Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] added out-of-place functions #3185

Merged
merged 3 commits into from
Jun 20, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 31 additions & 28 deletions src/cudamatrix/cu-kernels-ansi.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// 2013 Xiaohui Zhang
// 2013-2015 Guoguo Chen
// 2016-2018 Shiyin Kang
// 2019 Yiwen Shao

// See ../../COPYING for clarification regarding multiple authors
//
Expand Down Expand Up @@ -201,34 +202,6 @@ void cudaD_add_vec_vec(int Gr, int Bl, double alpha, double* v, const double* x,
const double* y, double beta, int dim);
void cudaF_add_vec_vec(int Gr, int Bl, float alpha, float* v, const float* x,
const float* y, float beta, int dim);
void cudaD_apply_ceiling(dim3 Gr, dim3 Bl, double* mat, double ceiling_val,
MatrixDim d);
void cudaF_apply_ceiling(dim3 Gr, dim3 Bl, float* mat, float ceiling_val,
MatrixDim d);
void cudaD_apply_exp(dim3 Gr, dim3 Bl, double* mat, MatrixDim d);
void cudaF_apply_exp(dim3 Gr, dim3 Bl, float* mat, MatrixDim d);
void cudaD_apply_exp_limited(dim3 Gr, dim3 Bl, double* mat, MatrixDim d,
double lower_limit, double upper_limit);
void cudaF_apply_exp_limited(dim3 Gr, dim3 Bl, float* mat, MatrixDim d,
float lower_limit, float upper_limit);
void cudaD_apply_exp_special(dim3 Gr, dim3 Bl, double* out, MatrixDim out_dim,
const double* in, int in_stride);
void cudaF_apply_exp_special(dim3 Gr, dim3 Bl, float* out, MatrixDim out_dim,
const float* in, int in_stride);
void cudaD_apply_floor(dim3 Gr, dim3 Bl, double* mat, double floor_val,
MatrixDim d);
void cudaF_apply_floor(dim3 Gr, dim3 Bl, float* mat, float floor_val,
MatrixDim d);
void cudaD_apply_heaviside(dim3 Gr, dim3 Bl, double* mat, MatrixDim d);
void cudaF_apply_heaviside(dim3 Gr, dim3 Bl, float* mat, MatrixDim d);
void cudaD_apply_log(dim3 Gr, dim3 Bl, double *mat, MatrixDim d);
void cudaF_apply_log(dim3 Gr, dim3 Bl, float *mat, MatrixDim d);
void cudaD_apply_pow_abs(dim3 Gr, dim3 Bl, double* mat, double power,
bool include_sign, MatrixDim d);
void cudaF_apply_pow_abs(dim3 Gr, dim3 Bl, float* mat, float power,
bool include_sign, MatrixDim d);
void cudaD_apply_pow(dim3 Gr, dim3 Bl, double* mat, double power, MatrixDim d);
void cudaF_apply_pow(dim3 Gr, dim3 Bl, float* mat, float power, MatrixDim d);
void cudaD_block_add_mat_mat(dim3 Gr, dim3 Bl, CuBlockMatrixData *B_cu_data,
int num_blocks, const double *C_data,
int C_num_cols, int C_row_stride, int C_col_stride,
Expand Down Expand Up @@ -506,6 +479,36 @@ void cudaD_heaviside(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d,
int src_stride);
void cudaF_heaviside(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d,
int src_stride);
void cudaD_exp(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d,
int src_stride);
void cudaF_exp(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d,
int src_stride);
void cudaD_pow(dim3 Gr, dim3 Bl, double *y, const double *x, double power, MatrixDim d,
int src_stride);
void cudaF_pow(dim3 Gr, dim3 Bl, float *y, const float *x, float power, MatrixDim d,
int src_stride);
void cudaD_ceiling(dim3 Gr, dim3 Bl, double* y, const double* x, double ceiling_val,
MatrixDim dim, int src_stride);
void cudaF_ceiling(dim3 Gr, dim3 Bl, float* y, const float* x, float ceiling_val,
MatrixDim dim, int src_stride);
void cudaD_floor(dim3 Gr, dim3 Bl, double* y, const double* x, double floor_val,
MatrixDim dim, int src_stride);
void cudaF_floor(dim3 Gr, dim3 Bl, float* y, const float* x, float floor_val,
MatrixDim dim, int src_stride);
void cudaD_exp_limited(dim3 Gr, dim3 Bl, double* y, const double* x,
double lower_limit, double upper_limit, MatrixDim d, int src_stride);
void cudaF_exp_limited(dim3 Gr, dim3 Bl, float* y, const float* x,
float lower_limit, float upper_limit, MatrixDim d, int src_stride);
void cudaD_exp_special(dim3 Gr, dim3 Bl, double* y, const double* x,
MatrixDim d, int src_stride);
void cudaF_exp_special(dim3 Gr, dim3 Bl, float* y, const float* x,
MatrixDim d, int src_stride);
void cudaD_log(dim3 Gr, dim3 Bl, double* y, const double* x, MatrixDim d, int src_stride);
void cudaF_log(dim3 Gr, dim3 Bl, float* y, const float* x, MatrixDim d, int src_stride);
void cudaD_pow_abs(dim3 Gr, dim3 Bl, double* y, const double* x, double power,
bool include_sign, MatrixDim dim, int src_stride);
void cudaF_pow_abs(dim3 Gr, dim3 Bl, float* y, const float* x, float power,
bool include_sign, MatrixDim dim, int src_stride);
void cuda_int32_add(dim3 Gr, dim3 Bl, int32_cuda *mat, int32_cuda value,
MatrixDim d);
void cuda_int32_set_const(dim3 Gr, dim3 Bl, int32_cuda *mat, int32_cuda value,
Expand Down
Loading