Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Lang] GPU Sparse Matrix #5612

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions python/taichi/linalg/sparse_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,47 @@ def build_from_ndarray(self, ndarray):
'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray]'
)

def build_csr_cusparse(self, data, indices, indptr):
"""Build a csr format sparse matrix using cuSparse where the column indices
for row i are stored in ``indices[indptr[i]:indptr[i+1]]``
and their corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
Args:
data (ti.ndarray): CSR format data array of the matrix.
indices (ti.ndarray): CSR format index array of the matrix.
indptr (ti.ndarray): CSR format index pointer array of the matrix.
"""
if isinstance(data, Ndarray) and isinstance(
indices, Ndarray) and isinstance(indptr, Ndarray):
get_runtime().prog.make_sparse_matrix_from_ndarray_cusparse(
self.matrix, indptr.arr, indices.arr, data.arr)
else:
raise TaichiRuntimeError(
'Sparse matrix only supports building from [ti.ndarray, ti.Vectorndarray, ti.Matrix.ndarray]'
)

def spmv(self, x, y):
"""Sparse matrix-vector multiplication using cuSparse.
Args:
x (ti.ndarray): the vector to be multiplied.
y (ti.ndarray): the result of matrix-vector multiplication.
Example::
>>> x = ti.ndarray(shape=4, dtype=val_dt)
>>> y = ti.ndarray(shape=4, dtype=val_dt)
>>> A = ti.linalg.SparseMatrix(n=4, m=4, dtype=ti.f32)
>>> A.build_from_ndarray_cusparse(row_csr, col_csr, value_csr)
>>> A.spmv(x, y)
"""
if not isinstance(x, Ndarray) or not isinstance(y, Ndarray):
raise TaichiRuntimeError(
'Sparse matrix only supports building from [ti.ndarray, ti.Vectorndarray, ti.Matrix.ndarray]'
)
if self.m != x.shape[0]:
raise TaichiRuntimeError(
f"Dimension mismatch between sparse matrix ({self.n}, {self.m}) and vector ({x.shape})"
)

self.matrix.spmv(get_runtime().prog, x.arr, y.arr)


class SparseMatrixBuilder:
"""A python wrap around sparse matrix builder.
Expand Down
77 changes: 77 additions & 0 deletions taichi/program/sparse_matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,13 @@ std::unique_ptr<SparseMatrix> make_sparse_matrix(
storage_format);
}

std::unique_ptr<SparseMatrix> make_cu_sparse_matrix(int rows,
int cols,
DataType dt) {
return std::unique_ptr<SparseMatrix>(
std::make_unique<CuSparseMatrix>(rows, cols, dt));
}

template <typename T>
void build_ndarray_template(SparseMatrix &sm,
intptr_t data_ptr,
Expand Down Expand Up @@ -191,5 +198,75 @@ void make_sparse_matrix_from_ndarray(Program *prog,
}
}

void CuSparseMatrix::build_csr(void *csr_ptr,
void *csr_indices_ptr,
void *csr_values_ptr,
int nnz) {
#if defined(TI_WITH_CUDA)
CUSPARSEDriver::get_instance().cpCreateCsr(
&matrix_, rows_, cols_, nnz, csr_ptr, csr_indices_ptr, csr_values_ptr,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
CUDA_R_32F);
#endif
}
CuSparseMatrix::~CuSparseMatrix() {
#if defined(TI_WITH_CUDA)
CUSPARSEDriver::get_instance().cpDestroySpMat(matrix_);
#endif
}
void make_sparse_matrix_from_ndarray_cusparse(Program *prog,
SparseMatrix &sm,
const Ndarray &row_offsets,
const Ndarray &col_indices,
const Ndarray &values) {
#if defined(TI_WITH_CUDA)
std::string sdtype = taichi::lang::data_type_name(sm.get_data_type());
if (!CUSPARSEDriver::get_instance().is_loaded()) {
bool load_success = CUSPARSEDriver::get_instance().load_cusparse();
if (!load_success) {
TI_ERROR("Failed to load cusparse library!");
}
}
size_t row_csr = prog->get_ndarray_data_ptr_as_int(&row_offsets);
size_t col_csr = prog->get_ndarray_data_ptr_as_int(&col_indices);
size_t values_csr = prog->get_ndarray_data_ptr_as_int(&values);
int nnz = values.get_nelement();
sm.build_csr((void *)row_csr, (void *)col_csr, (void *)values_csr, nnz);
#endif
}

void CuSparseMatrix::spmv(Program *prog, const Ndarray &x, Ndarray &y) {
#if defined(TI_WITH_CUDA)
size_t dX = prog->get_ndarray_data_ptr_as_int(&x);
size_t dY = prog->get_ndarray_data_ptr_as_int(&y);

cusparseDnVecDescr_t vecX, vecY;
CUSPARSEDriver::get_instance().cpCreateDnVec(&vecX, cols_, (void *)dX,
CUDA_R_32F);
CUSPARSEDriver::get_instance().cpCreateDnVec(&vecY, rows_, (void *)dY,
CUDA_R_32F);

cusparseHandle_t cusparse_handle;
CUSPARSEDriver::get_instance().cpCreate(&cusparse_handle);
float alpha = 1.0f, beta = 0.0f;
size_t bufferSize = 0;
CUSPARSEDriver::get_instance().cpSpMV_bufferSize(
cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matrix_, vecX,
&beta, vecY, CUDA_R_32F, CUSPARSE_CSRMV_ALG1, &bufferSize);

void *dBuffer = NULL;
if (bufferSize > 0)
CUDADriver::get_instance().malloc(&dBuffer, bufferSize);
CUSPARSEDriver::get_instance().cpSpMV(
cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matrix_, vecX,
&beta, vecY, CUDA_R_32F, CUSPARSE_CSRMV_ALG1, dBuffer);

CUSPARSEDriver::get_instance().cpDestroyDnVec(vecX);
CUSPARSEDriver::get_instance().cpDestroyDnVec(vecY);
CUSPARSEDriver::get_instance().cpDestroy(cusparse_handle);
CUDADriver::get_instance().mem_free(dBuffer);
#endif
}

} // namespace lang
} // namespace taichi
46 changes: 41 additions & 5 deletions taichi/program/sparse_matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
#include "taichi/ir/type_utils.h"
#include "taichi/program/ndarray.h"
#include "taichi/program/program.h"
#include "taichi/rhi/cuda/cuda_driver.h"

#include "Eigen/Sparse"

namespace taichi {
namespace lang {
TLANG_NAMESPACE_BEGIN

class SparseMatrix;

Expand Down Expand Up @@ -58,7 +58,16 @@ class SparseMatrix {
}
virtual ~SparseMatrix() = default;

virtual void build_triplets(void *triplets_adr){};
virtual void build_triplets(void *triplets_adr) {
TI_WARN("SparseMatrix::build_triplets is not implemented!");
};

virtual void build_csr(void *csr_ptr,
void *csr_indices_ptr,
void *csr_values_ptr,
int nnz) {
TI_WARN("SparseMatrix::build_csr is not implemented yet");
};

inline const int num_rows() const {
return rows_;
Expand Down Expand Up @@ -189,14 +198,41 @@ class EigenSparseMatrix : public SparseMatrix {
EigenMatrix matrix_;
};

class CuSparseMatrix : public SparseMatrix {
public:
explicit CuSparseMatrix(int rows, int cols, DataType dt)
: SparseMatrix(rows, cols, dt) {
}

virtual ~CuSparseMatrix();
void build_csr(void *csr_ptr,
void *csr_indices_ptr,
void *csr_values_ptr,
int nnz) override;

void spmv(Program *prog, const Ndarray &x, Ndarray &y);

private:
cusparseSpMatDescr_t matrix_;
};

std::unique_ptr<SparseMatrix> make_sparse_matrix(
int rows,
int cols,
DataType dt,
const std::string &storage_format);

std::unique_ptr<SparseMatrix> make_cu_sparse_matrix(int rows,
int cols,
DataType dt);

void make_sparse_matrix_from_ndarray(Program *prog,
SparseMatrix &sm,
const Ndarray &ndarray);
} // namespace lang
} // namespace taichi

void make_sparse_matrix_from_ndarray_cusparse(Program *prog,
SparseMatrix &sm,
const Ndarray &row_offsets,
const Ndarray &col_indices,
const Ndarray &values);
TLANG_NAMESPACE_END
29 changes: 23 additions & 6 deletions taichi/python/export_lang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,23 +380,37 @@ void export_lang(py::module &m) {
[](Program *program, int n, int m, uint64 max_num_entries,
DataType dtype, const std::string &storage_format) {
TI_ERROR_IF(!arch_is_cpu(program->config.arch),
"SparseMatrix only supports CPU for now.");
"SparseMatrix Builder only supports CPU for now.");
return SparseMatrixBuilder(n, m, max_num_entries, dtype,
storage_format);
})
.def("create_sparse_matrix",
[](Program *program, int n, int m, DataType dtype,
std::string storage_format) {
TI_ERROR_IF(!arch_is_cpu(program->config.arch),
"SparseMatrix only supports CPU for now.");
return make_sparse_matrix(n, m, dtype, storage_format);
TI_ERROR_IF(!arch_is_cpu(program->config.arch) &&
!arch_uses_cuda(program->config.arch),
"SparseMatrix only supports CPU and CUDA for now.");
if (arch_is_cpu(program->config.arch))
return make_sparse_matrix(n, m, dtype, storage_format);
else
return make_cu_sparse_matrix(n, m, dtype);
})
.def("make_sparse_matrix_from_ndarray",
[](Program *program, SparseMatrix &sm, const Ndarray &ndarray) {
TI_ERROR_IF(!arch_is_cpu(program->config.arch),
"SparseMatrix only supports CPU for now.");
TI_ERROR_IF(!arch_is_cpu(program->config.arch) &&
!arch_uses_cuda(program->config.arch),
"SparseMatrix only supports CPU and CUDA for now.");
return make_sparse_matrix_from_ndarray(program, sm, ndarray);
})
.def("make_sparse_matrix_from_ndarray_cusparse",
[](Program *program, CuSparseMatrix &sm, const Ndarray &row_csr,
const Ndarray &col_csr, const Ndarray &val_csr) {
TI_ERROR_IF(
!arch_uses_cuda(program->config.arch),
"SparseMatrix based on GPU only supports CUDA for now.");
return make_sparse_matrix_from_ndarray_cusparse(
program, sm, row_csr, col_csr, val_csr);
})
.def("no_activate",
[](Program *program, SNode *snode) {
// TODO(#2193): Also apply to @ti.func?
Expand Down Expand Up @@ -1171,6 +1185,9 @@ void export_lang(py::module &m) {
MAKE_SPARSE_MATRIX(64, ColMajor, d);
MAKE_SPARSE_MATRIX(64, RowMajor, d);

py::class_<CuSparseMatrix, SparseMatrix>(m, "CuSparseMatrix")
.def("spmv", &CuSparseMatrix::spmv);

py::class_<SparseSolver>(m, "SparseSolver")
.def("compute", &SparseSolver::compute)
.def("analyze_pattern", &SparseSolver::analyze_pattern)
Expand Down
4 changes: 4 additions & 0 deletions taichi/rhi/arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ bool arch_uses_llvm(Arch arch) {
arch == Arch::wasm);
}

bool arch_uses_cuda(Arch arch) {
return arch == Arch::cuda;
}

bool arch_is_gpu(Arch arch) {
return !arch_is_cpu(arch);
}
Expand Down
2 changes: 2 additions & 0 deletions taichi/rhi/arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ bool arch_is_cpu(Arch arch);

bool arch_uses_llvm(Arch arch);

bool arch_uses_cuda(Arch arch);

bool arch_is_gpu(Arch arch);

Arch host_arch();
Expand Down
78 changes: 60 additions & 18 deletions taichi/rhi/cuda/cuda_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,8 @@ bool CUDADriver::detected() {
}

CUDADriver::CUDADriver() {
disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0);
if (disabled_by_env_) {
TI_TRACE(
"CUDA driver disabled by environment variable \"TI_ENABLE_CUDA\".");
return;
}

#if defined(TI_PLATFORM_LINUX)
loader_ = std::make_unique<DynamicLoader>("libcuda.so");
#elif defined(TI_PLATFORM_WINDOWS)
loader_ = std::make_unique<DynamicLoader>("nvcuda.dll");
#else
static_assert(false, "Taichi CUDA driver supports only Windows and Linux.");
#endif

if (!loader_->loaded()) {
TI_WARN("CUDA driver not found.");
if (!load_lib("libcuda.so", "nvcuda.dll"))
return;
}

loader_->load_function("cuGetErrorName", get_error_name);
loader_->load_function("cuGetErrorString", get_error_string);
Expand Down Expand Up @@ -79,4 +62,63 @@ CUDADriver &CUDADriver::get_instance() {
return get_instance_without_context();
}

CUDADriverBase::CUDADriverBase() {
disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0);
if (disabled_by_env_) {
TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\".");
return;
}
}

bool CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) {
#if defined(TI_PLATFORM_LINUX)
auto lib_name = lib_linux;
#elif defined(TI_PLATFORM_WINDOWS)
auto lib_name = lib_windows;
#else
static_assert(false, "Taichi CUDA driver supports only Windows and Linux.");
#endif

loader_ = std::make_unique<DynamicLoader>(lib_name);
if (!loader_->loaded()) {
TI_WARN("{} lib not found.", lib_name);
return false;
} else {
TI_TRACE("{} loaded!", lib_name);
return true;
}
}

CUSPARSEDriver::CUSPARSEDriver() {
}

CUSPARSEDriver &CUSPARSEDriver::get_instance() {
static CUSPARSEDriver *instance = new CUSPARSEDriver();
return *instance;
}

bool CUSPARSEDriver::load_cusparse() {
cusparse_loaded_ = load_lib("libcusparse.so", "cusparse64_11.dll");

if (!cusparse_loaded_) {
return false;
}
#define PER_CUSPARSE_FUNCTION(name, symbol_name, ...) \
name.set(loader_->load_function(#symbol_name)); \
name.set_lock(&lock_); \
name.set_names(#name, #symbol_name);
#include "taichi/rhi/cuda/cusparse_functions.inc.h"
#undef PER_CUSPARSE_FUNCTION
return cusparse_loaded_;
}

CUSOLVERDriver::CUSOLVERDriver() {
load_lib("libcusolver.so", "cusolver.dll");
}

CUSOLVERDriver &CUSOLVERDriver::get_instance() {
static CUSOLVERDriver *instance = new CUSOLVERDriver();
return *instance;
}

TLANG_NAMESPACE_END
Loading