Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Lang] Support sparse matrix on GPU #5185

Merged
merged 34 commits into from
Aug 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
87ca8bc
cusparse loaded
Hanke98 Mar 31, 2022
e3b1c7c
load cusolver
Hanke98 May 4, 2022
1b8a8c7
add driver base class
Hanke98 May 4, 2022
e53279a
update comments
Hanke98 May 4, 2022
4082d57
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 4, 2022
39e0563
make CUDADriver a derived class from CUDADriverBase
Hanke98 May 17, 2022
933589e
clean code
Hanke98 May 17, 2022
8d46826
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 17, 2022
222abef
clean code
Hanke98 May 17, 2022
f830583
Merge branch 'master' into gpu_sm
FantasyVR May 27, 2022
09c2ede
create sparse matrix using cusparse
FantasyVR May 27, 2022
2f4b0f1
spmv with bugs
FantasyVR May 27, 2022
64b19be
bug fix: int -> size_t
FantasyVR Jun 14, 2022
8952ef1
clean
FantasyVR Jun 14, 2022
d7035f1
add safe loader
FantasyVR Jun 16, 2022
214d56a
seperate sparse matrix maker and spmv func
FantasyVR Jun 16, 2022
457a9f1
refactor
FantasyVR Jun 16, 2022
9ae6722
fix parameter bug
FantasyVR Jun 16, 2022
73ebb92
fix test bug
FantasyVR Jun 16, 2022
bc857d9
fix
FantasyVR Jun 16, 2022
f0333f7
Merge remote-tracking branch 'origin/master' into gpu_sm
FantasyVR Jun 16, 2022
ae7581c
fix merge conflicts
FantasyVR Jul 28, 2022
c0b395b
fix mac/windows failed tests
FantasyVR Jul 28, 2022
f60e675
fix
FantasyVR Jul 28, 2022
2e1edda
add tests for gpu sparse matrix
FantasyVR Jul 29, 2022
e060c3b
fix test
FantasyVR Jul 29, 2022
ea2dbf4
Merge branch 'master' into gpu_sm
FantasyVR Jul 29, 2022
8e9021c
fix
FantasyVR Aug 4, 2022
b7d1a70
fix cuMemAlloc_v2 bug and windows dll name bug
FantasyVR Aug 15, 2022
6fc1e75
fix ci
FantasyVR Aug 15, 2022
eada0f5
csr datatype checking
FantasyVR Aug 16, 2022
8f5ef40
Apply suggestions from code review
FantasyVR Aug 16, 2022
4c39e80
arch fix
FantasyVR Aug 16, 2022
2dc766a
format
FantasyVR Aug 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 49 additions & 1 deletion python/taichi/linalg/sparse_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from taichi.lang.impl import get_runtime
from taichi.lang.matrix import Ndarray
from taichi.lang.util import warning
from taichi.types import annotations, f32
from taichi.types import annotations, f32, i32


class SparseMatrix:
Expand Down Expand Up @@ -198,6 +198,54 @@ def build_from_ndarray(self, ndarray):
'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray]'
)

def build_csr_cusparse(self, data, indices, indptr):
"""Build a csr format sparse matrix using cuSparse where the column indices
for row i are stored in ``indices[indptr[i]:indptr[i+1]]``
and their corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
Args:
data (ti.ndarray): CSR format data array of the matrix.
indices (ti.ndarray): CSR format index array of the matrix.
indptr (ti.ndarray): CSR format index pointer array of the matrix.
"""
if not isinstance(data, Ndarray) or not isinstance(
indices, Ndarray) or not isinstance(indptr, Ndarray):
raise TaichiRuntimeError(
'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray].'
)
elif data.dtype != f32 or indices.dtype != i32 or indptr.dtype != i32:
raise TaichiRuntimeError(
'Sparse matrix only supports building from float32 data and int32 indices/indptr.'
)
else:
get_runtime().prog.make_sparse_matrix_from_ndarray_cusparse(
self.matrix, indptr.arr, indices.arr, data.arr)

def spmv(self, x, y):
"""Sparse matrix-vector multiplication using cuSparse.
Args:
x (ti.ndarray): the vector to be multiplied.
y (ti.ndarray): the result of matrix-vector multiplication.
Example::
>>> x = ti.ndarray(shape=4, dtype=val_dt)
>>> y = ti.ndarray(shape=4, dtype=val_dt)
>>> A = ti.linalg.SparseMatrix(n=4, m=4, dtype=ti.f32)
>>> A.build_from_ndarray_cusparse(row_csr, col_csr, value_csr)
>>> A.spmv(x, y)
"""
if not isinstance(x, Ndarray) or not isinstance(y, Ndarray):
raise TaichiRuntimeError(
'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray]'
)
if self.m != x.shape[0]:
raise TaichiRuntimeError(
f"Dimension mismatch between sparse matrix ({self.n}, {self.m}) and vector ({x.shape})"
)

self.matrix.spmv(get_runtime().prog, x.arr, y.arr)


class SparseMatrixBuilder:
"""A python wrap around sparse matrix builder.
Expand Down
77 changes: 77 additions & 0 deletions taichi/program/sparse_matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,13 @@ std::unique_ptr<SparseMatrix> make_sparse_matrix(
storage_format);
}

std::unique_ptr<SparseMatrix> make_cu_sparse_matrix(int rows,
int cols,
DataType dt) {
return std::unique_ptr<SparseMatrix>(
std::make_unique<CuSparseMatrix>(rows, cols, dt));
}

template <typename T>
void build_ndarray_template(SparseMatrix &sm,
intptr_t data_ptr,
Expand Down Expand Up @@ -191,5 +198,75 @@ void make_sparse_matrix_from_ndarray(Program *prog,
}
}

void CuSparseMatrix::build_csr(void *csr_ptr,
void *csr_indices_ptr,
void *csr_values_ptr,
int nnz) {
#if defined(TI_WITH_CUDA)
CUSPARSEDriver::get_instance().cpCreateCsr(
&matrix_, rows_, cols_, nnz, csr_ptr, csr_indices_ptr, csr_values_ptr,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
CUDA_R_32F);
#endif
FantasyVR marked this conversation as resolved.
Show resolved Hide resolved
}
CuSparseMatrix::~CuSparseMatrix() {
#if defined(TI_WITH_CUDA)
CUSPARSEDriver::get_instance().cpDestroySpMat(matrix_);
#endif
}
void make_sparse_matrix_from_ndarray_cusparse(Program *prog,
SparseMatrix &sm,
const Ndarray &row_offsets,
const Ndarray &col_indices,
const Ndarray &values) {
#if defined(TI_WITH_CUDA)
std::string sdtype = taichi::lang::data_type_name(sm.get_data_type());
if (!CUSPARSEDriver::get_instance().is_loaded()) {
bool load_success = CUSPARSEDriver::get_instance().load_cusparse();
if (!load_success) {
TI_ERROR("Failed to load cusparse library!");
}
}
size_t row_csr = prog->get_ndarray_data_ptr_as_int(&row_offsets);
size_t col_csr = prog->get_ndarray_data_ptr_as_int(&col_indices);
size_t values_csr = prog->get_ndarray_data_ptr_as_int(&values);
int nnz = values.get_nelement();
sm.build_csr((void *)row_csr, (void *)col_csr, (void *)values_csr, nnz);
#endif
}

void CuSparseMatrix::spmv(Program *prog, const Ndarray &x, Ndarray &y) {
#if defined(TI_WITH_CUDA)
size_t dX = prog->get_ndarray_data_ptr_as_int(&x);
size_t dY = prog->get_ndarray_data_ptr_as_int(&y);

cusparseDnVecDescr_t vecX, vecY;
CUSPARSEDriver::get_instance().cpCreateDnVec(&vecX, cols_, (void *)dX,
CUDA_R_32F);
CUSPARSEDriver::get_instance().cpCreateDnVec(&vecY, rows_, (void *)dY,
CUDA_R_32F);

cusparseHandle_t cusparse_handle;
CUSPARSEDriver::get_instance().cpCreate(&cusparse_handle);
float alpha = 1.0f, beta = 0.0f;
size_t bufferSize = 0;
CUSPARSEDriver::get_instance().cpSpMV_bufferSize(
cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matrix_, vecX,
&beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_CSR_ALG1, &bufferSize);

void *dBuffer = NULL;
if (bufferSize > 0)
CUDADriver::get_instance().malloc(&dBuffer, bufferSize);
CUSPARSEDriver::get_instance().cpSpMV(
cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matrix_, vecX,
&beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_CSR_ALG1, dBuffer);

CUSPARSEDriver::get_instance().cpDestroyDnVec(vecX);
CUSPARSEDriver::get_instance().cpDestroyDnVec(vecY);
CUSPARSEDriver::get_instance().cpDestroy(cusparse_handle);
CUDADriver::get_instance().mem_free(dBuffer);
#endif
}

} // namespace lang
} // namespace taichi
38 changes: 37 additions & 1 deletion taichi/program/sparse_matrix.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include "taichi/rhi/cuda/cuda_driver.h"
#include "taichi/common/core.h"
#include "taichi/inc/constants.h"
#include "taichi/ir/type_utils.h"
Expand Down Expand Up @@ -58,7 +59,16 @@ class SparseMatrix {
}
virtual ~SparseMatrix() = default;

virtual void build_triplets(void *triplets_adr){};
virtual void build_triplets(void *triplets_adr) {
TI_NOT_IMPLEMENTED;
};

virtual void build_csr(void *csr_ptr,
void *csr_indices_ptr,
void *csr_values_ptr,
int nnz) {
TI_NOT_IMPLEMENTED;
};

inline const int num_rows() const {
return rows_;
Expand Down Expand Up @@ -189,14 +199,40 @@ class EigenSparseMatrix : public SparseMatrix {
EigenMatrix matrix_;
};

class CuSparseMatrix : public SparseMatrix {
public:
explicit CuSparseMatrix(int rows, int cols, DataType dt)
: SparseMatrix(rows, cols, dt) {
}

virtual ~CuSparseMatrix();
void build_csr(void *csr_ptr,
void *csr_indices_ptr,
void *csr_values_ptr,
int nnz) override;

void spmv(Program *prog, const Ndarray &x, Ndarray &y);

private:
cusparseSpMatDescr_t matrix_;
};

std::unique_ptr<SparseMatrix> make_sparse_matrix(
int rows,
int cols,
DataType dt,
const std::string &storage_format);
std::unique_ptr<SparseMatrix> make_cu_sparse_matrix(int rows,
int cols,
DataType dt);

void make_sparse_matrix_from_ndarray(Program *prog,
SparseMatrix &sm,
const Ndarray &ndarray);
void make_sparse_matrix_from_ndarray_cusparse(Program *prog,
SparseMatrix &sm,
const Ndarray &row_offsets,
const Ndarray &col_indices,
const Ndarray &values);
} // namespace lang
} // namespace taichi
29 changes: 23 additions & 6 deletions taichi/python/export_lang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,23 +374,37 @@ void export_lang(py::module &m) {
[](Program *program, int n, int m, uint64 max_num_entries,
DataType dtype, const std::string &storage_format) {
TI_ERROR_IF(!arch_is_cpu(program->config.arch),
"SparseMatrix only supports CPU for now.");
"SparseMatrix Builder only supports CPU for now.");
return SparseMatrixBuilder(n, m, max_num_entries, dtype,
storage_format);
})
.def("create_sparse_matrix",
[](Program *program, int n, int m, DataType dtype,
std::string storage_format) {
TI_ERROR_IF(!arch_is_cpu(program->config.arch),
"SparseMatrix only supports CPU for now.");
return make_sparse_matrix(n, m, dtype, storage_format);
TI_ERROR_IF(!arch_is_cpu(program->config.arch) &&
!arch_is_cuda(program->config.arch),
"SparseMatrix only supports CPU and CUDA for now.");
if (arch_is_cpu(program->config.arch))
return make_sparse_matrix(n, m, dtype, storage_format);
else
return make_cu_sparse_matrix(n, m, dtype);
})
.def("make_sparse_matrix_from_ndarray",
[](Program *program, SparseMatrix &sm, const Ndarray &ndarray) {
TI_ERROR_IF(!arch_is_cpu(program->config.arch),
"SparseMatrix only supports CPU for now.");
TI_ERROR_IF(!arch_is_cpu(program->config.arch) &&
!arch_is_cuda(program->config.arch),
"SparseMatrix only supports CPU and CUDA for now.");
return make_sparse_matrix_from_ndarray(program, sm, ndarray);
})
.def("make_sparse_matrix_from_ndarray_cusparse",
[](Program *program, CuSparseMatrix &sm, const Ndarray &row_csr,
const Ndarray &col_csr, const Ndarray &val_csr) {
TI_ERROR_IF(
!arch_is_cuda(program->config.arch),
"SparseMatrix based on GPU only supports CUDA for now.");
return make_sparse_matrix_from_ndarray_cusparse(
program, sm, row_csr, col_csr, val_csr);
})
.def("no_activate",
[](Program *program, SNode *snode) {
// TODO(#2193): Also apply to @ti.func?
Expand Down Expand Up @@ -1163,6 +1177,9 @@ void export_lang(py::module &m) {
MAKE_SPARSE_MATRIX(64, ColMajor, d);
MAKE_SPARSE_MATRIX(64, RowMajor, d);

py::class_<CuSparseMatrix>(m, "CuSparseMatrix")
.def("spmv", &CuSparseMatrix::spmv);

py::class_<SparseSolver>(m, "SparseSolver")
.def("compute", &SparseSolver::compute)
.def("analyze_pattern", &SparseSolver::analyze_pattern)
Expand Down
4 changes: 4 additions & 0 deletions taichi/rhi/arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ bool arch_is_cpu(Arch arch) {
}
}

bool arch_is_cuda(Arch arch) {
return arch == Arch::cuda;
}

bool arch_uses_llvm(Arch arch) {
return (arch == Arch::x64 || arch == Arch::arm64 || arch == Arch::cuda ||
arch == Arch::wasm);
Expand Down
2 changes: 2 additions & 0 deletions taichi/rhi/arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ Arch arch_from_name(const std::string &arch);

bool arch_is_cpu(Arch arch);

bool arch_is_cuda(Arch arch);

bool arch_uses_llvm(Arch arch);

bool arch_is_gpu(Arch arch);
Expand Down
65 changes: 53 additions & 12 deletions taichi/rhi/cuda/cuda_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,39 @@ std::string get_cuda_error_message(uint32 err) {
return fmt::format("CUDA Error {}: {}", err_name_ptr, err_string_ptr);
}

bool CUDADriver::detected() {
return !disabled_by_env_ && cuda_version_valid_ && loader_->loaded();
}

CUDADriver::CUDADriver() {
CUDADriverBase::CUDADriverBase() {
disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0);
if (disabled_by_env_) {
TI_TRACE(
"CUDA driver disabled by environment variable \"TI_ENABLE_CUDA\".");
return;
TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\".");
}
}

bool CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) {
#if defined(TI_PLATFORM_LINUX)
loader_ = std::make_unique<DynamicLoader>("libcuda.so");
auto lib_name = lib_linux;
#elif defined(TI_PLATFORM_WINDOWS)
loader_ = std::make_unique<DynamicLoader>("nvcuda.dll");
auto lib_name = lib_windows;
#else
static_assert(false, "Taichi CUDA driver supports only Windows and Linux.");
#endif

loader_ = std::make_unique<DynamicLoader>(lib_name);
if (!loader_->loaded()) {
TI_WARN("CUDA driver not found.");
return;
TI_WARN("{} lib not found.", lib_name);
return false;
} else {
TI_TRACE("{} loaded!", lib_name);
return true;
}
}

bool CUDADriver::detected() {
return !disabled_by_env_ && cuda_version_valid_ && loader_->loaded();
}

CUDADriver::CUDADriver() {
if (!load_lib("libcuda.so", "nvcuda.dll"))
return;

loader_->load_function("cuGetErrorName", get_error_name);
loader_->load_function("cuGetErrorString", get_error_string);
Expand Down Expand Up @@ -79,4 +88,36 @@ CUDADriver &CUDADriver::get_instance() {
return get_instance_without_context();
}

CUSPARSEDriver::CUSPARSEDriver() {
}

CUSPARSEDriver &CUSPARSEDriver::get_instance() {
static CUSPARSEDriver *instance = new CUSPARSEDriver();
return *instance;
}

bool CUSPARSEDriver::load_cusparse() {
cusparse_loaded_ = load_lib("libcusparse.so", "cusparse64_11.dll");

if (!cusparse_loaded_) {
return false;
}
#define PER_CUSPARSE_FUNCTION(name, symbol_name, ...) \
name.set(loader_->load_function(#symbol_name)); \
name.set_lock(&lock_); \
name.set_names(#name, #symbol_name);
#include "taichi/rhi/cuda/cusparse_functions.inc.h"
#undef PER_CUSPARSE_FUNCTION
return cusparse_loaded_;
}

CUSOLVERDriver::CUSOLVERDriver() {
load_lib("libcusolver.so", "cusolver.dll");
}

CUSOLVERDriver &CUSOLVERDriver::get_instance() {
static CUSOLVERDriver *instance = new CUSOLVERDriver();
return *instance;
}

TLANG_NAMESPACE_END
Loading