diff --git a/python/taichi/linalg/sparse_matrix.py b/python/taichi/linalg/sparse_matrix.py index f04ddeb786340..e729052b05f05 100644 --- a/python/taichi/linalg/sparse_matrix.py +++ b/python/taichi/linalg/sparse_matrix.py @@ -6,7 +6,7 @@ from taichi.lang.impl import get_runtime from taichi.lang.matrix import Ndarray from taichi.lang.util import warning -from taichi.types import annotations, f32 +from taichi.types import annotations, f32, i32 class SparseMatrix: @@ -198,6 +198,54 @@ def build_from_ndarray(self, ndarray): 'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray]' ) + def build_csr_cusparse(self, data, indices, indptr): + """Build a csr format sparse matrix using cuSparse where the column indices + for row i are stored in ``indices[indptr[i]:indptr[i+1]]`` + and their corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``. + + Args: + data (ti.ndarray): CSR format data array of the matrix. + indices (ti.ndarray): CSR format index array of the matrix. + indptr (ti.ndarray): CSR format index pointer array of the matrix. + """ + if not isinstance(data, Ndarray) or not isinstance( + indices, Ndarray) or not isinstance(indptr, Ndarray): + raise TaichiRuntimeError( + 'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray].' + ) + elif data.dtype != f32 or indices.dtype != i32 or indptr.dtype != i32: + raise TaichiRuntimeError( + 'Sparse matrix only supports building from float32 data and int32 indices/indptr.' + ) + else: + get_runtime().prog.make_sparse_matrix_from_ndarray_cusparse( + self.matrix, indptr.arr, indices.arr, data.arr) + + def spmv(self, x, y): + """Sparse matrix-vector multiplication using cuSparse. + + Args: + x (ti.ndarray): the vector to be multiplied. + y (ti.ndarray): the result of matrix-vector multiplication. + + Example:: + >>> x = ti.ndarray(shape=4, dtype=val_dt) + >>> y = ti.ndarray(shape=4, dtype=val_dt) + >>> A = ti.linalg.SparseMatrix(n=4, m=4, dtype=ti.f32) + >>> A.build_from_ndarray_cusparse(row_csr, col_csr, value_csr) + >>> A.spmv(x, y) + """ + if not isinstance(x, Ndarray) or not isinstance(y, Ndarray): + raise TaichiRuntimeError( + 'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray]' + ) + if self.m != x.shape[0]: + raise TaichiRuntimeError( + f"Dimension mismatch between sparse matrix ({self.n}, {self.m}) and vector ({x.shape})" + ) + + self.matrix.spmv(get_runtime().prog, x.arr, y.arr) + class SparseMatrixBuilder: """A python wrap around sparse matrix builder. diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index 975d6b344f538..bdf646ee7594b 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -162,6 +162,13 @@ std::unique_ptr make_sparse_matrix( storage_format); } +std::unique_ptr make_cu_sparse_matrix(int rows, + int cols, + DataType dt) { + return std::unique_ptr( + std::make_unique(rows, cols, dt)); +} + template void build_ndarray_template(SparseMatrix &sm, intptr_t data_ptr, @@ -191,5 +198,75 @@ void make_sparse_matrix_from_ndarray(Program *prog, } } +void CuSparseMatrix::build_csr(void *csr_ptr, + void *csr_indices_ptr, + void *csr_values_ptr, + int nnz) { +#if defined(TI_WITH_CUDA) + CUSPARSEDriver::get_instance().cpCreateCsr( + &matrix_, rows_, cols_, nnz, csr_ptr, csr_indices_ptr, csr_values_ptr, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_32F); +#endif +} +CuSparseMatrix::~CuSparseMatrix() { +#if defined(TI_WITH_CUDA) + CUSPARSEDriver::get_instance().cpDestroySpMat(matrix_); +#endif +} +void make_sparse_matrix_from_ndarray_cusparse(Program *prog, + SparseMatrix &sm, + const Ndarray &row_offsets, + const Ndarray &col_indices, + const Ndarray &values) { +#if defined(TI_WITH_CUDA) + std::string sdtype = taichi::lang::data_type_name(sm.get_data_type()); + if (!CUSPARSEDriver::get_instance().is_loaded()) { + bool load_success = CUSPARSEDriver::get_instance().load_cusparse(); + if (!load_success) { + TI_ERROR("Failed to load cusparse library!"); + } + } + size_t row_csr = prog->get_ndarray_data_ptr_as_int(&row_offsets); + size_t col_csr = prog->get_ndarray_data_ptr_as_int(&col_indices); + size_t values_csr = prog->get_ndarray_data_ptr_as_int(&values); + int nnz = values.get_nelement(); + sm.build_csr((void *)row_csr, (void *)col_csr, (void *)values_csr, nnz); +#endif +} + +void CuSparseMatrix::spmv(Program *prog, const Ndarray &x, Ndarray &y) { +#if defined(TI_WITH_CUDA) + size_t dX = prog->get_ndarray_data_ptr_as_int(&x); + size_t dY = prog->get_ndarray_data_ptr_as_int(&y); + + cusparseDnVecDescr_t vecX, vecY; + CUSPARSEDriver::get_instance().cpCreateDnVec(&vecX, cols_, (void *)dX, + CUDA_R_32F); + CUSPARSEDriver::get_instance().cpCreateDnVec(&vecY, rows_, (void *)dY, + CUDA_R_32F); + + cusparseHandle_t cusparse_handle; + CUSPARSEDriver::get_instance().cpCreate(&cusparse_handle); + float alpha = 1.0f, beta = 0.0f; + size_t bufferSize = 0; + CUSPARSEDriver::get_instance().cpSpMV_bufferSize( + cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matrix_, vecX, + &beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_CSR_ALG1, &bufferSize); + + void *dBuffer = NULL; + if (bufferSize > 0) + CUDADriver::get_instance().malloc(&dBuffer, bufferSize); + CUSPARSEDriver::get_instance().cpSpMV( + cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matrix_, vecX, + &beta, vecY, CUDA_R_32F, CUSPARSE_SPMV_CSR_ALG1, dBuffer); + + CUSPARSEDriver::get_instance().cpDestroyDnVec(vecX); + CUSPARSEDriver::get_instance().cpDestroyDnVec(vecY); + CUSPARSEDriver::get_instance().cpDestroy(cusparse_handle); + CUDADriver::get_instance().mem_free(dBuffer); +#endif +} + } // namespace lang } // namespace taichi diff --git a/taichi/program/sparse_matrix.h b/taichi/program/sparse_matrix.h index 9501fc2781469..be776045bad7d 100644 --- a/taichi/program/sparse_matrix.h +++ b/taichi/program/sparse_matrix.h @@ -1,5 +1,6 @@ #pragma once +#include "taichi/rhi/cuda/cuda_driver.h" #include "taichi/common/core.h" #include "taichi/inc/constants.h" #include "taichi/ir/type_utils.h" @@ -58,7 +59,16 @@ class SparseMatrix { } virtual ~SparseMatrix() = default; - virtual void build_triplets(void *triplets_adr){}; + virtual void build_triplets(void *triplets_adr) { + TI_NOT_IMPLEMENTED; + }; + + virtual void build_csr(void *csr_ptr, + void *csr_indices_ptr, + void *csr_values_ptr, + int nnz) { + TI_NOT_IMPLEMENTED; + }; inline const int num_rows() const { return rows_; @@ -189,14 +199,40 @@ class EigenSparseMatrix : public SparseMatrix { EigenMatrix matrix_; }; +class CuSparseMatrix : public SparseMatrix { + public: + explicit CuSparseMatrix(int rows, int cols, DataType dt) + : SparseMatrix(rows, cols, dt) { + } + + virtual ~CuSparseMatrix(); + void build_csr(void *csr_ptr, + void *csr_indices_ptr, + void *csr_values_ptr, + int nnz) override; + + void spmv(Program *prog, const Ndarray &x, Ndarray &y); + + private: + cusparseSpMatDescr_t matrix_; +}; + std::unique_ptr make_sparse_matrix( int rows, int cols, DataType dt, const std::string &storage_format); +std::unique_ptr make_cu_sparse_matrix(int rows, + int cols, + DataType dt); void make_sparse_matrix_from_ndarray(Program *prog, SparseMatrix &sm, const Ndarray &ndarray); +void make_sparse_matrix_from_ndarray_cusparse(Program *prog, + SparseMatrix &sm, + const Ndarray &row_offsets, + const Ndarray &col_indices, + const Ndarray &values); } // namespace lang } // namespace taichi diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index 39a2149db656f..e2aaf88f8b4f4 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -380,23 +380,37 @@ void export_lang(py::module &m) { [](Program *program, int n, int m, uint64 max_num_entries, DataType dtype, const std::string &storage_format) { TI_ERROR_IF(!arch_is_cpu(program->config.arch), - "SparseMatrix only supports CPU for now."); + "SparseMatrix Builder only supports CPU for now."); return SparseMatrixBuilder(n, m, max_num_entries, dtype, storage_format); }) .def("create_sparse_matrix", [](Program *program, int n, int m, DataType dtype, std::string storage_format) { - TI_ERROR_IF(!arch_is_cpu(program->config.arch), - "SparseMatrix only supports CPU for now."); - return make_sparse_matrix(n, m, dtype, storage_format); + TI_ERROR_IF(!arch_is_cpu(program->config.arch) && + !arch_is_cuda(program->config.arch), + "SparseMatrix only supports CPU and CUDA for now."); + if (arch_is_cpu(program->config.arch)) + return make_sparse_matrix(n, m, dtype, storage_format); + else + return make_cu_sparse_matrix(n, m, dtype); }) .def("make_sparse_matrix_from_ndarray", [](Program *program, SparseMatrix &sm, const Ndarray &ndarray) { - TI_ERROR_IF(!arch_is_cpu(program->config.arch), - "SparseMatrix only supports CPU for now."); + TI_ERROR_IF(!arch_is_cpu(program->config.arch) && + !arch_is_cuda(program->config.arch), + "SparseMatrix only supports CPU and CUDA for now."); return make_sparse_matrix_from_ndarray(program, sm, ndarray); }) + .def("make_sparse_matrix_from_ndarray_cusparse", + [](Program *program, CuSparseMatrix &sm, const Ndarray &row_csr, + const Ndarray &col_csr, const Ndarray &val_csr) { + TI_ERROR_IF( + !arch_is_cuda(program->config.arch), + "SparseMatrix based on GPU only supports CUDA for now."); + return make_sparse_matrix_from_ndarray_cusparse( + program, sm, row_csr, col_csr, val_csr); + }) .def("no_activate", [](Program *program, SNode *snode) { // TODO(#2193): Also apply to @ti.func? @@ -1171,6 +1185,9 @@ void export_lang(py::module &m) { MAKE_SPARSE_MATRIX(64, ColMajor, d); MAKE_SPARSE_MATRIX(64, RowMajor, d); + py::class_(m, "CuSparseMatrix") + .def("spmv", &CuSparseMatrix::spmv); + py::class_(m, "SparseSolver") .def("compute", &SparseSolver::compute) .def("analyze_pattern", &SparseSolver::analyze_pattern) diff --git a/taichi/rhi/arch.cpp b/taichi/rhi/arch.cpp index a6eff634c84e5..bca52aefe49bd 100644 --- a/taichi/rhi/arch.cpp +++ b/taichi/rhi/arch.cpp @@ -43,6 +43,10 @@ bool arch_is_cpu(Arch arch) { } } +bool arch_is_cuda(Arch arch) { + return arch == Arch::cuda; +} + bool arch_uses_llvm(Arch arch) { return (arch == Arch::x64 || arch == Arch::arm64 || arch == Arch::cuda || arch == Arch::wasm); diff --git a/taichi/rhi/arch.h b/taichi/rhi/arch.h index 2d7cffde8950f..47e74ef3acbb0 100644 --- a/taichi/rhi/arch.h +++ b/taichi/rhi/arch.h @@ -18,6 +18,8 @@ Arch arch_from_name(const std::string &arch); bool arch_is_cpu(Arch arch); +bool arch_is_cuda(Arch arch); + bool arch_uses_llvm(Arch arch); bool arch_is_gpu(Arch arch); diff --git a/taichi/rhi/cuda/cuda_driver.cpp b/taichi/rhi/cuda/cuda_driver.cpp index ffa7653dab5ee..f882b75cd5a62 100644 --- a/taichi/rhi/cuda/cuda_driver.cpp +++ b/taichi/rhi/cuda/cuda_driver.cpp @@ -15,30 +15,39 @@ std::string get_cuda_error_message(uint32 err) { return fmt::format("CUDA Error {}: {}", err_name_ptr, err_string_ptr); } -bool CUDADriver::detected() { - return !disabled_by_env_ && cuda_version_valid_ && loader_->loaded(); -} - -CUDADriver::CUDADriver() { +CUDADriverBase::CUDADriverBase() { disabled_by_env_ = (get_environ_config("TI_ENABLE_CUDA", 1) == 0); if (disabled_by_env_) { - TI_TRACE( - "CUDA driver disabled by environment variable \"TI_ENABLE_CUDA\"."); - return; + TI_TRACE("CUDA driver disabled by enviroment variable \"TI_ENABLE_CUDA\"."); } +} +bool CUDADriverBase::load_lib(std::string lib_linux, std::string lib_windows) { #if defined(TI_PLATFORM_LINUX) - loader_ = std::make_unique("libcuda.so"); + auto lib_name = lib_linux; #elif defined(TI_PLATFORM_WINDOWS) - loader_ = std::make_unique("nvcuda.dll"); + auto lib_name = lib_windows; #else static_assert(false, "Taichi CUDA driver supports only Windows and Linux."); #endif + loader_ = std::make_unique(lib_name); if (!loader_->loaded()) { - TI_WARN("CUDA driver not found."); - return; + TI_WARN("{} lib not found.", lib_name); + return false; + } else { + TI_TRACE("{} loaded!", lib_name); + return true; } +} + +bool CUDADriver::detected() { + return !disabled_by_env_ && cuda_version_valid_ && loader_->loaded(); +} + +CUDADriver::CUDADriver() { + if (!load_lib("libcuda.so", "nvcuda.dll")) + return; loader_->load_function("cuGetErrorName", get_error_name); loader_->load_function("cuGetErrorString", get_error_string); @@ -79,4 +88,36 @@ CUDADriver &CUDADriver::get_instance() { return get_instance_without_context(); } +CUSPARSEDriver::CUSPARSEDriver() { +} + +CUSPARSEDriver &CUSPARSEDriver::get_instance() { + static CUSPARSEDriver *instance = new CUSPARSEDriver(); + return *instance; +} + +bool CUSPARSEDriver::load_cusparse() { + cusparse_loaded_ = load_lib("libcusparse.so", "cusparse64_11.dll"); + + if (!cusparse_loaded_) { + return false; + } +#define PER_CUSPARSE_FUNCTION(name, symbol_name, ...) \ + name.set(loader_->load_function(#symbol_name)); \ + name.set_lock(&lock_); \ + name.set_names(#name, #symbol_name); +#include "taichi/rhi/cuda/cusparse_functions.inc.h" +#undef PER_CUSPARSE_FUNCTION + return cusparse_loaded_; +} + +CUSOLVERDriver::CUSOLVERDriver() { + load_lib("libcusolver.so", "cusolver.dll"); +} + +CUSOLVERDriver &CUSOLVERDriver::get_instance() { + static CUSOLVERDriver *instance = new CUSOLVERDriver(); + return *instance; +} + TLANG_NAMESPACE_END diff --git a/taichi/rhi/cuda/cuda_driver.h b/taichi/rhi/cuda/cuda_driver.h index 35bd0f2105d2b..25491bbb44ee0 100644 --- a/taichi/rhi/cuda/cuda_driver.h +++ b/taichi/rhi/cuda/cuda_driver.h @@ -95,7 +95,20 @@ class CUDADriverFunction { std::mutex *driver_lock_{nullptr}; }; -class CUDADriver { +class CUDADriverBase { + public: + ~CUDADriverBase() = default; + + protected: + std::unique_ptr loader_; + CUDADriverBase(); + + bool load_lib(std::string lib_linux, std::string lib_windows); + + bool disabled_by_env_{false}; +}; + +class CUDADriver : protected CUDADriverBase { public: #define PER_CUDA_FUNCTION(name, symbol_name, ...) \ CUDADriverFunction<__VA_ARGS__> name; @@ -110,8 +123,6 @@ class CUDADriver { bool detected(); - ~CUDADriver() = default; - static CUDADriver &get_instance(); static CUDADriver &get_instance_without_context(); @@ -119,12 +130,39 @@ class CUDADriver { private: CUDADriver(); - std::unique_ptr loader_; - std::mutex lock_; - bool disabled_by_env_{false}; bool cuda_version_valid_{false}; }; +class CUSPARSEDriver : protected CUDADriverBase { + public: + static CUSPARSEDriver &get_instance(); + +#define PER_CUSPARSE_FUNCTION(name, symbol_name, ...) \ + CUDADriverFunction<__VA_ARGS__> name; +#include "taichi/rhi/cuda/cusparse_functions.inc.h" +#undef PER_CUSPARSE_FUNCTION + + bool load_cusparse(); + + inline bool is_loaded() { + return cusparse_loaded_; + } + + private: + CUSPARSEDriver(); + std::mutex lock_; + bool cusparse_loaded_{false}; +}; + +class CUSOLVERDriver : protected CUDADriverBase { + public: + // TODO: Add cusolver function APIs + static CUSOLVERDriver &get_instance(); + + private: + CUSOLVERDriver(); +}; + TLANG_NAMESPACE_END diff --git a/taichi/rhi/cuda/cuda_types.h b/taichi/rhi/cuda/cuda_types.h index 2652335b4e7c7..3e4d36ec1c526 100644 --- a/taichi/rhi/cuda/cuda_types.h +++ b/taichi/rhi/cuda/cuda_types.h @@ -3,6 +3,7 @@ #if defined(TI_WITH_CUDA_TOOLKIT) #include +#include #else @@ -434,3 +435,68 @@ typedef struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st { #define CUDA_ARRAY3D_COLOR_ATTACHMENT 0x20 #endif + +// copy from cusparse.h +struct cusparseContext; +typedef struct cusparseContext *cusparseHandle_t; + +struct cusparseDnVecDescr; +struct cusparseSpMatDescr; +typedef struct cusparseDnVecDescr *cusparseDnVecDescr_t; +typedef struct cusparseSpMatDescr *cusparseSpMatDescr_t; +typedef enum { + CUSPARSE_INDEX_16U = 1, ///< 16-bit unsigned integer for matrix/vector + ///< indices + CUSPARSE_INDEX_32I = 2, ///< 32-bit signed integer for matrix/vector indices + CUSPARSE_INDEX_64I = 3 ///< 64-bit signed integer for matrix/vector indices +} cusparseIndexType_t; + +typedef enum { + CUSPARSE_INDEX_BASE_ZERO = 0, + CUSPARSE_INDEX_BASE_ONE = 1 +} cusparseIndexBase_t; + +typedef enum cudaDataType_t { + CUDA_R_16F = 2, /* real as a half */ + CUDA_C_16F = 6, /* complex as a pair of half numbers */ + CUDA_R_16BF = 14, /* real as a nv_bfloat16 */ + CUDA_C_16BF = 15, /* complex as a pair of nv_bfloat16 numbers */ + CUDA_R_32F = 0, /* real as a float */ + CUDA_C_32F = 4, /* complex as a pair of float numbers */ + CUDA_R_64F = 1, /* real as a double */ + CUDA_C_64F = 5, /* complex as a pair of double numbers */ + CUDA_R_4I = 16, /* real as a signed 4-bit int */ + CUDA_C_4I = 17, /* complex as a pair of signed 4-bit int numbers */ + CUDA_R_4U = 18, /* real as a unsigned 4-bit int */ + CUDA_C_4U = 19, /* complex as a pair of unsigned 4-bit int numbers */ + CUDA_R_8I = 3, /* real as a signed 8-bit int */ + CUDA_C_8I = 7, /* complex as a pair of signed 8-bit int numbers */ + CUDA_R_8U = 8, /* real as a unsigned 8-bit int */ + CUDA_C_8U = 9, /* complex as a pair of unsigned 8-bit int numbers */ + CUDA_R_16I = 20, /* real as a signed 16-bit int */ + CUDA_C_16I = 21, /* complex as a pair of signed 16-bit int numbers */ + CUDA_R_16U = 22, /* real as a unsigned 16-bit int */ + CUDA_C_16U = 23, /* complex as a pair of unsigned 16-bit int numbers */ + CUDA_R_32I = 10, /* real as a signed 32-bit int */ + CUDA_C_32I = 11, /* complex as a pair of signed 32-bit int numbers */ + CUDA_R_32U = 12, /* real as a unsigned 32-bit int */ + CUDA_C_32U = 13, /* complex as a pair of unsigned 32-bit int numbers */ + CUDA_R_64I = 24, /* real as a signed 64-bit int */ + CUDA_C_64I = 25, /* complex as a pair of signed 64-bit int numbers */ + CUDA_R_64U = 26, /* real as a unsigned 64-bit int */ + CUDA_C_64U = 27 /* complex as a pair of unsigned 64-bit int numbers */ +} cudaDataType; + +typedef enum { + CUSPARSE_OPERATION_NON_TRANSPOSE = 0, + CUSPARSE_OPERATION_TRANSPOSE = 1, + CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE = 2 +} cusparseOperation_t; + +typedef enum { + CUSPARSE_SPMV_ALG_DEFAULT = 0, + CUSPARSE_SPMV_COO_ALG1 = 1, + CUSPARSE_SPMV_CSR_ALG1 = 2, + CUSPARSE_SPMV_CSR_ALG2 = 3, + CUSPARSE_SPMV_COO_ALG2 = 4 +} cusparseSpMVAlg_t; diff --git a/taichi/rhi/cuda/cusparse_functions.inc.h b/taichi/rhi/cuda/cusparse_functions.inc.h new file mode 100644 index 0000000000000..37df588ebdb66 --- /dev/null +++ b/taichi/rhi/cuda/cusparse_functions.inc.h @@ -0,0 +1,18 @@ +// clang-format off + +// cusparse setup +PER_CUSPARSE_FUNCTION(cpCreate, cusparseCreate, cusparseHandle_t *); +PER_CUSPARSE_FUNCTION(cpDestroy, cusparseDestroy, cusparseHandle_t); + +// cusparse sparse matrix description +PER_CUSPARSE_FUNCTION(cpCreateCoo, cusparseCreateCoo, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); +PER_CUSPARSE_FUNCTION(cpCreateCsr, cusparseCreateCsr, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); +PER_CUSPARSE_FUNCTION(cpDestroySpMat, cusparseDestroySpMat, cusparseSpMatDescr_t); + +// cusparse dense vector description +PER_CUSPARSE_FUNCTION(cpCreateDnVec, cusparseCreateDnVec, cusparseDnVecDescr_t*, int, void*, cudaDataType); +PER_CUSPARSE_FUNCTION(cpDestroyDnVec, cusparseDestroyDnVec, cusparseDnVecDescr_t); + +// cusparse sparse matrix-vector multiplication +PER_CUSPARSE_FUNCTION(cpSpMV_bufferSize, cusparseSpMV_bufferSize, cusparseHandle_t, cusparseOperation_t, const void*,cusparseSpMatDescr_t, cusparseDnVecDescr_t,const void*, cusparseDnVecDescr_t,cudaDataType, cusparseSpMVAlg_t, size_t*); +PER_CUSPARSE_FUNCTION(cpSpMV, cusparseSpMV, cusparseHandle_t, cusparseOperation_t, const void*,cusparseSpMatDescr_t, cusparseDnVecDescr_t,const void*, cusparseDnVecDescr_t,cudaDataType, cusparseSpMVAlg_t, void*); diff --git a/tests/python/test_sparse_matrix.py b/tests/python/test_sparse_matrix.py index d4f6b320c8484..69a0d66f5958d 100644 --- a/tests/python/test_sparse_matrix.py +++ b/tests/python/test_sparse_matrix.py @@ -1,3 +1,4 @@ +import numpy as np import pytest import taichi as ti @@ -374,3 +375,42 @@ def fill(Abuilder: ti.types.sparse_matrix_builder(), for i in range(n): for j in range(m): assert C[i, j] == GT[i][j] + + +@test_utils.test(arch=ti.cuda) +def test_gpu_sparse_matrix(): + h_row_csr = np.asarray([0, 3, 4, 7, 9], dtype=np.int32) + h_col_csr = np.asarray([0, 2, 3, 1, 0, 2, 3, 1, 3], dtype=np.int32) + h_value_csr = np.asarray([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], + dtype=np.float32) + h_X = np.asarray([1.0, 2.0, 3.0, 4.0], dtype=np.float32) + h_Y = np.asarray([19.0, 8.0, 51.0, 52.0], dtype=np.float32) + + # Data structure for building the CSR matrix A using Taichi Sparse Matrix + idx_dt = ti.int32 + val_dt = ti.f32 + row_csr = ti.ndarray(shape=5, dtype=idx_dt) + col_csr = ti.ndarray(shape=9, dtype=idx_dt) + value_csr = ti.ndarray(shape=9, dtype=val_dt) + # Dense vector x + X = ti.ndarray(shape=4, dtype=val_dt) + # Results for A @ x + Y = ti.ndarray(shape=4, dtype=val_dt) + + # Initialize the CSR matrix and vectors with numpy array + row_csr.from_numpy(h_row_csr) + col_csr.from_numpy(h_col_csr) + value_csr.from_numpy(h_value_csr) + X.from_numpy(h_X) + Y.fill(0.0) + + # Define the CSR matrix A + A = ti.linalg.SparseMatrix(n=4, m=4, dtype=ti.f32) + + # Build the CSR matrix A with Taichi ndarray + A.build_csr_cusparse(value_csr, col_csr, row_csr) + + # Compute Y = A @ X + A.spmv(X, Y) + for i in range(4): + assert Y[i] == h_Y[i]