diff --git a/misc/test_build_cusm_from_coo.py b/misc/test_build_cusm_from_coo.py index 7512a136dd364..17b46a4384f93 100644 --- a/misc/test_build_cusm_from_coo.py +++ b/misc/test_build_cusm_from_coo.py @@ -24,7 +24,7 @@ y.fill(0.0) A = ti.linalg.SparseMatrix(n=4, m=4, dtype=ti.float32) -A.build_coo(d_coo_val, d_coo_col, d_coo_row) +A.build_coo(d_coo_row, d_coo_col, d_coo_val) A.spmv(x, y) diff --git a/misc/test_coo_cusolver.py b/misc/test_coo_cusolver.py new file mode 100644 index 0000000000000..7570f8cf53bb1 --- /dev/null +++ b/misc/test_coo_cusolver.py @@ -0,0 +1,63 @@ +import numpy as np +from scipy.sparse import coo_matrix + +import taichi as ti + +ti.init(arch=ti.cuda) + + +@ti.kernel +def init_b(b: ti.types.ndarray(), nrows: ti.i32): + for i in range(nrows): + b[i] = 1.0 + i / nrows + + +@ti.kernel +def print_x(x: ti.types.ndarray(), ncols: ti.i32): + for i in range(ncols): + print(x[i], end=' ') + print() + + +""" +Generate a positive definite matrix with a given number of rows and columns. +Reference: https://stackoverflow.com/questions/619335/a-simple-algorithm-for-generating-positive-semidefinite-matrices +""" +matrixSize = 10 +A = np.random.rand(matrixSize, matrixSize) +A_psd = np.dot(A, A.transpose()) + +A_raw_coo = coo_matrix(A_psd) +nrows, ncols = A_raw_coo.shape +nnz = A_raw_coo.nnz + +A_csr = A_raw_coo.tocsr() +b = ti.ndarray(shape=nrows, dtype=ti.f32) +init_b(b, nrows) + +print(">> solve Ax = b using Cusolver ......... ") +A_coo = A_csr.tocoo() +d_row_coo = ti.ndarray(shape=nnz, dtype=ti.i32) +d_col_coo = ti.ndarray(shape=nnz, dtype=ti.i32) +d_val_coo = ti.ndarray(shape=nnz, dtype=ti.f32) +d_row_coo.from_numpy(A_coo.row) +d_col_coo.from_numpy(A_coo.col) +d_val_coo.from_numpy(A_coo.data) + +A_ti = ti.linalg.SparseMatrix(n=nrows, m=ncols, dtype=ti.float32) +A_ti.build_coo(d_row_coo, d_col_coo, d_val_coo) +x_ti = ti.ndarray(shape=ncols, dtype=ti.float32) +solver = ti.linalg.SparseSolver() +solver.solve_cu(A_ti, b, x_ti) +ti.sync() +print_x(x_ti, ncols) +ti.sync() + +print(">> solve Ax = b using Numpy ......... ") +b_np = b.to_numpy() +x_np = np.linalg.solve(A_psd, b_np) +print(x_np) + +print( + f"The solution is identical?: {np.allclose(x_ti.to_numpy(), x_np, atol=1e-1)}" +) diff --git a/python/taichi/linalg/sparse_matrix.py b/python/taichi/linalg/sparse_matrix.py index 0472b69a5b2e0..eac2896644af6 100644 --- a/python/taichi/linalg/sparse_matrix.py +++ b/python/taichi/linalg/sparse_matrix.py @@ -198,7 +198,7 @@ def build_from_ndarray(self, ndarray): 'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray]' ) - def build_coo(self, row_indices, col_indices, data): + def build_coo(self, row_coo, col_coo, value_coo): """Build a CSR format sparse matrix from COO format inputs. Args: @@ -209,18 +209,18 @@ def build_coo(self, row_indices, col_indices, data): Raises: TaichiRuntimeError: If the inputs are not ``ti.ndarray`` or the datatypes of the ndarray are not correct. """ - if not isinstance(data, Ndarray) or not isinstance( - col_indices, Ndarray) or not isinstance(row_indices, Ndarray): + if not isinstance(row_coo, Ndarray) or not isinstance( + col_coo, Ndarray) or not isinstance(value_coo, Ndarray): raise TaichiRuntimeError( 'Sparse matrix only supports COO format building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray].' ) - elif data.dtype != f32 or col_indices.dtype != i32 or row_indices.dtype != i32: + elif value_coo.dtype != f32 or row_coo.dtype != i32 or col_coo.dtype != i32: raise TaichiRuntimeError( 'Sparse matrix only supports COO fromat building from float32 data and int32 row/col indices.' ) else: get_runtime().prog.make_sparse_matrix_from_ndarray_cusparse( - self.matrix, row_indices.arr, col_indices.arr, data.arr) + self.matrix, row_coo.arr, col_coo.arr, value_coo.arr) def spmv(self, x, y): """Sparse matrix-vector multiplication using cuSparse. diff --git a/python/taichi/linalg/sparse_solver.py b/python/taichi/linalg/sparse_solver.py index 021df4b9e8fb7..3ff15420b36b4 100644 --- a/python/taichi/linalg/sparse_solver.py +++ b/python/taichi/linalg/sparse_solver.py @@ -3,7 +3,9 @@ from taichi._lib import core as _ti_core from taichi.lang.exception import TaichiRuntimeError from taichi.lang.field import Field -from taichi.linalg import SparseMatrix +from taichi.lang.impl import get_runtime +from taichi.lang.matrix import Ndarray +from taichi.linalg.sparse_matrix import SparseMatrix from taichi.types.primitive_types import f32 @@ -21,9 +23,13 @@ def __init__(self, dtype=f32, solver_type="LLT", ordering="AMD"): solver_ordering = ['AMD', 'COLAMD'] if solver_type in solver_type_list and ordering in solver_ordering: taichi_arch = taichi.lang.impl.get_runtime().prog.config.arch - assert taichi_arch == _ti_core.Arch.x64 or taichi_arch == _ti_core.Arch.arm64, "SparseSolver only supports CPU for now." - self.solver = _ti_core.make_sparse_solver(dtype, solver_type, - ordering) + assert taichi_arch == _ti_core.Arch.x64 or taichi_arch == _ti_core.Arch.arm64 or taichi_arch == _ti_core.Arch.cuda, "SparseSolver only supports CPU and CUDA for now." + if taichi_arch == _ti_core.Arch.cuda: + self.solver = _ti_core.make_cusparse_solver( + dtype, solver_type, ordering) + else: + self.solver = _ti_core.make_sparse_solver( + dtype, solver_type, ordering) else: raise TaichiRuntimeError( f"The solver type {solver_type} with {ordering} is not supported for now. Only {solver_type_list} with {solver_ordering} are supported." @@ -84,6 +90,16 @@ def solve(self, b): # pylint: disable=R1710 f"The parameter type: {type(b)} is not supported in linear solvers for now." ) + def solve_cu(self, sparse_matrix, b, x): + if isinstance(sparse_matrix, SparseMatrix) and isinstance( + b, Ndarray) and isinstance(x, Ndarray): + self.solver.solve_cu(get_runtime().prog, sparse_matrix.matrix, + b.arr, x.arr) + else: + raise TaichiRuntimeError( + f"The parameter type: {type(sparse_matrix)}, {type(b)} and {type(x)} is not supported in linear solvers for now." + ) + def info(self): """Check if the linear systems are solved successfully. diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index 5d170fbd0360a..3e8096a98d72f 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -217,8 +217,8 @@ void CuSparseMatrix::build_csr_from_coo(void *coo_row_ptr, coo_values_ptr, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F); CUSPARSEDriver::get_instance().cpDestroy(cusparse_handle); - // TODO: not sure if this array should be deleted now. - CUDADriver::get_instance().mem_free(csr_row_offset_ptr); + // TODO: free csr_row_offset_ptr + // CUDADriver::get_instance().mem_free(csr_row_offset_ptr); #endif } @@ -229,23 +229,16 @@ CuSparseMatrix::~CuSparseMatrix() { } void make_sparse_matrix_from_ndarray_cusparse(Program *prog, SparseMatrix &sm, - const Ndarray &row_indices, - const Ndarray &col_indices, - const Ndarray &values) { + const Ndarray &row_coo, + const Ndarray &col_coo, + const Ndarray &val_coo) { #if defined(TI_WITH_CUDA) - std::string sdtype = taichi::lang::data_type_name(sm.get_data_type()); - if (!CUSPARSEDriver::get_instance().is_loaded()) { - bool load_success = CUSPARSEDriver::get_instance().load_cusparse(); - if (!load_success) { - TI_ERROR("Failed to load cusparse library!"); - } - } - size_t row_coo = prog->get_ndarray_data_ptr_as_int(&row_indices); - size_t col_coo = prog->get_ndarray_data_ptr_as_int(&col_indices); - size_t values_coo = prog->get_ndarray_data_ptr_as_int(&values); - int nnz = values.get_nelement(); - sm.build_csr_from_coo((void *)row_coo, (void *)col_coo, (void *)values_coo, - nnz); + size_t coo_row_ptr = prog->get_ndarray_data_ptr_as_int(&row_coo); + size_t coo_col_ptr = prog->get_ndarray_data_ptr_as_int(&col_coo); + size_t coo_val_ptr = prog->get_ndarray_data_ptr_as_int(&val_coo); + int nnz = val_coo.get_nelement(); + sm.build_csr_from_coo((void *)coo_row_ptr, (void *)coo_col_ptr, + (void *)coo_val_ptr, nnz); #endif } diff --git a/taichi/program/sparse_matrix.h b/taichi/program/sparse_matrix.h index 777b12876f7b1..53d262919cb06 100644 --- a/taichi/program/sparse_matrix.h +++ b/taichi/program/sparse_matrix.h @@ -202,6 +202,14 @@ class CuSparseMatrix : public SparseMatrix { public: explicit CuSparseMatrix(int rows, int cols, DataType dt) : SparseMatrix(rows, cols, dt) { +#if defined(TI_WITH_CUDA) + if (!CUSPARSEDriver::get_instance().is_loaded()) { + bool load_success = CUSPARSEDriver::get_instance().load_cusparse(); + if (!load_success) { + TI_ERROR("Failed to load cusparse library!"); + } + } +#endif } virtual ~CuSparseMatrix(); diff --git a/taichi/program/sparse_solver.cpp b/taichi/program/sparse_solver.cpp index 4743a511ced50..bb4ea6c784e57 100644 --- a/taichi/program/sparse_solver.cpp +++ b/taichi/program/sparse_solver.cpp @@ -67,6 +67,189 @@ bool EigenSparseSolver::info() { return solver_.info() == Eigen::Success; } +CuSparseSolver::CuSparseSolver() { +#if defined(TI_WITH_CUDA) + if (!CUSPARSEDriver::get_instance().is_loaded()) { + bool load_success = CUSPARSEDriver::get_instance().load_cusparse(); + if (!load_success) { + TI_ERROR("Failed to load cusparse library!"); + } + } + if (!CUSOLVERDriver::get_instance().is_loaded()) { + bool load_success = CUSOLVERDriver::get_instance().load_cusolver(); + if (!load_success) { + TI_ERROR("Failed to load cusolver library!"); + } + } +#endif +} + +void CuSparseSolver::solve_cu(Program *prog, + const SparseMatrix &sm, + const Ndarray &b, + Ndarray &x) { +#ifdef TI_WITH_CUDA + cusparseHandle_t cusparseHandle = NULL; + CUSPARSEDriver::get_instance().cpCreate(&cusparseHandle); + cusolverSpHandle_t handle = NULL; + CUSOLVERDriver::get_instance().csSpCreate(&handle); + + int major_version, minor_version, patch_level; + CUSOLVERDriver::get_instance().csGetProperty(MAJOR_VERSION, &major_version); + CUSOLVERDriver::get_instance().csGetProperty(MINOR_VERSION, &minor_version); + CUSOLVERDriver::get_instance().csGetProperty(PATCH_LEVEL, &patch_level); + printf("Cusolver version: %d.%d.%d\n", major_version, minor_version, + patch_level); + + const cusparseSpMatDescr_t *A = + (const cusparseSpMatDescr_t *)(sm.get_matrix()); + size_t nrows = 0, ncols = 0, nnz = 0; + void *drow_offsets = NULL, *dcol_indices = NULL, *dvalues = NULL; + cusparseIndexType_t csrRowOffsetsType, csrColIndType; + cusparseIndexBase_t idxBase; + cudaDataType valueType; + CUSPARSEDriver::get_instance().cpCsrGet( + *A, &nrows, &ncols, &nnz, &drow_offsets, &dcol_indices, &dvalues, + &csrRowOffsetsType, &csrColIndType, &idxBase, &valueType); + + size_t db = prog->get_ndarray_data_ptr_as_int(&b); + size_t dx = prog->get_ndarray_data_ptr_as_int(&x); + + float *h_z = (float *)malloc(sizeof(float) * ncols); + float *h_x = (float *)malloc(sizeof(float) * ncols); + float *h_b = (float *)malloc(sizeof(float) * nrows); + float *h_Qb = (float *)malloc(sizeof(float) * nrows); + + int *h_Q = (int *)malloc(sizeof(int) * ncols); + int *hrow_offsets_B = (int *)malloc(sizeof(int) * (nrows + 1)); + int *hcol_indices_B = (int *)malloc(sizeof(int) * nnz); + float *h_val_B = (float *)malloc(sizeof(float) * nnz); + int *h_mapBfromA = (int *)malloc(sizeof(int) * nnz); + + assert(NULL != h_z); + assert(NULL != h_x); + assert(NULL != h_b); + assert(NULL != h_Qb); + assert(NULL != h_Q); + assert(NULL != hrow_offsets_B); + assert(NULL != hcol_indices_B); + assert(NULL != h_val_B); + assert(NULL != h_mapBfromA); + + int *hrow_offsets = NULL, *hcol_indices = NULL; + hrow_offsets = (int *)malloc(sizeof(int) * (nrows + 1)); + hcol_indices = (int *)malloc(sizeof(int) * nnz); + assert(NULL != hrow_offsets); + assert(NULL != hcol_indices); + // Attention: drow_offsets is not freed at other palces + CUDADriver::get_instance().memcpy_device_to_host( + (void *)hrow_offsets, drow_offsets, sizeof(int) * (nrows + 1)); + CUDADriver::get_instance().memcpy_device_to_host( + (void *)hcol_indices, dcol_indices, sizeof(int) * nnz); + + /* configure matrix descriptor*/ + cusparseMatDescr_t descrA = NULL; + CUSPARSEDriver::get_instance().cpCreateMatDescr(&descrA); + CUSPARSEDriver::get_instance().cpSetMatType(descrA, + CUSPARSE_MATRIX_TYPE_GENERAL); + CUSPARSEDriver::get_instance().cpSetMatIndexBase(descrA, + CUSPARSE_INDEX_BASE_ZERO); + int issym = 0; + CUSOLVERDriver::get_instance().csSpXcsrissymHost( + handle, nrows, nnz, descrA, (void *)hrow_offsets, + (void *)(hrow_offsets + 1), (void *)hcol_indices, &issym); + if (!issym) { + TI_ERROR("A has no symmetric pattern, please use LU or QR!"); + return; + } + + // step 2:reorder the matrix to minimize zero fill-in + CUSOLVERDriver::get_instance().csSpXcsrsymrcmHost( + handle, nrows, nnz, descrA, (void *)hrow_offsets, (void *)hcol_indices, + (void *)h_Q); // symrcm method + + // B = A(Q, Q) + memcpy(hrow_offsets_B, hrow_offsets, sizeof(int) * (nrows + 1)); + memcpy(hcol_indices_B, hcol_indices, sizeof(int) * nnz); + + size_t size_perm; + CUSOLVERDriver::get_instance().csSpXcsrperm_bufferSizeHost( + handle, nrows, ncols, nnz, descrA, (void *)hrow_offsets_B, + (void *)hcol_indices_B, (void *)h_Q, (void *)h_Q, &size_perm); + void *buffer_cpu = (void *)malloc(sizeof(char) * size_perm); + for (int j = 0; j < nnz; j++) + h_mapBfromA[j] = j; + + CUSOLVERDriver::get_instance().csSpXcsrpermHost( + handle, nrows, ncols, nnz, descrA, (void *)hrow_offsets_B, + (void *)hcol_indices_B, (void *)h_Q, (void *)h_Q, (void *)h_mapBfromA, + (void *)buffer_cpu); + + float *h_val_A = (float *)malloc(sizeof(float) * nnz); + CUDADriver::get_instance().memcpy_device_to_host( + (void *)h_val_A, (void *)dvalues, sizeof(int) * nnz); + for (int j = 0; j < nnz; j++) + h_val_B[j] = h_val_A[h_mapBfromA[j]]; + + CUDADriver::get_instance().memcpy_device_to_host((void *)h_b, (void *)db, + sizeof(float) * nrows); + for (int row = 0; row < nrows; row++) { + h_Qb[row] = h_b[h_Q[row]]; + } + + // step 4: solve B*z = Q*b + float tol = 1e-6; + int reorder = 1; + int singularity = 0; /* -1 if A is invertible under tol. */ + // use Cholesky decomposition as defualt + CUSOLVERDriver::get_instance().csSpScsrlsvcholHost( + handle, nrows, nnz, descrA, (void *)h_val_B, (void *)hrow_offsets_B, + (void *)hcol_indices_B, (void *)h_Qb, tol, reorder, (void *)h_z, + &singularity); + if (!singularity) { + TI_ERROR("A is a sigular matrix!"); + return; + } + + // step 5: Q*x = z + for (int row = 0; row < nrows; row++) + h_x[h_Q[row]] = h_z[row]; + + CUDADriver::get_instance().memcpy_host_to_device((void *)dx, (void *)h_x, + sizeof(float) * ncols); + + CUSOLVERDriver::get_instance().csSpDestory(handle); + CUSPARSEDriver::get_instance().cpDestroy(cusparseHandle); + + if (hrow_offsets != NULL) + free(hrow_offsets); + if (hcol_indices != NULL) + free(hcol_indices); + if (hrow_offsets_B != NULL) + free(hrow_offsets_B); + if (hcol_indices_B != NULL) + free(hcol_indices_B); + if (h_Q != NULL) + free(h_Q); + if (h_mapBfromA != NULL) + free(h_mapBfromA); + if (h_z != NULL) + free(h_z); + if (h_b != NULL) + free(h_b); + if (h_Qb != NULL) + free(h_Qb); + if (h_x != NULL) + free(h_x); + if (buffer_cpu != NULL) + free(buffer_cpu); + if (h_val_A != NULL) + free(h_val_A); + if (h_val_B != NULL) + free(h_val_B); +#endif +} + std::unique_ptr make_sparse_solver(DataType dt, const std::string &solver_type, const std::string &ordering) { @@ -95,5 +278,11 @@ std::unique_ptr make_sparse_solver(DataType dt, TI_ERROR("Not supported sparse solver type: {}", solver_type); } +std::unique_ptr make_cusparse_solver( + DataType dt, + const std::string &solver_type, + const std::string &ordering) { + return std::make_unique(); +} } // namespace lang } // namespace taichi diff --git a/taichi/program/sparse_solver.h b/taichi/program/sparse_solver.h index e156e826a0db1..460bdce0a6cea 100644 --- a/taichi/program/sparse_solver.h +++ b/taichi/program/sparse_solver.h @@ -1,6 +1,8 @@ #pragma once #include "taichi/ir/type.h" +#include "taichi/rhi/cuda/cuda_driver.h" +#include "taichi/program/program.h" #include "sparse_matrix.h" @@ -14,6 +16,10 @@ class SparseSolver { virtual void analyze_pattern(const SparseMatrix &sm) = 0; virtual void factorize(const SparseMatrix &sm) = 0; virtual Eigen::VectorXf solve(const Eigen::Ref &b) = 0; + virtual void solve_cu(Program *prog, + const SparseMatrix &sm, + const Ndarray &b, + Ndarray &x) = 0; virtual bool info() = 0; }; @@ -28,12 +34,46 @@ class EigenSparseSolver : public SparseSolver { void analyze_pattern(const SparseMatrix &sm) override; void factorize(const SparseMatrix &sm) override; Eigen::VectorXf solve(const Eigen::Ref &b) override; + void solve_cu(Program *prog, + const SparseMatrix &sm, + const Ndarray &b, + Ndarray &x) override{}; bool info() override; }; +class CuSparseSolver : public SparseSolver { + private: + public: + CuSparseSolver(); + ~CuSparseSolver() override = default; + bool compute(const SparseMatrix &sm) override { + TI_NOT_IMPLEMENTED; + }; + void analyze_pattern(const SparseMatrix &sm) override { + TI_NOT_IMPLEMENTED; + }; + void factorize(const SparseMatrix &sm) override { + TI_NOT_IMPLEMENTED; + }; + Eigen::VectorXf solve(const Eigen::Ref &b) override { + TI_NOT_IMPLEMENTED; + }; + void solve_cu(Program *prog, + const SparseMatrix &sm, + const Ndarray &b, + Ndarray &x) override; + bool info() override { + TI_NOT_IMPLEMENTED; + }; +}; + std::unique_ptr make_sparse_solver(DataType dt, const std::string &solver_type, const std::string &ordering); +std::unique_ptr make_cusparse_solver( + DataType dt, + const std::string &solver_type, + const std::string &ordering); } // namespace lang } // namespace taichi diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index 0ceb503cec27d..e33a7979c88dd 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -1191,7 +1191,8 @@ void export_lang(py::module &m) { MAKE_SPARSE_MATRIX(64, ColMajor, d); MAKE_SPARSE_MATRIX(64, RowMajor, d); - py::class_(m, "CuSparseMatrix") + py::class_(m, "CuSparseMatrix") + .def(py::init()) .def("spmv", &CuSparseMatrix::spmv); py::class_(m, "SparseSolver") @@ -1199,9 +1200,11 @@ void export_lang(py::module &m) { .def("analyze_pattern", &SparseSolver::analyze_pattern) .def("factorize", &SparseSolver::factorize) .def("solve", &SparseSolver::solve) + .def("solve_cu", &SparseSolver::solve_cu) .def("info", &SparseSolver::info); m.def("make_sparse_solver", &make_sparse_solver); + m.def("make_cusparse_solver", &make_cusparse_solver); // Mesh Class // Mesh related. diff --git a/taichi/rhi/cuda/cuda_driver.cpp b/taichi/rhi/cuda/cuda_driver.cpp index f882b75cd5a62..f266801d00a9b 100644 --- a/taichi/rhi/cuda/cuda_driver.cpp +++ b/taichi/rhi/cuda/cuda_driver.cpp @@ -112,7 +112,6 @@ bool CUSPARSEDriver::load_cusparse() { } CUSOLVERDriver::CUSOLVERDriver() { - load_lib("libcusolver.so", "cusolver.dll"); } CUSOLVERDriver &CUSOLVERDriver::get_instance() { @@ -120,4 +119,17 @@ CUSOLVERDriver &CUSOLVERDriver::get_instance() { return *instance; } +bool CUSOLVERDriver::load_cusolver() { + cusolver_loaded_ = load_lib("libcusolver.so", "cusolver64_11.dll"); + if (!cusolver_loaded_) { + return false; + } +#define PER_CUSOLVER_FUNCTION(name, symbol_name, ...) \ + name.set(loader_->load_function(#symbol_name)); \ + name.set_lock(&lock_); \ + name.set_names(#name, #symbol_name); +#include "taichi/rhi/cuda/cusolver_functions.inc.h" +#undef PER_CUSOLVER_FUNCTION + return cusolver_loaded_; +} TLANG_NAMESPACE_END diff --git a/taichi/rhi/cuda/cuda_driver.h b/taichi/rhi/cuda/cuda_driver.h index 25491bbb44ee0..13fdf03480e50 100644 --- a/taichi/rhi/cuda/cuda_driver.h +++ b/taichi/rhi/cuda/cuda_driver.h @@ -161,8 +161,21 @@ class CUSOLVERDriver : protected CUDADriverBase { // TODO: Add cusolver function APIs static CUSOLVERDriver &get_instance(); +#define PER_CUSOLVER_FUNCTION(name, symbol_name, ...) \ + CUDADriverFunction<__VA_ARGS__> name; +#include "taichi/rhi/cuda/cusolver_functions.inc.h" +#undef PER_CUSOLVER_FUNCTION + + bool load_cusolver(); + + inline bool is_loaded() { + return cusolver_loaded_; + } + private: CUSOLVERDriver(); + std::mutex lock_; + bool cusolver_loaded_{false}; }; TLANG_NAMESPACE_END diff --git a/taichi/rhi/cuda/cuda_types.h b/taichi/rhi/cuda/cuda_types.h index 3e4d36ec1c526..64a369f5a76b5 100644 --- a/taichi/rhi/cuda/cuda_types.h +++ b/taichi/rhi/cuda/cuda_types.h @@ -434,12 +434,13 @@ typedef struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st { */ #define CUDA_ARRAY3D_COLOR_ATTACHMENT 0x20 -#endif - // copy from cusparse.h struct cusparseContext; typedef struct cusparseContext *cusparseHandle_t; +struct cusparseMatDescr; +typedef struct cusparseMatDescr *cusparseMatDescr_t; + struct cusparseDnVecDescr; struct cusparseSpMatDescr; typedef struct cusparseDnVecDescr *cusparseDnVecDescr_t; @@ -500,3 +501,31 @@ typedef enum { CUSPARSE_SPMV_CSR_ALG2 = 3, CUSPARSE_SPMV_COO_ALG2 = 4 } cusparseSpMVAlg_t; + +typedef enum { + CUSPARSE_MATRIX_TYPE_GENERAL = 0, + CUSPARSE_MATRIX_TYPE_SYMMETRIC = 1, + CUSPARSE_MATRIX_TYPE_HERMITIAN = 2, + CUSPARSE_MATRIX_TYPE_TRIANGULAR = 3 +} cusparseMatrixType_t; + +typedef enum { + CUSPARSE_FILL_MODE_LOWER = 0, + CUSPARSE_FILL_MODE_UPPER = 1 +} cusparseFillMode_t; + +typedef enum { + CUSPARSE_DIAG_TYPE_NON_UNIT = 0, + CUSPARSE_DIAG_TYPE_UNIT = 1 +} cusparseDiagType_t; + +// copy from cusolver.h +typedef enum libraryPropertyType_t { + MAJOR_VERSION, + MINOR_VERSION, + PATCH_LEVEL +} libraryPropertyType; + +struct cusolverSpContext; +typedef struct cusolverSpContext *cusolverSpHandle_t; +#endif diff --git a/taichi/rhi/cuda/cusolver_functions.inc.h b/taichi/rhi/cuda/cusolver_functions.inc.h new file mode 100644 index 0000000000000..bc9672e4f745c --- /dev/null +++ b/taichi/rhi/cuda/cusolver_functions.inc.h @@ -0,0 +1,14 @@ +// clang-format off + +// cusolver functions +PER_CUSOLVER_FUNCTION(csGetProperty, cusolverGetProperty, libraryPropertyType, void * ); + + +// cusolver functions for solve A*x = b +PER_CUSOLVER_FUNCTION(csSpCreate, cusolverSpCreate, cusolverSpHandle_t * ); +PER_CUSOLVER_FUNCTION(csSpDestory, cusolverSpDestroy, cusolverSpHandle_t ); +PER_CUSOLVER_FUNCTION(csSpXcsrissymHost, cusolverSpXcsrissymHost, cusolverSpHandle_t,int ,int ,const cusparseMatDescr_t ,const void *,const void *,const void *,void *); +PER_CUSOLVER_FUNCTION(csSpXcsrsymrcmHost, cusolverSpXcsrsymrcmHost, cusolverSpHandle_t ,int ,int ,const cusparseMatDescr_t ,const void *,const void *,void *); +PER_CUSOLVER_FUNCTION(csSpXcsrperm_bufferSizeHost, cusolverSpXcsrperm_bufferSizeHost, cusolverSpHandle_t ,int ,int ,int ,const cusparseMatDescr_t ,void *,void *,const void *,const void *,size_t *); +PER_CUSOLVER_FUNCTION(csSpXcsrpermHost, cusolverSpXcsrpermHost, cusolverSpHandle_t ,int ,int ,int ,const cusparseMatDescr_t ,void *,void *,const void *,const void *,void *,void *); +PER_CUSOLVER_FUNCTION(csSpScsrlsvcholHost, cusolverSpScsrlsvcholHost, cusolverSpHandle_t ,int ,int ,const cusparseMatDescr_t ,const void *,const void *,const void *,const void *,float ,int ,void *,void *); diff --git a/taichi/rhi/cuda/cusparse_functions.inc.h b/taichi/rhi/cuda/cusparse_functions.inc.h index dc8e2fa02e361..7476b83c09473 100644 --- a/taichi/rhi/cuda/cusparse_functions.inc.h +++ b/taichi/rhi/cuda/cusparse_functions.inc.h @@ -8,6 +8,10 @@ PER_CUSPARSE_FUNCTION(cpDestroy, cusparseDestroy, cusparseHandle_t); PER_CUSPARSE_FUNCTION(cpCreateCoo, cusparseCreateCoo, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); PER_CUSPARSE_FUNCTION(cpCreateCsr, cusparseCreateCsr, cusparseSpMatDescr_t*, int, int, int,void*, void*, void*,cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t,cudaDataType ); PER_CUSPARSE_FUNCTION(cpCoo2Csr, cusparseXcoo2csr, cusparseHandle_t ,const void*, int, int,void*, cusparseIndexBase_t ); +PER_CUSPARSE_FUNCTION(cpCsrGet, cusparseCsrGet, cusparseSpMatDescr_t ,size_t *,size_t *,size_t *,void**,void**,void**,cusparseIndexType_t* ,cusparseIndexType_t* ,cusparseIndexBase_t* ,cudaDataType*); +PER_CUSPARSE_FUNCTION(cpCreateMatDescr, cusparseCreateMatDescr, cusparseMatDescr_t*); +PER_CUSPARSE_FUNCTION(cpSetMatType, cusparseSetMatType, cusparseMatDescr_t, cusparseMatrixType_t); +PER_CUSPARSE_FUNCTION(cpSetMatIndexBase, cusparseSetMatIndexBase, cusparseMatDescr_t, cusparseIndexBase_t); PER_CUSPARSE_FUNCTION(cpDestroySpMat, cusparseDestroySpMat, cusparseSpMatDescr_t); // cusparse dense vector description