Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Lang] Support linear system solving on GPU with cuSolver #5860

Merged
merged 20 commits into from
Aug 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion misc/test_build_cusm_from_coo.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
y.fill(0.0)

A = ti.linalg.SparseMatrix(n=4, m=4, dtype=ti.float32)
A.build_coo(d_coo_val, d_coo_col, d_coo_row)
A.build_coo(d_coo_row, d_coo_col, d_coo_val)

A.spmv(x, y)

Expand Down
63 changes: 63 additions & 0 deletions misc/test_coo_cusolver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import numpy as np
from scipy.sparse import coo_matrix

import taichi as ti

ti.init(arch=ti.cuda)


@ti.kernel
def init_b(b: ti.types.ndarray(), nrows: ti.i32):
for i in range(nrows):
b[i] = 1.0 + i / nrows


@ti.kernel
def print_x(x: ti.types.ndarray(), ncols: ti.i32):
for i in range(ncols):
print(x[i], end=' ')
print()


"""
Generate a positive definite matrix with a given number of rows and columns.
Reference: https://stackoverflow.com/questions/619335/a-simple-algorithm-for-generating-positive-semidefinite-matrices
"""
matrixSize = 10
A = np.random.rand(matrixSize, matrixSize)
A_psd = np.dot(A, A.transpose())

A_raw_coo = coo_matrix(A_psd)
nrows, ncols = A_raw_coo.shape
nnz = A_raw_coo.nnz

A_csr = A_raw_coo.tocsr()
b = ti.ndarray(shape=nrows, dtype=ti.f32)
init_b(b, nrows)

print(">> solve Ax = b using Cusolver ......... ")
A_coo = A_csr.tocoo()
d_row_coo = ti.ndarray(shape=nnz, dtype=ti.i32)
d_col_coo = ti.ndarray(shape=nnz, dtype=ti.i32)
d_val_coo = ti.ndarray(shape=nnz, dtype=ti.f32)
d_row_coo.from_numpy(A_coo.row)
d_col_coo.from_numpy(A_coo.col)
d_val_coo.from_numpy(A_coo.data)

A_ti = ti.linalg.SparseMatrix(n=nrows, m=ncols, dtype=ti.float32)
A_ti.build_coo(d_row_coo, d_col_coo, d_val_coo)
x_ti = ti.ndarray(shape=ncols, dtype=ti.float32)
solver = ti.linalg.SparseSolver()
solver.solve_cu(A_ti, b, x_ti)
FantasyVR marked this conversation as resolved.
Show resolved Hide resolved
ti.sync()
print_x(x_ti, ncols)
ti.sync()

print(">> solve Ax = b using Numpy ......... ")
b_np = b.to_numpy()
x_np = np.linalg.solve(A_psd, b_np)
print(x_np)

print(
f"The solution is identical?: {np.allclose(x_ti.to_numpy(), x_np, atol=1e-1)}"
)
10 changes: 5 additions & 5 deletions python/taichi/linalg/sparse_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def build_from_ndarray(self, ndarray):
'Sparse matrix only supports building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray]'
)

def build_coo(self, row_indices, col_indices, data):
def build_coo(self, row_coo, col_coo, value_coo):
"""Build a CSR format sparse matrix from COO format inputs.

Args:
Expand All @@ -209,18 +209,18 @@ def build_coo(self, row_indices, col_indices, data):
Raises:
TaichiRuntimeError: If the inputs are not ``ti.ndarray`` or the datatypes of the ndarray are not correct.
"""
if not isinstance(data, Ndarray) or not isinstance(
col_indices, Ndarray) or not isinstance(row_indices, Ndarray):
if not isinstance(row_coo, Ndarray) or not isinstance(
col_coo, Ndarray) or not isinstance(value_coo, Ndarray):
raise TaichiRuntimeError(
'Sparse matrix only supports COO format building from [ti.ndarray, ti.Vector.ndarray, ti.Matrix.ndarray].'
)
elif data.dtype != f32 or col_indices.dtype != i32 or row_indices.dtype != i32:
elif value_coo.dtype != f32 or row_coo.dtype != i32 or col_coo.dtype != i32:
raise TaichiRuntimeError(
'Sparse matrix only supports COO fromat building from float32 data and int32 row/col indices.'
)
else:
get_runtime().prog.make_sparse_matrix_from_ndarray_cusparse(
self.matrix, row_indices.arr, col_indices.arr, data.arr)
self.matrix, row_coo.arr, col_coo.arr, value_coo.arr)

def spmv(self, x, y):
"""Sparse matrix-vector multiplication using cuSparse.
Expand Down
24 changes: 20 additions & 4 deletions python/taichi/linalg/sparse_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from taichi._lib import core as _ti_core
from taichi.lang.exception import TaichiRuntimeError
from taichi.lang.field import Field
from taichi.linalg import SparseMatrix
from taichi.lang.impl import get_runtime
from taichi.lang.matrix import Ndarray
from taichi.linalg.sparse_matrix import SparseMatrix
from taichi.types.primitive_types import f32


Expand All @@ -21,9 +23,13 @@ def __init__(self, dtype=f32, solver_type="LLT", ordering="AMD"):
solver_ordering = ['AMD', 'COLAMD']
if solver_type in solver_type_list and ordering in solver_ordering:
taichi_arch = taichi.lang.impl.get_runtime().prog.config.arch
assert taichi_arch == _ti_core.Arch.x64 or taichi_arch == _ti_core.Arch.arm64, "SparseSolver only supports CPU for now."
self.solver = _ti_core.make_sparse_solver(dtype, solver_type,
ordering)
assert taichi_arch == _ti_core.Arch.x64 or taichi_arch == _ti_core.Arch.arm64 or taichi_arch == _ti_core.Arch.cuda, "SparseSolver only supports CPU and CUDA for now."
if taichi_arch == _ti_core.Arch.cuda:
self.solver = _ti_core.make_cusparse_solver(
dtype, solver_type, ordering)
else:
self.solver = _ti_core.make_sparse_solver(
dtype, solver_type, ordering)
else:
raise TaichiRuntimeError(
f"The solver type {solver_type} with {ordering} is not supported for now. Only {solver_type_list} with {solver_ordering} are supported."
Expand Down Expand Up @@ -84,6 +90,16 @@ def solve(self, b): # pylint: disable=R1710
f"The parameter type: {type(b)} is not supported in linear solvers for now."
)

def solve_cu(self, sparse_matrix, b, x):
if isinstance(sparse_matrix, SparseMatrix) and isinstance(
b, Ndarray) and isinstance(x, Ndarray):
self.solver.solve_cu(get_runtime().prog, sparse_matrix.matrix,
b.arr, x.arr)
else:
raise TaichiRuntimeError(
f"The parameter type: {type(sparse_matrix)}, {type(b)} and {type(x)} is not supported in linear solvers for now."
)

def info(self):
"""Check if the linear systems are solved successfully.

Expand Down
29 changes: 11 additions & 18 deletions taichi/program/sparse_matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,8 @@ void CuSparseMatrix::build_csr_from_coo(void *coo_row_ptr,
coo_values_ptr, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F);
CUSPARSEDriver::get_instance().cpDestroy(cusparse_handle);
// TODO: not sure if this array should be deleted now.
CUDADriver::get_instance().mem_free(csr_row_offset_ptr);
// TODO: free csr_row_offset_ptr
// CUDADriver::get_instance().mem_free(csr_row_offset_ptr);
#endif
}

Expand All @@ -229,23 +229,16 @@ CuSparseMatrix::~CuSparseMatrix() {
}
void make_sparse_matrix_from_ndarray_cusparse(Program *prog,
SparseMatrix &sm,
const Ndarray &row_indices,
const Ndarray &col_indices,
const Ndarray &values) {
const Ndarray &row_coo,
const Ndarray &col_coo,
const Ndarray &val_coo) {
#if defined(TI_WITH_CUDA)
std::string sdtype = taichi::lang::data_type_name(sm.get_data_type());
if (!CUSPARSEDriver::get_instance().is_loaded()) {
bool load_success = CUSPARSEDriver::get_instance().load_cusparse();
if (!load_success) {
TI_ERROR("Failed to load cusparse library!");
}
}
size_t row_coo = prog->get_ndarray_data_ptr_as_int(&row_indices);
size_t col_coo = prog->get_ndarray_data_ptr_as_int(&col_indices);
size_t values_coo = prog->get_ndarray_data_ptr_as_int(&values);
int nnz = values.get_nelement();
sm.build_csr_from_coo((void *)row_coo, (void *)col_coo, (void *)values_coo,
nnz);
size_t coo_row_ptr = prog->get_ndarray_data_ptr_as_int(&row_coo);
size_t coo_col_ptr = prog->get_ndarray_data_ptr_as_int(&col_coo);
size_t coo_val_ptr = prog->get_ndarray_data_ptr_as_int(&val_coo);
int nnz = val_coo.get_nelement();
sm.build_csr_from_coo((void *)coo_row_ptr, (void *)coo_col_ptr,
(void *)coo_val_ptr, nnz);
#endif
}

Expand Down
8 changes: 8 additions & 0 deletions taichi/program/sparse_matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,14 @@ class CuSparseMatrix : public SparseMatrix {
public:
explicit CuSparseMatrix(int rows, int cols, DataType dt)
: SparseMatrix(rows, cols, dt) {
#if defined(TI_WITH_CUDA)
if (!CUSPARSEDriver::get_instance().is_loaded()) {
bool load_success = CUSPARSEDriver::get_instance().load_cusparse();
if (!load_success) {
TI_ERROR("Failed to load cusparse library!");
}
}
#endif
}

virtual ~CuSparseMatrix();
Expand Down
Loading