From 2953a018aaec284892897098ae30fc954f3d2939 Mon Sep 17 00:00:00 2001 From: pengyu <6712304+FantasyVR@users.noreply.github.com> Date: Mon, 14 Nov 2022 12:10:43 +0800 Subject: [PATCH] [lang] Build sparse matrix using ndarray (#6563) Issue: #2906 ### Brief Summary Replace the array storing triplets in sparse matrix builder with ndarray. It unifies the sparse matrix builder creation on the CPU and GPU. Co-authored-by: Yi Xu --- python/taichi/lang/kernel_impl.py | 2 +- python/taichi/linalg/sparse_matrix.py | 4 ++ taichi/program/sparse_matrix.cpp | 49 +++++++++---------- taichi/program/sparse_matrix.h | 11 +++-- taichi/python/export_lang.cpp | 8 +-- .../llvm/runtime_module/internal_functions.h | 22 ++++----- tests/python/test_sparse_linear_solver.py | 39 ++++----------- 7 files changed, 58 insertions(+), 77 deletions(-) diff --git a/python/taichi/lang/kernel_impl.py b/python/taichi/lang/kernel_impl.py index e16fefecd6f62..f8644c27b946b 100644 --- a/python/taichi/lang/kernel_impl.py +++ b/python/taichi/lang/kernel_impl.py @@ -680,7 +680,7 @@ def func__(*args): elif isinstance(needed, sparse_matrix_builder): # Pass only the base pointer of the ti.types.sparse_matrix_builder() argument launch_ctx.set_arg_uint(actual_argument_slot, - v._get_addr()) + v._get_ndarray_addr()) elif isinstance(needed, ndarray_type.NdarrayType) and isinstance( v, taichi.lang._ndarray.Ndarray): diff --git a/python/taichi/linalg/sparse_matrix.py b/python/taichi/linalg/sparse_matrix.py index 034d3805954a1..6224a9822b5b0 100644 --- a/python/taichi/linalg/sparse_matrix.py +++ b/python/taichi/linalg/sparse_matrix.py @@ -260,6 +260,10 @@ def _get_addr(self): """Get the address of the sparse matrix""" return self.ptr.get_addr() + def _get_ndarray_addr(self): + """Get the address of the ndarray""" + return self.ptr.get_ndarray_data_ptr() + def print_triplets(self): """Print the triplets stored in the builder""" self.ptr.print_triplets() diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index bf096ac57528e..cfe157a935550 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -79,53 +79,48 @@ SparseMatrixBuilder::SparseMatrixBuilder(int rows, int cols, int max_num_triplets, DataType dtype, - const std::string &storage_format) + const std::string &storage_format, + Program *prog) : rows_(rows), cols_(cols), max_num_triplets_(max_num_triplets), dtype_(dtype), - storage_format_(storage_format) { + storage_format_(storage_format), + prog_(prog) { auto element_size = data_type_size(dtype); TI_ASSERT((element_size == 4 || element_size == 8)); - data_base_ptr_ = - std::make_unique(max_num_triplets_ * 3 * element_size); + ndarray_data_base_ptr_ = std::make_unique( + prog_, dtype_, std::vector{3 * (int)max_num_triplets_ + 1}); } -template -void SparseMatrixBuilder::print_template() { +void SparseMatrixBuilder::print_triplets() { + num_triplets_ = ndarray_data_base_ptr_->read_int(std::vector{0}); fmt::print("n={}, m={}, num_triplets={} (max={})\n", rows_, cols_, num_triplets_, max_num_triplets_); - T *data = reinterpret_cast(data_base_ptr_.get()); - for (int64 i = 0; i < num_triplets_; i++) { - fmt::print("({}, {}) val={}\n", ((G *)data)[i * 3], ((G *)data)[i * 3 + 1], - taichi_union_cast(data[i * 3 + 2])); + for (int i = 0; i < num_triplets_; i++) { + auto idx = 3 * i + 1; + auto row = ndarray_data_base_ptr_->read_int(std::vector{idx}); + auto col = ndarray_data_base_ptr_->read_int(std::vector{idx + 1}); + auto val = ndarray_data_base_ptr_->read_float(std::vector{idx + 2}); + fmt::print("[{}, {}] = {}\n", row, col, val); } - fmt::print("\n"); } -void SparseMatrixBuilder::print_triplets() { - auto element_size = data_type_size(dtype_); - switch (element_size) { - case 4: - print_template(); - break; - case 8: - print_template(); - break; - default: - TI_ERROR("Unsupported sparse matrix data type!"); - break; - } +intptr_t SparseMatrixBuilder::get_ndarray_data_ptr() const { + return prog_->get_ndarray_data_ptr_as_int(ndarray_data_base_ptr_.get()); } template void SparseMatrixBuilder::build_template(std::unique_ptr &m) { using V = Eigen::Triplet; std::vector triplets; - T *data = reinterpret_cast(data_base_ptr_.get()); + auto ptr = get_ndarray_data_ptr(); + G *data = reinterpret_cast(ptr); + num_triplets_ = data[0]; + data += 1; for (int i = 0; i < num_triplets_; i++) { - triplets.push_back(V(((G *)data)[i * 3], ((G *)data)[i * 3 + 1], - taichi_union_cast(data[i * 3 + 2]))); + triplets.push_back( + V(data[i * 3], data[i * 3 + 1], taichi_union_cast(data[i * 3 + 2]))); } m->build_triplets(static_cast(&triplets)); clear(); diff --git a/taichi/program/sparse_matrix.h b/taichi/program/sparse_matrix.h index 781f694f3a68f..4b976d6f176da 100644 --- a/taichi/program/sparse_matrix.h +++ b/taichi/program/sparse_matrix.h @@ -19,30 +19,31 @@ class SparseMatrixBuilder { int cols, int max_num_triplets, DataType dtype, - const std::string &storage_format); + const std::string &storage_format, + Program *prog); void print_triplets(); + intptr_t get_ndarray_data_ptr() const; + std::unique_ptr build(); void clear(); private: - template - void print_template(); - template void build_template(std::unique_ptr &); private: uint64 num_triplets_{0}; - std::unique_ptr data_base_ptr_{nullptr}; + std::unique_ptr ndarray_data_base_ptr_{nullptr}; int rows_{0}; int cols_{0}; uint64 max_num_triplets_{0}; bool built_{false}; DataType dtype_{PrimitiveType::f32}; std::string storage_format_{"col_major"}; + Program *prog_{nullptr}; }; class SparseMatrix { diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index 774894b1f78a9..7887ee4f78414 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -398,10 +398,11 @@ void export_lang(py::module &m) { .def("create_sparse_matrix_builder", [](Program *program, int n, int m, uint64 max_num_entries, DataType dtype, const std::string &storage_format) { - TI_ERROR_IF(!arch_is_cpu(program->this_thread_config().arch), - "SparseMatrix Builder only supports CPU for now."); + TI_ERROR_IF(!arch_is_cpu(program->this_thread_config().arch) && + !arch_is_cuda(program->this_thread_config().arch), + "SparseMatrix only supports CPU and CUDA for now."); return SparseMatrixBuilder(n, m, max_num_entries, dtype, - storage_format); + storage_format, program); }) .def("create_sparse_matrix", [](Program *program, int n, int m, DataType dtype, @@ -1204,6 +1205,7 @@ void export_lang(py::module &m) { // Sparse Matrix py::class_(m, "SparseMatrixBuilder") .def("print_triplets", &SparseMatrixBuilder::print_triplets) + .def("get_ndarray_data_ptr", &SparseMatrixBuilder::get_ndarray_data_ptr) .def("build", &SparseMatrixBuilder::build) .def("get_addr", [](SparseMatrixBuilder *mat) { return uint64(mat); }); diff --git a/taichi/runtime/llvm/runtime_module/internal_functions.h b/taichi/runtime/llvm/runtime_module/internal_functions.h index 89c23fd998877..ad66fa7cba3e1 100644 --- a/taichi/runtime/llvm/runtime_module/internal_functions.h +++ b/taichi/runtime/llvm/runtime_module/internal_functions.h @@ -9,15 +9,15 @@ } \ } while (0) -#define ATOMIC_INSERT(T) \ - do { \ - auto base_ptr = (int64 *)base_ptr_; \ - int64 *num_triplets = base_ptr; \ - auto data_base_ptr = *(T **)(base_ptr + 1); \ - auto triplet_id = atomic_add_i64(num_triplets, 1); \ - data_base_ptr[triplet_id * 3] = i; \ - data_base_ptr[triplet_id * 3 + 1] = j; \ - data_base_ptr[triplet_id * 3 + 2] = taichi_union_cast(value); \ +#define ATOMIC_INSERT(T) \ + do { \ + auto base_ptr = reinterpret_cast(base_ptr_); \ + int##T *num_triplets = base_ptr; \ + auto data_base_ptr = base_ptr + 1; \ + auto triplet_id = atomic_add_i##T(num_triplets, 1); \ + data_base_ptr[triplet_id * 3] = i; \ + data_base_ptr[triplet_id * 3 + 1] = j; \ + data_base_ptr[triplet_id * 3 + 2] = taichi_union_cast(value); \ } while (0); i32 do_nothing(RuntimeContext *context) { @@ -36,7 +36,7 @@ i32 insert_triplet_f32(RuntimeContext *context, int i, int j, float value) { - ATOMIC_INSERT(int32); + ATOMIC_INSERT(32); return 0; } @@ -45,7 +45,7 @@ i32 insert_triplet_f64(RuntimeContext *context, int i, int j, float64 value) { - ATOMIC_INSERT(int64); + ATOMIC_INSERT(64); return 0; } diff --git a/tests/python/test_sparse_linear_solver.py b/tests/python/test_sparse_linear_solver.py index f31e7bd7df988..cd48f0581b3af 100644 --- a/tests/python/test_sparse_linear_solver.py +++ b/tests/python/test_sparse_linear_solver.py @@ -4,40 +4,16 @@ import taichi as ti from tests import test_utils -""" -The symmetric positive definite matrix is created in matlab using the following script: - A = diag([1,2,3,4]); - OrthM = [1 0 1 0; -1 -2 0 1; 0 1 -1 0; 0, 1, 0 1]; - U = orth(OrthM); - Aarray = U * A * U'; - b = [1,2,3,4]'; - res = inv(A) * b; -""" # yapf: disable - -Aarray = np.array([[ - 2.73999501130921, 0.518002544441220, 0.745119303009342, 0.0508907745638859 -], [0.518002544441220, 1.45111665837647, 0.757997555750432, 0.290885785873098], - [ - 0.745119303009342, 0.757997555750432, 2.96711176987733, - -0.518002544441220 - ], - [ - 0.0508907745638859, 0.290885785873098, - -0.518002544441220, 2.84177656043698 - ]]) - -res = np.array([ - -0.0754984396447588, 0.469972700892492, 1.18527357933586, 1.57686870529319 -]) - @pytest.mark.parametrize("dtype", [ti.f32]) @pytest.mark.parametrize("solver_type", ["LLT", "LDLT", "LU"]) @pytest.mark.parametrize("ordering", ["AMD", "COLAMD"]) @test_utils.test(arch=ti.cpu) def test_sparse_LLT_solver(dtype, solver_type, ordering): - n = 4 - Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) + n = 10 + A = np.random.rand(n, n) + A_psd = np.dot(A, A.transpose()) + Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=300) b = ti.field(ti.f32, shape=n) @ti.kernel @@ -48,16 +24,19 @@ def fill(Abuilder: ti.types.sparse_matrix_builder(), for i in range(n): b[i] = i + 1 - fill(Abuilder, Aarray, b) + fill(Abuilder, A_psd, b) A = Abuilder.build() + print(A) solver = ti.linalg.SparseSolver(dtype=dtype, solver_type=solver_type, ordering=ordering) solver.analyze_pattern(A) solver.factorize(A) x = solver.solve(b) + + res = np.linalg.solve(A_psd, b.to_numpy()) for i in range(n): - assert x[i] == test_utils.approx(res[i]) + assert x[i] == test_utils.approx(res[i], rel=1.0) @test_utils.test(arch=ti.cuda)