From 3fd67cacd32c47a33a63fb1a7fe2287169ccedf8 Mon Sep 17 00:00:00 2001
From: pengyu <6712304+FantasyVR@users.noreply.github.com>
Date: Tue, 22 Nov 2022 10:17:16 +0800
Subject: [PATCH] [lang] Add spmv and sparse linear solver using ndarray as
 vector (#6651)

Issue: #2906

Previously, ndarray cann't be used as a vector on CPU backend in spmv
and sparse linear solve operations. In this PR, the ndarray is
[mapped](https://eigen.tuxfamily.org/dox/classEigen_1_1Map.html) as
Eigen's vector, and then it's used to do math computation.

In addition, the implicit mass spring example is modified to use
ndarrays. It enables us to run this example on both the CPU and GPU
backend.
---
 .../simulation/implicit_mass_spring.py        | 26 +++++++++++++--
 taichi/program/sparse_matrix.cpp              | 32 +++++++++++++++++-
 taichi/program/sparse_matrix.h                |  6 ++--
 taichi/program/sparse_solver.cpp              | 31 +++++++++++++++++
 taichi/program/sparse_solver.h                | 15 +++++++--
 taichi/python/export_lang.cpp                 |  1 +
 tests/python/test_sparse_linear_solver.py     | 33 +++++++++++++++++++
 tests/python/test_sparse_matrix.py            | 28 ++++++++++++++++
 8 files changed, 163 insertions(+), 9 deletions(-)
diff --git a/python/taichi/examples/simulation/implicit_mass_spring.py b/python/taichi/examples/simulation/implicit_mass_spring.py
index a9cfc743d1907..293fd283b59b3 100644
--- a/python/taichi/examples/simulation/implicit_mass_spring.py
+++ b/python/taichi/examples/simulation/implicit_mass_spring.py
@@ -179,9 +179,29 @@ def update(self, h):
 
         A = self.M - h * D - h**2 * K
 
-        vel = self.vel.to_numpy().reshape(2 * self.NV)
-        force = self.force.to_numpy().reshape(2 * self.NV)
-        b = (force + h * K @ vel) * h
+        vel = ti.ndarray(ti.f32, 2 * self.NV)
+        force = ti.ndarray(ti.f32, 2 * self.NV)
+
+        @ti.kernel
+        def copy_to(des: ti.types.ndarray(), source: ti.template()):
+            for i in range(self.NV):
+                des[2 * i] = source[i][0]
+                des[2 * i + 1] = source[i][1]
+
+        copy_to(vel, self.vel)
+        copy_to(force, self.force)
+
+        @ti.kernel
+        def compute_b(b: ti.types.ndarray(), f: ti.types.ndarray(),
+                      Kv: ti.types.ndarray(), h: ti.f32):
+            for i in range(2 * self.NV):
+                b[i] = (f[i] + Kv[i] * h) * h
+
+        # b = (force + h * K @ vel) * h
+        b = ti.ndarray(ti.f32, 2 * self.NV)
+        Kv = K @ vel
+        compute_b(b, force, Kv, h)
+
         # Sparse solver
         solver = ti.linalg.SparseSolver(solver_type="LDLT")
         solver.analyze_pattern(A)
diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp
index 5ed15f6b79618..718ecbf3b887e 100644
--- a/taichi/program/sparse_matrix.cpp
+++ b/taichi/program/sparse_matrix.cpp
@@ -25,6 +25,11 @@
         }                                                                      \
   }
 
+#define INSTANTIATE_SPMV(type, storage)                               \
+  template void                                                       \
+  EigenSparseMatrix<Eigen::SparseMatrix<type, Eigen::storage>>::spmv( \
+      Program *prog, const Ndarray &x, const Ndarray &y);
+
 namespace {
 using Pair = std::pair<std::string, std::string>;
 struct key_hash {
@@ -226,6 +231,31 @@ void EigenSparseMatrix<EigenMatrix>::build_triplets(void *triplets_adr) {
   }
 }
 
+template <class EigenMatrix>
+void EigenSparseMatrix<EigenMatrix>::spmv(Program *prog,
+                                          const Ndarray &x,
+                                          const Ndarray &y) {
+  size_t dX = prog->get_ndarray_data_ptr_as_int(&x);
+  size_t dY = prog->get_ndarray_data_ptr_as_int(&y);
+  std::string sdtype = taichi::lang::data_type_name(dtype_);
+  if (sdtype == "f32") {
+    Eigen::Map<Eigen::VectorXf>((float *)dY, cols_) =
+        matrix_.template cast<float>() *
+        Eigen::Map<Eigen::VectorXf>((float *)dX, cols_);
+  } else if (sdtype == "f64") {
+    Eigen::Map<Eigen::VectorXd>((double *)dY, cols_) =
+        matrix_.template cast<double>() *
+        Eigen::Map<Eigen::VectorXd>((double *)dX, cols_);
+  } else {
+    TI_ERROR("Unsupported sparse matrix data type {}!", sdtype);
+  }
+}
+
+INSTANTIATE_SPMV(float32, ColMajor)
+INSTANTIATE_SPMV(float32, RowMajor)
+INSTANTIATE_SPMV(float64, ColMajor)
+INSTANTIATE_SPMV(float64, RowMajor)
+
 std::unique_ptr<SparseMatrix> make_sparse_matrix(
     int rows,
     int cols,
@@ -652,7 +682,7 @@ std::unique_ptr<SparseMatrix> CuSparseMatrix::transpose() const {
 #endif
 }
 
-void CuSparseMatrix::spmv(Program *prog, const Ndarray &x, Ndarray &y) {
+void CuSparseMatrix::spmv(Program *prog, const Ndarray &x, const Ndarray &y) {
 #if defined(TI_WITH_CUDA)
   size_t dX = prog->get_ndarray_data_ptr_as_int(&x);
   size_t dY = prog->get_ndarray_data_ptr_as_int(&y);
diff --git a/taichi/program/sparse_matrix.h b/taichi/program/sparse_matrix.h
index 6031e402441cb..3708840e2ee9e 100644
--- a/taichi/program/sparse_matrix.h
+++ b/taichi/program/sparse_matrix.h
@@ -87,7 +87,7 @@ class SparseMatrix {
     return nullptr;
   }
 
-  inline DataType get_data_type() {
+  inline const DataType get_data_type() const {
     return dtype_;
   }
 
@@ -197,6 +197,8 @@ class EigenSparseMatrix : public SparseMatrix {
     return matrix_ * b;
   }
 
+  void spmv(Program *prog, const Ndarray &x, const Ndarray &y);
+
  private:
   EigenMatrix matrix_;
 };
@@ -265,7 +267,7 @@ class CuSparseMatrix : public SparseMatrix {
                           void *coo_values_ptr,
                           int nnz) override;
 
-  void spmv(Program *prog, const Ndarray &x, Ndarray &y);
+  void spmv(Program *prog, const Ndarray &x, const Ndarray &y);
 
   const void *get_matrix() const override {
     return &matrix_;
diff --git a/taichi/program/sparse_solver.cpp b/taichi/program/sparse_solver.cpp
index 1d3f5ba4ff211..5a55add667983 100644
--- a/taichi/program/sparse_solver.cpp
+++ b/taichi/program/sparse_solver.cpp
@@ -14,6 +14,14 @@
     }                                                                          \
   }
 
+#define INSTANTIATE_SOLVER(dt, type, order)                              \
+  using dt##type##order =                                                \
+      Eigen::Simplicial##type<Eigen::SparseMatrix<dt>, Eigen::Lower,     \
+                              Eigen::order##Ordering<int>>;              \
+  template void                                                          \
+  EigenSparseSolver<dt##type##order, Eigen::SparseMatrix<dt>>::solve_rf( \
+      Program *prog, const SparseMatrix &sm, const Ndarray &b, Ndarray &x);
+
 using Triplets = std::tuple<std::string, std::string, std::string>;
 namespace {
 struct key_hash {
@@ -34,6 +42,9 @@ namespace taichi::lang {
 template <class EigenSolver, class EigenMatrix>
 bool EigenSparseSolver<EigenSolver, EigenMatrix>::compute(
     const SparseMatrix &sm) {
+  if (!is_initialized_) {
+    SparseSolver::init_solver(sm.num_rows(), sm.num_cols(), sm.get_data_type());
+  }
   GET_EM(sm);
   solver_.compute(*mat);
   if (solver_.info() != Eigen::Success) {
@@ -44,6 +55,9 @@ bool EigenSparseSolver<EigenSolver, EigenMatrix>::compute(
 template <class EigenSolver, class EigenMatrix>
 void EigenSparseSolver<EigenSolver, EigenMatrix>::analyze_pattern(
     const SparseMatrix &sm) {
+  if (!is_initialized_) {
+    SparseSolver::init_solver(sm.num_rows(), sm.num_cols(), sm.get_data_type());
+  }
   GET_EM(sm);
   solver_.analyzePattern(*mat);
 }
@@ -66,6 +80,23 @@ bool EigenSparseSolver<EigenSolver, EigenMatrix>::info() {
   return solver_.info() == Eigen::Success;
 }
 
+template <class EigenSolver, class EigenMatrix>
+void EigenSparseSolver<EigenSolver, EigenMatrix>::solve_rf(
+    Program *prog,
+    const SparseMatrix &sm,
+    const Ndarray &b,
+    Ndarray &x) {
+  size_t db = prog->get_ndarray_data_ptr_as_int(&b);
+  size_t dX = prog->get_ndarray_data_ptr_as_int(&x);
+  Eigen::Map<Eigen::VectorXf>((float *)dX, rows_) =
+      solver_.solve(Eigen::Map<Eigen::VectorXf>((float *)db, cols_));
+}
+
+INSTANTIATE_SOLVER(float32, LLT, COLAMD)
+INSTANTIATE_SOLVER(float32, LDLT, COLAMD)
+INSTANTIATE_SOLVER(float32, LLT, AMD)
+INSTANTIATE_SOLVER(float32, LDLT, AMD)
+
 CuSparseSolver::CuSparseSolver() {
 #if defined(TI_WITH_CUDA)
   if (!CUSPARSEDriver::get_instance().is_loaded()) {
diff --git a/taichi/program/sparse_solver.h b/taichi/program/sparse_solver.h
index 917fccd0f2917..876452cf93161 100644
--- a/taichi/program/sparse_solver.h
+++ b/taichi/program/sparse_solver.h
@@ -9,8 +9,19 @@
 namespace taichi::lang {
 
 class SparseSolver {
+ protected:
+  int rows_{0};
+  int cols_{0};
+  DataType dtype_{PrimitiveType::f32};
+  bool is_initialized_{false};
+
  public:
   virtual ~SparseSolver() = default;
+  void init_solver(const int rows, const int cols, const DataType dtype) {
+    rows_ = rows;
+    cols_ = cols;
+    dtype_ = dtype;
+  }
   virtual bool compute(const SparseMatrix &sm) = 0;
   virtual void analyze_pattern(const SparseMatrix &sm) = 0;
   virtual void factorize(const SparseMatrix &sm) = 0;
@@ -46,9 +57,7 @@ class EigenSparseSolver : public SparseSolver {
   void solve_rf(Program *prog,
                 const SparseMatrix &sm,
                 const Ndarray &b,
-                Ndarray &x) override {
-    TI_NOT_IMPLEMENTED;
-  };
+                Ndarray &x) override;
 
   bool info() override;
 };
diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp
index 4e131e2b30837..4c49ee6c0cb79 100644
--- a/taichi/python/export_lang.cpp
+++ b/taichi/python/export_lang.cpp
@@ -1219,6 +1219,7 @@ void export_lang(py::module &m) {
       .def(float##TYPE() * py::self)                                         \
       .def(py::self *py::self)                                               \
       .def("matmul", &EigenSparseMatrix<STORAGE##TYPE##EigenMatrix>::matmul) \
+      .def("spmv", &EigenSparseMatrix<STORAGE##TYPE##EigenMatrix>::spmv)     \
       .def("transpose",                                                      \
            &EigenSparseMatrix<STORAGE##TYPE##EigenMatrix>::transpose)        \
       .def("get_element",                                                    \
diff --git a/tests/python/test_sparse_linear_solver.py b/tests/python/test_sparse_linear_solver.py
index 772cfc2382d75..b0e79b66d3bd1 100644
--- a/tests/python/test_sparse_linear_solver.py
+++ b/tests/python/test_sparse_linear_solver.py
@@ -38,6 +38,39 @@ def fill(Abuilder: ti.types.sparse_matrix_builder(),
         assert x[i] == test_utils.approx(res[i], rel=1.0)
 
 
+@pytest.mark.parametrize("dtype", [ti.f32])
+@pytest.mark.parametrize("solver_type", ["LLT", "LDLT", "LU"])
+@pytest.mark.parametrize("ordering", ["AMD", "COLAMD"])
+@test_utils.test(arch=ti.cpu)
+def test_sparse_solver_ndarray_vector(dtype, solver_type, ordering):
+    n = 10
+    A = np.random.rand(n, n)
+    A_psd = np.dot(A, A.transpose())
+    Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=300)
+    b = ti.ndarray(ti.f32, shape=n)
+
+    @ti.kernel
+    def fill(Abuilder: ti.types.sparse_matrix_builder(),
+             InputArray: ti.types.ndarray(), b: ti.types.ndarray()):
+        for i, j in ti.ndrange(n, n):
+            Abuilder[i, j] += InputArray[i, j]
+        for i in range(n):
+            b[i] = i + 1
+
+    fill(Abuilder, A_psd, b)
+    A = Abuilder.build()
+    solver = ti.linalg.SparseSolver(dtype=dtype,
+                                    solver_type=solver_type,
+                                    ordering=ordering)
+    solver.analyze_pattern(A)
+    solver.factorize(A)
+    x = solver.solve(b)
+
+    res = np.linalg.solve(A_psd, b.to_numpy())
+    for i in range(n):
+        assert x[i] == test_utils.approx(res[i], rel=1.0)
+
+
 @test_utils.test(arch=ti.cuda)
 def test_gpu_sparse_solver():
     from scipy.sparse import coo_matrix
diff --git a/tests/python/test_sparse_matrix.py b/tests/python/test_sparse_matrix.py
index ce90d4616e15a..bcf0ed72311fc 100644
--- a/tests/python/test_sparse_matrix.py
+++ b/tests/python/test_sparse_matrix.py
@@ -376,6 +376,34 @@ def fill(Abuilder: ti.types.sparse_matrix_builder(),
             assert C[i, j] == GT[i][j]
 
 
+@pytest.mark.parametrize('dtype, storage_format', [(ti.f32, 'col_major'),
+                                                   (ti.f32, 'row_major'),
+                                                   (ti.f64, 'col_major'),
+                                                   (ti.f64, 'row_major')])
+@test_utils.test(arch=ti.cpu)
+def test_sparse_matrix_ndarray_vector_multiplication(dtype, storage_format):
+    n = 2
+    Abuilder = ti.linalg.SparseMatrixBuilder(n,
+                                             n,
+                                             max_num_triplets=100,
+                                             dtype=dtype,
+                                             storage_format=storage_format)
+    x = ti.ndarray(dtype, n)
+
+    @ti.kernel
+    def fill(Abuilder: ti.types.sparse_matrix_builder()):
+        for i, j in ti.ndrange(n, n):
+            Abuilder[i, j] += i + j
+
+    fill(Abuilder)
+    x.fill(1.0)
+    A = Abuilder.build()
+    res = A @ x
+    res_n = res.to_numpy()
+    assert res_n[0] == 1.0
+    assert res_n[1] == 3.0
+
+
 @test_utils.test(arch=ti.cuda)
 def test_gpu_sparse_matrix():
     import numpy as np