From b514b3429dde76be524ee9d982d367b30c56cf67 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Wed, 9 Feb 2022 13:17:42 -0500
Subject: [PATCH 1/8] fixing spectral APIs

---
 cpp/include/raft/linalg/detail/lanczos.hpp    | 142 ++++++++----------
 cpp/include/raft/linalg/lanczos.hpp           |  49 +++---
 .../detail/modularity_maximization.hpp        |  19 ++-
 .../raft/spectral/detail/partition.hpp        |  24 +--
 .../raft/spectral/detail/spectral_util.cuh    |   7 +-
 cpp/include/raft/spectral/eigen_solvers.hpp   |  65 ++++----
 ...atrix_wrappers.cuh => matrix_wrappers.hpp} |   2 +
 .../raft/spectral/modularity_maximization.hpp |   4 +-
 8 files changed, 155 insertions(+), 157 deletions(-)
 rename cpp/include/raft/spectral/{detail/matrix_wrappers.cuh => matrix_wrappers.hpp} (99%)
diff --git a/cpp/include/raft/linalg/detail/lanczos.hpp b/cpp/include/raft/linalg/detail/lanczos.hpp
index a2b7751a05..6d51936e95 100644
--- a/cpp/include/raft/linalg/detail/lanczos.hpp
+++ b/cpp/include/raft/linalg/detail/lanczos.hpp
@@ -33,11 +33,7 @@
 #include <raft/spectral/detail/warn_dbg.hpp>
 
 namespace raft {
-
-using namespace matrix;
-using namespace linalg::detail;
-
-namespace spectral {
+namespace linalg {
 namespace detail {
 
 // curandGeneratorNormalX
@@ -87,7 +83,7 @@ inline curandStatus_t curandGenerateNormalX(
  */
 template <typename index_type_t, typename value_type_t>
 int performLanczosIteration(handle_t const& handle,
-                            sparse_matrix_t<index_type_t, value_type_t> const* A,
+                            spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const* A,
                             index_type_t* iter,
                             index_type_t maxIter,
                             value_type_t shift,
@@ -696,11 +692,6 @@ static int lanczosRestart(handle_t const& handle,
   return 0;
 }
 
-}  // namespace detail
-}  // namespace spectral
-
-namespace detail {
-
 /**
  * @brief  Compute smallest eigenvectors of symmetric matrix
  *    Computes eigenvalues and eigenvectors that are least
@@ -751,26 +742,25 @@ namespace detail {
  *  @return error flag.
  */
 template <typename index_type_t, typename value_type_t>
-int computeSmallestEigenvectors(handle_t const& handle,
-                                sparse_matrix_t<index_type_t, value_type_t> const* A,
-                                index_type_t nEigVecs,
-                                index_type_t maxIter,
-                                index_type_t restartIter,
-                                value_type_t tol,
-                                bool reorthogonalize,
-                                index_type_t* effIter,
-                                index_type_t* totalIter,
-                                value_type_t* shift,
-                                value_type_t* __restrict__ alpha_host,
-                                value_type_t* __restrict__ beta_host,
-                                value_type_t* __restrict__ lanczosVecs_dev,
-                                value_type_t* __restrict__ work_dev,
-                                value_type_t* __restrict__ eigVals_dev,
-                                value_type_t* __restrict__ eigVecs_dev,
-                                unsigned long long seed)
+int computeSmallestEigenvectors(
+  handle_t const& handle,
+  spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const* A,
+  index_type_t nEigVecs,
+  index_type_t maxIter,
+  index_type_t restartIter,
+  value_type_t tol,
+  bool reorthogonalize,
+  index_type_t* effIter,
+  index_type_t* totalIter,
+  value_type_t* shift,
+  value_type_t* __restrict__ alpha_host,
+  value_type_t* __restrict__ beta_host,
+  value_type_t* __restrict__ lanczosVecs_dev,
+  value_type_t* __restrict__ work_dev,
+  value_type_t* __restrict__ eigVals_dev,
+  value_type_t* __restrict__ eigVecs_dev,
+  unsigned long long seed)
 {
-  using namespace raft::spectral::detail;
-
   // Useful constants
   constexpr value_type_t one  = 1;
   constexpr value_type_t zero = 0;
@@ -993,20 +983,19 @@ int computeSmallestEigenvectors(handle_t const& handle,
 }
 
 template <typename index_type_t, typename value_type_t>
-int computeSmallestEigenvectors(handle_t const& handle,
-                                sparse_matrix_t<index_type_t, value_type_t> const& A,
-                                index_type_t nEigVecs,
-                                index_type_t maxIter,
-                                index_type_t restartIter,
-                                value_type_t tol,
-                                bool reorthogonalize,
-                                index_type_t& iter,
-                                value_type_t* __restrict__ eigVals_dev,
-                                value_type_t* __restrict__ eigVecs_dev,
-                                unsigned long long seed = 1234567)
+int computeSmallestEigenvectors(
+  handle_t const& handle,
+  spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
+  index_type_t nEigVecs,
+  index_type_t maxIter,
+  index_type_t restartIter,
+  value_type_t tol,
+  bool reorthogonalize,
+  index_type_t& iter,
+  value_type_t* __restrict__ eigVals_dev,
+  value_type_t* __restrict__ eigVecs_dev,
+  unsigned long long seed = 1234567)
 {
-  using namespace raft::spectral::detail;
-
   // Matrix dimension
   index_type_t n = A.nrows_;
 
@@ -1024,8 +1013,8 @@ int computeSmallestEigenvectors(handle_t const& handle,
   value_type_t* alpha_host = alpha_host_v.data();
   value_type_t* beta_host  = beta_host_v.data();
 
-  vector_t<value_type_t> lanczosVecs_dev(handle, n * (restartIter + 1));
-  vector_t<value_type_t> work_dev(handle, (n + restartIter) * restartIter);
+  spectral::matrix::vector_t<value_type_t> lanczosVecs_dev(handle, n * (restartIter + 1));
+  spectral::matrix::vector_t<value_type_t> work_dev(handle, (n + restartIter) * restartIter);
 
   // Perform Lanczos method
   index_type_t effIter;
@@ -1097,25 +1086,24 @@ int computeSmallestEigenvectors(handle_t const& handle,
  *  @return error flag.
  */
 template <typename index_type_t, typename value_type_t>
-int computeLargestEigenvectors(handle_t const& handle,
-                               sparse_matrix_t<index_type_t, value_type_t> const* A,
-                               index_type_t nEigVecs,
-                               index_type_t maxIter,
-                               index_type_t restartIter,
-                               value_type_t tol,
-                               bool reorthogonalize,
-                               index_type_t* effIter,
-                               index_type_t* totalIter,
-                               value_type_t* __restrict__ alpha_host,
-                               value_type_t* __restrict__ beta_host,
-                               value_type_t* __restrict__ lanczosVecs_dev,
-                               value_type_t* __restrict__ work_dev,
-                               value_type_t* __restrict__ eigVals_dev,
-                               value_type_t* __restrict__ eigVecs_dev,
-                               unsigned long long seed)
+int computeLargestEigenvectors(
+  handle_t const& handle,
+  spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const* A,
+  index_type_t nEigVecs,
+  index_type_t maxIter,
+  index_type_t restartIter,
+  value_type_t tol,
+  bool reorthogonalize,
+  index_type_t* effIter,
+  index_type_t* totalIter,
+  value_type_t* __restrict__ alpha_host,
+  value_type_t* __restrict__ beta_host,
+  value_type_t* __restrict__ lanczosVecs_dev,
+  value_type_t* __restrict__ work_dev,
+  value_type_t* __restrict__ eigVals_dev,
+  value_type_t* __restrict__ eigVecs_dev,
+  unsigned long long seed)
 {
-  using namespace raft::spectral::detail;
-
   // Useful constants
   constexpr value_type_t one  = 1;
   constexpr value_type_t zero = 0;
@@ -1342,17 +1330,18 @@ int computeLargestEigenvectors(handle_t const& handle,
 }
 
 template <typename index_type_t, typename value_type_t>
-int computeLargestEigenvectors(handle_t const& handle,
-                               sparse_matrix_t<index_type_t, value_type_t> const& A,
-                               index_type_t nEigVecs,
-                               index_type_t maxIter,
-                               index_type_t restartIter,
-                               value_type_t tol,
-                               bool reorthogonalize,
-                               index_type_t& iter,
-                               value_type_t* __restrict__ eigVals_dev,
-                               value_type_t* __restrict__ eigVecs_dev,
-                               unsigned long long seed = 123456)
+int computeLargestEigenvectors(
+  handle_t const& handle,
+  spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
+  index_type_t nEigVecs,
+  index_type_t maxIter,
+  index_type_t restartIter,
+  value_type_t tol,
+  bool reorthogonalize,
+  index_type_t& iter,
+  value_type_t* __restrict__ eigVals_dev,
+  value_type_t* __restrict__ eigVecs_dev,
+  unsigned long long seed = 123456)
 {
   // Matrix dimension
   index_type_t n = A.nrows_;
@@ -1371,8 +1360,8 @@ int computeLargestEigenvectors(handle_t const& handle,
   value_type_t* alpha_host = alpha_host_v.data();
   value_type_t* beta_host  = beta_host_v.data();
 
-  vector_t<value_type_t> lanczosVecs_dev(handle, n * (restartIter + 1));
-  vector_t<value_type_t> work_dev(handle, (n + restartIter) * restartIter);
+  spectral::matrix::vector_t<value_type_t> lanczosVecs_dev(handle, n * (restartIter + 1));
+  spectral::matrix::vector_t<value_type_t> work_dev(handle, (n + restartIter) * restartIter);
 
   // Perform Lanczos method
   index_type_t effIter;
@@ -1398,4 +1387,5 @@ int computeLargestEigenvectors(handle_t const& handle,
 }
 
 }  // namespace detail
+}  // namespace linalg
 }  // namespace raft
diff --git a/cpp/include/raft/linalg/lanczos.hpp b/cpp/include/raft/linalg/lanczos.hpp
index e7d965f810..21b65158fc 100644
--- a/cpp/include/raft/linalg/lanczos.hpp
+++ b/cpp/include/raft/linalg/lanczos.hpp
@@ -17,8 +17,10 @@
 #pragma once
 
 #include "detail/lanczos.hpp"
+#include <raft/spectral/matrix_wrappers.hpp>
 
 namespace raft {
+namespace linalg {
 
 // =========================================================
 // Eigensolver
@@ -62,17 +64,18 @@ namespace raft {
  *  @return error flag.
  */
 template <typename index_type_t, typename value_type_t>
-int computeSmallestEigenvectors(handle_t const& handle,
-                                sparse_matrix_t<index_type_t, value_type_t> const& A,
-                                index_type_t nEigVecs,
-                                index_type_t maxIter,
-                                index_type_t restartIter,
-                                value_type_t tol,
-                                bool reorthogonalize,
-                                index_type_t& iter,
-                                value_type_t* __restrict__ eigVals_dev,
-                                value_type_t* __restrict__ eigVecs_dev,
-                                unsigned long long seed = 1234567)
+int computeSmallestEigenvectors(
+  handle_t const& handle,
+  spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
+  index_type_t nEigVecs,
+  index_type_t maxIter,
+  index_type_t restartIter,
+  value_type_t tol,
+  bool reorthogonalize,
+  index_type_t& iter,
+  value_type_t* __restrict__ eigVals_dev,
+  value_type_t* __restrict__ eigVecs_dev,
+  unsigned long long seed = 1234567)
 {
   return detail::computeSmallestEigenvectors(handle,
                                              A,
@@ -125,17 +128,18 @@ int computeSmallestEigenvectors(handle_t const& handle,
  *  @return error flag.
  */
 template <typename index_type_t, typename value_type_t>
-int computeLargestEigenvectors(handle_t const& handle,
-                               sparse_matrix_t<index_type_t, value_type_t> const& A,
-                               index_type_t nEigVecs,
-                               index_type_t maxIter,
-                               index_type_t restartIter,
-                               value_type_t tol,
-                               bool reorthogonalize,
-                               index_type_t& iter,
-                               value_type_t* __restrict__ eigVals_dev,
-                               value_type_t* __restrict__ eigVecs_dev,
-                               unsigned long long seed = 123456)
+int computeLargestEigenvectors(
+  handle_t const& handle,
+  spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
+  index_type_t nEigVecs,
+  index_type_t maxIter,
+  index_type_t restartIter,
+  value_type_t tol,
+  bool reorthogonalize,
+  index_type_t& iter,
+  value_type_t* __restrict__ eigVals_dev,
+  value_type_t* __restrict__ eigVecs_dev,
+  unsigned long long seed = 123456)
 {
   return detail::computeLargestEigenvectors(handle,
                                             A,
@@ -150,4 +154,5 @@ int computeLargestEigenvectors(handle_t const& handle,
                                             seed);
 }
 
+}  // namespace linalg
 }  // namespace raft
diff --git a/cpp/include/raft/spectral/detail/modularity_maximization.hpp b/cpp/include/raft/spectral/detail/modularity_maximization.hpp
index a55dfbe67f..6bb3dca920 100644
--- a/cpp/include/raft/spectral/detail/modularity_maximization.hpp
+++ b/cpp/include/raft/spectral/detail/modularity_maximization.hpp
@@ -26,9 +26,11 @@
 
 #include <tuple>
 
+#include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/spectral/cluster_solvers.hpp>
 #include <raft/spectral/detail/spectral_util.cuh>
 #include <raft/spectral/eigen_solvers.hpp>
+#include <raft/spectral/matrix_wrappers.hpp>
 
 #ifdef COLLECT_TIME_STATISTICS
 #include <cuda_profiler_api.h>
@@ -52,9 +54,6 @@ namespace raft {
 namespace spectral {
 namespace detail {
 
-using namespace matrix;
-using namespace linalg;
-
 // =========================================================
 // Spectral modularity_maximization
 // =========================================================
@@ -83,7 +82,7 @@ using namespace linalg;
 template <typename vertex_t, typename weight_t, typename EigenSolver, typename ClusterSolver>
 std::tuple<vertex_t, weight_t, vertex_t> modularity_maximization(
   handle_t const& handle,
-  sparse_matrix_t<vertex_t, weight_t> const& csr_m,
+  raft::spectral::matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
   EigenSolver const& eigen_solver,
   ClusterSolver const& cluster_solver,
   vertex_t* __restrict__ clusters,
@@ -105,7 +104,7 @@ std::tuple<vertex_t, weight_t, vertex_t> modularity_maximization(
   // Compute eigenvectors of Modularity Matrix
 
   // Initialize Modularity Matrix
-  modularity_matrix_t<vertex_t, weight_t> B{handle, csr_m};
+  raft::spectral::matrix::modularity_matrix_t<vertex_t, weight_t> B{handle, csr_m};
 
   auto eigen_config = eigen_solver.get_config();
   auto nEigVecs     = eigen_config.n_eigVecs;
@@ -142,7 +141,7 @@ std::tuple<vertex_t, weight_t, vertex_t> modularity_maximization(
  */
 template <typename vertex_t, typename weight_t>
 void analyzeModularity(handle_t const& handle,
-                       sparse_matrix_t<vertex_t, weight_t> const& csr_m,
+                       raft::spectral::matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
                        vertex_t nClusters,
                        vertex_t const* __restrict__ clusters,
                        weight_t& modularity)
@@ -157,14 +156,14 @@ void analyzeModularity(handle_t const& handle,
   auto stream   = handle.get_stream();
 
   // Device memory
-  vector_t<weight_t> part_i(handle, n);
-  vector_t<weight_t> Bx(handle, n);
+  raft::spectral::matrix::vector_t<weight_t> part_i(handle, n);
+  raft::spectral::matrix::vector_t<weight_t> Bx(handle, n);
 
   // Initialize cuBLAS
-  RAFT_CUBLAS_TRY(cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream));
+  RAFT_CUBLAS_TRY(linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream));
 
   // Initialize Modularity
-  modularity_matrix_t<vertex_t, weight_t> B{handle, csr_m};
+  raft::spectral::matrix::modularity_matrix_t<vertex_t, weight_t> B{handle, csr_m};
 
   // Initialize output
   modularity = 0;
diff --git a/cpp/include/raft/spectral/detail/partition.hpp b/cpp/include/raft/spectral/detail/partition.hpp
index b7c811d5a5..775b37d118 100644
--- a/cpp/include/raft/spectral/detail/partition.hpp
+++ b/cpp/include/raft/spectral/detail/partition.hpp
@@ -25,6 +25,7 @@
 
 #include <tuple>
 
+#include <raft/linalg/detail/cublas_wrappers.hpp>
 #include <raft/spectral/cluster_solvers.hpp>
 #include <raft/spectral/detail/spectral_util.cuh>
 #include <raft/spectral/eigen_solvers.hpp>
@@ -33,9 +34,6 @@ namespace raft {
 namespace spectral {
 namespace detail {
 
-using namespace matrix;
-using namespace linalg;
-
 // =========================================================
 // Spectral partitioner
 // =========================================================
@@ -63,13 +61,14 @@ using namespace linalg;
  *  @return statistics: number of eigensolver iterations, .
  */
 template <typename vertex_t, typename weight_t, typename EigenSolver, typename ClusterSolver>
-std::tuple<vertex_t, weight_t, vertex_t> partition(handle_t const& handle,
-                                                   sparse_matrix_t<vertex_t, weight_t> const& csr_m,
-                                                   EigenSolver const& eigen_solver,
-                                                   ClusterSolver const& cluster_solver,
-                                                   vertex_t* __restrict__ clusters,
-                                                   weight_t* eigVals,
-                                                   weight_t* eigVecs)
+std::tuple<vertex_t, weight_t, vertex_t> partition(
+  handle_t const& handle,
+  spectral::matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
+  EigenSolver const& eigen_solver,
+  ClusterSolver const& cluster_solver,
+  vertex_t* __restrict__ clusters,
+  weight_t* eigVals,
+  weight_t* eigVecs)
 {
   RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer.");
   RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer.");
@@ -132,7 +131,7 @@ std::tuple<vertex_t, weight_t, vertex_t> partition(handle_t const& handle,
  */
 template <typename vertex_t, typename weight_t>
 void analyzePartition(handle_t const& handle,
-                      sparse_matrix_t<vertex_t, weight_t> const& csr_m,
+                      spectral::matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
                       vertex_t nClusters,
                       const vertex_t* __restrict__ clusters,
                       weight_t& edgeCut,
@@ -153,7 +152,8 @@ void analyzePartition(handle_t const& handle,
   vector_t<weight_t> Lx(handle, n);
 
   // Initialize cuBLAS
-  RAFT_CUBLAS_TRY(cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream));
+  RAFT_CUBLAS_TRY(
+    raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream));
 
   // Initialize Laplacian
   /// sparse_matrix_t<vertex_t, weight_t> A{handle, graph};
diff --git a/cpp/include/raft/spectral/detail/spectral_util.cuh b/cpp/include/raft/spectral/detail/spectral_util.cuh
index 6b57566a73..c7a0f0c5ef 100644
--- a/cpp/include/raft/spectral/detail/spectral_util.cuh
+++ b/cpp/include/raft/spectral/detail/spectral_util.cuh
@@ -19,6 +19,7 @@
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
+#include <raft/spectral/matrix_wrappers.hpp>
 
 #include <thrust/fill.h>
 #include <thrust/reduce.h>
@@ -204,9 +205,9 @@ bool construct_indicator(handle_t const& handle,
                          weight_t& clustersize,
                          weight_t& partStats,
                          vertex_t const* __restrict__ clusters,
-                         vector_t<weight_t>& part_i,
-                         vector_t<weight_t>& Bx,
-                         laplacian_matrix_t<vertex_t, weight_t> const& B)
+                         raft::spectral::matrix::vector_t<weight_t>& part_i,
+                         raft::spectral::matrix::vector_t<weight_t>& Bx,
+                         raft::spectral::matrix::laplacian_matrix_t<vertex_t, weight_t> const& B)
 {
   auto stream             = handle.get_stream();
   auto cublas_h           = handle.get_cublas_handle();
diff --git a/cpp/include/raft/spectral/eigen_solvers.hpp b/cpp/include/raft/spectral/eigen_solvers.hpp
index 192dc15a6b..b6e25814e6 100644
--- a/cpp/include/raft/spectral/eigen_solvers.hpp
+++ b/cpp/include/raft/spectral/eigen_solvers.hpp
@@ -16,12 +16,11 @@
 #pragma once
 
 #include <raft/linalg/lanczos.hpp>
+#include <raft/spectral/matrix_wrappers.hpp>
 
 namespace raft {
 namespace spectral {
 
-using namespace matrix;
-
 // aggregate of control params for Eigen Solver:
 //
 template <typename index_type_t, typename value_type_t, typename size_type_t = index_type_t>
@@ -47,47 +46,49 @@ struct lanczos_solver_t {
   {
   }
 
-  index_type_t solve_smallest_eigenvectors(handle_t const& handle,
-                                           sparse_matrix_t<index_type_t, value_type_t> const& A,
-                                           value_type_t* __restrict__ eigVals,
-                                           value_type_t* __restrict__ eigVecs) const
+  index_type_t solve_smallest_eigenvectors(
+    handle_t const& handle,
+    spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
+    value_type_t* __restrict__ eigVals,
+    value_type_t* __restrict__ eigVecs) const
   {
     RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer.");
     RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer.");
     index_type_t iters{};
-    computeSmallestEigenvectors(handle,
-                                A,
-                                config_.n_eigVecs,
-                                config_.maxIter,
-                                config_.restartIter,
-                                config_.tol,
-                                config_.reorthogonalize,
-                                iters,
-                                eigVals,
-                                eigVecs,
-                                config_.seed);
+    linalg::computeSmallestEigenvectors(handle,
+                                        A,
+                                        config_.n_eigVecs,
+                                        config_.maxIter,
+                                        config_.restartIter,
+                                        config_.tol,
+                                        config_.reorthogonalize,
+                                        iters,
+                                        eigVals,
+                                        eigVecs,
+                                        config_.seed);
     return iters;
   }
 
-  index_type_t solve_largest_eigenvectors(handle_t const& handle,
-                                          sparse_matrix_t<index_type_t, value_type_t> const& A,
-                                          value_type_t* __restrict__ eigVals,
-                                          value_type_t* __restrict__ eigVecs) const
+  index_type_t solve_largest_eigenvectors(
+    handle_t const& handle,
+    spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
+    value_type_t* __restrict__ eigVals,
+    value_type_t* __restrict__ eigVecs) const
   {
     RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer.");
     RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer.");
     index_type_t iters{};
-    computeLargestEigenvectors(handle,
-                               A,
-                               config_.n_eigVecs,
-                               config_.maxIter,
-                               config_.restartIter,
-                               config_.tol,
-                               config_.reorthogonalize,
-                               iters,
-                               eigVals,
-                               eigVecs,
-                               config_.seed);
+    linalg::computeLargestEigenvectors(handle,
+                                       A,
+                                       config_.n_eigVecs,
+                                       config_.maxIter,
+                                       config_.restartIter,
+                                       config_.tol,
+                                       config_.reorthogonalize,
+                                       iters,
+                                       eigVals,
+                                       eigVecs,
+                                       config_.seed);
     return iters;
   }
 
diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.cuh b/cpp/include/raft/spectral/matrix_wrappers.hpp
similarity index 99%
rename from cpp/include/raft/spectral/detail/matrix_wrappers.cuh
rename to cpp/include/raft/spectral/matrix_wrappers.hpp
index d86dc21135..95940ceb2a 100644
--- a/cpp/include/raft/spectral/detail/matrix_wrappers.cuh
+++ b/cpp/include/raft/spectral/matrix_wrappers.hpp
@@ -34,6 +34,7 @@
 #define IDX(i, j, lda) ((i) + (j) * (lda))
 
 namespace raft {
+namespace spectral {
 namespace matrix {
 
 using size_type = int;  // for now; TODO: move it in appropriate header
@@ -444,4 +445,5 @@ struct modularity_matrix_t : laplacian_matrix_t<index_type, value_type> {
 };
 
 }  // namespace matrix
+}  // namespace spectral
 }  // namespace raft
diff --git a/cpp/include/raft/spectral/modularity_maximization.hpp b/cpp/include/raft/spectral/modularity_maximization.hpp
index 466851c74f..460e2cf26f 100644
--- a/cpp/include/raft/spectral/modularity_maximization.hpp
+++ b/cpp/include/raft/spectral/modularity_maximization.hpp
@@ -51,7 +51,7 @@ namespace spectral {
 template <typename vertex_t, typename weight_t, typename EigenSolver, typename ClusterSolver>
 std::tuple<vertex_t, weight_t, vertex_t> modularity_maximization(
   handle_t const& handle,
-  sparse_matrix_t<vertex_t, weight_t> const& csr_m,
+  matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
   EigenSolver const& eigen_solver,
   ClusterSolver const& cluster_solver,
   vertex_t* __restrict__ clusters,
@@ -74,7 +74,7 @@ std::tuple<vertex_t, weight_t, vertex_t> modularity_maximization(
  */
 template <typename vertex_t, typename weight_t>
 void analyzeModularity(handle_t const& handle,
-                       sparse_matrix_t<vertex_t, weight_t> const& csr_m,
+                       matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
                        vertex_t nClusters,
                        vertex_t const* __restrict__ clusters,
                        weight_t& modularity)

From 3575dd673c573114160814dfc4db96fac87d62ef Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Wed, 9 Feb 2022 14:04:08 -0500
Subject: [PATCH 2/8] Fixing API

---
 cpp/include/raft/cluster/detail/kmeans.cuh    |  12 +-
 cpp/include/raft/linalg/detail/lanczos.hpp    |   2 +-
 .../raft/spectral/detail/matrix_wrappers.cuh  | 451 ++++++++++++++++++
 cpp/include/raft/spectral/eigen_solvers.hpp   |   4 +-
 cpp/include/raft/spectral/matrix_wrappers.hpp | 406 +---------------
 cpp/include/raft/spectral/partition.hpp       |  17 +-
 6 files changed, 475 insertions(+), 417 deletions(-)
 create mode 100644 cpp/include/raft/spectral/detail/matrix_wrappers.cuh

diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh
index 5f1a0e137d..039ac8854a 100644
--- a/cpp/include/raft/cluster/detail/kmeans.cuh
+++ b/cpp/include/raft/cluster/detail/kmeans.cuh
@@ -32,8 +32,8 @@
 #include <raft/device_atomics.cuh>
 #include <raft/handle.hpp>
 #include <raft/linalg/detail/cublas_wrappers.hpp>
-#include <raft/spectral/detail/matrix_wrappers.cuh>
 #include <raft/spectral/detail/warn_dbg.hpp>
+#include <raft/spectral/matrix_wrappers.hpp>
 
 namespace raft {
 namespace cluster {
@@ -948,8 +948,6 @@ int kmeans(handle_t const& handle,
            index_type_t& iters,
            unsigned long long seed = 123456)
 {
-  using namespace matrix;
-
   // Check that parameters are valid
   RAFT_EXPECTS(n > 0, "invalid parameter (n<1)");
   RAFT_EXPECTS(d > 0, "invalid parameter (d<1)");
@@ -958,10 +956,10 @@ int kmeans(handle_t const& handle,
   RAFT_EXPECTS(maxiter >= 0, "invalid parameter (maxiter<0)");
 
   // Allocate memory
-  vector_t<index_type_t> clusterSizes(handle, k);
-  vector_t<value_type_t> centroids(handle, d * k);
-  vector_t<value_type_t> work(handle, n * max(k, d));
-  vector_t<index_type_t> work_int(handle, 2 * d * n);
+  raft::spectral::matrix::vector_t<index_type_t> clusterSizes(handle, k);
+  raft::spectral::matrix::vector_t<value_type_t> centroids(handle, d * k);
+  raft::spectral::matrix::vector_t<value_type_t> work(handle, n * max(k, d));
+  raft::spectral::matrix::vector_t<index_type_t> work_int(handle, 2 * d * n);
 
   // Perform k-means
   return kmeans<index_type_t, value_type_t>(handle,
diff --git a/cpp/include/raft/linalg/detail/lanczos.hpp b/cpp/include/raft/linalg/detail/lanczos.hpp
index 6d51936e95..9fa0d79875 100644
--- a/cpp/include/raft/linalg/detail/lanczos.hpp
+++ b/cpp/include/raft/linalg/detail/lanczos.hpp
@@ -29,8 +29,8 @@
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
 #include <raft/spectral/detail/lapack.hpp>
-#include <raft/spectral/detail/matrix_wrappers.cuh>
 #include <raft/spectral/detail/warn_dbg.hpp>
+#include <raft/spectral/matrix_wrappers.hpp>
 
 namespace raft {
 namespace linalg {
diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.cuh b/cpp/include/raft/spectral/detail/matrix_wrappers.cuh
new file mode 100644
index 0000000000..b4a2ed175f
--- /dev/null
+++ b/cpp/include/raft/spectral/detail/matrix_wrappers.cuh
@@ -0,0 +1,451 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <raft/cudart_utils.h>
+#include <raft/handle.hpp>
+#include <raft/linalg/detail/cublas_wrappers.hpp>
+#include <raft/sparse/detail/cusparse_wrappers.h>
+#include <rmm/device_uvector.hpp>
+
+#include <thrust/fill.h>
+#include <thrust/reduce.h>
+
+#include <algorithm>
+
+// =========================================================
+// Useful macros
+// =========================================================
+
+// Get index of matrix entry
+#define IDX(i, j, lda) ((i) + (j) * (lda))
+
+namespace raft {
+namespace spectral {
+namespace matrix {
+namespace detail {
+
+using size_type = int;  // for now; TODO: move it in appropriate header
+
+// Apply diagonal matrix to vector:
+//
+template <typename IndexType_, typename ValueType_>
+static __global__ void diagmv(IndexType_ n,
+                              ValueType_ alpha,
+                              const ValueType_* __restrict__ D,
+                              const ValueType_* __restrict__ x,
+                              ValueType_* __restrict__ y)
+{
+  IndexType_ i = threadIdx.x + blockIdx.x * blockDim.x;
+  while (i < n) {
+    y[i] += alpha * D[i] * x[i];
+    i += blockDim.x * gridDim.x;
+  }
+}
+
+// specifies type of algorithm used
+// for SpMv:
+//
+enum struct sparse_mv_alg_t : int {
+  SPARSE_MV_UNDEFINED = -1,
+  SPARSE_MV_ALG_DEFAULT,  // generic, for any sparse matrix
+  SPARSE_MV_ALG1,         // typical for CSR
+  SPARSE_MV_ALG2          // may provide better performamce for irregular sparse matrices
+};
+
+// Vector "view"-like aggregate for linear algebra purposes
+//
+template <typename value_type>
+struct vector_view_t {
+  value_type* buffer_;
+  size_type size_;
+
+  vector_view_t(value_type* buffer, size_type sz) : buffer_(buffer), size_(sz) {}
+
+  vector_view_t(vector_view_t&& other) : buffer_(other.raw()), size_(other.size()) {}
+
+  vector_view_t& operator=(vector_view_t&& other)
+  {
+    buffer_ = other.raw();
+    size_   = other.size();
+  }
+};
+
+template <typename value_type>
+class vector_t {
+ public:
+  vector_t(handle_t const& raft_handle, size_type sz)
+    : buffer_(sz, raft_handle.get_stream()), thrust_policy(raft_handle.get_thrust_policy())
+  {
+  }
+
+  size_type size(void) const { return buffer_.size(); }
+
+  value_type* raw(void) { return buffer_.data(); }
+
+  value_type const* raw(void) const { return buffer_.data(); }
+
+  value_type nrm1() const
+  {
+    return thrust::reduce(thrust_policy,
+                          buffer_.data(),
+                          buffer_.data() + buffer_.size(),
+                          value_type{0},
+                          [] __device__(auto left, auto right) {
+                            auto abs_left  = left > 0 ? left : -left;
+                            auto abs_right = right > 0 ? right : -right;
+                            return abs_left + abs_right;
+                          });
+  }
+
+  void fill(value_type value)
+  {
+    thrust::fill_n(thrust_policy, buffer_.data(), buffer_.size(), value);
+  }
+
+ private:
+  using thrust_exec_policy_t =
+    thrust::detail::execute_with_allocator<rmm::mr::thrust_allocator<char>,
+                                           thrust::cuda_cub::execute_on_stream_base>;
+  rmm::device_uvector<value_type> buffer_;
+  const thrust_exec_policy_t thrust_policy;
+};
+
+template <typename index_type, typename value_type>
+struct sparse_matrix_t {
+  sparse_matrix_t(handle_t const& raft_handle,
+                  index_type const* row_offsets,
+                  index_type const* col_indices,
+                  value_type const* values,
+                  index_type const nrows,
+                  index_type const ncols,
+                  index_type const nnz)
+    : handle_(raft_handle),
+      row_offsets_(row_offsets),
+      col_indices_(col_indices),
+      values_(values),
+      nrows_(nrows),
+      ncols_(ncols),
+      nnz_(nnz)
+  {
+  }
+
+  sparse_matrix_t(handle_t const& raft_handle,
+                  index_type const* row_offsets,
+                  index_type const* col_indices,
+                  value_type const* values,
+                  index_type const nrows,
+                  index_type const nnz)
+    : handle_(raft_handle),
+      row_offsets_(row_offsets),
+      col_indices_(col_indices),
+      values_(values),
+      nrows_(nrows),
+      ncols_(nrows),
+      nnz_(nnz)
+  {
+  }
+
+  template <typename CSRView>
+  sparse_matrix_t(handle_t const& raft_handle, CSRView const& csr_view)
+    : handle_(raft_handle),
+      row_offsets_(csr_view.offsets),
+      col_indices_(csr_view.indices),
+      values_(csr_view.edge_data),
+      nrows_(csr_view.number_of_vertices),
+      ncols_(csr_view.number_of_vertices),
+      nnz_(csr_view.number_of_edges)
+  {
+  }
+
+  virtual ~sparse_matrix_t(void) =
+    default;  // virtual because used as base for following matrix types
+
+  // y = alpha*A*x + beta*y
+  //(Note: removed const-ness of x, because CUDA 11 SpMV
+  // descriptor creation works with non-const, and const-casting
+  // down is dangerous)
+  //
+  virtual void mv(value_type alpha,
+                  value_type* __restrict__ x,
+                  value_type beta,
+                  value_type* __restrict__ y,
+                  sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1,
+                  bool transpose      = false,
+                  bool symmetric      = false) const
+  {
+    using namespace sparse;
+
+    RAFT_EXPECTS(x != nullptr, "Null x buffer.");
+    RAFT_EXPECTS(y != nullptr, "Null y buffer.");
+
+    auto cusparse_h = handle_.get_cusparse_handle();
+    auto stream     = handle_.get_stream();
+
+    cusparseOperation_t trans = transpose ? CUSPARSE_OPERATION_TRANSPOSE :  // transpose
+                                  CUSPARSE_OPERATION_NON_TRANSPOSE;         // non-transpose
+
+#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP
+    auto size_x = transpose ? nrows_ : ncols_;
+    auto size_y = transpose ? ncols_ : nrows_;
+
+    cusparseSpMVAlg_t spmv_alg = translate_algorithm(alg);
+
+    // create descriptors:
+    //(below casts are necessary, because
+    // cusparseCreateCsr(...) takes non-const
+    // void*; the casts should be harmless)
+    //
+    cusparseSpMatDescr_t matA;
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr(&matA,
+                                                              nrows_,
+                                                              ncols_,
+                                                              nnz_,
+                                                              const_cast<index_type*>(row_offsets_),
+                                                              const_cast<index_type*>(col_indices_),
+                                                              const_cast<value_type*>(values_)));
+
+    cusparseDnVecDescr_t vecX;
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecX, size_x, x));
+
+    cusparseDnVecDescr_t vecY;
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecY, size_y, y));
+
+    // get (scratch) external device buffer size:
+    //
+    size_t bufferSize;
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv_buffersize(
+      cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, &bufferSize, stream));
+
+    // allocate external buffer:
+    //
+    vector_t<value_type> external_buffer(handle_, bufferSize);
+
+    // finally perform SpMV:
+    //
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(
+      cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, external_buffer.raw(), stream));
+
+    // free descriptors:
+    //(TODO: maybe wrap them in a RAII struct?)
+    //
+    RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(vecY));
+    RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(vecX));
+    RAFT_CUSPARSE_TRY(cusparseDestroySpMat(matA));
+#else
+    RAFT_CUSPARSE_TRY(
+      raft::sparse::detail::cusparsesetpointermode(cusparse_h, CUSPARSE_POINTER_MODE_HOST, stream));
+    cusparseMatDescr_t descr = 0;
+    RAFT_CUSPARSE_TRY(cusparseCreateMatDescr(&descr));
+    if (symmetric) {
+      RAFT_CUSPARSE_TRY(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_SYMMETRIC));
+    } else {
+      RAFT_CUSPARSE_TRY(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL));
+    }
+    RAFT_CUSPARSE_TRY(cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO));
+    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsrmv(cusparse_h,
+                                                          trans,
+                                                          nrows_,
+                                                          ncols_,
+                                                          nnz_,
+                                                          &alpha,
+                                                          descr,
+                                                          values_,
+                                                          row_offsets_,
+                                                          col_indices_,
+                                                          x,
+                                                          &beta,
+                                                          y,
+                                                          stream));
+    RAFT_CUSPARSE_TRY(cusparseDestroyMatDescr(descr));
+#endif
+  }
+
+  handle_t const& get_handle(void) const { return handle_; }
+
+#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP
+  cusparseSpMVAlg_t translate_algorithm(sparse_mv_alg_t alg) const
+  {
+    switch (alg) {
+      case sparse_mv_alg_t::SPARSE_MV_ALG1: return CUSPARSE_CSRMV_ALG1;
+      case sparse_mv_alg_t::SPARSE_MV_ALG2: return CUSPARSE_CSRMV_ALG2;
+      default: return CUSPARSE_MV_ALG_DEFAULT;
+    }
+  }
+#endif
+
+  // private: // maybe not, keep this ASAPBNS ("as simple as possible, but not simpler"); hence,
+  // aggregate
+
+  handle_t const& handle_;
+  index_type const* row_offsets_;
+  index_type const* col_indices_;
+  value_type const* values_;
+  index_type const nrows_;
+  index_type const ncols_;
+  index_type const nnz_;
+};
+
+template <typename index_type, typename value_type>
+struct laplacian_matrix_t : sparse_matrix_t<index_type, value_type> {
+  laplacian_matrix_t(handle_t const& raft_handle,
+                     index_type const* row_offsets,
+                     index_type const* col_indices,
+                     value_type const* values,
+                     index_type const nrows,
+                     index_type const nnz)
+    : sparse_matrix_t<index_type, value_type>(
+        raft_handle, row_offsets, col_indices, values, nrows, nnz),
+      diagonal_(raft_handle, nrows)
+  {
+    vector_t<value_type> ones{raft_handle, nrows};
+    ones.fill(1.0);
+    sparse_matrix_t<index_type, value_type>::mv(1, ones.raw(), 0, diagonal_.raw());
+  }
+
+  laplacian_matrix_t(handle_t const& raft_handle,
+                     sparse_matrix_t<index_type, value_type> const& csr_m)
+    : sparse_matrix_t<index_type, value_type>(raft_handle,
+                                              csr_m.row_offsets_,
+                                              csr_m.col_indices_,
+                                              csr_m.values_,
+                                              csr_m.nrows_,
+                                              csr_m.nnz_),
+      diagonal_(raft_handle, csr_m.nrows_)
+  {
+    vector_t<value_type> ones{raft_handle, csr_m.nrows_};
+    ones.fill(1.0);
+    sparse_matrix_t<index_type, value_type>::mv(1, ones.raw(), 0, diagonal_.raw());
+  }
+
+  // y = alpha*A*x + beta*y
+  //
+  void mv(value_type alpha,
+          value_type* __restrict__ x,
+          value_type beta,
+          value_type* __restrict__ y,
+          sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1,
+          bool transpose      = false,
+          bool symmetric      = false) const override
+  {
+    constexpr int BLOCK_SIZE = 1024;
+    auto n                   = sparse_matrix_t<index_type, value_type>::nrows_;
+
+    auto cublas_h = sparse_matrix_t<index_type, value_type>::get_handle().get_cublas_handle();
+    auto stream   = sparse_matrix_t<index_type, value_type>::get_handle().get_stream();
+
+    // scales y by beta:
+    //
+    if (beta == 0) {
+      CUDA_TRY(cudaMemsetAsync(y, 0, n * sizeof(value_type), stream));
+    } else if (beta != 1) {
+      // TODO: Call from public API when ready
+      RAFT_CUBLAS_TRY(raft::linalg::detail::cublasscal(cublas_h, n, &beta, y, 1, stream));
+    }
+
+    // Apply diagonal matrix
+    //
+    dim3 gridDim{std::min<unsigned int>((n + BLOCK_SIZE - 1) / BLOCK_SIZE, 65535), 1, 1};
+
+    dim3 blockDim{BLOCK_SIZE, 1, 1};
+    diagmv<<<gridDim, blockDim, 0, stream>>>(n, alpha, diagonal_.raw(), x, y);
+    RAFT_CHECK_CUDA(stream);
+
+    // Apply adjacency matrix
+    //
+    sparse_matrix_t<index_type, value_type>::mv(-alpha, x, 1, y, alg, transpose, symmetric);
+  }
+
+  vector_t<value_type> diagonal_;
+};
+
+template <typename index_type, typename value_type>
+struct modularity_matrix_t : laplacian_matrix_t<index_type, value_type> {
+  modularity_matrix_t(handle_t const& raft_handle,
+                      index_type const* row_offsets,
+                      index_type const* col_indices,
+                      value_type const* values,
+                      index_type const nrows,
+                      index_type const nnz)
+    : laplacian_matrix_t<index_type, value_type>(
+        raft_handle, row_offsets, col_indices, values, nrows, nnz)
+  {
+    edge_sum_ = laplacian_matrix_t<index_type, value_type>::diagonal_.nrm1();
+  }
+
+  modularity_matrix_t(handle_t const& raft_handle,
+                      sparse_matrix_t<index_type, value_type> const& csr_m)
+    : laplacian_matrix_t<index_type, value_type>(raft_handle, csr_m)
+  {
+    edge_sum_ = laplacian_matrix_t<index_type, value_type>::diagonal_.nrm1();
+  }
+
+  // y = alpha*A*x + beta*y
+  //
+  void mv(value_type alpha,
+          value_type* __restrict__ x,
+          value_type beta,
+          value_type* __restrict__ y,
+          sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1,
+          bool transpose      = false,
+          bool symmetric      = false) const override
+  {
+    auto n = sparse_matrix_t<index_type, value_type>::nrows_;
+
+    auto cublas_h = sparse_matrix_t<index_type, value_type>::get_handle().get_cublas_handle();
+    auto stream   = sparse_matrix_t<index_type, value_type>::get_handle().get_stream();
+
+    // y = A*x
+    //
+    sparse_matrix_t<index_type, value_type>::mv(alpha, x, 0, y, alg, transpose, symmetric);
+    value_type dot_res;
+
+    // gamma = d'*x
+    //
+    // Cublas::dot(this->n, D.raw(), 1, x, 1, &dot_res);
+    // TODO: Call from public API when ready
+    RAFT_CUBLAS_TRY(
+      raft::linalg::detail::cublasdot(cublas_h,
+                                      n,
+                                      laplacian_matrix_t<index_type, value_type>::diagonal_.raw(),
+                                      1,
+                                      x,
+                                      1,
+                                      &dot_res,
+                                      stream));
+
+    // y = y -(gamma/edge_sum)*d
+    //
+    value_type gamma_ = -dot_res / edge_sum_;
+    // TODO: Call from public API when ready
+    RAFT_CUBLAS_TRY(
+      raft::linalg::detail::cublasaxpy(cublas_h,
+                                       n,
+                                       &gamma_,
+                                       laplacian_matrix_t<index_type, value_type>::diagonal_.raw(),
+                                       1,
+                                       y,
+                                       1,
+                                       stream));
+  }
+
+  value_type edge_sum_;
+};
+
+}  // namespace detail
+}  // namespace matrix
+}  // namespace spectral
+}  // namespace raft
diff --git a/cpp/include/raft/spectral/eigen_solvers.hpp b/cpp/include/raft/spectral/eigen_solvers.hpp
index b6e25814e6..0033dbeea9 100644
--- a/cpp/include/raft/spectral/eigen_solvers.hpp
+++ b/cpp/include/raft/spectral/eigen_solvers.hpp
@@ -48,7 +48,7 @@ struct lanczos_solver_t {
 
   index_type_t solve_smallest_eigenvectors(
     handle_t const& handle,
-    spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
+    matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
     value_type_t* __restrict__ eigVals,
     value_type_t* __restrict__ eigVecs) const
   {
@@ -71,7 +71,7 @@ struct lanczos_solver_t {
 
   index_type_t solve_largest_eigenvectors(
     handle_t const& handle,
-    spectral::matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
+    matrix::sparse_matrix_t<index_type_t, value_type_t> const& A,
     value_type_t* __restrict__ eigVals,
     value_type_t* __restrict__ eigVecs) const
   {
diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/matrix_wrappers.hpp
index 95940ceb2a..9454d0c7ff 100644
--- a/cpp/include/raft/spectral/matrix_wrappers.hpp
+++ b/cpp/include/raft/spectral/matrix_wrappers.hpp
@@ -15,17 +15,10 @@
  */
 #pragma once
 
-#include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/linalg/detail/cublas_wrappers.hpp>
-#include <raft/sparse/detail/cusparse_wrappers.h>
+#include <raft/spectral/detail/matrix_wrappers.cuh>
 #include <rmm/device_uvector.hpp>
 
-#include <thrust/fill.h>
-#include <thrust/reduce.h>
-
-#include <algorithm>
-
 // =========================================================
 // Useful macros
 // =========================================================
@@ -39,410 +32,25 @@ namespace matrix {
 
 using size_type = int;  // for now; TODO: move it in appropriate header
 
-// Apply diagonal matrix to vector:
-//
-template <typename IndexType_, typename ValueType_>
-static __global__ void diagmv(IndexType_ n,
-                              ValueType_ alpha,
-                              const ValueType_* __restrict__ D,
-                              const ValueType_* __restrict__ x,
-                              ValueType_* __restrict__ y)
-{
-  IndexType_ i = threadIdx.x + blockIdx.x * blockDim.x;
-  while (i < n) {
-    y[i] += alpha * D[i] * x[i];
-    i += blockDim.x * gridDim.x;
-  }
-}
-
 // specifies type of algorithm used
 // for SpMv:
 //
-enum struct sparse_mv_alg_t : int {
-  SPARSE_MV_UNDEFINED = -1,
-  SPARSE_MV_ALG_DEFAULT,  // generic, for any sparse matrix
-  SPARSE_MV_ALG1,         // typical for CSR
-  SPARSE_MV_ALG2          // may provide better performamce for irregular sparse matrices
-};
+using sparse_mv_alt_t = detail::sparse_mv_alg_t;
 
 // Vector "view"-like aggregate for linear algebra purposes
 //
 template <typename value_type>
-struct vector_view_t {
-  value_type* buffer_;
-  size_type size_;
-
-  vector_view_t(value_type* buffer, size_type sz) : buffer_(buffer), size_(sz) {}
-
-  vector_view_t(vector_view_t&& other) : buffer_(other.raw()), size_(other.size()) {}
-
-  vector_view_t& operator=(vector_view_t&& other)
-  {
-    buffer_ = other.raw();
-    size_   = other.size();
-  }
-};
+using vector_view_t = detail::vector_view_t<value_type>;
 
 template <typename value_type>
-class vector_t {
- public:
-  vector_t(handle_t const& raft_handle, size_type sz)
-    : buffer_(sz, raft_handle.get_stream()), thrust_policy(raft_handle.get_thrust_policy())
-  {
-  }
-
-  size_type size(void) const { return buffer_.size(); }
-
-  value_type* raw(void) { return buffer_.data(); }
-
-  value_type const* raw(void) const { return buffer_.data(); }
-
-  value_type nrm1() const
-  {
-    return thrust::reduce(thrust_policy,
-                          buffer_.data(),
-                          buffer_.data() + buffer_.size(),
-                          value_type{0},
-                          [] __device__(auto left, auto right) {
-                            auto abs_left  = left > 0 ? left : -left;
-                            auto abs_right = right > 0 ? right : -right;
-                            return abs_left + abs_right;
-                          });
-  }
-
-  void fill(value_type value)
-  {
-    thrust::fill_n(thrust_policy, buffer_.data(), buffer_.size(), value);
-  }
-
- private:
-  using thrust_exec_policy_t =
-    thrust::detail::execute_with_allocator<rmm::mr::thrust_allocator<char>,
-                                           thrust::cuda_cub::execute_on_stream_base>;
-  rmm::device_uvector<value_type> buffer_;
-  const thrust_exec_policy_t thrust_policy;
-};
+using vector_t = detail::vector_t<value_type>;
 
 template <typename index_type, typename value_type>
-struct sparse_matrix_t {
-  sparse_matrix_t(handle_t const& raft_handle,
-                  index_type const* row_offsets,
-                  index_type const* col_indices,
-                  value_type const* values,
-                  index_type const nrows,
-                  index_type const ncols,
-                  index_type const nnz)
-    : handle_(raft_handle),
-      row_offsets_(row_offsets),
-      col_indices_(col_indices),
-      values_(values),
-      nrows_(nrows),
-      ncols_(ncols),
-      nnz_(nnz)
-  {
-  }
-
-  sparse_matrix_t(handle_t const& raft_handle,
-                  index_type const* row_offsets,
-                  index_type const* col_indices,
-                  value_type const* values,
-                  index_type const nrows,
-                  index_type const nnz)
-    : handle_(raft_handle),
-      row_offsets_(row_offsets),
-      col_indices_(col_indices),
-      values_(values),
-      nrows_(nrows),
-      ncols_(nrows),
-      nnz_(nnz)
-  {
-  }
-
-  template <typename CSRView>
-  sparse_matrix_t(handle_t const& raft_handle, CSRView const& csr_view)
-    : handle_(raft_handle),
-      row_offsets_(csr_view.offsets),
-      col_indices_(csr_view.indices),
-      values_(csr_view.edge_data),
-      nrows_(csr_view.number_of_vertices),
-      ncols_(csr_view.number_of_vertices),
-      nnz_(csr_view.number_of_edges)
-  {
-  }
-
-  virtual ~sparse_matrix_t(void) =
-    default;  // virtual because used as base for following matrix types
-
-  // y = alpha*A*x + beta*y
-  //(Note: removed const-ness of x, because CUDA 11 SpMV
-  // descriptor creation works with non-const, and const-casting
-  // down is dangerous)
-  //
-  virtual void mv(value_type alpha,
-                  value_type* __restrict__ x,
-                  value_type beta,
-                  value_type* __restrict__ y,
-                  sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1,
-                  bool transpose      = false,
-                  bool symmetric      = false) const
-  {
-    using namespace sparse;
-
-    RAFT_EXPECTS(x != nullptr, "Null x buffer.");
-    RAFT_EXPECTS(y != nullptr, "Null y buffer.");
-
-    auto cusparse_h = handle_.get_cusparse_handle();
-    auto stream     = handle_.get_stream();
-
-    cusparseOperation_t trans = transpose ? CUSPARSE_OPERATION_TRANSPOSE :  // transpose
-                                  CUSPARSE_OPERATION_NON_TRANSPOSE;         // non-transpose
-
-#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP
-    auto size_x = transpose ? nrows_ : ncols_;
-    auto size_y = transpose ? ncols_ : nrows_;
-
-    cusparseSpMVAlg_t spmv_alg = translate_algorithm(alg);
-
-    // create descriptors:
-    //(below casts are necessary, because
-    // cusparseCreateCsr(...) takes non-const
-    // void*; the casts should be harmless)
-    //
-    cusparseSpMatDescr_t matA;
-    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr(&matA,
-                                                              nrows_,
-                                                              ncols_,
-                                                              nnz_,
-                                                              const_cast<index_type*>(row_offsets_),
-                                                              const_cast<index_type*>(col_indices_),
-                                                              const_cast<value_type*>(values_)));
-
-    cusparseDnVecDescr_t vecX;
-    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecX, size_x, x));
-
-    cusparseDnVecDescr_t vecY;
-    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecY, size_y, y));
-
-    // get (scratch) external device buffer size:
-    //
-    size_t bufferSize;
-    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv_buffersize(
-      cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, &bufferSize, stream));
-
-    // allocate external buffer:
-    //
-    vector_t<value_type> external_buffer(handle_, bufferSize);
-
-    // finally perform SpMV:
-    //
-    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(
-      cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, external_buffer.raw(), stream));
-
-    // free descriptors:
-    //(TODO: maybe wrap them in a RAII struct?)
-    //
-    RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(vecY));
-    RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(vecX));
-    RAFT_CUSPARSE_TRY(cusparseDestroySpMat(matA));
-#else
-    RAFT_CUSPARSE_TRY(
-      raft::sparse::detail::cusparsesetpointermode(cusparse_h, CUSPARSE_POINTER_MODE_HOST, stream));
-    cusparseMatDescr_t descr = 0;
-    RAFT_CUSPARSE_TRY(cusparseCreateMatDescr(&descr));
-    if (symmetric) {
-      RAFT_CUSPARSE_TRY(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_SYMMETRIC));
-    } else {
-      RAFT_CUSPARSE_TRY(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL));
-    }
-    RAFT_CUSPARSE_TRY(cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO));
-    RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsrmv(cusparse_h,
-                                                          trans,
-                                                          nrows_,
-                                                          ncols_,
-                                                          nnz_,
-                                                          &alpha,
-                                                          descr,
-                                                          values_,
-                                                          row_offsets_,
-                                                          col_indices_,
-                                                          x,
-                                                          &beta,
-                                                          y,
-                                                          stream));
-    RAFT_CUSPARSE_TRY(cusparseDestroyMatDescr(descr));
-#endif
-  }
-
-  handle_t const& get_handle(void) const { return handle_; }
-
-#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP
-  cusparseSpMVAlg_t translate_algorithm(sparse_mv_alg_t alg) const
-  {
-    switch (alg) {
-      case sparse_mv_alg_t::SPARSE_MV_ALG1: return CUSPARSE_CSRMV_ALG1;
-      case sparse_mv_alg_t::SPARSE_MV_ALG2: return CUSPARSE_CSRMV_ALG2;
-      default: return CUSPARSE_MV_ALG_DEFAULT;
-    }
-  }
-#endif
-
-  // private: // maybe not, keep this ASAPBNS ("as simple as possible, but not simpler"); hence,
-  // aggregate
-
-  handle_t const& handle_;
-  index_type const* row_offsets_;
-  index_type const* col_indices_;
-  value_type const* values_;
-  index_type const nrows_;
-  index_type const ncols_;
-  index_type const nnz_;
-};
-
+using sparse_matrix_t = sparse_matrix_t<index_type, value_type>;
 template <typename index_type, typename value_type>
-struct laplacian_matrix_t : sparse_matrix_t<index_type, value_type> {
-  laplacian_matrix_t(handle_t const& raft_handle,
-                     index_type const* row_offsets,
-                     index_type const* col_indices,
-                     value_type const* values,
-                     index_type const nrows,
-                     index_type const nnz)
-    : sparse_matrix_t<index_type, value_type>(
-        raft_handle, row_offsets, col_indices, values, nrows, nnz),
-      diagonal_(raft_handle, nrows)
-  {
-    vector_t<value_type> ones{raft_handle, nrows};
-    ones.fill(1.0);
-    sparse_matrix_t<index_type, value_type>::mv(1, ones.raw(), 0, diagonal_.raw());
-  }
-
-  laplacian_matrix_t(handle_t const& raft_handle,
-                     sparse_matrix_t<index_type, value_type> const& csr_m)
-    : sparse_matrix_t<index_type, value_type>(raft_handle,
-                                              csr_m.row_offsets_,
-                                              csr_m.col_indices_,
-                                              csr_m.values_,
-                                              csr_m.nrows_,
-                                              csr_m.nnz_),
-      diagonal_(raft_handle, csr_m.nrows_)
-  {
-    vector_t<value_type> ones{raft_handle, csr_m.nrows_};
-    ones.fill(1.0);
-    sparse_matrix_t<index_type, value_type>::mv(1, ones.raw(), 0, diagonal_.raw());
-  }
-
-  // y = alpha*A*x + beta*y
-  //
-  void mv(value_type alpha,
-          value_type* __restrict__ x,
-          value_type beta,
-          value_type* __restrict__ y,
-          sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1,
-          bool transpose      = false,
-          bool symmetric      = false) const override
-  {
-    constexpr int BLOCK_SIZE = 1024;
-    auto n                   = sparse_matrix_t<index_type, value_type>::nrows_;
-
-    auto cublas_h = sparse_matrix_t<index_type, value_type>::get_handle().get_cublas_handle();
-    auto stream   = sparse_matrix_t<index_type, value_type>::get_handle().get_stream();
-
-    // scales y by beta:
-    //
-    if (beta == 0) {
-      CUDA_TRY(cudaMemsetAsync(y, 0, n * sizeof(value_type), stream));
-    } else if (beta != 1) {
-      // TODO: Call from public API when ready
-      RAFT_CUBLAS_TRY(raft::linalg::detail::cublasscal(cublas_h, n, &beta, y, 1, stream));
-    }
-
-    // Apply diagonal matrix
-    //
-    dim3 gridDim{std::min<unsigned int>((n + BLOCK_SIZE - 1) / BLOCK_SIZE, 65535), 1, 1};
-
-    dim3 blockDim{BLOCK_SIZE, 1, 1};
-    diagmv<<<gridDim, blockDim, 0, stream>>>(n, alpha, diagonal_.raw(), x, y);
-    RAFT_CHECK_CUDA(stream);
-
-    // Apply adjacency matrix
-    //
-    sparse_matrix_t<index_type, value_type>::mv(-alpha, x, 1, y, alg, transpose, symmetric);
-  }
-
-  vector_t<value_type> diagonal_;
-};
-
+using laplacian_matrix_t : detail::laplacian_matrix_t<index_type, value_type>;
 template <typename index_type, typename value_type>
-struct modularity_matrix_t : laplacian_matrix_t<index_type, value_type> {
-  modularity_matrix_t(handle_t const& raft_handle,
-                      index_type const* row_offsets,
-                      index_type const* col_indices,
-                      value_type const* values,
-                      index_type const nrows,
-                      index_type const nnz)
-    : laplacian_matrix_t<index_type, value_type>(
-        raft_handle, row_offsets, col_indices, values, nrows, nnz)
-  {
-    edge_sum_ = laplacian_matrix_t<index_type, value_type>::diagonal_.nrm1();
-  }
-
-  modularity_matrix_t(handle_t const& raft_handle,
-                      sparse_matrix_t<index_type, value_type> const& csr_m)
-    : laplacian_matrix_t<index_type, value_type>(raft_handle, csr_m)
-  {
-    edge_sum_ = laplacian_matrix_t<index_type, value_type>::diagonal_.nrm1();
-  }
-
-  // y = alpha*A*x + beta*y
-  //
-  void mv(value_type alpha,
-          value_type* __restrict__ x,
-          value_type beta,
-          value_type* __restrict__ y,
-          sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1,
-          bool transpose      = false,
-          bool symmetric      = false) const override
-  {
-    auto n = sparse_matrix_t<index_type, value_type>::nrows_;
-
-    auto cublas_h = sparse_matrix_t<index_type, value_type>::get_handle().get_cublas_handle();
-    auto stream   = sparse_matrix_t<index_type, value_type>::get_handle().get_stream();
-
-    // y = A*x
-    //
-    sparse_matrix_t<index_type, value_type>::mv(alpha, x, 0, y, alg, transpose, symmetric);
-    value_type dot_res;
-
-    // gamma = d'*x
-    //
-    // Cublas::dot(this->n, D.raw(), 1, x, 1, &dot_res);
-    // TODO: Call from public API when ready
-    RAFT_CUBLAS_TRY(
-      raft::linalg::detail::cublasdot(cublas_h,
-                                      n,
-                                      laplacian_matrix_t<index_type, value_type>::diagonal_.raw(),
-                                      1,
-                                      x,
-                                      1,
-                                      &dot_res,
-                                      stream));
-
-    // y = y -(gamma/edge_sum)*d
-    //
-    value_type gamma_ = -dot_res / edge_sum_;
-    // TODO: Call from public API when ready
-    RAFT_CUBLAS_TRY(
-      raft::linalg::detail::cublasaxpy(cublas_h,
-                                       n,
-                                       &gamma_,
-                                       laplacian_matrix_t<index_type, value_type>::diagonal_.raw(),
-                                       1,
-                                       y,
-                                       1,
-                                       stream));
-  }
-
-  value_type edge_sum_;
-};
+using modularity_matrix_t = modularity_matrix_t<index_type, value_type>;
 
 }  // namespace matrix
 }  // namespace spectral
diff --git a/cpp/include/raft/spectral/partition.hpp b/cpp/include/raft/spectral/partition.hpp
index 597ef530a2..4b772c08ee 100644
--- a/cpp/include/raft/spectral/partition.hpp
+++ b/cpp/include/raft/spectral/partition.hpp
@@ -49,13 +49,14 @@ namespace spectral {
  *  @return statistics: number of eigensolver iterations, .
  */
 template <typename vertex_t, typename weight_t, typename EigenSolver, typename ClusterSolver>
-std::tuple<vertex_t, weight_t, vertex_t> partition(handle_t const& handle,
-                                                   sparse_matrix_t<vertex_t, weight_t> const& csr_m,
-                                                   EigenSolver const& eigen_solver,
-                                                   ClusterSolver const& cluster_solver,
-                                                   vertex_t* __restrict__ clusters,
-                                                   weight_t* eigVals,
-                                                   weight_t* eigVecs)
+std::tuple<vertex_t, weight_t, vertex_t> partition(
+  handle_t const& handle,
+  matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
+  EigenSolver const& eigen_solver,
+  ClusterSolver const& cluster_solver,
+  vertex_t* __restrict__ clusters,
+  weight_t* eigVals,
+  weight_t* eigVecs)
 {
   return detail::partition<vertex_t, weight_t, EigenSolver, ClusterSolver>(
     handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs);
@@ -81,7 +82,7 @@ std::tuple<vertex_t, weight_t, vertex_t> partition(handle_t const& handle,
  */
 template <typename vertex_t, typename weight_t>
 void analyzePartition(handle_t const& handle,
-                      sparse_matrix_t<vertex_t, weight_t> const& csr_m,
+                      matrix::sparse_matrix_t<vertex_t, weight_t> const& csr_m,
                       vertex_t nClusters,
                       const vertex_t* __restrict__ clusters,
                       weight_t& edgeCut,

From 04e668bd45941b952fa29a77cf4d00fd90b28e8b Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Wed, 9 Feb 2022 14:27:12 -0500
Subject: [PATCH 3/8] Updates

---
 cpp/include/raft/spectral/matrix_wrappers.hpp       | 13 +++++--------
 .../raft/spectral/modularity_maximization.hpp       |  8 +++++---
 cpp/include/raft/spectral/partition.hpp             |  5 +++--
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/matrix_wrappers.hpp
index 9454d0c7ff..479634c9b7 100644
--- a/cpp/include/raft/spectral/matrix_wrappers.hpp
+++ b/cpp/include/raft/spectral/matrix_wrappers.hpp
@@ -15,17 +15,12 @@
  */
 #pragma once
 
-#include <raft/handle.hpp>
 #include <raft/spectral/detail/matrix_wrappers.cuh>
-#include <rmm/device_uvector.hpp>
 
 // =========================================================
 // Useful macros
 // =========================================================
 
-// Get index of matrix entry
-#define IDX(i, j, lda) ((i) + (j) * (lda))
-
 namespace raft {
 namespace spectral {
 namespace matrix {
@@ -46,11 +41,13 @@ template <typename value_type>
 using vector_t = detail::vector_t<value_type>;
 
 template <typename index_type, typename value_type>
-using sparse_matrix_t = sparse_matrix_t<index_type, value_type>;
+using sparse_matrix_t = detail::sparse_matrix_t<index_type, value_type>;
+
 template <typename index_type, typename value_type>
-using laplacian_matrix_t : detail::laplacian_matrix_t<index_type, value_type>;
+using laplacian_matrix_t = detail::laplacian_matrix_t<index_type, value_type>;
+
 template <typename index_type, typename value_type>
-using modularity_matrix_t = modularity_matrix_t<index_type, value_type>;
+using modularity_matrix_t = detail::modularity_matrix_t<index_type, value_type>;
 
 }  // namespace matrix
 }  // namespace spectral
diff --git a/cpp/include/raft/spectral/modularity_maximization.hpp b/cpp/include/raft/spectral/modularity_maximization.hpp
index 460e2cf26f..e67be767a2 100644
--- a/cpp/include/raft/spectral/modularity_maximization.hpp
+++ b/cpp/include/raft/spectral/modularity_maximization.hpp
@@ -58,8 +58,9 @@ std::tuple<vertex_t, weight_t, vertex_t> modularity_maximization(
   weight_t* eigVals,
   weight_t* eigVecs)
 {
-  return detail::modularity_maximization<vertex_t, weight_t, EigenSolver, ClusterSolver>(
-    handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs);
+  return raft::spectral::detail::
+    modularity_maximization<vertex_t, weight_t, EigenSolver, ClusterSolver>(
+      handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs);
 }
 //===================================================
 // Analysis of graph partition
@@ -79,7 +80,8 @@ void analyzeModularity(handle_t const& handle,
                        vertex_t const* __restrict__ clusters,
                        weight_t& modularity)
 {
-  detail::analyzeModularity<vertex_t, weight_t>(handle, csr_m, nClusters, clusters, modularity);
+  raft::spectral::detail::analyzeModularity<vertex_t, weight_t>(
+    handle, csr_m, nClusters, clusters, modularity);
 }
 
 }  // namespace spectral
diff --git a/cpp/include/raft/spectral/partition.hpp b/cpp/include/raft/spectral/partition.hpp
index 4b772c08ee..f62773a958 100644
--- a/cpp/include/raft/spectral/partition.hpp
+++ b/cpp/include/raft/spectral/partition.hpp
@@ -58,7 +58,7 @@ std::tuple<vertex_t, weight_t, vertex_t> partition(
   weight_t* eigVals,
   weight_t* eigVecs)
 {
-  return detail::partition<vertex_t, weight_t, EigenSolver, ClusterSolver>(
+  return raft::spectral::detail::partition<vertex_t, weight_t, EigenSolver, ClusterSolver>(
     handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs);
 }
 
@@ -88,7 +88,8 @@ void analyzePartition(handle_t const& handle,
                       weight_t& edgeCut,
                       weight_t& cost)
 {
-  detail::analyzePartition<vertex_t, weight_t>(handle, csr_m, nClusters, clusters, edgeCut, cost);
+  raft::spectral::detail::analyzePartition<vertex_t, weight_t>(
+    handle, csr_m, nClusters, clusters, edgeCut, cost);
 }
 
 }  // namespace spectral

From 82ca1adb9e76e202e75ea33f7e8fdb842380f599 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Wed, 9 Feb 2022 14:34:07 -0500
Subject: [PATCH 4/8] Updates

---
 cpp/test/spectral_matrix.cu | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/cpp/test/spectral_matrix.cu b/cpp/test/spectral_matrix.cu
index 652aa61451..5d0768a729 100644
--- a/cpp/test/spectral_matrix.cu
+++ b/cpp/test/spectral_matrix.cu
@@ -19,9 +19,11 @@
 #include <memory>
 #include <raft/handle.hpp>
 
-#include <raft/spectral/detail/matrix_wrappers.cuh>
+#include <raft/spectral/matrix_wrappers.hpp>
 
 namespace raft {
+namespace spectral {
+namespace matrix {
 namespace {
 template <typename index_type, typename value_type>
 struct csr_view_t {
@@ -34,7 +36,6 @@ struct csr_view_t {
 }  // namespace
 TEST(Raft, SpectralMatrices)
 {
-  using namespace matrix;
   using index_type = int;
   using value_type = double;
 
@@ -75,4 +76,6 @@ TEST(Raft, SpectralMatrices)
   EXPECT_ANY_THROW(cnstr_mm2());  // because of nullptr ptr args
 }
 
+}  // namespace matrix
+}  // namespace spectral
 }  // namespace raft

From 21fc5b507fea9b2460c478d31e4b8fd50e2b4237 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Wed, 9 Feb 2022 14:59:27 -0500
Subject: [PATCH 5/8] iDarn typo

---
 cpp/include/raft/spectral/matrix_wrappers.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/matrix_wrappers.hpp
index 479634c9b7..237f1275fd 100644
--- a/cpp/include/raft/spectral/matrix_wrappers.hpp
+++ b/cpp/include/raft/spectral/matrix_wrappers.hpp
@@ -30,7 +30,7 @@ using size_type = int;  // for now; TODO: move it in appropriate header
 // specifies type of algorithm used
 // for SpMv:
 //
-using sparse_mv_alt_t = detail::sparse_mv_alg_t;
+using sparse_mv_alg_t = detail::sparse_mv_alg_t;
 
 // Vector "view"-like aggregate for linear algebra purposes
 //

From d1cc186301fa3c64b50dcc7030d2eb1b87ff7dae Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Wed, 9 Feb 2022 15:54:45 -0500
Subject: [PATCH 6/8] Adding buffers back for renumbering

---
 cpp/include/raft/mr/device/buffer.hpp | 70 ++++++++++++++++++++
 cpp/include/raft/mr/host/buffer.hpp   | 85 +++++++++++++++++++++++++
 cpp/test/CMakeLists.txt               |  2 +
 cpp/test/mr/device/buffer.cpp         | 92 +++++++++++++++++++++++++++
 cpp/test/mr/host/buffer.cpp           | 70 ++++++++++++++++++++
 5 files changed, 319 insertions(+)
 create mode 100644 cpp/include/raft/mr/device/buffer.hpp
 create mode 100644 cpp/include/raft/mr/host/buffer.hpp
 create mode 100644 cpp/test/mr/device/buffer.cpp
 create mode 100644 cpp/test/mr/host/buffer.cpp

diff --git a/cpp/include/raft/mr/device/buffer.hpp b/cpp/include/raft/mr/device/buffer.hpp
new file mode 100644
index 0000000000..aee3cba046
--- /dev/null
+++ b/cpp/include/raft/mr/device/buffer.hpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "allocator.hpp"
+#include <memory>
+#include <raft/mr/buffer_base.hpp>
+
+namespace raft {
+namespace mr {
+namespace device {
+
+/**
+ * @brief RAII object owning a contiguous typed device buffer. The passed in
+ *        allocator supports asynchronous allocation and deallocation so this
+ *        can also be used for temporary memory
+ *
+ * @code{.cpp}
+ * template<typename T>
+ * void foo(..., cudaStream_t stream) {
+ *   ...
+ *   raft::mr::device::buffer<T> temp(stream, 0);
+ *   ...
+ *   temp.resize(n);
+ *   kernelA<<<grid,block,0,stream>>>(...,temp.data(),...);
+ *   kernelB<<<grid,block,0,stream>>>(...,temp.data(),...);
+ *   temp.release();
+ *   ...
+ * }
+ * @endcode
+ */
+template <typename T>
+class buffer : public buffer_base<T, allocator> {
+ public:
+  using size_type       = typename buffer_base<T, allocator>::size_type;
+  using value_type      = typename buffer_base<T, allocator>::value_type;
+  using iterator        = typename buffer_base<T, allocator>::iterator;
+  using const_iterator  = typename buffer_base<T, allocator>::const_iterator;
+  using reference       = typename buffer_base<T, allocator>::reference;
+  using const_reference = typename buffer_base<T, allocator>::const_reference;
+
+  buffer() = delete;
+
+  buffer(const buffer& other) = delete;
+
+  buffer& operator=(const buffer& other) = delete;
+
+  buffer(std::shared_ptr<allocator> alloc, cudaStream_t stream, size_type n = 0)
+    : buffer_base<T, device::allocator>(alloc, stream, n)
+  {
+  }
+};  // class buffer
+
+};  // namespace device
+};  // namespace mr
+};  // namespace raft
\ No newline at end of file
diff --git a/cpp/include/raft/mr/host/buffer.hpp b/cpp/include/raft/mr/host/buffer.hpp
new file mode 100644
index 0000000000..de9468add8
--- /dev/null
+++ b/cpp/include/raft/mr/host/buffer.hpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "allocator.hpp"
+#include <memory>
+#include <raft/mr/buffer_base.hpp>
+#include <raft/mr/device/buffer.hpp>
+
+namespace raft {
+namespace mr {
+namespace host {
+
+/**
+ * @brief RAII object owning a contigous typed host buffer (aka pinned memory).
+ *        The passed in allocator supports asynchronus allocation and
+ *        deallocation so this can also be used for temporary memory
+ *
+ * @code{.cpp}
+ * template<typename T>
+ * void foo(const T* in_d , T* out_d, ..., cudaStream_t stream) {
+ *   ...
+ *   raft::mr::host::buffer<T> temp(stream, 0);
+ *   ...
+ *   temp.resize(n);
+ *   raft::copy(temp.data(), in_d, temp.size());
+ *   ...
+ *   raft::copy(out_d, temp.data(), temp.size());
+ *   temp.release(stream);
+ *   ...
+ * }
+ * @endcode
+ */
+template <typename T>
+class buffer : public buffer_base<T, allocator> {
+ public:
+  using size_type       = typename buffer_base<T, allocator>::size_type;
+  using value_type      = typename buffer_base<T, allocator>::value_type;
+  using iterator        = typename buffer_base<T, allocator>::iterator;
+  using const_iterator  = typename buffer_base<T, allocator>::const_iterator;
+  using reference       = typename buffer_base<T, allocator>::reference;
+  using const_reference = typename buffer_base<T, allocator>::const_reference;
+
+  buffer() = delete;
+
+  buffer(const buffer& other) = delete;
+
+  buffer& operator=(const buffer& other) = delete;
+
+  buffer(std::shared_ptr<allocator> alloc, const device::buffer<T>& other)
+    : buffer_base<T, allocator>(alloc, other.get_stream(), other.size())
+  {
+    if (other.size() > 0) { raft::copy(data_, other.data(), other.size(), other.get_stream()); }
+  }
+
+  buffer(std::shared_ptr<allocator> alloc, cudaStream_t stream, size_type n = 0)
+    : buffer_base<T, allocator>(alloc, stream, n)
+  {
+  }
+
+  reference operator[](size_type pos) { return data_[pos]; }
+
+  const_reference operator[](size_type pos) const { return data_[pos]; }
+
+ private:
+  using buffer_base<T, allocator>::data_;
+};
+
+};  // namespace host
+};  // namespace mr
+};  // namespace raft
\ No newline at end of file
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index fda60e1cb0..9f5ca95e93 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -65,6 +65,8 @@ add_executable(test_raft
     test/matrix/math.cu
     test/matrix/matrix.cu
     test/matrix/linewise_op.cu
+    test/mr/host/buffer.cpp
+    test/mr/device/buffer.cpp
     test/mst.cu
     test/random/rng.cu
     test/random/rng_int.cu
diff --git a/cpp/test/mr/device/buffer.cpp b/cpp/test/mr/device/buffer.cpp
new file mode 100644
index 0000000000..324e9b9e4b
--- /dev/null
+++ b/cpp/test/mr/device/buffer.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <iostream>
+#include <memory>
+#include <raft/cudart_utils.h>
+#include <rmm/device_uvector.hpp>
+#include <rmm/mr/device/limiting_resource_adaptor.hpp>
+
+namespace raft {
+namespace mr {
+namespace device {
+
+TEST(Raft, DeviceBufferAlloc)
+{
+  cudaStream_t stream;
+  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
+  // no allocation at construction
+  rmm::device_uvector<char> buff(0, stream);
+  ASSERT_EQ(0, buff.size());
+  // explicit allocation after construction
+  buff.resize(20, stream);
+  ASSERT_EQ(20, buff.size());
+  // resizing to a smaller buffer size
+  buff.resize(10, stream);
+  ASSERT_EQ(10, buff.size());
+  // explicit deallocation
+  buff.release();
+  ASSERT_EQ(0, buff.size());
+  // use these methods without the explicit stream parameter
+  buff.resize(20, stream);
+  ASSERT_EQ(20, buff.size());
+  buff.resize(10, stream);
+  ASSERT_EQ(10, buff.size());
+  buff.release();
+  ASSERT_EQ(0, buff.size());
+  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
+}
+
+TEST(Raft, DeviceBufferZeroResize)
+{
+  // Create a limiting_resource_adaptor to track allocations
+  auto curr_mr =
+    dynamic_cast<rmm::mr::cuda_memory_resource*>(rmm::mr::get_current_device_resource());
+  auto limit_mr =
+    std::make_shared<rmm::mr::limiting_resource_adaptor<rmm::mr::cuda_memory_resource>>(curr_mr,
+                                                                                        1000);
+
+  rmm::mr::set_current_device_resource(limit_mr.get());
+
+  cudaStream_t stream;
+  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
+  // no allocation at construction
+  rmm::device_uvector<char> buff(10, stream);
+  ASSERT_EQ(10, buff.size());
+  // explicit allocation after construction
+  buff.resize(0, stream);
+  ASSERT_EQ(0, buff.size());
+  // resizing to a smaller buffer size
+  buff.resize(20, stream);
+  ASSERT_EQ(20, buff.size());
+  // explicit deallocation
+  buff.release();
+  ASSERT_EQ(0, buff.size());
+
+  // Now check that there is no memory left. (Used to not be true)
+  ASSERT_EQ(0, limit_mr->get_allocated_bytes());
+
+  rmm::mr::set_current_device_resource(curr_mr);
+
+  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
+}
+
+}  // namespace device
+}  // namespace mr
+}  // namespace raft
\ No newline at end of file
diff --git a/cpp/test/mr/host/buffer.cpp b/cpp/test/mr/host/buffer.cpp
new file mode 100644
index 0000000000..c174b269da
--- /dev/null
+++ b/cpp/test/mr/host/buffer.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <iostream>
+#include <memory>
+#include <raft/mr/host/buffer.hpp>
+
+namespace raft {
+namespace mr {
+namespace host {
+
+TEST(Raft, HostBuffer)
+{
+  auto alloc = std::make_shared<default_allocator>();
+  cudaStream_t stream;
+  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
+  // no allocation at construction
+  buffer<char> buff(alloc, stream);
+  ASSERT_EQ(0, buff.size());
+  // explicit allocation after construction
+  buff.resize(20, stream);
+  ASSERT_EQ(20, buff.size());
+  // resizing to a smaller buffer size
+  buff.resize(10, stream);
+  ASSERT_EQ(10, buff.size());
+  // explicit deallocation
+  buff.release(stream);
+  ASSERT_EQ(0, buff.size());
+  // use these methods without the explicit stream parameter
+  buff.resize(20);
+  ASSERT_EQ(20, buff.size());
+  buff.resize(10);
+  ASSERT_EQ(10, buff.size());
+  buff.release();
+  ASSERT_EQ(0, buff.size());
+  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
+}
+
+TEST(Raft, DeviceToHostBuffer)
+{
+  auto d_alloc = std::make_shared<device::default_allocator>();
+  auto h_alloc = std::make_shared<default_allocator>();
+  cudaStream_t stream;
+  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
+  device::buffer<char> d_buff(d_alloc, stream, 32);
+  RAFT_CUDA_TRY(cudaMemsetAsync(d_buff.data(), 0, sizeof(char) * d_buff.size(), stream));
+  buffer<char> h_buff(h_alloc, d_buff);
+  ASSERT_EQ(d_buff.size(), h_buff.size());
+  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
+}
+
+}  // namespace host
+}  // namespace mr
+}  // namespace raft
\ No newline at end of file

From ae27571ea85616e47cfe51c374a2a8b5c9cf6173 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Wed, 9 Feb 2022 16:22:05 -0500
Subject: [PATCH 7/8] buffer base

---
 cpp/include/raft/mr/buffer_base.hpp | 211 ++++++++++++++++++++++++++++
 1 file changed, 211 insertions(+)
 create mode 100644 cpp/include/raft/mr/buffer_base.hpp

diff --git a/cpp/include/raft/mr/buffer_base.hpp b/cpp/include/raft/mr/buffer_base.hpp
new file mode 100644
index 0000000000..447d40e8ae
--- /dev/null
+++ b/cpp/include/raft/mr/buffer_base.hpp
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/cudart_utils.h>
+
+#include <cuda_runtime.h>
+
+#include <cstddef>
+#include <memory>
+#include <utility>
+
+namespace raft {
+    namespace mr {
+
+/**
+ * @brief Base for all RAII-based owning of temporary memory allocations. This
+ *        class should ideally not be used by users directly, but instead via
+ *        the child classes `device_buffer` and `host_buffer`.
+ *
+ * @tparam T          data type
+ * @tparam AllocatorT The underly allocator object
+ */
+        template <typename T, typename AllocatorT>
+        class buffer_base {
+        public:
+            using size_type       = std::size_t;
+            using value_type      = T;
+            using iterator        = value_type*;
+            using const_iterator  = const value_type*;
+            using reference       = T&;
+            using const_reference = const T&;
+
+            buffer_base() = delete;
+
+            buffer_base(const buffer_base& other) = delete;
+
+            buffer_base& operator=(const buffer_base& other) = delete;
+
+            /**
+             * @brief Main ctor
+             *
+             * @param[in] allocator asynchronous allocator used for managing buffer life
+             * @param[in] stream    cuda stream where this allocation operations are async
+             * @param[in] n         size of the buffer (in number of elements)
+             */
+            buffer_base(std::shared_ptr<AllocatorT> allocator, cudaStream_t stream, size_type n = 0)
+                    : data_(nullptr), size_(n), capacity_(n), stream_(stream), allocator_(std::move(allocator))
+            {
+                if (capacity_ > 0) {
+                    data_ =
+                            static_cast<value_type*>(allocator_->allocate(capacity_ * sizeof(value_type), stream_));
+                    RAFT_CUDA_TRY(cudaStreamSynchronize(stream_));
+                }
+            }
+
+            ~buffer_base() { release(); }
+
+            value_type* data() { return data_; }
+
+            const value_type* data() const { return data_; }
+
+            size_type size() const { return size_; }
+
+            void clear() { size_ = 0; }
+
+            iterator begin() { return data_; }
+
+            const_iterator begin() const { return data_; }
+
+            iterator end() { return data_ + size_; }
+
+            const_iterator end() const { return data_ + size_; }
+
+            /**
+             * @brief Reserve new memory size for this buffer.
+             *
+             * It re-allocates a fresh buffer if the new requested capacity is more than
+             * the current one, copies the old buffer contents to this new buffer and
+             * removes the old one.
+             *
+             * @param[in] new_capacity new capacity (in number of elements)
+             * @{
+             */
+            void reserve(size_type new_capacity)
+            {
+                if (new_capacity > capacity_) {
+                    auto* new_data =
+                            static_cast<value_type*>(allocator_->allocate(new_capacity * sizeof(value_type), stream_));
+                    if (size_ > 0) { raft::copy(new_data, data_, size_, stream_); }
+                    // Only deallocate if we have allocated a pointer
+                    if (nullptr != data_) {
+                        allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_);
+                    }
+                    data_     = new_data;
+                    capacity_ = new_capacity;
+                }
+            }
+
+            void reserve(size_type new_capacity, cudaStream_t stream)
+            {
+                set_stream(stream);
+                reserve(new_capacity);
+            }
+            /** @} */
+
+            /**
+             * @brief Resize the underlying buffer (uses `reserve` method internally)
+             *
+             * @param[in] new_size new buffer size
+             * @{
+             */
+            void resize(const size_type new_size)
+            {
+                reserve(new_size);
+                size_ = new_size;
+            }
+
+            void resize(const size_type new_size, cudaStream_t stream)
+            {
+                set_stream(stream);
+                resize(new_size);
+            }
+            /** @} */
+
+            /**
+             * @brief Deletes the underlying buffer
+             *
+             * If this method is not explicitly called, it will be during the destructor
+             * @{
+             */
+            void release()
+            {
+                if (nullptr != data_) {
+                    allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_);
+                }
+                data_     = nullptr;
+                capacity_ = 0;
+                size_     = 0;
+            }
+
+            void release(cudaStream_t stream)
+            {
+                set_stream(stream);
+                release();
+            }
+            /** @} */
+
+            /**
+             * @brief returns the underlying allocator used
+             *
+             * @return the allocator pointer
+             */
+            std::shared_ptr<AllocatorT> get_allocator() const { return allocator_; }
+
+            /**
+             * @brief returns the underlying stream used
+             *
+             * @return the cuda stream
+             */
+            cudaStream_t get_stream() const { return stream_; }
+
+        protected:
+            value_type* data_;
+
+        private:
+            size_type size_;
+            size_type capacity_;
+            cudaStream_t stream_;
+            std::shared_ptr<AllocatorT> allocator_;
+
+            /**
+             * @brief Sets a new cuda stream where the future operations will be queued
+             *
+             * This method makes sure that the inter-stream dependencies are met and taken
+             * care of, before setting the input stream as a new stream for this buffer.
+             * Ideally, the same cuda stream passed during constructor is expected to be
+             * used throughout this buffer's lifetime, for performance.
+             *
+             * @param[in] stream new cuda stream to be set. If it is the same as the
+             *                   current one, then this method will be a no-op.
+             */
+            void set_stream(cudaStream_t stream)
+            {
+                if (stream_ != stream) {
+                    cudaEvent_t event;
+                    RAFT_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
+                    RAFT_CUDA_TRY(cudaEventRecord(event, stream_));
+                    RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0));
+                    stream_ = stream;
+                    RAFT_CUDA_TRY(cudaEventDestroy(event));
+                }
+            }
+        };  // class buffer_base
+
+    };  // namespace mr
+};  // namespace raft
\ No newline at end of file

From 80b9a3f024b3fe3987740d980ed2b66bfb98de24 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Wed, 9 Feb 2022 16:28:57 -0500
Subject: [PATCH 8/8] style

---
 cpp/include/raft/mr/buffer_base.hpp | 348 ++++++++++++++--------------
 1 file changed, 174 insertions(+), 174 deletions(-)

diff --git a/cpp/include/raft/mr/buffer_base.hpp b/cpp/include/raft/mr/buffer_base.hpp
index 447d40e8ae..151c49af7c 100644
--- a/cpp/include/raft/mr/buffer_base.hpp
+++ b/cpp/include/raft/mr/buffer_base.hpp
@@ -25,7 +25,7 @@
 #include <utility>
 
 namespace raft {
-    namespace mr {
+namespace mr {
 
 /**
  * @brief Base for all RAII-based owning of temporary memory allocations. This
@@ -35,177 +35,177 @@ namespace raft {
  * @tparam T          data type
  * @tparam AllocatorT The underly allocator object
  */
-        template <typename T, typename AllocatorT>
-        class buffer_base {
-        public:
-            using size_type       = std::size_t;
-            using value_type      = T;
-            using iterator        = value_type*;
-            using const_iterator  = const value_type*;
-            using reference       = T&;
-            using const_reference = const T&;
-
-            buffer_base() = delete;
-
-            buffer_base(const buffer_base& other) = delete;
-
-            buffer_base& operator=(const buffer_base& other) = delete;
-
-            /**
-             * @brief Main ctor
-             *
-             * @param[in] allocator asynchronous allocator used for managing buffer life
-             * @param[in] stream    cuda stream where this allocation operations are async
-             * @param[in] n         size of the buffer (in number of elements)
-             */
-            buffer_base(std::shared_ptr<AllocatorT> allocator, cudaStream_t stream, size_type n = 0)
-                    : data_(nullptr), size_(n), capacity_(n), stream_(stream), allocator_(std::move(allocator))
-            {
-                if (capacity_ > 0) {
-                    data_ =
-                            static_cast<value_type*>(allocator_->allocate(capacity_ * sizeof(value_type), stream_));
-                    RAFT_CUDA_TRY(cudaStreamSynchronize(stream_));
-                }
-            }
-
-            ~buffer_base() { release(); }
-
-            value_type* data() { return data_; }
-
-            const value_type* data() const { return data_; }
-
-            size_type size() const { return size_; }
-
-            void clear() { size_ = 0; }
-
-            iterator begin() { return data_; }
-
-            const_iterator begin() const { return data_; }
-
-            iterator end() { return data_ + size_; }
-
-            const_iterator end() const { return data_ + size_; }
-
-            /**
-             * @brief Reserve new memory size for this buffer.
-             *
-             * It re-allocates a fresh buffer if the new requested capacity is more than
-             * the current one, copies the old buffer contents to this new buffer and
-             * removes the old one.
-             *
-             * @param[in] new_capacity new capacity (in number of elements)
-             * @{
-             */
-            void reserve(size_type new_capacity)
-            {
-                if (new_capacity > capacity_) {
-                    auto* new_data =
-                            static_cast<value_type*>(allocator_->allocate(new_capacity * sizeof(value_type), stream_));
-                    if (size_ > 0) { raft::copy(new_data, data_, size_, stream_); }
-                    // Only deallocate if we have allocated a pointer
-                    if (nullptr != data_) {
-                        allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_);
-                    }
-                    data_     = new_data;
-                    capacity_ = new_capacity;
-                }
-            }
-
-            void reserve(size_type new_capacity, cudaStream_t stream)
-            {
-                set_stream(stream);
-                reserve(new_capacity);
-            }
-            /** @} */
-
-            /**
-             * @brief Resize the underlying buffer (uses `reserve` method internally)
-             *
-             * @param[in] new_size new buffer size
-             * @{
-             */
-            void resize(const size_type new_size)
-            {
-                reserve(new_size);
-                size_ = new_size;
-            }
-
-            void resize(const size_type new_size, cudaStream_t stream)
-            {
-                set_stream(stream);
-                resize(new_size);
-            }
-            /** @} */
-
-            /**
-             * @brief Deletes the underlying buffer
-             *
-             * If this method is not explicitly called, it will be during the destructor
-             * @{
-             */
-            void release()
-            {
-                if (nullptr != data_) {
-                    allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_);
-                }
-                data_     = nullptr;
-                capacity_ = 0;
-                size_     = 0;
-            }
-
-            void release(cudaStream_t stream)
-            {
-                set_stream(stream);
-                release();
-            }
-            /** @} */
-
-            /**
-             * @brief returns the underlying allocator used
-             *
-             * @return the allocator pointer
-             */
-            std::shared_ptr<AllocatorT> get_allocator() const { return allocator_; }
-
-            /**
-             * @brief returns the underlying stream used
-             *
-             * @return the cuda stream
-             */
-            cudaStream_t get_stream() const { return stream_; }
-
-        protected:
-            value_type* data_;
-
-        private:
-            size_type size_;
-            size_type capacity_;
-            cudaStream_t stream_;
-            std::shared_ptr<AllocatorT> allocator_;
-
-            /**
-             * @brief Sets a new cuda stream where the future operations will be queued
-             *
-             * This method makes sure that the inter-stream dependencies are met and taken
-             * care of, before setting the input stream as a new stream for this buffer.
-             * Ideally, the same cuda stream passed during constructor is expected to be
-             * used throughout this buffer's lifetime, for performance.
-             *
-             * @param[in] stream new cuda stream to be set. If it is the same as the
-             *                   current one, then this method will be a no-op.
-             */
-            void set_stream(cudaStream_t stream)
-            {
-                if (stream_ != stream) {
-                    cudaEvent_t event;
-                    RAFT_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
-                    RAFT_CUDA_TRY(cudaEventRecord(event, stream_));
-                    RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0));
-                    stream_ = stream;
-                    RAFT_CUDA_TRY(cudaEventDestroy(event));
-                }
-            }
-        };  // class buffer_base
-
-    };  // namespace mr
+template <typename T, typename AllocatorT>
+class buffer_base {
+ public:
+  using size_type       = std::size_t;
+  using value_type      = T;
+  using iterator        = value_type*;
+  using const_iterator  = const value_type*;
+  using reference       = T&;
+  using const_reference = const T&;
+
+  buffer_base() = delete;
+
+  buffer_base(const buffer_base& other) = delete;
+
+  buffer_base& operator=(const buffer_base& other) = delete;
+
+  /**
+   * @brief Main ctor
+   *
+   * @param[in] allocator asynchronous allocator used for managing buffer life
+   * @param[in] stream    cuda stream where this allocation operations are async
+   * @param[in] n         size of the buffer (in number of elements)
+   */
+  buffer_base(std::shared_ptr<AllocatorT> allocator, cudaStream_t stream, size_type n = 0)
+    : data_(nullptr), size_(n), capacity_(n), stream_(stream), allocator_(std::move(allocator))
+  {
+    if (capacity_ > 0) {
+      data_ =
+        static_cast<value_type*>(allocator_->allocate(capacity_ * sizeof(value_type), stream_));
+      RAFT_CUDA_TRY(cudaStreamSynchronize(stream_));
+    }
+  }
+
+  ~buffer_base() { release(); }
+
+  value_type* data() { return data_; }
+
+  const value_type* data() const { return data_; }
+
+  size_type size() const { return size_; }
+
+  void clear() { size_ = 0; }
+
+  iterator begin() { return data_; }
+
+  const_iterator begin() const { return data_; }
+
+  iterator end() { return data_ + size_; }
+
+  const_iterator end() const { return data_ + size_; }
+
+  /**
+   * @brief Reserve new memory size for this buffer.
+   *
+   * It re-allocates a fresh buffer if the new requested capacity is more than
+   * the current one, copies the old buffer contents to this new buffer and
+   * removes the old one.
+   *
+   * @param[in] new_capacity new capacity (in number of elements)
+   * @{
+   */
+  void reserve(size_type new_capacity)
+  {
+    if (new_capacity > capacity_) {
+      auto* new_data =
+        static_cast<value_type*>(allocator_->allocate(new_capacity * sizeof(value_type), stream_));
+      if (size_ > 0) { raft::copy(new_data, data_, size_, stream_); }
+      // Only deallocate if we have allocated a pointer
+      if (nullptr != data_) {
+        allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_);
+      }
+      data_     = new_data;
+      capacity_ = new_capacity;
+    }
+  }
+
+  void reserve(size_type new_capacity, cudaStream_t stream)
+  {
+    set_stream(stream);
+    reserve(new_capacity);
+  }
+  /** @} */
+
+  /**
+   * @brief Resize the underlying buffer (uses `reserve` method internally)
+   *
+   * @param[in] new_size new buffer size
+   * @{
+   */
+  void resize(const size_type new_size)
+  {
+    reserve(new_size);
+    size_ = new_size;
+  }
+
+  void resize(const size_type new_size, cudaStream_t stream)
+  {
+    set_stream(stream);
+    resize(new_size);
+  }
+  /** @} */
+
+  /**
+   * @brief Deletes the underlying buffer
+   *
+   * If this method is not explicitly called, it will be during the destructor
+   * @{
+   */
+  void release()
+  {
+    if (nullptr != data_) {
+      allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_);
+    }
+    data_     = nullptr;
+    capacity_ = 0;
+    size_     = 0;
+  }
+
+  void release(cudaStream_t stream)
+  {
+    set_stream(stream);
+    release();
+  }
+  /** @} */
+
+  /**
+   * @brief returns the underlying allocator used
+   *
+   * @return the allocator pointer
+   */
+  std::shared_ptr<AllocatorT> get_allocator() const { return allocator_; }
+
+  /**
+   * @brief returns the underlying stream used
+   *
+   * @return the cuda stream
+   */
+  cudaStream_t get_stream() const { return stream_; }
+
+ protected:
+  value_type* data_;
+
+ private:
+  size_type size_;
+  size_type capacity_;
+  cudaStream_t stream_;
+  std::shared_ptr<AllocatorT> allocator_;
+
+  /**
+   * @brief Sets a new cuda stream where the future operations will be queued
+   *
+   * This method makes sure that the inter-stream dependencies are met and taken
+   * care of, before setting the input stream as a new stream for this buffer.
+   * Ideally, the same cuda stream passed during constructor is expected to be
+   * used throughout this buffer's lifetime, for performance.
+   *
+   * @param[in] stream new cuda stream to be set. If it is the same as the
+   *                   current one, then this method will be a no-op.
+   */
+  void set_stream(cudaStream_t stream)
+  {
+    if (stream_ != stream) {
+      cudaEvent_t event;
+      RAFT_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
+      RAFT_CUDA_TRY(cudaEventRecord(event, stream_));
+      RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0));
+      stream_ = stream;
+      RAFT_CUDA_TRY(cudaEventDestroy(event));
+    }
+  }
+};  // class buffer_base
+
+};  // namespace mr
 };  // namespace raft
\ No newline at end of file