From b514b3429dde76be524ee9d982d367b30c56cf67 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 9 Feb 2022 13:17:42 -0500 Subject: [PATCH 1/8] fixing spectral APIs --- cpp/include/raft/linalg/detail/lanczos.hpp | 142 ++++++++---------- cpp/include/raft/linalg/lanczos.hpp | 49 +++--- .../detail/modularity_maximization.hpp | 19 ++- .../raft/spectral/detail/partition.hpp | 24 +-- .../raft/spectral/detail/spectral_util.cuh | 7 +- cpp/include/raft/spectral/eigen_solvers.hpp | 65 ++++---- ...atrix_wrappers.cuh => matrix_wrappers.hpp} | 2 + .../raft/spectral/modularity_maximization.hpp | 4 +- 8 files changed, 155 insertions(+), 157 deletions(-) rename cpp/include/raft/spectral/{detail/matrix_wrappers.cuh => matrix_wrappers.hpp} (99%) diff --git a/cpp/include/raft/linalg/detail/lanczos.hpp b/cpp/include/raft/linalg/detail/lanczos.hpp index a2b7751a05..6d51936e95 100644 --- a/cpp/include/raft/linalg/detail/lanczos.hpp +++ b/cpp/include/raft/linalg/detail/lanczos.hpp @@ -33,11 +33,7 @@ #include namespace raft { - -using namespace matrix; -using namespace linalg::detail; - -namespace spectral { +namespace linalg { namespace detail { // curandGeneratorNormalX @@ -87,7 +83,7 @@ inline curandStatus_t curandGenerateNormalX( */ template int performLanczosIteration(handle_t const& handle, - sparse_matrix_t const* A, + spectral::matrix::sparse_matrix_t const* A, index_type_t* iter, index_type_t maxIter, value_type_t shift, @@ -696,11 +692,6 @@ static int lanczosRestart(handle_t const& handle, return 0; } -} // namespace detail -} // namespace spectral - -namespace detail { - /** * @brief Compute smallest eigenvectors of symmetric matrix * Computes eigenvalues and eigenvectors that are least @@ -751,26 +742,25 @@ namespace detail { * @return error flag. */ template -int computeSmallestEigenvectors(handle_t const& handle, - sparse_matrix_t const* A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - bool reorthogonalize, - index_type_t* effIter, - index_type_t* totalIter, - value_type_t* shift, - value_type_t* __restrict__ alpha_host, - value_type_t* __restrict__ beta_host, - value_type_t* __restrict__ lanczosVecs_dev, - value_type_t* __restrict__ work_dev, - value_type_t* __restrict__ eigVals_dev, - value_type_t* __restrict__ eigVecs_dev, - unsigned long long seed) +int computeSmallestEigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const* A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + bool reorthogonalize, + index_type_t* effIter, + index_type_t* totalIter, + value_type_t* shift, + value_type_t* __restrict__ alpha_host, + value_type_t* __restrict__ beta_host, + value_type_t* __restrict__ lanczosVecs_dev, + value_type_t* __restrict__ work_dev, + value_type_t* __restrict__ eigVals_dev, + value_type_t* __restrict__ eigVecs_dev, + unsigned long long seed) { - using namespace raft::spectral::detail; - // Useful constants constexpr value_type_t one = 1; constexpr value_type_t zero = 0; @@ -993,20 +983,19 @@ int computeSmallestEigenvectors(handle_t const& handle, } template -int computeSmallestEigenvectors(handle_t const& handle, - sparse_matrix_t const& A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - bool reorthogonalize, - index_type_t& iter, - value_type_t* __restrict__ eigVals_dev, - value_type_t* __restrict__ eigVecs_dev, - unsigned long long seed = 1234567) +int computeSmallestEigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const& A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + bool reorthogonalize, + index_type_t& iter, + value_type_t* __restrict__ eigVals_dev, + value_type_t* __restrict__ eigVecs_dev, + unsigned long long seed = 1234567) { - using namespace raft::spectral::detail; - // Matrix dimension index_type_t n = A.nrows_; @@ -1024,8 +1013,8 @@ int computeSmallestEigenvectors(handle_t const& handle, value_type_t* alpha_host = alpha_host_v.data(); value_type_t* beta_host = beta_host_v.data(); - vector_t lanczosVecs_dev(handle, n * (restartIter + 1)); - vector_t work_dev(handle, (n + restartIter) * restartIter); + spectral::matrix::vector_t lanczosVecs_dev(handle, n * (restartIter + 1)); + spectral::matrix::vector_t work_dev(handle, (n + restartIter) * restartIter); // Perform Lanczos method index_type_t effIter; @@ -1097,25 +1086,24 @@ int computeSmallestEigenvectors(handle_t const& handle, * @return error flag. */ template -int computeLargestEigenvectors(handle_t const& handle, - sparse_matrix_t const* A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - bool reorthogonalize, - index_type_t* effIter, - index_type_t* totalIter, - value_type_t* __restrict__ alpha_host, - value_type_t* __restrict__ beta_host, - value_type_t* __restrict__ lanczosVecs_dev, - value_type_t* __restrict__ work_dev, - value_type_t* __restrict__ eigVals_dev, - value_type_t* __restrict__ eigVecs_dev, - unsigned long long seed) +int computeLargestEigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const* A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + bool reorthogonalize, + index_type_t* effIter, + index_type_t* totalIter, + value_type_t* __restrict__ alpha_host, + value_type_t* __restrict__ beta_host, + value_type_t* __restrict__ lanczosVecs_dev, + value_type_t* __restrict__ work_dev, + value_type_t* __restrict__ eigVals_dev, + value_type_t* __restrict__ eigVecs_dev, + unsigned long long seed) { - using namespace raft::spectral::detail; - // Useful constants constexpr value_type_t one = 1; constexpr value_type_t zero = 0; @@ -1342,17 +1330,18 @@ int computeLargestEigenvectors(handle_t const& handle, } template -int computeLargestEigenvectors(handle_t const& handle, - sparse_matrix_t const& A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - bool reorthogonalize, - index_type_t& iter, - value_type_t* __restrict__ eigVals_dev, - value_type_t* __restrict__ eigVecs_dev, - unsigned long long seed = 123456) +int computeLargestEigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const& A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + bool reorthogonalize, + index_type_t& iter, + value_type_t* __restrict__ eigVals_dev, + value_type_t* __restrict__ eigVecs_dev, + unsigned long long seed = 123456) { // Matrix dimension index_type_t n = A.nrows_; @@ -1371,8 +1360,8 @@ int computeLargestEigenvectors(handle_t const& handle, value_type_t* alpha_host = alpha_host_v.data(); value_type_t* beta_host = beta_host_v.data(); - vector_t lanczosVecs_dev(handle, n * (restartIter + 1)); - vector_t work_dev(handle, (n + restartIter) * restartIter); + spectral::matrix::vector_t lanczosVecs_dev(handle, n * (restartIter + 1)); + spectral::matrix::vector_t work_dev(handle, (n + restartIter) * restartIter); // Perform Lanczos method index_type_t effIter; @@ -1398,4 +1387,5 @@ int computeLargestEigenvectors(handle_t const& handle, } } // namespace detail +} // namespace linalg } // namespace raft diff --git a/cpp/include/raft/linalg/lanczos.hpp b/cpp/include/raft/linalg/lanczos.hpp index e7d965f810..21b65158fc 100644 --- a/cpp/include/raft/linalg/lanczos.hpp +++ b/cpp/include/raft/linalg/lanczos.hpp @@ -17,8 +17,10 @@ #pragma once #include "detail/lanczos.hpp" +#include namespace raft { +namespace linalg { // ========================================================= // Eigensolver @@ -62,17 +64,18 @@ namespace raft { * @return error flag. */ template -int computeSmallestEigenvectors(handle_t const& handle, - sparse_matrix_t const& A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - bool reorthogonalize, - index_type_t& iter, - value_type_t* __restrict__ eigVals_dev, - value_type_t* __restrict__ eigVecs_dev, - unsigned long long seed = 1234567) +int computeSmallestEigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const& A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + bool reorthogonalize, + index_type_t& iter, + value_type_t* __restrict__ eigVals_dev, + value_type_t* __restrict__ eigVecs_dev, + unsigned long long seed = 1234567) { return detail::computeSmallestEigenvectors(handle, A, @@ -125,17 +128,18 @@ int computeSmallestEigenvectors(handle_t const& handle, * @return error flag. */ template -int computeLargestEigenvectors(handle_t const& handle, - sparse_matrix_t const& A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - bool reorthogonalize, - index_type_t& iter, - value_type_t* __restrict__ eigVals_dev, - value_type_t* __restrict__ eigVecs_dev, - unsigned long long seed = 123456) +int computeLargestEigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const& A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + bool reorthogonalize, + index_type_t& iter, + value_type_t* __restrict__ eigVals_dev, + value_type_t* __restrict__ eigVecs_dev, + unsigned long long seed = 123456) { return detail::computeLargestEigenvectors(handle, A, @@ -150,4 +154,5 @@ int computeLargestEigenvectors(handle_t const& handle, seed); } +} // namespace linalg } // namespace raft diff --git a/cpp/include/raft/spectral/detail/modularity_maximization.hpp b/cpp/include/raft/spectral/detail/modularity_maximization.hpp index a55dfbe67f..6bb3dca920 100644 --- a/cpp/include/raft/spectral/detail/modularity_maximization.hpp +++ b/cpp/include/raft/spectral/detail/modularity_maximization.hpp @@ -26,9 +26,11 @@ #include +#include #include #include #include +#include #ifdef COLLECT_TIME_STATISTICS #include @@ -52,9 +54,6 @@ namespace raft { namespace spectral { namespace detail { -using namespace matrix; -using namespace linalg; - // ========================================================= // Spectral modularity_maximization // ========================================================= @@ -83,7 +82,7 @@ using namespace linalg; template std::tuple modularity_maximization( handle_t const& handle, - sparse_matrix_t const& csr_m, + raft::spectral::matrix::sparse_matrix_t const& csr_m, EigenSolver const& eigen_solver, ClusterSolver const& cluster_solver, vertex_t* __restrict__ clusters, @@ -105,7 +104,7 @@ std::tuple modularity_maximization( // Compute eigenvectors of Modularity Matrix // Initialize Modularity Matrix - modularity_matrix_t B{handle, csr_m}; + raft::spectral::matrix::modularity_matrix_t B{handle, csr_m}; auto eigen_config = eigen_solver.get_config(); auto nEigVecs = eigen_config.n_eigVecs; @@ -142,7 +141,7 @@ std::tuple modularity_maximization( */ template void analyzeModularity(handle_t const& handle, - sparse_matrix_t const& csr_m, + raft::spectral::matrix::sparse_matrix_t const& csr_m, vertex_t nClusters, vertex_t const* __restrict__ clusters, weight_t& modularity) @@ -157,14 +156,14 @@ void analyzeModularity(handle_t const& handle, auto stream = handle.get_stream(); // Device memory - vector_t part_i(handle, n); - vector_t Bx(handle, n); + raft::spectral::matrix::vector_t part_i(handle, n); + raft::spectral::matrix::vector_t Bx(handle, n); // Initialize cuBLAS - RAFT_CUBLAS_TRY(cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream)); + RAFT_CUBLAS_TRY(linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream)); // Initialize Modularity - modularity_matrix_t B{handle, csr_m}; + raft::spectral::matrix::modularity_matrix_t B{handle, csr_m}; // Initialize output modularity = 0; diff --git a/cpp/include/raft/spectral/detail/partition.hpp b/cpp/include/raft/spectral/detail/partition.hpp index b7c811d5a5..775b37d118 100644 --- a/cpp/include/raft/spectral/detail/partition.hpp +++ b/cpp/include/raft/spectral/detail/partition.hpp @@ -25,6 +25,7 @@ #include +#include #include #include #include @@ -33,9 +34,6 @@ namespace raft { namespace spectral { namespace detail { -using namespace matrix; -using namespace linalg; - // ========================================================= // Spectral partitioner // ========================================================= @@ -63,13 +61,14 @@ using namespace linalg; * @return statistics: number of eigensolver iterations, . */ template -std::tuple partition(handle_t const& handle, - sparse_matrix_t const& csr_m, - EigenSolver const& eigen_solver, - ClusterSolver const& cluster_solver, - vertex_t* __restrict__ clusters, - weight_t* eigVals, - weight_t* eigVecs) +std::tuple partition( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const& csr_m, + EigenSolver const& eigen_solver, + ClusterSolver const& cluster_solver, + vertex_t* __restrict__ clusters, + weight_t* eigVals, + weight_t* eigVecs) { RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer."); RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer."); @@ -132,7 +131,7 @@ std::tuple partition(handle_t const& handle, */ template void analyzePartition(handle_t const& handle, - sparse_matrix_t const& csr_m, + spectral::matrix::sparse_matrix_t const& csr_m, vertex_t nClusters, const vertex_t* __restrict__ clusters, weight_t& edgeCut, @@ -153,7 +152,8 @@ void analyzePartition(handle_t const& handle, vector_t Lx(handle, n); // Initialize cuBLAS - RAFT_CUBLAS_TRY(cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream)); + RAFT_CUBLAS_TRY( + raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream)); // Initialize Laplacian /// sparse_matrix_t A{handle, graph}; diff --git a/cpp/include/raft/spectral/detail/spectral_util.cuh b/cpp/include/raft/spectral/detail/spectral_util.cuh index 6b57566a73..c7a0f0c5ef 100644 --- a/cpp/include/raft/spectral/detail/spectral_util.cuh +++ b/cpp/include/raft/spectral/detail/spectral_util.cuh @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -204,9 +205,9 @@ bool construct_indicator(handle_t const& handle, weight_t& clustersize, weight_t& partStats, vertex_t const* __restrict__ clusters, - vector_t& part_i, - vector_t& Bx, - laplacian_matrix_t const& B) + raft::spectral::matrix::vector_t& part_i, + raft::spectral::matrix::vector_t& Bx, + raft::spectral::matrix::laplacian_matrix_t const& B) { auto stream = handle.get_stream(); auto cublas_h = handle.get_cublas_handle(); diff --git a/cpp/include/raft/spectral/eigen_solvers.hpp b/cpp/include/raft/spectral/eigen_solvers.hpp index 192dc15a6b..b6e25814e6 100644 --- a/cpp/include/raft/spectral/eigen_solvers.hpp +++ b/cpp/include/raft/spectral/eigen_solvers.hpp @@ -16,12 +16,11 @@ #pragma once #include +#include namespace raft { namespace spectral { -using namespace matrix; - // aggregate of control params for Eigen Solver: // template @@ -47,47 +46,49 @@ struct lanczos_solver_t { { } - index_type_t solve_smallest_eigenvectors(handle_t const& handle, - sparse_matrix_t const& A, - value_type_t* __restrict__ eigVals, - value_type_t* __restrict__ eigVecs) const + index_type_t solve_smallest_eigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const& A, + value_type_t* __restrict__ eigVals, + value_type_t* __restrict__ eigVecs) const { RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer."); RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer."); index_type_t iters{}; - computeSmallestEigenvectors(handle, - A, - config_.n_eigVecs, - config_.maxIter, - config_.restartIter, - config_.tol, - config_.reorthogonalize, - iters, - eigVals, - eigVecs, - config_.seed); + linalg::computeSmallestEigenvectors(handle, + A, + config_.n_eigVecs, + config_.maxIter, + config_.restartIter, + config_.tol, + config_.reorthogonalize, + iters, + eigVals, + eigVecs, + config_.seed); return iters; } - index_type_t solve_largest_eigenvectors(handle_t const& handle, - sparse_matrix_t const& A, - value_type_t* __restrict__ eigVals, - value_type_t* __restrict__ eigVecs) const + index_type_t solve_largest_eigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const& A, + value_type_t* __restrict__ eigVals, + value_type_t* __restrict__ eigVecs) const { RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer."); RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer."); index_type_t iters{}; - computeLargestEigenvectors(handle, - A, - config_.n_eigVecs, - config_.maxIter, - config_.restartIter, - config_.tol, - config_.reorthogonalize, - iters, - eigVals, - eigVecs, - config_.seed); + linalg::computeLargestEigenvectors(handle, + A, + config_.n_eigVecs, + config_.maxIter, + config_.restartIter, + config_.tol, + config_.reorthogonalize, + iters, + eigVals, + eigVecs, + config_.seed); return iters; } diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.cuh b/cpp/include/raft/spectral/matrix_wrappers.hpp similarity index 99% rename from cpp/include/raft/spectral/detail/matrix_wrappers.cuh rename to cpp/include/raft/spectral/matrix_wrappers.hpp index d86dc21135..95940ceb2a 100644 --- a/cpp/include/raft/spectral/detail/matrix_wrappers.cuh +++ b/cpp/include/raft/spectral/matrix_wrappers.hpp @@ -34,6 +34,7 @@ #define IDX(i, j, lda) ((i) + (j) * (lda)) namespace raft { +namespace spectral { namespace matrix { using size_type = int; // for now; TODO: move it in appropriate header @@ -444,4 +445,5 @@ struct modularity_matrix_t : laplacian_matrix_t { }; } // namespace matrix +} // namespace spectral } // namespace raft diff --git a/cpp/include/raft/spectral/modularity_maximization.hpp b/cpp/include/raft/spectral/modularity_maximization.hpp index 466851c74f..460e2cf26f 100644 --- a/cpp/include/raft/spectral/modularity_maximization.hpp +++ b/cpp/include/raft/spectral/modularity_maximization.hpp @@ -51,7 +51,7 @@ namespace spectral { template std::tuple modularity_maximization( handle_t const& handle, - sparse_matrix_t const& csr_m, + matrix::sparse_matrix_t const& csr_m, EigenSolver const& eigen_solver, ClusterSolver const& cluster_solver, vertex_t* __restrict__ clusters, @@ -74,7 +74,7 @@ std::tuple modularity_maximization( */ template void analyzeModularity(handle_t const& handle, - sparse_matrix_t const& csr_m, + matrix::sparse_matrix_t const& csr_m, vertex_t nClusters, vertex_t const* __restrict__ clusters, weight_t& modularity) From 3575dd673c573114160814dfc4db96fac87d62ef Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 9 Feb 2022 14:04:08 -0500 Subject: [PATCH 2/8] Fixing API --- cpp/include/raft/cluster/detail/kmeans.cuh | 12 +- cpp/include/raft/linalg/detail/lanczos.hpp | 2 +- .../raft/spectral/detail/matrix_wrappers.cuh | 451 ++++++++++++++++++ cpp/include/raft/spectral/eigen_solvers.hpp | 4 +- cpp/include/raft/spectral/matrix_wrappers.hpp | 406 +--------------- cpp/include/raft/spectral/partition.hpp | 17 +- 6 files changed, 475 insertions(+), 417 deletions(-) create mode 100644 cpp/include/raft/spectral/detail/matrix_wrappers.cuh diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh index 5f1a0e137d..039ac8854a 100644 --- a/cpp/include/raft/cluster/detail/kmeans.cuh +++ b/cpp/include/raft/cluster/detail/kmeans.cuh @@ -32,8 +32,8 @@ #include #include #include -#include #include +#include namespace raft { namespace cluster { @@ -948,8 +948,6 @@ int kmeans(handle_t const& handle, index_type_t& iters, unsigned long long seed = 123456) { - using namespace matrix; - // Check that parameters are valid RAFT_EXPECTS(n > 0, "invalid parameter (n<1)"); RAFT_EXPECTS(d > 0, "invalid parameter (d<1)"); @@ -958,10 +956,10 @@ int kmeans(handle_t const& handle, RAFT_EXPECTS(maxiter >= 0, "invalid parameter (maxiter<0)"); // Allocate memory - vector_t clusterSizes(handle, k); - vector_t centroids(handle, d * k); - vector_t work(handle, n * max(k, d)); - vector_t work_int(handle, 2 * d * n); + raft::spectral::matrix::vector_t clusterSizes(handle, k); + raft::spectral::matrix::vector_t centroids(handle, d * k); + raft::spectral::matrix::vector_t work(handle, n * max(k, d)); + raft::spectral::matrix::vector_t work_int(handle, 2 * d * n); // Perform k-means return kmeans(handle, diff --git a/cpp/include/raft/linalg/detail/lanczos.hpp b/cpp/include/raft/linalg/detail/lanczos.hpp index 6d51936e95..9fa0d79875 100644 --- a/cpp/include/raft/linalg/detail/lanczos.hpp +++ b/cpp/include/raft/linalg/detail/lanczos.hpp @@ -29,8 +29,8 @@ #include #include #include -#include #include +#include namespace raft { namespace linalg { diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.cuh b/cpp/include/raft/spectral/detail/matrix_wrappers.cuh new file mode 100644 index 0000000000..b4a2ed175f --- /dev/null +++ b/cpp/include/raft/spectral/detail/matrix_wrappers.cuh @@ -0,0 +1,451 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +#include + +// ========================================================= +// Useful macros +// ========================================================= + +// Get index of matrix entry +#define IDX(i, j, lda) ((i) + (j) * (lda)) + +namespace raft { +namespace spectral { +namespace matrix { +namespace detail { + +using size_type = int; // for now; TODO: move it in appropriate header + +// Apply diagonal matrix to vector: +// +template +static __global__ void diagmv(IndexType_ n, + ValueType_ alpha, + const ValueType_* __restrict__ D, + const ValueType_* __restrict__ x, + ValueType_* __restrict__ y) +{ + IndexType_ i = threadIdx.x + blockIdx.x * blockDim.x; + while (i < n) { + y[i] += alpha * D[i] * x[i]; + i += blockDim.x * gridDim.x; + } +} + +// specifies type of algorithm used +// for SpMv: +// +enum struct sparse_mv_alg_t : int { + SPARSE_MV_UNDEFINED = -1, + SPARSE_MV_ALG_DEFAULT, // generic, for any sparse matrix + SPARSE_MV_ALG1, // typical for CSR + SPARSE_MV_ALG2 // may provide better performamce for irregular sparse matrices +}; + +// Vector "view"-like aggregate for linear algebra purposes +// +template +struct vector_view_t { + value_type* buffer_; + size_type size_; + + vector_view_t(value_type* buffer, size_type sz) : buffer_(buffer), size_(sz) {} + + vector_view_t(vector_view_t&& other) : buffer_(other.raw()), size_(other.size()) {} + + vector_view_t& operator=(vector_view_t&& other) + { + buffer_ = other.raw(); + size_ = other.size(); + } +}; + +template +class vector_t { + public: + vector_t(handle_t const& raft_handle, size_type sz) + : buffer_(sz, raft_handle.get_stream()), thrust_policy(raft_handle.get_thrust_policy()) + { + } + + size_type size(void) const { return buffer_.size(); } + + value_type* raw(void) { return buffer_.data(); } + + value_type const* raw(void) const { return buffer_.data(); } + + value_type nrm1() const + { + return thrust::reduce(thrust_policy, + buffer_.data(), + buffer_.data() + buffer_.size(), + value_type{0}, + [] __device__(auto left, auto right) { + auto abs_left = left > 0 ? left : -left; + auto abs_right = right > 0 ? right : -right; + return abs_left + abs_right; + }); + } + + void fill(value_type value) + { + thrust::fill_n(thrust_policy, buffer_.data(), buffer_.size(), value); + } + + private: + using thrust_exec_policy_t = + thrust::detail::execute_with_allocator, + thrust::cuda_cub::execute_on_stream_base>; + rmm::device_uvector buffer_; + const thrust_exec_policy_t thrust_policy; +}; + +template +struct sparse_matrix_t { + sparse_matrix_t(handle_t const& raft_handle, + index_type const* row_offsets, + index_type const* col_indices, + value_type const* values, + index_type const nrows, + index_type const ncols, + index_type const nnz) + : handle_(raft_handle), + row_offsets_(row_offsets), + col_indices_(col_indices), + values_(values), + nrows_(nrows), + ncols_(ncols), + nnz_(nnz) + { + } + + sparse_matrix_t(handle_t const& raft_handle, + index_type const* row_offsets, + index_type const* col_indices, + value_type const* values, + index_type const nrows, + index_type const nnz) + : handle_(raft_handle), + row_offsets_(row_offsets), + col_indices_(col_indices), + values_(values), + nrows_(nrows), + ncols_(nrows), + nnz_(nnz) + { + } + + template + sparse_matrix_t(handle_t const& raft_handle, CSRView const& csr_view) + : handle_(raft_handle), + row_offsets_(csr_view.offsets), + col_indices_(csr_view.indices), + values_(csr_view.edge_data), + nrows_(csr_view.number_of_vertices), + ncols_(csr_view.number_of_vertices), + nnz_(csr_view.number_of_edges) + { + } + + virtual ~sparse_matrix_t(void) = + default; // virtual because used as base for following matrix types + + // y = alpha*A*x + beta*y + //(Note: removed const-ness of x, because CUDA 11 SpMV + // descriptor creation works with non-const, and const-casting + // down is dangerous) + // + virtual void mv(value_type alpha, + value_type* __restrict__ x, + value_type beta, + value_type* __restrict__ y, + sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1, + bool transpose = false, + bool symmetric = false) const + { + using namespace sparse; + + RAFT_EXPECTS(x != nullptr, "Null x buffer."); + RAFT_EXPECTS(y != nullptr, "Null y buffer."); + + auto cusparse_h = handle_.get_cusparse_handle(); + auto stream = handle_.get_stream(); + + cusparseOperation_t trans = transpose ? CUSPARSE_OPERATION_TRANSPOSE : // transpose + CUSPARSE_OPERATION_NON_TRANSPOSE; // non-transpose + +#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP + auto size_x = transpose ? nrows_ : ncols_; + auto size_y = transpose ? ncols_ : nrows_; + + cusparseSpMVAlg_t spmv_alg = translate_algorithm(alg); + + // create descriptors: + //(below casts are necessary, because + // cusparseCreateCsr(...) takes non-const + // void*; the casts should be harmless) + // + cusparseSpMatDescr_t matA; + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr(&matA, + nrows_, + ncols_, + nnz_, + const_cast(row_offsets_), + const_cast(col_indices_), + const_cast(values_))); + + cusparseDnVecDescr_t vecX; + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecX, size_x, x)); + + cusparseDnVecDescr_t vecY; + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecY, size_y, y)); + + // get (scratch) external device buffer size: + // + size_t bufferSize; + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv_buffersize( + cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, &bufferSize, stream)); + + // allocate external buffer: + // + vector_t external_buffer(handle_, bufferSize); + + // finally perform SpMV: + // + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv( + cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, external_buffer.raw(), stream)); + + // free descriptors: + //(TODO: maybe wrap them in a RAII struct?) + // + RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(vecY)); + RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(vecX)); + RAFT_CUSPARSE_TRY(cusparseDestroySpMat(matA)); +#else + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsesetpointermode(cusparse_h, CUSPARSE_POINTER_MODE_HOST, stream)); + cusparseMatDescr_t descr = 0; + RAFT_CUSPARSE_TRY(cusparseCreateMatDescr(&descr)); + if (symmetric) { + RAFT_CUSPARSE_TRY(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_SYMMETRIC)); + } else { + RAFT_CUSPARSE_TRY(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)); + } + RAFT_CUSPARSE_TRY(cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsrmv(cusparse_h, + trans, + nrows_, + ncols_, + nnz_, + &alpha, + descr, + values_, + row_offsets_, + col_indices_, + x, + &beta, + y, + stream)); + RAFT_CUSPARSE_TRY(cusparseDestroyMatDescr(descr)); +#endif + } + + handle_t const& get_handle(void) const { return handle_; } + +#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP + cusparseSpMVAlg_t translate_algorithm(sparse_mv_alg_t alg) const + { + switch (alg) { + case sparse_mv_alg_t::SPARSE_MV_ALG1: return CUSPARSE_CSRMV_ALG1; + case sparse_mv_alg_t::SPARSE_MV_ALG2: return CUSPARSE_CSRMV_ALG2; + default: return CUSPARSE_MV_ALG_DEFAULT; + } + } +#endif + + // private: // maybe not, keep this ASAPBNS ("as simple as possible, but not simpler"); hence, + // aggregate + + handle_t const& handle_; + index_type const* row_offsets_; + index_type const* col_indices_; + value_type const* values_; + index_type const nrows_; + index_type const ncols_; + index_type const nnz_; +}; + +template +struct laplacian_matrix_t : sparse_matrix_t { + laplacian_matrix_t(handle_t const& raft_handle, + index_type const* row_offsets, + index_type const* col_indices, + value_type const* values, + index_type const nrows, + index_type const nnz) + : sparse_matrix_t( + raft_handle, row_offsets, col_indices, values, nrows, nnz), + diagonal_(raft_handle, nrows) + { + vector_t ones{raft_handle, nrows}; + ones.fill(1.0); + sparse_matrix_t::mv(1, ones.raw(), 0, diagonal_.raw()); + } + + laplacian_matrix_t(handle_t const& raft_handle, + sparse_matrix_t const& csr_m) + : sparse_matrix_t(raft_handle, + csr_m.row_offsets_, + csr_m.col_indices_, + csr_m.values_, + csr_m.nrows_, + csr_m.nnz_), + diagonal_(raft_handle, csr_m.nrows_) + { + vector_t ones{raft_handle, csr_m.nrows_}; + ones.fill(1.0); + sparse_matrix_t::mv(1, ones.raw(), 0, diagonal_.raw()); + } + + // y = alpha*A*x + beta*y + // + void mv(value_type alpha, + value_type* __restrict__ x, + value_type beta, + value_type* __restrict__ y, + sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1, + bool transpose = false, + bool symmetric = false) const override + { + constexpr int BLOCK_SIZE = 1024; + auto n = sparse_matrix_t::nrows_; + + auto cublas_h = sparse_matrix_t::get_handle().get_cublas_handle(); + auto stream = sparse_matrix_t::get_handle().get_stream(); + + // scales y by beta: + // + if (beta == 0) { + CUDA_TRY(cudaMemsetAsync(y, 0, n * sizeof(value_type), stream)); + } else if (beta != 1) { + // TODO: Call from public API when ready + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasscal(cublas_h, n, &beta, y, 1, stream)); + } + + // Apply diagonal matrix + // + dim3 gridDim{std::min((n + BLOCK_SIZE - 1) / BLOCK_SIZE, 65535), 1, 1}; + + dim3 blockDim{BLOCK_SIZE, 1, 1}; + diagmv<<>>(n, alpha, diagonal_.raw(), x, y); + RAFT_CHECK_CUDA(stream); + + // Apply adjacency matrix + // + sparse_matrix_t::mv(-alpha, x, 1, y, alg, transpose, symmetric); + } + + vector_t diagonal_; +}; + +template +struct modularity_matrix_t : laplacian_matrix_t { + modularity_matrix_t(handle_t const& raft_handle, + index_type const* row_offsets, + index_type const* col_indices, + value_type const* values, + index_type const nrows, + index_type const nnz) + : laplacian_matrix_t( + raft_handle, row_offsets, col_indices, values, nrows, nnz) + { + edge_sum_ = laplacian_matrix_t::diagonal_.nrm1(); + } + + modularity_matrix_t(handle_t const& raft_handle, + sparse_matrix_t const& csr_m) + : laplacian_matrix_t(raft_handle, csr_m) + { + edge_sum_ = laplacian_matrix_t::diagonal_.nrm1(); + } + + // y = alpha*A*x + beta*y + // + void mv(value_type alpha, + value_type* __restrict__ x, + value_type beta, + value_type* __restrict__ y, + sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1, + bool transpose = false, + bool symmetric = false) const override + { + auto n = sparse_matrix_t::nrows_; + + auto cublas_h = sparse_matrix_t::get_handle().get_cublas_handle(); + auto stream = sparse_matrix_t::get_handle().get_stream(); + + // y = A*x + // + sparse_matrix_t::mv(alpha, x, 0, y, alg, transpose, symmetric); + value_type dot_res; + + // gamma = d'*x + // + // Cublas::dot(this->n, D.raw(), 1, x, 1, &dot_res); + // TODO: Call from public API when ready + RAFT_CUBLAS_TRY( + raft::linalg::detail::cublasdot(cublas_h, + n, + laplacian_matrix_t::diagonal_.raw(), + 1, + x, + 1, + &dot_res, + stream)); + + // y = y -(gamma/edge_sum)*d + // + value_type gamma_ = -dot_res / edge_sum_; + // TODO: Call from public API when ready + RAFT_CUBLAS_TRY( + raft::linalg::detail::cublasaxpy(cublas_h, + n, + &gamma_, + laplacian_matrix_t::diagonal_.raw(), + 1, + y, + 1, + stream)); + } + + value_type edge_sum_; +}; + +} // namespace detail +} // namespace matrix +} // namespace spectral +} // namespace raft diff --git a/cpp/include/raft/spectral/eigen_solvers.hpp b/cpp/include/raft/spectral/eigen_solvers.hpp index b6e25814e6..0033dbeea9 100644 --- a/cpp/include/raft/spectral/eigen_solvers.hpp +++ b/cpp/include/raft/spectral/eigen_solvers.hpp @@ -48,7 +48,7 @@ struct lanczos_solver_t { index_type_t solve_smallest_eigenvectors( handle_t const& handle, - spectral::matrix::sparse_matrix_t const& A, + matrix::sparse_matrix_t const& A, value_type_t* __restrict__ eigVals, value_type_t* __restrict__ eigVecs) const { @@ -71,7 +71,7 @@ struct lanczos_solver_t { index_type_t solve_largest_eigenvectors( handle_t const& handle, - spectral::matrix::sparse_matrix_t const& A, + matrix::sparse_matrix_t const& A, value_type_t* __restrict__ eigVals, value_type_t* __restrict__ eigVecs) const { diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/matrix_wrappers.hpp index 95940ceb2a..9454d0c7ff 100644 --- a/cpp/include/raft/spectral/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/matrix_wrappers.hpp @@ -15,17 +15,10 @@ */ #pragma once -#include #include -#include -#include +#include #include -#include -#include - -#include - // ========================================================= // Useful macros // ========================================================= @@ -39,410 +32,25 @@ namespace matrix { using size_type = int; // for now; TODO: move it in appropriate header -// Apply diagonal matrix to vector: -// -template -static __global__ void diagmv(IndexType_ n, - ValueType_ alpha, - const ValueType_* __restrict__ D, - const ValueType_* __restrict__ x, - ValueType_* __restrict__ y) -{ - IndexType_ i = threadIdx.x + blockIdx.x * blockDim.x; - while (i < n) { - y[i] += alpha * D[i] * x[i]; - i += blockDim.x * gridDim.x; - } -} - // specifies type of algorithm used // for SpMv: // -enum struct sparse_mv_alg_t : int { - SPARSE_MV_UNDEFINED = -1, - SPARSE_MV_ALG_DEFAULT, // generic, for any sparse matrix - SPARSE_MV_ALG1, // typical for CSR - SPARSE_MV_ALG2 // may provide better performamce for irregular sparse matrices -}; +using sparse_mv_alt_t = detail::sparse_mv_alg_t; // Vector "view"-like aggregate for linear algebra purposes // template -struct vector_view_t { - value_type* buffer_; - size_type size_; - - vector_view_t(value_type* buffer, size_type sz) : buffer_(buffer), size_(sz) {} - - vector_view_t(vector_view_t&& other) : buffer_(other.raw()), size_(other.size()) {} - - vector_view_t& operator=(vector_view_t&& other) - { - buffer_ = other.raw(); - size_ = other.size(); - } -}; +using vector_view_t = detail::vector_view_t; template -class vector_t { - public: - vector_t(handle_t const& raft_handle, size_type sz) - : buffer_(sz, raft_handle.get_stream()), thrust_policy(raft_handle.get_thrust_policy()) - { - } - - size_type size(void) const { return buffer_.size(); } - - value_type* raw(void) { return buffer_.data(); } - - value_type const* raw(void) const { return buffer_.data(); } - - value_type nrm1() const - { - return thrust::reduce(thrust_policy, - buffer_.data(), - buffer_.data() + buffer_.size(), - value_type{0}, - [] __device__(auto left, auto right) { - auto abs_left = left > 0 ? left : -left; - auto abs_right = right > 0 ? right : -right; - return abs_left + abs_right; - }); - } - - void fill(value_type value) - { - thrust::fill_n(thrust_policy, buffer_.data(), buffer_.size(), value); - } - - private: - using thrust_exec_policy_t = - thrust::detail::execute_with_allocator, - thrust::cuda_cub::execute_on_stream_base>; - rmm::device_uvector buffer_; - const thrust_exec_policy_t thrust_policy; -}; +using vector_t = detail::vector_t; template -struct sparse_matrix_t { - sparse_matrix_t(handle_t const& raft_handle, - index_type const* row_offsets, - index_type const* col_indices, - value_type const* values, - index_type const nrows, - index_type const ncols, - index_type const nnz) - : handle_(raft_handle), - row_offsets_(row_offsets), - col_indices_(col_indices), - values_(values), - nrows_(nrows), - ncols_(ncols), - nnz_(nnz) - { - } - - sparse_matrix_t(handle_t const& raft_handle, - index_type const* row_offsets, - index_type const* col_indices, - value_type const* values, - index_type const nrows, - index_type const nnz) - : handle_(raft_handle), - row_offsets_(row_offsets), - col_indices_(col_indices), - values_(values), - nrows_(nrows), - ncols_(nrows), - nnz_(nnz) - { - } - - template - sparse_matrix_t(handle_t const& raft_handle, CSRView const& csr_view) - : handle_(raft_handle), - row_offsets_(csr_view.offsets), - col_indices_(csr_view.indices), - values_(csr_view.edge_data), - nrows_(csr_view.number_of_vertices), - ncols_(csr_view.number_of_vertices), - nnz_(csr_view.number_of_edges) - { - } - - virtual ~sparse_matrix_t(void) = - default; // virtual because used as base for following matrix types - - // y = alpha*A*x + beta*y - //(Note: removed const-ness of x, because CUDA 11 SpMV - // descriptor creation works with non-const, and const-casting - // down is dangerous) - // - virtual void mv(value_type alpha, - value_type* __restrict__ x, - value_type beta, - value_type* __restrict__ y, - sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1, - bool transpose = false, - bool symmetric = false) const - { - using namespace sparse; - - RAFT_EXPECTS(x != nullptr, "Null x buffer."); - RAFT_EXPECTS(y != nullptr, "Null y buffer."); - - auto cusparse_h = handle_.get_cusparse_handle(); - auto stream = handle_.get_stream(); - - cusparseOperation_t trans = transpose ? CUSPARSE_OPERATION_TRANSPOSE : // transpose - CUSPARSE_OPERATION_NON_TRANSPOSE; // non-transpose - -#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP - auto size_x = transpose ? nrows_ : ncols_; - auto size_y = transpose ? ncols_ : nrows_; - - cusparseSpMVAlg_t spmv_alg = translate_algorithm(alg); - - // create descriptors: - //(below casts are necessary, because - // cusparseCreateCsr(...) takes non-const - // void*; the casts should be harmless) - // - cusparseSpMatDescr_t matA; - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatecsr(&matA, - nrows_, - ncols_, - nnz_, - const_cast(row_offsets_), - const_cast(col_indices_), - const_cast(values_))); - - cusparseDnVecDescr_t vecX; - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecX, size_x, x)); - - cusparseDnVecDescr_t vecY; - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&vecY, size_y, y)); - - // get (scratch) external device buffer size: - // - size_t bufferSize; - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv_buffersize( - cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, &bufferSize, stream)); - - // allocate external buffer: - // - vector_t external_buffer(handle_, bufferSize); - - // finally perform SpMV: - // - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv( - cusparse_h, trans, &alpha, matA, vecX, &beta, vecY, spmv_alg, external_buffer.raw(), stream)); - - // free descriptors: - //(TODO: maybe wrap them in a RAII struct?) - // - RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(vecY)); - RAFT_CUSPARSE_TRY(cusparseDestroyDnVec(vecX)); - RAFT_CUSPARSE_TRY(cusparseDestroySpMat(matA)); -#else - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsesetpointermode(cusparse_h, CUSPARSE_POINTER_MODE_HOST, stream)); - cusparseMatDescr_t descr = 0; - RAFT_CUSPARSE_TRY(cusparseCreateMatDescr(&descr)); - if (symmetric) { - RAFT_CUSPARSE_TRY(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_SYMMETRIC)); - } else { - RAFT_CUSPARSE_TRY(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)); - } - RAFT_CUSPARSE_TRY(cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)); - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecsrmv(cusparse_h, - trans, - nrows_, - ncols_, - nnz_, - &alpha, - descr, - values_, - row_offsets_, - col_indices_, - x, - &beta, - y, - stream)); - RAFT_CUSPARSE_TRY(cusparseDestroyMatDescr(descr)); -#endif - } - - handle_t const& get_handle(void) const { return handle_; } - -#if not defined CUDA_ENFORCE_LOWER and CUDA_VER_10_1_UP - cusparseSpMVAlg_t translate_algorithm(sparse_mv_alg_t alg) const - { - switch (alg) { - case sparse_mv_alg_t::SPARSE_MV_ALG1: return CUSPARSE_CSRMV_ALG1; - case sparse_mv_alg_t::SPARSE_MV_ALG2: return CUSPARSE_CSRMV_ALG2; - default: return CUSPARSE_MV_ALG_DEFAULT; - } - } -#endif - - // private: // maybe not, keep this ASAPBNS ("as simple as possible, but not simpler"); hence, - // aggregate - - handle_t const& handle_; - index_type const* row_offsets_; - index_type const* col_indices_; - value_type const* values_; - index_type const nrows_; - index_type const ncols_; - index_type const nnz_; -}; - +using sparse_matrix_t = sparse_matrix_t; template -struct laplacian_matrix_t : sparse_matrix_t { - laplacian_matrix_t(handle_t const& raft_handle, - index_type const* row_offsets, - index_type const* col_indices, - value_type const* values, - index_type const nrows, - index_type const nnz) - : sparse_matrix_t( - raft_handle, row_offsets, col_indices, values, nrows, nnz), - diagonal_(raft_handle, nrows) - { - vector_t ones{raft_handle, nrows}; - ones.fill(1.0); - sparse_matrix_t::mv(1, ones.raw(), 0, diagonal_.raw()); - } - - laplacian_matrix_t(handle_t const& raft_handle, - sparse_matrix_t const& csr_m) - : sparse_matrix_t(raft_handle, - csr_m.row_offsets_, - csr_m.col_indices_, - csr_m.values_, - csr_m.nrows_, - csr_m.nnz_), - diagonal_(raft_handle, csr_m.nrows_) - { - vector_t ones{raft_handle, csr_m.nrows_}; - ones.fill(1.0); - sparse_matrix_t::mv(1, ones.raw(), 0, diagonal_.raw()); - } - - // y = alpha*A*x + beta*y - // - void mv(value_type alpha, - value_type* __restrict__ x, - value_type beta, - value_type* __restrict__ y, - sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1, - bool transpose = false, - bool symmetric = false) const override - { - constexpr int BLOCK_SIZE = 1024; - auto n = sparse_matrix_t::nrows_; - - auto cublas_h = sparse_matrix_t::get_handle().get_cublas_handle(); - auto stream = sparse_matrix_t::get_handle().get_stream(); - - // scales y by beta: - // - if (beta == 0) { - CUDA_TRY(cudaMemsetAsync(y, 0, n * sizeof(value_type), stream)); - } else if (beta != 1) { - // TODO: Call from public API when ready - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasscal(cublas_h, n, &beta, y, 1, stream)); - } - - // Apply diagonal matrix - // - dim3 gridDim{std::min((n + BLOCK_SIZE - 1) / BLOCK_SIZE, 65535), 1, 1}; - - dim3 blockDim{BLOCK_SIZE, 1, 1}; - diagmv<<>>(n, alpha, diagonal_.raw(), x, y); - RAFT_CHECK_CUDA(stream); - - // Apply adjacency matrix - // - sparse_matrix_t::mv(-alpha, x, 1, y, alg, transpose, symmetric); - } - - vector_t diagonal_; -}; - +using laplacian_matrix_t : detail::laplacian_matrix_t; template -struct modularity_matrix_t : laplacian_matrix_t { - modularity_matrix_t(handle_t const& raft_handle, - index_type const* row_offsets, - index_type const* col_indices, - value_type const* values, - index_type const nrows, - index_type const nnz) - : laplacian_matrix_t( - raft_handle, row_offsets, col_indices, values, nrows, nnz) - { - edge_sum_ = laplacian_matrix_t::diagonal_.nrm1(); - } - - modularity_matrix_t(handle_t const& raft_handle, - sparse_matrix_t const& csr_m) - : laplacian_matrix_t(raft_handle, csr_m) - { - edge_sum_ = laplacian_matrix_t::diagonal_.nrm1(); - } - - // y = alpha*A*x + beta*y - // - void mv(value_type alpha, - value_type* __restrict__ x, - value_type beta, - value_type* __restrict__ y, - sparse_mv_alg_t alg = sparse_mv_alg_t::SPARSE_MV_ALG1, - bool transpose = false, - bool symmetric = false) const override - { - auto n = sparse_matrix_t::nrows_; - - auto cublas_h = sparse_matrix_t::get_handle().get_cublas_handle(); - auto stream = sparse_matrix_t::get_handle().get_stream(); - - // y = A*x - // - sparse_matrix_t::mv(alpha, x, 0, y, alg, transpose, symmetric); - value_type dot_res; - - // gamma = d'*x - // - // Cublas::dot(this->n, D.raw(), 1, x, 1, &dot_res); - // TODO: Call from public API when ready - RAFT_CUBLAS_TRY( - raft::linalg::detail::cublasdot(cublas_h, - n, - laplacian_matrix_t::diagonal_.raw(), - 1, - x, - 1, - &dot_res, - stream)); - - // y = y -(gamma/edge_sum)*d - // - value_type gamma_ = -dot_res / edge_sum_; - // TODO: Call from public API when ready - RAFT_CUBLAS_TRY( - raft::linalg::detail::cublasaxpy(cublas_h, - n, - &gamma_, - laplacian_matrix_t::diagonal_.raw(), - 1, - y, - 1, - stream)); - } - - value_type edge_sum_; -}; +using modularity_matrix_t = modularity_matrix_t; } // namespace matrix } // namespace spectral diff --git a/cpp/include/raft/spectral/partition.hpp b/cpp/include/raft/spectral/partition.hpp index 597ef530a2..4b772c08ee 100644 --- a/cpp/include/raft/spectral/partition.hpp +++ b/cpp/include/raft/spectral/partition.hpp @@ -49,13 +49,14 @@ namespace spectral { * @return statistics: number of eigensolver iterations, . */ template -std::tuple partition(handle_t const& handle, - sparse_matrix_t const& csr_m, - EigenSolver const& eigen_solver, - ClusterSolver const& cluster_solver, - vertex_t* __restrict__ clusters, - weight_t* eigVals, - weight_t* eigVecs) +std::tuple partition( + handle_t const& handle, + matrix::sparse_matrix_t const& csr_m, + EigenSolver const& eigen_solver, + ClusterSolver const& cluster_solver, + vertex_t* __restrict__ clusters, + weight_t* eigVals, + weight_t* eigVecs) { return detail::partition( handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs); @@ -81,7 +82,7 @@ std::tuple partition(handle_t const& handle, */ template void analyzePartition(handle_t const& handle, - sparse_matrix_t const& csr_m, + matrix::sparse_matrix_t const& csr_m, vertex_t nClusters, const vertex_t* __restrict__ clusters, weight_t& edgeCut, From 04e668bd45941b952fa29a77cf4d00fd90b28e8b Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 9 Feb 2022 14:27:12 -0500 Subject: [PATCH 3/8] Updates --- cpp/include/raft/spectral/matrix_wrappers.hpp | 13 +++++-------- .../raft/spectral/modularity_maximization.hpp | 8 +++++--- cpp/include/raft/spectral/partition.hpp | 5 +++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/matrix_wrappers.hpp index 9454d0c7ff..479634c9b7 100644 --- a/cpp/include/raft/spectral/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/matrix_wrappers.hpp @@ -15,17 +15,12 @@ */ #pragma once -#include #include -#include // ========================================================= // Useful macros // ========================================================= -// Get index of matrix entry -#define IDX(i, j, lda) ((i) + (j) * (lda)) - namespace raft { namespace spectral { namespace matrix { @@ -46,11 +41,13 @@ template using vector_t = detail::vector_t; template -using sparse_matrix_t = sparse_matrix_t; +using sparse_matrix_t = detail::sparse_matrix_t; + template -using laplacian_matrix_t : detail::laplacian_matrix_t; +using laplacian_matrix_t = detail::laplacian_matrix_t; + template -using modularity_matrix_t = modularity_matrix_t; +using modularity_matrix_t = detail::modularity_matrix_t; } // namespace matrix } // namespace spectral diff --git a/cpp/include/raft/spectral/modularity_maximization.hpp b/cpp/include/raft/spectral/modularity_maximization.hpp index 460e2cf26f..e67be767a2 100644 --- a/cpp/include/raft/spectral/modularity_maximization.hpp +++ b/cpp/include/raft/spectral/modularity_maximization.hpp @@ -58,8 +58,9 @@ std::tuple modularity_maximization( weight_t* eigVals, weight_t* eigVecs) { - return detail::modularity_maximization( - handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs); + return raft::spectral::detail:: + modularity_maximization( + handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs); } //=================================================== // Analysis of graph partition @@ -79,7 +80,8 @@ void analyzeModularity(handle_t const& handle, vertex_t const* __restrict__ clusters, weight_t& modularity) { - detail::analyzeModularity(handle, csr_m, nClusters, clusters, modularity); + raft::spectral::detail::analyzeModularity( + handle, csr_m, nClusters, clusters, modularity); } } // namespace spectral diff --git a/cpp/include/raft/spectral/partition.hpp b/cpp/include/raft/spectral/partition.hpp index 4b772c08ee..f62773a958 100644 --- a/cpp/include/raft/spectral/partition.hpp +++ b/cpp/include/raft/spectral/partition.hpp @@ -58,7 +58,7 @@ std::tuple partition( weight_t* eigVals, weight_t* eigVecs) { - return detail::partition( + return raft::spectral::detail::partition( handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs); } @@ -88,7 +88,8 @@ void analyzePartition(handle_t const& handle, weight_t& edgeCut, weight_t& cost) { - detail::analyzePartition(handle, csr_m, nClusters, clusters, edgeCut, cost); + raft::spectral::detail::analyzePartition( + handle, csr_m, nClusters, clusters, edgeCut, cost); } } // namespace spectral From 82ca1adb9e76e202e75ea33f7e8fdb842380f599 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 9 Feb 2022 14:34:07 -0500 Subject: [PATCH 4/8] Updates --- cpp/test/spectral_matrix.cu | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/test/spectral_matrix.cu b/cpp/test/spectral_matrix.cu index 652aa61451..5d0768a729 100644 --- a/cpp/test/spectral_matrix.cu +++ b/cpp/test/spectral_matrix.cu @@ -19,9 +19,11 @@ #include #include -#include +#include namespace raft { +namespace spectral { +namespace matrix { namespace { template struct csr_view_t { @@ -34,7 +36,6 @@ struct csr_view_t { } // namespace TEST(Raft, SpectralMatrices) { - using namespace matrix; using index_type = int; using value_type = double; @@ -75,4 +76,6 @@ TEST(Raft, SpectralMatrices) EXPECT_ANY_THROW(cnstr_mm2()); // because of nullptr ptr args } +} // namespace matrix +} // namespace spectral } // namespace raft From 21fc5b507fea9b2460c478d31e4b8fd50e2b4237 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 9 Feb 2022 14:59:27 -0500 Subject: [PATCH 5/8] iDarn typo --- cpp/include/raft/spectral/matrix_wrappers.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/matrix_wrappers.hpp index 479634c9b7..237f1275fd 100644 --- a/cpp/include/raft/spectral/matrix_wrappers.hpp +++ b/cpp/include/raft/spectral/matrix_wrappers.hpp @@ -30,7 +30,7 @@ using size_type = int; // for now; TODO: move it in appropriate header // specifies type of algorithm used // for SpMv: // -using sparse_mv_alt_t = detail::sparse_mv_alg_t; +using sparse_mv_alg_t = detail::sparse_mv_alg_t; // Vector "view"-like aggregate for linear algebra purposes // From d1cc186301fa3c64b50dcc7030d2eb1b87ff7dae Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 9 Feb 2022 15:54:45 -0500 Subject: [PATCH 6/8] Adding buffers back for renumbering --- cpp/include/raft/mr/device/buffer.hpp | 70 ++++++++++++++++++++ cpp/include/raft/mr/host/buffer.hpp | 85 +++++++++++++++++++++++++ cpp/test/CMakeLists.txt | 2 + cpp/test/mr/device/buffer.cpp | 92 +++++++++++++++++++++++++++ cpp/test/mr/host/buffer.cpp | 70 ++++++++++++++++++++ 5 files changed, 319 insertions(+) create mode 100644 cpp/include/raft/mr/device/buffer.hpp create mode 100644 cpp/include/raft/mr/host/buffer.hpp create mode 100644 cpp/test/mr/device/buffer.cpp create mode 100644 cpp/test/mr/host/buffer.cpp diff --git a/cpp/include/raft/mr/device/buffer.hpp b/cpp/include/raft/mr/device/buffer.hpp new file mode 100644 index 0000000000..aee3cba046 --- /dev/null +++ b/cpp/include/raft/mr/device/buffer.hpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "allocator.hpp" +#include +#include + +namespace raft { +namespace mr { +namespace device { + +/** + * @brief RAII object owning a contiguous typed device buffer. The passed in + * allocator supports asynchronous allocation and deallocation so this + * can also be used for temporary memory + * + * @code{.cpp} + * template + * void foo(..., cudaStream_t stream) { + * ... + * raft::mr::device::buffer temp(stream, 0); + * ... + * temp.resize(n); + * kernelA<<>>(...,temp.data(),...); + * kernelB<<>>(...,temp.data(),...); + * temp.release(); + * ... + * } + * @endcode + */ +template +class buffer : public buffer_base { + public: + using size_type = typename buffer_base::size_type; + using value_type = typename buffer_base::value_type; + using iterator = typename buffer_base::iterator; + using const_iterator = typename buffer_base::const_iterator; + using reference = typename buffer_base::reference; + using const_reference = typename buffer_base::const_reference; + + buffer() = delete; + + buffer(const buffer& other) = delete; + + buffer& operator=(const buffer& other) = delete; + + buffer(std::shared_ptr alloc, cudaStream_t stream, size_type n = 0) + : buffer_base(alloc, stream, n) + { + } +}; // class buffer + +}; // namespace device +}; // namespace mr +}; // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/mr/host/buffer.hpp b/cpp/include/raft/mr/host/buffer.hpp new file mode 100644 index 0000000000..de9468add8 --- /dev/null +++ b/cpp/include/raft/mr/host/buffer.hpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "allocator.hpp" +#include +#include +#include + +namespace raft { +namespace mr { +namespace host { + +/** + * @brief RAII object owning a contigous typed host buffer (aka pinned memory). + * The passed in allocator supports asynchronus allocation and + * deallocation so this can also be used for temporary memory + * + * @code{.cpp} + * template + * void foo(const T* in_d , T* out_d, ..., cudaStream_t stream) { + * ... + * raft::mr::host::buffer temp(stream, 0); + * ... + * temp.resize(n); + * raft::copy(temp.data(), in_d, temp.size()); + * ... + * raft::copy(out_d, temp.data(), temp.size()); + * temp.release(stream); + * ... + * } + * @endcode + */ +template +class buffer : public buffer_base { + public: + using size_type = typename buffer_base::size_type; + using value_type = typename buffer_base::value_type; + using iterator = typename buffer_base::iterator; + using const_iterator = typename buffer_base::const_iterator; + using reference = typename buffer_base::reference; + using const_reference = typename buffer_base::const_reference; + + buffer() = delete; + + buffer(const buffer& other) = delete; + + buffer& operator=(const buffer& other) = delete; + + buffer(std::shared_ptr alloc, const device::buffer& other) + : buffer_base(alloc, other.get_stream(), other.size()) + { + if (other.size() > 0) { raft::copy(data_, other.data(), other.size(), other.get_stream()); } + } + + buffer(std::shared_ptr alloc, cudaStream_t stream, size_type n = 0) + : buffer_base(alloc, stream, n) + { + } + + reference operator[](size_type pos) { return data_[pos]; } + + const_reference operator[](size_type pos) const { return data_[pos]; } + + private: + using buffer_base::data_; +}; + +}; // namespace host +}; // namespace mr +}; // namespace raft \ No newline at end of file diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index fda60e1cb0..9f5ca95e93 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -65,6 +65,8 @@ add_executable(test_raft test/matrix/math.cu test/matrix/matrix.cu test/matrix/linewise_op.cu + test/mr/host/buffer.cpp + test/mr/device/buffer.cpp test/mst.cu test/random/rng.cu test/random/rng_int.cu diff --git a/cpp/test/mr/device/buffer.cpp b/cpp/test/mr/device/buffer.cpp new file mode 100644 index 0000000000..324e9b9e4b --- /dev/null +++ b/cpp/test/mr/device/buffer.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace mr { +namespace device { + +TEST(Raft, DeviceBufferAlloc) +{ + cudaStream_t stream; + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + // no allocation at construction + rmm::device_uvector buff(0, stream); + ASSERT_EQ(0, buff.size()); + // explicit allocation after construction + buff.resize(20, stream); + ASSERT_EQ(20, buff.size()); + // resizing to a smaller buffer size + buff.resize(10, stream); + ASSERT_EQ(10, buff.size()); + // explicit deallocation + buff.release(); + ASSERT_EQ(0, buff.size()); + // use these methods without the explicit stream parameter + buff.resize(20, stream); + ASSERT_EQ(20, buff.size()); + buff.resize(10, stream); + ASSERT_EQ(10, buff.size()); + buff.release(); + ASSERT_EQ(0, buff.size()); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + RAFT_CUDA_TRY(cudaStreamDestroy(stream)); +} + +TEST(Raft, DeviceBufferZeroResize) +{ + // Create a limiting_resource_adaptor to track allocations + auto curr_mr = + dynamic_cast(rmm::mr::get_current_device_resource()); + auto limit_mr = + std::make_shared>(curr_mr, + 1000); + + rmm::mr::set_current_device_resource(limit_mr.get()); + + cudaStream_t stream; + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + // no allocation at construction + rmm::device_uvector buff(10, stream); + ASSERT_EQ(10, buff.size()); + // explicit allocation after construction + buff.resize(0, stream); + ASSERT_EQ(0, buff.size()); + // resizing to a smaller buffer size + buff.resize(20, stream); + ASSERT_EQ(20, buff.size()); + // explicit deallocation + buff.release(); + ASSERT_EQ(0, buff.size()); + + // Now check that there is no memory left. (Used to not be true) + ASSERT_EQ(0, limit_mr->get_allocated_bytes()); + + rmm::mr::set_current_device_resource(curr_mr); + + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + RAFT_CUDA_TRY(cudaStreamDestroy(stream)); +} + +} // namespace device +} // namespace mr +} // namespace raft \ No newline at end of file diff --git a/cpp/test/mr/host/buffer.cpp b/cpp/test/mr/host/buffer.cpp new file mode 100644 index 0000000000..c174b269da --- /dev/null +++ b/cpp/test/mr/host/buffer.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace raft { +namespace mr { +namespace host { + +TEST(Raft, HostBuffer) +{ + auto alloc = std::make_shared(); + cudaStream_t stream; + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + // no allocation at construction + buffer buff(alloc, stream); + ASSERT_EQ(0, buff.size()); + // explicit allocation after construction + buff.resize(20, stream); + ASSERT_EQ(20, buff.size()); + // resizing to a smaller buffer size + buff.resize(10, stream); + ASSERT_EQ(10, buff.size()); + // explicit deallocation + buff.release(stream); + ASSERT_EQ(0, buff.size()); + // use these methods without the explicit stream parameter + buff.resize(20); + ASSERT_EQ(20, buff.size()); + buff.resize(10); + ASSERT_EQ(10, buff.size()); + buff.release(); + ASSERT_EQ(0, buff.size()); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + RAFT_CUDA_TRY(cudaStreamDestroy(stream)); +} + +TEST(Raft, DeviceToHostBuffer) +{ + auto d_alloc = std::make_shared(); + auto h_alloc = std::make_shared(); + cudaStream_t stream; + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + device::buffer d_buff(d_alloc, stream, 32); + RAFT_CUDA_TRY(cudaMemsetAsync(d_buff.data(), 0, sizeof(char) * d_buff.size(), stream)); + buffer h_buff(h_alloc, d_buff); + ASSERT_EQ(d_buff.size(), h_buff.size()); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + RAFT_CUDA_TRY(cudaStreamDestroy(stream)); +} + +} // namespace host +} // namespace mr +} // namespace raft \ No newline at end of file From ae27571ea85616e47cfe51c374a2a8b5c9cf6173 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 9 Feb 2022 16:22:05 -0500 Subject: [PATCH 7/8] buffer base --- cpp/include/raft/mr/buffer_base.hpp | 211 ++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 cpp/include/raft/mr/buffer_base.hpp diff --git a/cpp/include/raft/mr/buffer_base.hpp b/cpp/include/raft/mr/buffer_base.hpp new file mode 100644 index 0000000000..447d40e8ae --- /dev/null +++ b/cpp/include/raft/mr/buffer_base.hpp @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +#include +#include +#include + +namespace raft { + namespace mr { + +/** + * @brief Base for all RAII-based owning of temporary memory allocations. This + * class should ideally not be used by users directly, but instead via + * the child classes `device_buffer` and `host_buffer`. + * + * @tparam T data type + * @tparam AllocatorT The underly allocator object + */ + template + class buffer_base { + public: + using size_type = std::size_t; + using value_type = T; + using iterator = value_type*; + using const_iterator = const value_type*; + using reference = T&; + using const_reference = const T&; + + buffer_base() = delete; + + buffer_base(const buffer_base& other) = delete; + + buffer_base& operator=(const buffer_base& other) = delete; + + /** + * @brief Main ctor + * + * @param[in] allocator asynchronous allocator used for managing buffer life + * @param[in] stream cuda stream where this allocation operations are async + * @param[in] n size of the buffer (in number of elements) + */ + buffer_base(std::shared_ptr allocator, cudaStream_t stream, size_type n = 0) + : data_(nullptr), size_(n), capacity_(n), stream_(stream), allocator_(std::move(allocator)) + { + if (capacity_ > 0) { + data_ = + static_cast(allocator_->allocate(capacity_ * sizeof(value_type), stream_)); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_)); + } + } + + ~buffer_base() { release(); } + + value_type* data() { return data_; } + + const value_type* data() const { return data_; } + + size_type size() const { return size_; } + + void clear() { size_ = 0; } + + iterator begin() { return data_; } + + const_iterator begin() const { return data_; } + + iterator end() { return data_ + size_; } + + const_iterator end() const { return data_ + size_; } + + /** + * @brief Reserve new memory size for this buffer. + * + * It re-allocates a fresh buffer if the new requested capacity is more than + * the current one, copies the old buffer contents to this new buffer and + * removes the old one. + * + * @param[in] new_capacity new capacity (in number of elements) + * @{ + */ + void reserve(size_type new_capacity) + { + if (new_capacity > capacity_) { + auto* new_data = + static_cast(allocator_->allocate(new_capacity * sizeof(value_type), stream_)); + if (size_ > 0) { raft::copy(new_data, data_, size_, stream_); } + // Only deallocate if we have allocated a pointer + if (nullptr != data_) { + allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_); + } + data_ = new_data; + capacity_ = new_capacity; + } + } + + void reserve(size_type new_capacity, cudaStream_t stream) + { + set_stream(stream); + reserve(new_capacity); + } + /** @} */ + + /** + * @brief Resize the underlying buffer (uses `reserve` method internally) + * + * @param[in] new_size new buffer size + * @{ + */ + void resize(const size_type new_size) + { + reserve(new_size); + size_ = new_size; + } + + void resize(const size_type new_size, cudaStream_t stream) + { + set_stream(stream); + resize(new_size); + } + /** @} */ + + /** + * @brief Deletes the underlying buffer + * + * If this method is not explicitly called, it will be during the destructor + * @{ + */ + void release() + { + if (nullptr != data_) { + allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_); + } + data_ = nullptr; + capacity_ = 0; + size_ = 0; + } + + void release(cudaStream_t stream) + { + set_stream(stream); + release(); + } + /** @} */ + + /** + * @brief returns the underlying allocator used + * + * @return the allocator pointer + */ + std::shared_ptr get_allocator() const { return allocator_; } + + /** + * @brief returns the underlying stream used + * + * @return the cuda stream + */ + cudaStream_t get_stream() const { return stream_; } + + protected: + value_type* data_; + + private: + size_type size_; + size_type capacity_; + cudaStream_t stream_; + std::shared_ptr allocator_; + + /** + * @brief Sets a new cuda stream where the future operations will be queued + * + * This method makes sure that the inter-stream dependencies are met and taken + * care of, before setting the input stream as a new stream for this buffer. + * Ideally, the same cuda stream passed during constructor is expected to be + * used throughout this buffer's lifetime, for performance. + * + * @param[in] stream new cuda stream to be set. If it is the same as the + * current one, then this method will be a no-op. + */ + void set_stream(cudaStream_t stream) + { + if (stream_ != stream) { + cudaEvent_t event; + RAFT_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming)); + RAFT_CUDA_TRY(cudaEventRecord(event, stream_)); + RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0)); + stream_ = stream; + RAFT_CUDA_TRY(cudaEventDestroy(event)); + } + } + }; // class buffer_base + + }; // namespace mr +}; // namespace raft \ No newline at end of file From 80b9a3f024b3fe3987740d980ed2b66bfb98de24 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 9 Feb 2022 16:28:57 -0500 Subject: [PATCH 8/8] style --- cpp/include/raft/mr/buffer_base.hpp | 348 ++++++++++++++-------------- 1 file changed, 174 insertions(+), 174 deletions(-) diff --git a/cpp/include/raft/mr/buffer_base.hpp b/cpp/include/raft/mr/buffer_base.hpp index 447d40e8ae..151c49af7c 100644 --- a/cpp/include/raft/mr/buffer_base.hpp +++ b/cpp/include/raft/mr/buffer_base.hpp @@ -25,7 +25,7 @@ #include namespace raft { - namespace mr { +namespace mr { /** * @brief Base for all RAII-based owning of temporary memory allocations. This @@ -35,177 +35,177 @@ namespace raft { * @tparam T data type * @tparam AllocatorT The underly allocator object */ - template - class buffer_base { - public: - using size_type = std::size_t; - using value_type = T; - using iterator = value_type*; - using const_iterator = const value_type*; - using reference = T&; - using const_reference = const T&; - - buffer_base() = delete; - - buffer_base(const buffer_base& other) = delete; - - buffer_base& operator=(const buffer_base& other) = delete; - - /** - * @brief Main ctor - * - * @param[in] allocator asynchronous allocator used for managing buffer life - * @param[in] stream cuda stream where this allocation operations are async - * @param[in] n size of the buffer (in number of elements) - */ - buffer_base(std::shared_ptr allocator, cudaStream_t stream, size_type n = 0) - : data_(nullptr), size_(n), capacity_(n), stream_(stream), allocator_(std::move(allocator)) - { - if (capacity_ > 0) { - data_ = - static_cast(allocator_->allocate(capacity_ * sizeof(value_type), stream_)); - RAFT_CUDA_TRY(cudaStreamSynchronize(stream_)); - } - } - - ~buffer_base() { release(); } - - value_type* data() { return data_; } - - const value_type* data() const { return data_; } - - size_type size() const { return size_; } - - void clear() { size_ = 0; } - - iterator begin() { return data_; } - - const_iterator begin() const { return data_; } - - iterator end() { return data_ + size_; } - - const_iterator end() const { return data_ + size_; } - - /** - * @brief Reserve new memory size for this buffer. - * - * It re-allocates a fresh buffer if the new requested capacity is more than - * the current one, copies the old buffer contents to this new buffer and - * removes the old one. - * - * @param[in] new_capacity new capacity (in number of elements) - * @{ - */ - void reserve(size_type new_capacity) - { - if (new_capacity > capacity_) { - auto* new_data = - static_cast(allocator_->allocate(new_capacity * sizeof(value_type), stream_)); - if (size_ > 0) { raft::copy(new_data, data_, size_, stream_); } - // Only deallocate if we have allocated a pointer - if (nullptr != data_) { - allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_); - } - data_ = new_data; - capacity_ = new_capacity; - } - } - - void reserve(size_type new_capacity, cudaStream_t stream) - { - set_stream(stream); - reserve(new_capacity); - } - /** @} */ - - /** - * @brief Resize the underlying buffer (uses `reserve` method internally) - * - * @param[in] new_size new buffer size - * @{ - */ - void resize(const size_type new_size) - { - reserve(new_size); - size_ = new_size; - } - - void resize(const size_type new_size, cudaStream_t stream) - { - set_stream(stream); - resize(new_size); - } - /** @} */ - - /** - * @brief Deletes the underlying buffer - * - * If this method is not explicitly called, it will be during the destructor - * @{ - */ - void release() - { - if (nullptr != data_) { - allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_); - } - data_ = nullptr; - capacity_ = 0; - size_ = 0; - } - - void release(cudaStream_t stream) - { - set_stream(stream); - release(); - } - /** @} */ - - /** - * @brief returns the underlying allocator used - * - * @return the allocator pointer - */ - std::shared_ptr get_allocator() const { return allocator_; } - - /** - * @brief returns the underlying stream used - * - * @return the cuda stream - */ - cudaStream_t get_stream() const { return stream_; } - - protected: - value_type* data_; - - private: - size_type size_; - size_type capacity_; - cudaStream_t stream_; - std::shared_ptr allocator_; - - /** - * @brief Sets a new cuda stream where the future operations will be queued - * - * This method makes sure that the inter-stream dependencies are met and taken - * care of, before setting the input stream as a new stream for this buffer. - * Ideally, the same cuda stream passed during constructor is expected to be - * used throughout this buffer's lifetime, for performance. - * - * @param[in] stream new cuda stream to be set. If it is the same as the - * current one, then this method will be a no-op. - */ - void set_stream(cudaStream_t stream) - { - if (stream_ != stream) { - cudaEvent_t event; - RAFT_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming)); - RAFT_CUDA_TRY(cudaEventRecord(event, stream_)); - RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0)); - stream_ = stream; - RAFT_CUDA_TRY(cudaEventDestroy(event)); - } - } - }; // class buffer_base - - }; // namespace mr +template +class buffer_base { + public: + using size_type = std::size_t; + using value_type = T; + using iterator = value_type*; + using const_iterator = const value_type*; + using reference = T&; + using const_reference = const T&; + + buffer_base() = delete; + + buffer_base(const buffer_base& other) = delete; + + buffer_base& operator=(const buffer_base& other) = delete; + + /** + * @brief Main ctor + * + * @param[in] allocator asynchronous allocator used for managing buffer life + * @param[in] stream cuda stream where this allocation operations are async + * @param[in] n size of the buffer (in number of elements) + */ + buffer_base(std::shared_ptr allocator, cudaStream_t stream, size_type n = 0) + : data_(nullptr), size_(n), capacity_(n), stream_(stream), allocator_(std::move(allocator)) + { + if (capacity_ > 0) { + data_ = + static_cast(allocator_->allocate(capacity_ * sizeof(value_type), stream_)); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_)); + } + } + + ~buffer_base() { release(); } + + value_type* data() { return data_; } + + const value_type* data() const { return data_; } + + size_type size() const { return size_; } + + void clear() { size_ = 0; } + + iterator begin() { return data_; } + + const_iterator begin() const { return data_; } + + iterator end() { return data_ + size_; } + + const_iterator end() const { return data_ + size_; } + + /** + * @brief Reserve new memory size for this buffer. + * + * It re-allocates a fresh buffer if the new requested capacity is more than + * the current one, copies the old buffer contents to this new buffer and + * removes the old one. + * + * @param[in] new_capacity new capacity (in number of elements) + * @{ + */ + void reserve(size_type new_capacity) + { + if (new_capacity > capacity_) { + auto* new_data = + static_cast(allocator_->allocate(new_capacity * sizeof(value_type), stream_)); + if (size_ > 0) { raft::copy(new_data, data_, size_, stream_); } + // Only deallocate if we have allocated a pointer + if (nullptr != data_) { + allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_); + } + data_ = new_data; + capacity_ = new_capacity; + } + } + + void reserve(size_type new_capacity, cudaStream_t stream) + { + set_stream(stream); + reserve(new_capacity); + } + /** @} */ + + /** + * @brief Resize the underlying buffer (uses `reserve` method internally) + * + * @param[in] new_size new buffer size + * @{ + */ + void resize(const size_type new_size) + { + reserve(new_size); + size_ = new_size; + } + + void resize(const size_type new_size, cudaStream_t stream) + { + set_stream(stream); + resize(new_size); + } + /** @} */ + + /** + * @brief Deletes the underlying buffer + * + * If this method is not explicitly called, it will be during the destructor + * @{ + */ + void release() + { + if (nullptr != data_) { + allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_); + } + data_ = nullptr; + capacity_ = 0; + size_ = 0; + } + + void release(cudaStream_t stream) + { + set_stream(stream); + release(); + } + /** @} */ + + /** + * @brief returns the underlying allocator used + * + * @return the allocator pointer + */ + std::shared_ptr get_allocator() const { return allocator_; } + + /** + * @brief returns the underlying stream used + * + * @return the cuda stream + */ + cudaStream_t get_stream() const { return stream_; } + + protected: + value_type* data_; + + private: + size_type size_; + size_type capacity_; + cudaStream_t stream_; + std::shared_ptr allocator_; + + /** + * @brief Sets a new cuda stream where the future operations will be queued + * + * This method makes sure that the inter-stream dependencies are met and taken + * care of, before setting the input stream as a new stream for this buffer. + * Ideally, the same cuda stream passed during constructor is expected to be + * used throughout this buffer's lifetime, for performance. + * + * @param[in] stream new cuda stream to be set. If it is the same as the + * current one, then this method will be a no-op. + */ + void set_stream(cudaStream_t stream) + { + if (stream_ != stream) { + cudaEvent_t event; + RAFT_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming)); + RAFT_CUDA_TRY(cudaEventRecord(event, stream_)); + RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0)); + stream_ = stream; + RAFT_CUDA_TRY(cudaEventDestroy(event)); + } + } +}; // class buffer_base + +}; // namespace mr }; // namespace raft \ No newline at end of file