From 9b0208b414de3102f5e45dea6a085fea96ec49c0 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 9 Feb 2022 17:40:29 -0500 Subject: [PATCH] Fixing spectral APIs (#496) Further cleanup of the spectral APIs. Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/raft/pull/496 --- cpp/include/raft/cluster/detail/kmeans.cuh | 12 +- cpp/include/raft/linalg/detail/lanczos.hpp | 144 ++++++------ cpp/include/raft/linalg/lanczos.hpp | 49 ++-- cpp/include/raft/mr/buffer_base.hpp | 211 ++++++++++++++++++ cpp/include/raft/mr/device/buffer.hpp | 70 ++++++ cpp/include/raft/mr/host/buffer.hpp | 85 +++++++ .../raft/spectral/detail/matrix_wrappers.cuh | 4 + .../detail/modularity_maximization.hpp | 19 +- .../raft/spectral/detail/partition.hpp | 24 +- .../raft/spectral/detail/spectral_util.cuh | 7 +- cpp/include/raft/spectral/eigen_solvers.hpp | 65 +++--- cpp/include/raft/spectral/matrix_wrappers.hpp | 54 +++++ .../raft/spectral/modularity_maximization.hpp | 12 +- cpp/include/raft/spectral/partition.hpp | 22 +- cpp/test/CMakeLists.txt | 2 + cpp/test/mr/device/buffer.cpp | 92 ++++++++ cpp/test/mr/host/buffer.cpp | 70 ++++++ cpp/test/spectral_matrix.cu | 7 +- 18 files changed, 769 insertions(+), 180 deletions(-) create mode 100644 cpp/include/raft/mr/buffer_base.hpp create mode 100644 cpp/include/raft/mr/device/buffer.hpp create mode 100644 cpp/include/raft/mr/host/buffer.hpp create mode 100644 cpp/include/raft/spectral/matrix_wrappers.hpp create mode 100644 cpp/test/mr/device/buffer.cpp create mode 100644 cpp/test/mr/host/buffer.cpp diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh index 5f1a0e137d..039ac8854a 100644 --- a/cpp/include/raft/cluster/detail/kmeans.cuh +++ b/cpp/include/raft/cluster/detail/kmeans.cuh @@ -32,8 +32,8 @@ #include #include #include -#include #include +#include namespace raft { namespace cluster { @@ -948,8 +948,6 @@ int kmeans(handle_t const& handle, index_type_t& iters, unsigned long long seed = 123456) { - using namespace matrix; - // Check that parameters are valid RAFT_EXPECTS(n > 0, "invalid parameter (n<1)"); RAFT_EXPECTS(d > 0, "invalid parameter (d<1)"); @@ -958,10 +956,10 @@ int kmeans(handle_t const& handle, RAFT_EXPECTS(maxiter >= 0, "invalid parameter (maxiter<0)"); // Allocate memory - vector_t clusterSizes(handle, k); - vector_t centroids(handle, d * k); - vector_t work(handle, n * max(k, d)); - vector_t work_int(handle, 2 * d * n); + raft::spectral::matrix::vector_t clusterSizes(handle, k); + raft::spectral::matrix::vector_t centroids(handle, d * k); + raft::spectral::matrix::vector_t work(handle, n * max(k, d)); + raft::spectral::matrix::vector_t work_int(handle, 2 * d * n); // Perform k-means return kmeans(handle, diff --git a/cpp/include/raft/linalg/detail/lanczos.hpp b/cpp/include/raft/linalg/detail/lanczos.hpp index a2b7751a05..9fa0d79875 100644 --- a/cpp/include/raft/linalg/detail/lanczos.hpp +++ b/cpp/include/raft/linalg/detail/lanczos.hpp @@ -29,15 +29,11 @@ #include #include #include -#include #include +#include namespace raft { - -using namespace matrix; -using namespace linalg::detail; - -namespace spectral { +namespace linalg { namespace detail { // curandGeneratorNormalX @@ -87,7 +83,7 @@ inline curandStatus_t curandGenerateNormalX( */ template int performLanczosIteration(handle_t const& handle, - sparse_matrix_t const* A, + spectral::matrix::sparse_matrix_t const* A, index_type_t* iter, index_type_t maxIter, value_type_t shift, @@ -696,11 +692,6 @@ static int lanczosRestart(handle_t const& handle, return 0; } -} // namespace detail -} // namespace spectral - -namespace detail { - /** * @brief Compute smallest eigenvectors of symmetric matrix * Computes eigenvalues and eigenvectors that are least @@ -751,26 +742,25 @@ namespace detail { * @return error flag. */ template -int computeSmallestEigenvectors(handle_t const& handle, - sparse_matrix_t const* A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - bool reorthogonalize, - index_type_t* effIter, - index_type_t* totalIter, - value_type_t* shift, - value_type_t* __restrict__ alpha_host, - value_type_t* __restrict__ beta_host, - value_type_t* __restrict__ lanczosVecs_dev, - value_type_t* __restrict__ work_dev, - value_type_t* __restrict__ eigVals_dev, - value_type_t* __restrict__ eigVecs_dev, - unsigned long long seed) +int computeSmallestEigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const* A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + bool reorthogonalize, + index_type_t* effIter, + index_type_t* totalIter, + value_type_t* shift, + value_type_t* __restrict__ alpha_host, + value_type_t* __restrict__ beta_host, + value_type_t* __restrict__ lanczosVecs_dev, + value_type_t* __restrict__ work_dev, + value_type_t* __restrict__ eigVals_dev, + value_type_t* __restrict__ eigVecs_dev, + unsigned long long seed) { - using namespace raft::spectral::detail; - // Useful constants constexpr value_type_t one = 1; constexpr value_type_t zero = 0; @@ -993,20 +983,19 @@ int computeSmallestEigenvectors(handle_t const& handle, } template -int computeSmallestEigenvectors(handle_t const& handle, - sparse_matrix_t const& A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - bool reorthogonalize, - index_type_t& iter, - value_type_t* __restrict__ eigVals_dev, - value_type_t* __restrict__ eigVecs_dev, - unsigned long long seed = 1234567) +int computeSmallestEigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const& A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + bool reorthogonalize, + index_type_t& iter, + value_type_t* __restrict__ eigVals_dev, + value_type_t* __restrict__ eigVecs_dev, + unsigned long long seed = 1234567) { - using namespace raft::spectral::detail; - // Matrix dimension index_type_t n = A.nrows_; @@ -1024,8 +1013,8 @@ int computeSmallestEigenvectors(handle_t const& handle, value_type_t* alpha_host = alpha_host_v.data(); value_type_t* beta_host = beta_host_v.data(); - vector_t lanczosVecs_dev(handle, n * (restartIter + 1)); - vector_t work_dev(handle, (n + restartIter) * restartIter); + spectral::matrix::vector_t lanczosVecs_dev(handle, n * (restartIter + 1)); + spectral::matrix::vector_t work_dev(handle, (n + restartIter) * restartIter); // Perform Lanczos method index_type_t effIter; @@ -1097,25 +1086,24 @@ int computeSmallestEigenvectors(handle_t const& handle, * @return error flag. */ template -int computeLargestEigenvectors(handle_t const& handle, - sparse_matrix_t const* A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - bool reorthogonalize, - index_type_t* effIter, - index_type_t* totalIter, - value_type_t* __restrict__ alpha_host, - value_type_t* __restrict__ beta_host, - value_type_t* __restrict__ lanczosVecs_dev, - value_type_t* __restrict__ work_dev, - value_type_t* __restrict__ eigVals_dev, - value_type_t* __restrict__ eigVecs_dev, - unsigned long long seed) +int computeLargestEigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const* A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + bool reorthogonalize, + index_type_t* effIter, + index_type_t* totalIter, + value_type_t* __restrict__ alpha_host, + value_type_t* __restrict__ beta_host, + value_type_t* __restrict__ lanczosVecs_dev, + value_type_t* __restrict__ work_dev, + value_type_t* __restrict__ eigVals_dev, + value_type_t* __restrict__ eigVecs_dev, + unsigned long long seed) { - using namespace raft::spectral::detail; - // Useful constants constexpr value_type_t one = 1; constexpr value_type_t zero = 0; @@ -1342,17 +1330,18 @@ int computeLargestEigenvectors(handle_t const& handle, } template -int computeLargestEigenvectors(handle_t const& handle, - sparse_matrix_t const& A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - bool reorthogonalize, - index_type_t& iter, - value_type_t* __restrict__ eigVals_dev, - value_type_t* __restrict__ eigVecs_dev, - unsigned long long seed = 123456) +int computeLargestEigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const& A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + bool reorthogonalize, + index_type_t& iter, + value_type_t* __restrict__ eigVals_dev, + value_type_t* __restrict__ eigVecs_dev, + unsigned long long seed = 123456) { // Matrix dimension index_type_t n = A.nrows_; @@ -1371,8 +1360,8 @@ int computeLargestEigenvectors(handle_t const& handle, value_type_t* alpha_host = alpha_host_v.data(); value_type_t* beta_host = beta_host_v.data(); - vector_t lanczosVecs_dev(handle, n * (restartIter + 1)); - vector_t work_dev(handle, (n + restartIter) * restartIter); + spectral::matrix::vector_t lanczosVecs_dev(handle, n * (restartIter + 1)); + spectral::matrix::vector_t work_dev(handle, (n + restartIter) * restartIter); // Perform Lanczos method index_type_t effIter; @@ -1398,4 +1387,5 @@ int computeLargestEigenvectors(handle_t const& handle, } } // namespace detail +} // namespace linalg } // namespace raft diff --git a/cpp/include/raft/linalg/lanczos.hpp b/cpp/include/raft/linalg/lanczos.hpp index e7d965f810..21b65158fc 100644 --- a/cpp/include/raft/linalg/lanczos.hpp +++ b/cpp/include/raft/linalg/lanczos.hpp @@ -17,8 +17,10 @@ #pragma once #include "detail/lanczos.hpp" +#include namespace raft { +namespace linalg { // ========================================================= // Eigensolver @@ -62,17 +64,18 @@ namespace raft { * @return error flag. */ template -int computeSmallestEigenvectors(handle_t const& handle, - sparse_matrix_t const& A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - bool reorthogonalize, - index_type_t& iter, - value_type_t* __restrict__ eigVals_dev, - value_type_t* __restrict__ eigVecs_dev, - unsigned long long seed = 1234567) +int computeSmallestEigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const& A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + bool reorthogonalize, + index_type_t& iter, + value_type_t* __restrict__ eigVals_dev, + value_type_t* __restrict__ eigVecs_dev, + unsigned long long seed = 1234567) { return detail::computeSmallestEigenvectors(handle, A, @@ -125,17 +128,18 @@ int computeSmallestEigenvectors(handle_t const& handle, * @return error flag. */ template -int computeLargestEigenvectors(handle_t const& handle, - sparse_matrix_t const& A, - index_type_t nEigVecs, - index_type_t maxIter, - index_type_t restartIter, - value_type_t tol, - bool reorthogonalize, - index_type_t& iter, - value_type_t* __restrict__ eigVals_dev, - value_type_t* __restrict__ eigVecs_dev, - unsigned long long seed = 123456) +int computeLargestEigenvectors( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const& A, + index_type_t nEigVecs, + index_type_t maxIter, + index_type_t restartIter, + value_type_t tol, + bool reorthogonalize, + index_type_t& iter, + value_type_t* __restrict__ eigVals_dev, + value_type_t* __restrict__ eigVecs_dev, + unsigned long long seed = 123456) { return detail::computeLargestEigenvectors(handle, A, @@ -150,4 +154,5 @@ int computeLargestEigenvectors(handle_t const& handle, seed); } +} // namespace linalg } // namespace raft diff --git a/cpp/include/raft/mr/buffer_base.hpp b/cpp/include/raft/mr/buffer_base.hpp new file mode 100644 index 0000000000..151c49af7c --- /dev/null +++ b/cpp/include/raft/mr/buffer_base.hpp @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +#include +#include +#include + +namespace raft { +namespace mr { + +/** + * @brief Base for all RAII-based owning of temporary memory allocations. This + * class should ideally not be used by users directly, but instead via + * the child classes `device_buffer` and `host_buffer`. + * + * @tparam T data type + * @tparam AllocatorT The underly allocator object + */ +template +class buffer_base { + public: + using size_type = std::size_t; + using value_type = T; + using iterator = value_type*; + using const_iterator = const value_type*; + using reference = T&; + using const_reference = const T&; + + buffer_base() = delete; + + buffer_base(const buffer_base& other) = delete; + + buffer_base& operator=(const buffer_base& other) = delete; + + /** + * @brief Main ctor + * + * @param[in] allocator asynchronous allocator used for managing buffer life + * @param[in] stream cuda stream where this allocation operations are async + * @param[in] n size of the buffer (in number of elements) + */ + buffer_base(std::shared_ptr allocator, cudaStream_t stream, size_type n = 0) + : data_(nullptr), size_(n), capacity_(n), stream_(stream), allocator_(std::move(allocator)) + { + if (capacity_ > 0) { + data_ = + static_cast(allocator_->allocate(capacity_ * sizeof(value_type), stream_)); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_)); + } + } + + ~buffer_base() { release(); } + + value_type* data() { return data_; } + + const value_type* data() const { return data_; } + + size_type size() const { return size_; } + + void clear() { size_ = 0; } + + iterator begin() { return data_; } + + const_iterator begin() const { return data_; } + + iterator end() { return data_ + size_; } + + const_iterator end() const { return data_ + size_; } + + /** + * @brief Reserve new memory size for this buffer. + * + * It re-allocates a fresh buffer if the new requested capacity is more than + * the current one, copies the old buffer contents to this new buffer and + * removes the old one. + * + * @param[in] new_capacity new capacity (in number of elements) + * @{ + */ + void reserve(size_type new_capacity) + { + if (new_capacity > capacity_) { + auto* new_data = + static_cast(allocator_->allocate(new_capacity * sizeof(value_type), stream_)); + if (size_ > 0) { raft::copy(new_data, data_, size_, stream_); } + // Only deallocate if we have allocated a pointer + if (nullptr != data_) { + allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_); + } + data_ = new_data; + capacity_ = new_capacity; + } + } + + void reserve(size_type new_capacity, cudaStream_t stream) + { + set_stream(stream); + reserve(new_capacity); + } + /** @} */ + + /** + * @brief Resize the underlying buffer (uses `reserve` method internally) + * + * @param[in] new_size new buffer size + * @{ + */ + void resize(const size_type new_size) + { + reserve(new_size); + size_ = new_size; + } + + void resize(const size_type new_size, cudaStream_t stream) + { + set_stream(stream); + resize(new_size); + } + /** @} */ + + /** + * @brief Deletes the underlying buffer + * + * If this method is not explicitly called, it will be during the destructor + * @{ + */ + void release() + { + if (nullptr != data_) { + allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_); + } + data_ = nullptr; + capacity_ = 0; + size_ = 0; + } + + void release(cudaStream_t stream) + { + set_stream(stream); + release(); + } + /** @} */ + + /** + * @brief returns the underlying allocator used + * + * @return the allocator pointer + */ + std::shared_ptr get_allocator() const { return allocator_; } + + /** + * @brief returns the underlying stream used + * + * @return the cuda stream + */ + cudaStream_t get_stream() const { return stream_; } + + protected: + value_type* data_; + + private: + size_type size_; + size_type capacity_; + cudaStream_t stream_; + std::shared_ptr allocator_; + + /** + * @brief Sets a new cuda stream where the future operations will be queued + * + * This method makes sure that the inter-stream dependencies are met and taken + * care of, before setting the input stream as a new stream for this buffer. + * Ideally, the same cuda stream passed during constructor is expected to be + * used throughout this buffer's lifetime, for performance. + * + * @param[in] stream new cuda stream to be set. If it is the same as the + * current one, then this method will be a no-op. + */ + void set_stream(cudaStream_t stream) + { + if (stream_ != stream) { + cudaEvent_t event; + RAFT_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming)); + RAFT_CUDA_TRY(cudaEventRecord(event, stream_)); + RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0)); + stream_ = stream; + RAFT_CUDA_TRY(cudaEventDestroy(event)); + } + } +}; // class buffer_base + +}; // namespace mr +}; // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/mr/device/buffer.hpp b/cpp/include/raft/mr/device/buffer.hpp new file mode 100644 index 0000000000..aee3cba046 --- /dev/null +++ b/cpp/include/raft/mr/device/buffer.hpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "allocator.hpp" +#include +#include + +namespace raft { +namespace mr { +namespace device { + +/** + * @brief RAII object owning a contiguous typed device buffer. The passed in + * allocator supports asynchronous allocation and deallocation so this + * can also be used for temporary memory + * + * @code{.cpp} + * template + * void foo(..., cudaStream_t stream) { + * ... + * raft::mr::device::buffer temp(stream, 0); + * ... + * temp.resize(n); + * kernelA<<>>(...,temp.data(),...); + * kernelB<<>>(...,temp.data(),...); + * temp.release(); + * ... + * } + * @endcode + */ +template +class buffer : public buffer_base { + public: + using size_type = typename buffer_base::size_type; + using value_type = typename buffer_base::value_type; + using iterator = typename buffer_base::iterator; + using const_iterator = typename buffer_base::const_iterator; + using reference = typename buffer_base::reference; + using const_reference = typename buffer_base::const_reference; + + buffer() = delete; + + buffer(const buffer& other) = delete; + + buffer& operator=(const buffer& other) = delete; + + buffer(std::shared_ptr alloc, cudaStream_t stream, size_type n = 0) + : buffer_base(alloc, stream, n) + { + } +}; // class buffer + +}; // namespace device +}; // namespace mr +}; // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/mr/host/buffer.hpp b/cpp/include/raft/mr/host/buffer.hpp new file mode 100644 index 0000000000..de9468add8 --- /dev/null +++ b/cpp/include/raft/mr/host/buffer.hpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "allocator.hpp" +#include +#include +#include + +namespace raft { +namespace mr { +namespace host { + +/** + * @brief RAII object owning a contigous typed host buffer (aka pinned memory). + * The passed in allocator supports asynchronus allocation and + * deallocation so this can also be used for temporary memory + * + * @code{.cpp} + * template + * void foo(const T* in_d , T* out_d, ..., cudaStream_t stream) { + * ... + * raft::mr::host::buffer temp(stream, 0); + * ... + * temp.resize(n); + * raft::copy(temp.data(), in_d, temp.size()); + * ... + * raft::copy(out_d, temp.data(), temp.size()); + * temp.release(stream); + * ... + * } + * @endcode + */ +template +class buffer : public buffer_base { + public: + using size_type = typename buffer_base::size_type; + using value_type = typename buffer_base::value_type; + using iterator = typename buffer_base::iterator; + using const_iterator = typename buffer_base::const_iterator; + using reference = typename buffer_base::reference; + using const_reference = typename buffer_base::const_reference; + + buffer() = delete; + + buffer(const buffer& other) = delete; + + buffer& operator=(const buffer& other) = delete; + + buffer(std::shared_ptr alloc, const device::buffer& other) + : buffer_base(alloc, other.get_stream(), other.size()) + { + if (other.size() > 0) { raft::copy(data_, other.data(), other.size(), other.get_stream()); } + } + + buffer(std::shared_ptr alloc, cudaStream_t stream, size_type n = 0) + : buffer_base(alloc, stream, n) + { + } + + reference operator[](size_type pos) { return data_[pos]; } + + const_reference operator[](size_type pos) const { return data_[pos]; } + + private: + using buffer_base::data_; +}; + +}; // namespace host +}; // namespace mr +}; // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/spectral/detail/matrix_wrappers.cuh b/cpp/include/raft/spectral/detail/matrix_wrappers.cuh index d86dc21135..b4a2ed175f 100644 --- a/cpp/include/raft/spectral/detail/matrix_wrappers.cuh +++ b/cpp/include/raft/spectral/detail/matrix_wrappers.cuh @@ -34,7 +34,9 @@ #define IDX(i, j, lda) ((i) + (j) * (lda)) namespace raft { +namespace spectral { namespace matrix { +namespace detail { using size_type = int; // for now; TODO: move it in appropriate header @@ -443,5 +445,7 @@ struct modularity_matrix_t : laplacian_matrix_t { value_type edge_sum_; }; +} // namespace detail } // namespace matrix +} // namespace spectral } // namespace raft diff --git a/cpp/include/raft/spectral/detail/modularity_maximization.hpp b/cpp/include/raft/spectral/detail/modularity_maximization.hpp index a55dfbe67f..6bb3dca920 100644 --- a/cpp/include/raft/spectral/detail/modularity_maximization.hpp +++ b/cpp/include/raft/spectral/detail/modularity_maximization.hpp @@ -26,9 +26,11 @@ #include +#include #include #include #include +#include #ifdef COLLECT_TIME_STATISTICS #include @@ -52,9 +54,6 @@ namespace raft { namespace spectral { namespace detail { -using namespace matrix; -using namespace linalg; - // ========================================================= // Spectral modularity_maximization // ========================================================= @@ -83,7 +82,7 @@ using namespace linalg; template std::tuple modularity_maximization( handle_t const& handle, - sparse_matrix_t const& csr_m, + raft::spectral::matrix::sparse_matrix_t const& csr_m, EigenSolver const& eigen_solver, ClusterSolver const& cluster_solver, vertex_t* __restrict__ clusters, @@ -105,7 +104,7 @@ std::tuple modularity_maximization( // Compute eigenvectors of Modularity Matrix // Initialize Modularity Matrix - modularity_matrix_t B{handle, csr_m}; + raft::spectral::matrix::modularity_matrix_t B{handle, csr_m}; auto eigen_config = eigen_solver.get_config(); auto nEigVecs = eigen_config.n_eigVecs; @@ -142,7 +141,7 @@ std::tuple modularity_maximization( */ template void analyzeModularity(handle_t const& handle, - sparse_matrix_t const& csr_m, + raft::spectral::matrix::sparse_matrix_t const& csr_m, vertex_t nClusters, vertex_t const* __restrict__ clusters, weight_t& modularity) @@ -157,14 +156,14 @@ void analyzeModularity(handle_t const& handle, auto stream = handle.get_stream(); // Device memory - vector_t part_i(handle, n); - vector_t Bx(handle, n); + raft::spectral::matrix::vector_t part_i(handle, n); + raft::spectral::matrix::vector_t Bx(handle, n); // Initialize cuBLAS - RAFT_CUBLAS_TRY(cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream)); + RAFT_CUBLAS_TRY(linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream)); // Initialize Modularity - modularity_matrix_t B{handle, csr_m}; + raft::spectral::matrix::modularity_matrix_t B{handle, csr_m}; // Initialize output modularity = 0; diff --git a/cpp/include/raft/spectral/detail/partition.hpp b/cpp/include/raft/spectral/detail/partition.hpp index b7c811d5a5..775b37d118 100644 --- a/cpp/include/raft/spectral/detail/partition.hpp +++ b/cpp/include/raft/spectral/detail/partition.hpp @@ -25,6 +25,7 @@ #include +#include #include #include #include @@ -33,9 +34,6 @@ namespace raft { namespace spectral { namespace detail { -using namespace matrix; -using namespace linalg; - // ========================================================= // Spectral partitioner // ========================================================= @@ -63,13 +61,14 @@ using namespace linalg; * @return statistics: number of eigensolver iterations, . */ template -std::tuple partition(handle_t const& handle, - sparse_matrix_t const& csr_m, - EigenSolver const& eigen_solver, - ClusterSolver const& cluster_solver, - vertex_t* __restrict__ clusters, - weight_t* eigVals, - weight_t* eigVecs) +std::tuple partition( + handle_t const& handle, + spectral::matrix::sparse_matrix_t const& csr_m, + EigenSolver const& eigen_solver, + ClusterSolver const& cluster_solver, + vertex_t* __restrict__ clusters, + weight_t* eigVals, + weight_t* eigVecs) { RAFT_EXPECTS(clusters != nullptr, "Null clusters buffer."); RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer."); @@ -132,7 +131,7 @@ std::tuple partition(handle_t const& handle, */ template void analyzePartition(handle_t const& handle, - sparse_matrix_t const& csr_m, + spectral::matrix::sparse_matrix_t const& csr_m, vertex_t nClusters, const vertex_t* __restrict__ clusters, weight_t& edgeCut, @@ -153,7 +152,8 @@ void analyzePartition(handle_t const& handle, vector_t Lx(handle, n); // Initialize cuBLAS - RAFT_CUBLAS_TRY(cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream)); + RAFT_CUBLAS_TRY( + raft::linalg::detail::cublassetpointermode(cublas_h, CUBLAS_POINTER_MODE_HOST, stream)); // Initialize Laplacian /// sparse_matrix_t A{handle, graph}; diff --git a/cpp/include/raft/spectral/detail/spectral_util.cuh b/cpp/include/raft/spectral/detail/spectral_util.cuh index 6b57566a73..c7a0f0c5ef 100644 --- a/cpp/include/raft/spectral/detail/spectral_util.cuh +++ b/cpp/include/raft/spectral/detail/spectral_util.cuh @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -204,9 +205,9 @@ bool construct_indicator(handle_t const& handle, weight_t& clustersize, weight_t& partStats, vertex_t const* __restrict__ clusters, - vector_t& part_i, - vector_t& Bx, - laplacian_matrix_t const& B) + raft::spectral::matrix::vector_t& part_i, + raft::spectral::matrix::vector_t& Bx, + raft::spectral::matrix::laplacian_matrix_t const& B) { auto stream = handle.get_stream(); auto cublas_h = handle.get_cublas_handle(); diff --git a/cpp/include/raft/spectral/eigen_solvers.hpp b/cpp/include/raft/spectral/eigen_solvers.hpp index 192dc15a6b..0033dbeea9 100644 --- a/cpp/include/raft/spectral/eigen_solvers.hpp +++ b/cpp/include/raft/spectral/eigen_solvers.hpp @@ -16,12 +16,11 @@ #pragma once #include +#include namespace raft { namespace spectral { -using namespace matrix; - // aggregate of control params for Eigen Solver: // template @@ -47,47 +46,49 @@ struct lanczos_solver_t { { } - index_type_t solve_smallest_eigenvectors(handle_t const& handle, - sparse_matrix_t const& A, - value_type_t* __restrict__ eigVals, - value_type_t* __restrict__ eigVecs) const + index_type_t solve_smallest_eigenvectors( + handle_t const& handle, + matrix::sparse_matrix_t const& A, + value_type_t* __restrict__ eigVals, + value_type_t* __restrict__ eigVecs) const { RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer."); RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer."); index_type_t iters{}; - computeSmallestEigenvectors(handle, - A, - config_.n_eigVecs, - config_.maxIter, - config_.restartIter, - config_.tol, - config_.reorthogonalize, - iters, - eigVals, - eigVecs, - config_.seed); + linalg::computeSmallestEigenvectors(handle, + A, + config_.n_eigVecs, + config_.maxIter, + config_.restartIter, + config_.tol, + config_.reorthogonalize, + iters, + eigVals, + eigVecs, + config_.seed); return iters; } - index_type_t solve_largest_eigenvectors(handle_t const& handle, - sparse_matrix_t const& A, - value_type_t* __restrict__ eigVals, - value_type_t* __restrict__ eigVecs) const + index_type_t solve_largest_eigenvectors( + handle_t const& handle, + matrix::sparse_matrix_t const& A, + value_type_t* __restrict__ eigVals, + value_type_t* __restrict__ eigVecs) const { RAFT_EXPECTS(eigVals != nullptr, "Null eigVals buffer."); RAFT_EXPECTS(eigVecs != nullptr, "Null eigVecs buffer."); index_type_t iters{}; - computeLargestEigenvectors(handle, - A, - config_.n_eigVecs, - config_.maxIter, - config_.restartIter, - config_.tol, - config_.reorthogonalize, - iters, - eigVals, - eigVecs, - config_.seed); + linalg::computeLargestEigenvectors(handle, + A, + config_.n_eigVecs, + config_.maxIter, + config_.restartIter, + config_.tol, + config_.reorthogonalize, + iters, + eigVals, + eigVecs, + config_.seed); return iters; } diff --git a/cpp/include/raft/spectral/matrix_wrappers.hpp b/cpp/include/raft/spectral/matrix_wrappers.hpp new file mode 100644 index 0000000000..237f1275fd --- /dev/null +++ b/cpp/include/raft/spectral/matrix_wrappers.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +// ========================================================= +// Useful macros +// ========================================================= + +namespace raft { +namespace spectral { +namespace matrix { + +using size_type = int; // for now; TODO: move it in appropriate header + +// specifies type of algorithm used +// for SpMv: +// +using sparse_mv_alg_t = detail::sparse_mv_alg_t; + +// Vector "view"-like aggregate for linear algebra purposes +// +template +using vector_view_t = detail::vector_view_t; + +template +using vector_t = detail::vector_t; + +template +using sparse_matrix_t = detail::sparse_matrix_t; + +template +using laplacian_matrix_t = detail::laplacian_matrix_t; + +template +using modularity_matrix_t = detail::modularity_matrix_t; + +} // namespace matrix +} // namespace spectral +} // namespace raft diff --git a/cpp/include/raft/spectral/modularity_maximization.hpp b/cpp/include/raft/spectral/modularity_maximization.hpp index 466851c74f..e67be767a2 100644 --- a/cpp/include/raft/spectral/modularity_maximization.hpp +++ b/cpp/include/raft/spectral/modularity_maximization.hpp @@ -51,15 +51,16 @@ namespace spectral { template std::tuple modularity_maximization( handle_t const& handle, - sparse_matrix_t const& csr_m, + matrix::sparse_matrix_t const& csr_m, EigenSolver const& eigen_solver, ClusterSolver const& cluster_solver, vertex_t* __restrict__ clusters, weight_t* eigVals, weight_t* eigVecs) { - return detail::modularity_maximization( - handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs); + return raft::spectral::detail:: + modularity_maximization( + handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs); } //=================================================== // Analysis of graph partition @@ -74,12 +75,13 @@ std::tuple modularity_maximization( */ template void analyzeModularity(handle_t const& handle, - sparse_matrix_t const& csr_m, + matrix::sparse_matrix_t const& csr_m, vertex_t nClusters, vertex_t const* __restrict__ clusters, weight_t& modularity) { - detail::analyzeModularity(handle, csr_m, nClusters, clusters, modularity); + raft::spectral::detail::analyzeModularity( + handle, csr_m, nClusters, clusters, modularity); } } // namespace spectral diff --git a/cpp/include/raft/spectral/partition.hpp b/cpp/include/raft/spectral/partition.hpp index 597ef530a2..f62773a958 100644 --- a/cpp/include/raft/spectral/partition.hpp +++ b/cpp/include/raft/spectral/partition.hpp @@ -49,15 +49,16 @@ namespace spectral { * @return statistics: number of eigensolver iterations, . */ template -std::tuple partition(handle_t const& handle, - sparse_matrix_t const& csr_m, - EigenSolver const& eigen_solver, - ClusterSolver const& cluster_solver, - vertex_t* __restrict__ clusters, - weight_t* eigVals, - weight_t* eigVecs) +std::tuple partition( + handle_t const& handle, + matrix::sparse_matrix_t const& csr_m, + EigenSolver const& eigen_solver, + ClusterSolver const& cluster_solver, + vertex_t* __restrict__ clusters, + weight_t* eigVals, + weight_t* eigVecs) { - return detail::partition( + return raft::spectral::detail::partition( handle, csr_m, eigen_solver, cluster_solver, clusters, eigVals, eigVecs); } @@ -81,13 +82,14 @@ std::tuple partition(handle_t const& handle, */ template void analyzePartition(handle_t const& handle, - sparse_matrix_t const& csr_m, + matrix::sparse_matrix_t const& csr_m, vertex_t nClusters, const vertex_t* __restrict__ clusters, weight_t& edgeCut, weight_t& cost) { - detail::analyzePartition(handle, csr_m, nClusters, clusters, edgeCut, cost); + raft::spectral::detail::analyzePartition( + handle, csr_m, nClusters, clusters, edgeCut, cost); } } // namespace spectral diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index fda60e1cb0..9f5ca95e93 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -65,6 +65,8 @@ add_executable(test_raft test/matrix/math.cu test/matrix/matrix.cu test/matrix/linewise_op.cu + test/mr/host/buffer.cpp + test/mr/device/buffer.cpp test/mst.cu test/random/rng.cu test/random/rng_int.cu diff --git a/cpp/test/mr/device/buffer.cpp b/cpp/test/mr/device/buffer.cpp new file mode 100644 index 0000000000..324e9b9e4b --- /dev/null +++ b/cpp/test/mr/device/buffer.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace mr { +namespace device { + +TEST(Raft, DeviceBufferAlloc) +{ + cudaStream_t stream; + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + // no allocation at construction + rmm::device_uvector buff(0, stream); + ASSERT_EQ(0, buff.size()); + // explicit allocation after construction + buff.resize(20, stream); + ASSERT_EQ(20, buff.size()); + // resizing to a smaller buffer size + buff.resize(10, stream); + ASSERT_EQ(10, buff.size()); + // explicit deallocation + buff.release(); + ASSERT_EQ(0, buff.size()); + // use these methods without the explicit stream parameter + buff.resize(20, stream); + ASSERT_EQ(20, buff.size()); + buff.resize(10, stream); + ASSERT_EQ(10, buff.size()); + buff.release(); + ASSERT_EQ(0, buff.size()); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + RAFT_CUDA_TRY(cudaStreamDestroy(stream)); +} + +TEST(Raft, DeviceBufferZeroResize) +{ + // Create a limiting_resource_adaptor to track allocations + auto curr_mr = + dynamic_cast(rmm::mr::get_current_device_resource()); + auto limit_mr = + std::make_shared>(curr_mr, + 1000); + + rmm::mr::set_current_device_resource(limit_mr.get()); + + cudaStream_t stream; + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + // no allocation at construction + rmm::device_uvector buff(10, stream); + ASSERT_EQ(10, buff.size()); + // explicit allocation after construction + buff.resize(0, stream); + ASSERT_EQ(0, buff.size()); + // resizing to a smaller buffer size + buff.resize(20, stream); + ASSERT_EQ(20, buff.size()); + // explicit deallocation + buff.release(); + ASSERT_EQ(0, buff.size()); + + // Now check that there is no memory left. (Used to not be true) + ASSERT_EQ(0, limit_mr->get_allocated_bytes()); + + rmm::mr::set_current_device_resource(curr_mr); + + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + RAFT_CUDA_TRY(cudaStreamDestroy(stream)); +} + +} // namespace device +} // namespace mr +} // namespace raft \ No newline at end of file diff --git a/cpp/test/mr/host/buffer.cpp b/cpp/test/mr/host/buffer.cpp new file mode 100644 index 0000000000..c174b269da --- /dev/null +++ b/cpp/test/mr/host/buffer.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace raft { +namespace mr { +namespace host { + +TEST(Raft, HostBuffer) +{ + auto alloc = std::make_shared(); + cudaStream_t stream; + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + // no allocation at construction + buffer buff(alloc, stream); + ASSERT_EQ(0, buff.size()); + // explicit allocation after construction + buff.resize(20, stream); + ASSERT_EQ(20, buff.size()); + // resizing to a smaller buffer size + buff.resize(10, stream); + ASSERT_EQ(10, buff.size()); + // explicit deallocation + buff.release(stream); + ASSERT_EQ(0, buff.size()); + // use these methods without the explicit stream parameter + buff.resize(20); + ASSERT_EQ(20, buff.size()); + buff.resize(10); + ASSERT_EQ(10, buff.size()); + buff.release(); + ASSERT_EQ(0, buff.size()); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + RAFT_CUDA_TRY(cudaStreamDestroy(stream)); +} + +TEST(Raft, DeviceToHostBuffer) +{ + auto d_alloc = std::make_shared(); + auto h_alloc = std::make_shared(); + cudaStream_t stream; + RAFT_CUDA_TRY(cudaStreamCreate(&stream)); + device::buffer d_buff(d_alloc, stream, 32); + RAFT_CUDA_TRY(cudaMemsetAsync(d_buff.data(), 0, sizeof(char) * d_buff.size(), stream)); + buffer h_buff(h_alloc, d_buff); + ASSERT_EQ(d_buff.size(), h_buff.size()); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + RAFT_CUDA_TRY(cudaStreamDestroy(stream)); +} + +} // namespace host +} // namespace mr +} // namespace raft \ No newline at end of file diff --git a/cpp/test/spectral_matrix.cu b/cpp/test/spectral_matrix.cu index 652aa61451..5d0768a729 100644 --- a/cpp/test/spectral_matrix.cu +++ b/cpp/test/spectral_matrix.cu @@ -19,9 +19,11 @@ #include #include -#include +#include namespace raft { +namespace spectral { +namespace matrix { namespace { template struct csr_view_t { @@ -34,7 +36,6 @@ struct csr_view_t { } // namespace TEST(Raft, SpectralMatrices) { - using namespace matrix; using index_type = int; using value_type = double; @@ -75,4 +76,6 @@ TEST(Raft, SpectralMatrices) EXPECT_ANY_THROW(cnstr_mm2()); // because of nullptr ptr args } +} // namespace matrix +} // namespace spectral } // namespace raft