diff --git a/cpp/include/raft/cudart_utils.h b/cpp/include/raft/cudart_utils.h index 936065afba..1940fcea51 100644 --- a/cpp/include/raft/cudart_utils.h +++ b/cpp/include/raft/cudart_utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -299,40 +299,6 @@ void print_device_vector(const char* variable_name, } /** @} */ -static std::mutex mutex_; -static std::unordered_map allocations; - -template -void allocate(Type*& ptr, size_t len, rmm::cuda_stream_view stream, bool setZero = false) -{ - size_t size = len * sizeof(Type); - ptr = (Type*)rmm::mr::get_current_device_resource()->allocate(size, stream); - if (setZero) CUDA_CHECK(cudaMemsetAsync((void*)ptr, 0, size, stream)); - - std::lock_guard _(mutex_); - allocations[ptr] = size; -} - -template -void deallocate(Type*& ptr, rmm::cuda_stream_view stream) -{ - std::lock_guard _(mutex_); - size_t size = allocations[ptr]; - allocations.erase(ptr); - rmm::mr::get_current_device_resource()->deallocate((void*)ptr, size, stream); -} - -inline void deallocate_all(rmm::cuda_stream_view stream) -{ - std::lock_guard _(mutex_); - for (auto& alloc : allocations) { - void* ptr = alloc.first; - size_t size = alloc.second; - rmm::mr::get_current_device_resource()->deallocate(ptr, size, stream); - } - allocations.clear(); -} - /** helper method to get max usable shared mem per block parameter */ inline int getSharedMemPerBlock() { diff --git a/cpp/include/raft/mr/allocator.hpp b/cpp/include/raft/mr/allocator.hpp deleted file mode 100644 index 08a4987c91..0000000000 --- a/cpp/include/raft/mr/allocator.hpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include - -namespace raft { -namespace mr { - -/** - * @brief Interface for an asynchronous device/host allocator. - * - * An implementation of this interface can make the following assumptions: - * - It does not need to be but it can allow async allocate and deallocate. - * - * @note This interface does NOT support RAII. Thus, if you need RAII-enabled - * interface, better to use `device_buffer` or `host_buffer`. - */ -class base_allocator { - public: - /** - * @brief Asynchronously allocates a memory region. - * - * An implementation of this need to return a allocation of n bytes properly - * align bytes on the configured device. The allocation can optionally be - * asynchronous in the sense that it is only save to use after all work - * submitted to the passed in stream prior to the call to allocate has - * completed. If the allocation is used before, e.g. in another stream the - * behaviour may be undefined. - * @todo: Add alignment requirments. - * - * @param[in] n number of bytes to allocate - * @param[in] stream stream to issue the possible asynchronous allocation in - */ - virtual void* allocate(std::size_t n, cudaStream_t stream) = 0; - - /** - * @brief Asynchronously deallocates device memory - * - * An implementation of this need to ensure that the allocation that the - * passed in pointer points to remains usable until all work sheduled in - * stream prior to the call to deallocate has completed. - * - * @param[inout] p pointer to the buffer to deallocte - * @param[in] n size of the buffer to deallocte in bytes - * @param[in] stream stream in which the allocation might be still in use - */ - virtual void deallocate(void* p, std::size_t n, cudaStream_t stream) = 0; - - virtual ~base_allocator() = default; -}; // class base_allocator - -}; // namespace mr -}; // namespace raft diff --git a/cpp/include/raft/mr/buffer_base.hpp b/cpp/include/raft/mr/buffer_base.hpp deleted file mode 100644 index 96aa622525..0000000000 --- a/cpp/include/raft/mr/buffer_base.hpp +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include - -#include -#include -#include - -namespace raft { -namespace mr { - -/** - * @brief Base for all RAII-based owning of temporary memory allocations. This - * class should ideally not be used by users directly, but instead via - * the child classes `device_buffer` and `host_buffer`. - * - * @tparam T data type - * @tparam AllocatorT The underly allocator object - */ -template -class buffer_base { - public: - using size_type = std::size_t; - using value_type = T; - using iterator = value_type*; - using const_iterator = const value_type*; - using reference = T&; - using const_reference = const T&; - - buffer_base() = delete; - - buffer_base(const buffer_base& other) = delete; - - buffer_base& operator=(const buffer_base& other) = delete; - - /** - * @brief Main ctor - * - * @param[in] allocator asynchronous allocator used for managing buffer life - * @param[in] stream cuda stream where this allocation operations are async - * @param[in] n size of the buffer (in number of elements) - */ - buffer_base(std::shared_ptr allocator, cudaStream_t stream, size_type n = 0) - : data_(nullptr), size_(n), capacity_(n), stream_(stream), allocator_(std::move(allocator)) - { - if (capacity_ > 0) { - data_ = - static_cast(allocator_->allocate(capacity_ * sizeof(value_type), stream_)); - RAFT_CUDA_TRY(cudaStreamSynchronize(stream_)); - } - } - - ~buffer_base() { release(); } - - value_type* data() { return data_; } - - const value_type* data() const { return data_; } - - size_type size() const { return size_; } - - void clear() { size_ = 0; } - - iterator begin() { return data_; } - - const_iterator begin() const { return data_; } - - iterator end() { return data_ + size_; } - - const_iterator end() const { return data_ + size_; } - - /** - * @brief Reserve new memory size for this buffer. - * - * It re-allocates a fresh buffer if the new requested capacity is more than - * the current one, copies the old buffer contents to this new buffer and - * removes the old one. - * - * @param[in] new_capacity new capacity (in number of elements) - * @{ - */ - void reserve(size_type new_capacity) - { - if (new_capacity > capacity_) { - auto* new_data = - static_cast(allocator_->allocate(new_capacity * sizeof(value_type), stream_)); - if (size_ > 0) { raft::copy(new_data, data_, size_, stream_); } - // Only deallocate if we have allocated a pointer - if (nullptr != data_) { - allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_); - } - data_ = new_data; - capacity_ = new_capacity; - } - } - - void reserve(size_type new_capacity, cudaStream_t stream) - { - set_stream(stream); - reserve(new_capacity); - } - /** @} */ - - /** - * @brief Resize the underlying buffer (uses `reserve` method internally) - * - * @param[in] new_size new buffer size - * @{ - */ - void resize(const size_type new_size) - { - reserve(new_size); - size_ = new_size; - } - - void resize(const size_type new_size, cudaStream_t stream) - { - set_stream(stream); - resize(new_size); - } - /** @} */ - - /** - * @brief Deletes the underlying buffer - * - * If this method is not explicitly called, it will be during the destructor - * @{ - */ - void release() - { - if (nullptr != data_) { - allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_); - } - data_ = nullptr; - capacity_ = 0; - size_ = 0; - } - - void release(cudaStream_t stream) - { - set_stream(stream); - release(); - } - /** @} */ - - /** - * @brief returns the underlying allocator used - * - * @return the allocator pointer - */ - std::shared_ptr get_allocator() const { return allocator_; } - - /** - * @brief returns the underlying stream used - * - * @return the cuda stream - */ - cudaStream_t get_stream() const { return stream_; } - - protected: - value_type* data_; - - private: - size_type size_; - size_type capacity_; - cudaStream_t stream_; - std::shared_ptr allocator_; - - /** - * @brief Sets a new cuda stream where the future operations will be queued - * - * This method makes sure that the inter-stream dependencies are met and taken - * care of, before setting the input stream as a new stream for this buffer. - * Ideally, the same cuda stream passed during constructor is expected to be - * used throughout this buffer's lifetime, for performance. - * - * @param[in] stream new cuda stream to be set. If it is the same as the - * current one, then this method will be a no-op. - */ - void set_stream(cudaStream_t stream) - { - if (stream_ != stream) { - cudaEvent_t event; - RAFT_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming)); - RAFT_CUDA_TRY(cudaEventRecord(event, stream_)); - RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0)); - stream_ = stream; - RAFT_CUDA_TRY(cudaEventDestroy(event)); - } - } -}; // class buffer_base - -}; // namespace mr -}; // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/mr/device/allocator.hpp b/cpp/include/raft/mr/device/allocator.hpp deleted file mode 100644 index 8d306a199f..0000000000 --- a/cpp/include/raft/mr/device/allocator.hpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include - -#include - -namespace raft { -namespace mr { -namespace device { - -/** - * @brief An explicit interface for an asynchronous device allocator. - * - * This is mostly done in order to reduce work needed in cuML codebase. - * An implementation of this interface can make the following assumptions, - * further to the ones listed in `Allocator`: - * - Allocations may be always on the device that was specified on construction. - */ -class allocator : public base_allocator { -}; - -/** Default device allocator based on the one provided by RMM */ -class default_allocator : public allocator { - public: - void* allocate(std::size_t n, cudaStream_t stream) override - { - void* ptr = rmm::mr::get_current_device_resource()->allocate(n, stream); - return ptr; - } - - void deallocate(void* p, std::size_t n, cudaStream_t stream) override - { - rmm::mr::get_current_device_resource()->deallocate(p, n, stream); - } -}; // class default_allocator - -}; // namespace device -}; // namespace mr -}; // namespace raft diff --git a/cpp/include/raft/mr/device/buffer.hpp b/cpp/include/raft/mr/device/buffer.hpp deleted file mode 100644 index 954ce83d1f..0000000000 --- a/cpp/include/raft/mr/device/buffer.hpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "allocator.hpp" -#include -#include - -namespace raft { -namespace mr { -namespace device { - -/** - * @brief RAII object owning a contiguous typed device buffer. The passed in - * allocator supports asynchronous allocation and deallocation so this - * can also be used for temporary memory - * - * @code{.cpp} - * template - * void foo(..., cudaStream_t stream) { - * ... - * raft::mr::device::buffer temp(stream, 0); - * ... - * temp.resize(n); - * kernelA<<>>(...,temp.data(),...); - * kernelB<<>>(...,temp.data(),...); - * temp.release(); - * ... - * } - * @endcode - */ -template -class buffer : public buffer_base { - public: - using size_type = typename buffer_base::size_type; - using value_type = typename buffer_base::value_type; - using iterator = typename buffer_base::iterator; - using const_iterator = typename buffer_base::const_iterator; - using reference = typename buffer_base::reference; - using const_reference = typename buffer_base::const_reference; - - buffer() = delete; - - buffer(const buffer& other) = delete; - - buffer& operator=(const buffer& other) = delete; - - buffer(std::shared_ptr alloc, cudaStream_t stream, size_type n = 0) - : buffer_base(alloc, stream, n) - { - } -}; // class buffer - -}; // namespace device -}; // namespace mr -}; // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/mr/host/allocator.hpp b/cpp/include/raft/mr/host/allocator.hpp deleted file mode 100644 index 71b5465451..0000000000 --- a/cpp/include/raft/mr/host/allocator.hpp +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -#include - -#include - -namespace raft { -namespace mr { -namespace host { - -/** - * @brief An explicit interface for an asynchronous host allocations. - * - * This is mostly done in order to reduce work needed in cuML codebase. - * An implementation of this interface can make the following assumptions, - * further to the ones listed in `Allocator`: - * - Allocations don't need to be zero copy accessible form a device. - */ -class allocator : public base_allocator { -}; - -/** Default cudaMallocHost/cudaFreeHost based host allocator */ -class default_allocator : public allocator { - public: - void* allocate(std::size_t n, cudaStream_t stream) override - { - void* ptr = nullptr; - RAFT_CUDA_TRY(cudaMallocHost(&ptr, n)); - return ptr; - } - - void deallocate(void* p, std::size_t n, cudaStream_t stream) override - { - // Must call _NO_THROW here since this is called frequently from object - // destructors which are "nothrow" by default - RAFT_CUDA_TRY_NO_THROW(cudaFreeHost(p)); - } -}; // class default_allocator - -}; // namespace host -}; // namespace mr -}; // namespace raft diff --git a/cpp/include/raft/mr/host/buffer.hpp b/cpp/include/raft/mr/host/buffer.hpp deleted file mode 100644 index 25aed3e725..0000000000 --- a/cpp/include/raft/mr/host/buffer.hpp +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "allocator.hpp" -#include -#include -#include - -namespace raft { -namespace mr { -namespace host { - -/** - * @brief RAII object owning a contigous typed host buffer (aka pinned memory). - * The passed in allocator supports asynchronus allocation and - * deallocation so this can also be used for temporary memory - * - * @code{.cpp} - * template - * void foo(const T* in_d , T* out_d, ..., cudaStream_t stream) { - * ... - * raft::mr::host::buffer temp(stream, 0); - * ... - * temp.resize(n); - * raft::copy(temp.data(), in_d, temp.size()); - * ... - * raft::copy(out_d, temp.data(), temp.size()); - * temp.release(stream); - * ... - * } - * @endcode - */ -template -class buffer : public buffer_base { - public: - using size_type = typename buffer_base::size_type; - using value_type = typename buffer_base::value_type; - using iterator = typename buffer_base::iterator; - using const_iterator = typename buffer_base::const_iterator; - using reference = typename buffer_base::reference; - using const_reference = typename buffer_base::const_reference; - - buffer() = delete; - - buffer(const buffer& other) = delete; - - buffer& operator=(const buffer& other) = delete; - - buffer(std::shared_ptr alloc, const device::buffer& other) - : buffer_base(alloc, other.get_stream(), other.size()) - { - if (other.size() > 0) { raft::copy(data_, other.data(), other.size(), other.get_stream()); } - } - - buffer(std::shared_ptr alloc, cudaStream_t stream, size_type n = 0) - : buffer_base(alloc, stream, n) - { - } - - reference operator[](size_type pos) { return data_[pos]; } - - const_reference operator[](size_type pos) const { return data_[pos]; } - - private: - using buffer_base::data_; -}; - -}; // namespace host -}; // namespace mr -}; // namespace raft \ No newline at end of file diff --git a/cpp/include/raft/random/detail/make_regression.cuh b/cpp/include/raft/random/detail/make_regression.cuh index 8bab85e485..42c1319889 100644 --- a/cpp/include/raft/random/detail/make_regression.cuh +++ b/cpp/include/raft/random/detail/make_regression.cuh @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh index 1b245ca45f..17ddb1d5d9 100644 --- a/cpp/include/raft/random/detail/rng_impl.cuh +++ b/cpp/include/raft/random/detail/rng_impl.cuh @@ -903,7 +903,7 @@ class RngImpl { rmm::device_uvector workspace(0, stream); sortPairs(workspace, expWts.data(), sortedWts.data(), inIdxPtr, outIdxPtr, (int)len, stream); if (outIdx != nullptr) { - CUDA_CHECK(cudaMemcpyAsync( + RAFT_CUDA_TRY(cudaMemcpyAsync( outIdx, outIdxPtr, sizeof(IdxT) * sampledLen, cudaMemcpyDeviceToDevice, stream)); } scatter(out, in, outIdxPtr, sampledLen, stream); diff --git a/cpp/include/raft/sparse/distance/detail/utils.cuh b/cpp/include/raft/sparse/distance/detail/utils.cuh index a2fe090c96..ed2b414c70 100644 --- a/cpp/include/raft/sparse/distance/detail/utils.cuh +++ b/cpp/include/raft/sparse/distance/detail/utils.cuh @@ -16,8 +16,6 @@ #pragma once -#include - #include namespace raft { diff --git a/cpp/include/raft/stats/detail/homogeneity_score.cuh b/cpp/include/raft/stats/detail/homogeneity_score.cuh index 4c78553258..e781b58875 100644 --- a/cpp/include/raft/stats/detail/homogeneity_score.cuh +++ b/cpp/include/raft/stats/detail/homogeneity_score.cuh @@ -22,7 +22,6 @@ #pragma once -#include #include #include diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index f8ae28f550..8d7b239624 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -75,8 +75,6 @@ add_executable(test_raft test/matrix/columnSort.cu test/matrix/linewise_op.cu test/mdarray.cu - test/mr/host/buffer.cpp - test/mr/device/buffer.cpp test/mst.cu test/random/make_blobs.cu test/random/make_regression.cu diff --git a/cpp/test/linalg/rsvd.cu b/cpp/test/linalg/rsvd.cu index 66b472c7e1..23d29c3d4e 100644 --- a/cpp/test/linalg/rsvd.cu +++ b/cpp/test/linalg/rsvd.cu @@ -102,11 +102,11 @@ class RsvdTest : public ::testing::TestWithParam> { int len_redundant = m * n_redundant; r.normal(A.data(), len_informative, mu, sigma, stream); - CUDA_CHECK(cudaMemcpyAsync(A.data() + len_informative, - A.data(), - len_redundant * sizeof(T), - cudaMemcpyDeviceToDevice, - stream)); + RAFT_CUDA_TRY(cudaMemcpyAsync(A.data() + len_informative, + A.data(), + len_redundant * sizeof(T), + cudaMemcpyDeviceToDevice, + stream)); } std::vector A_backup_cpu(m * n); // Backup A matrix as svdJacobi will destroy the content of A diff --git a/cpp/test/span.cu b/cpp/test/span.cu index e121cea108..abecfc677c 100644 --- a/cpp/test/span.cu +++ b/cpp/test/span.cu @@ -30,16 +30,16 @@ struct TestStatus { public: TestStatus() { - CUDA_CHECK(cudaMalloc(&status_, sizeof(int))); + RAFT_CUDA_TRY(cudaMalloc(&status_, sizeof(int))); int h_status = 1; - CUDA_CHECK(cudaMemcpy(status_, &h_status, sizeof(int), cudaMemcpyHostToDevice)); + RAFT_CUDA_TRY(cudaMemcpy(status_, &h_status, sizeof(int), cudaMemcpyHostToDevice)); } - ~TestStatus() noexcept(false) { CUDA_CHECK(cudaFree(status_)); } + ~TestStatus() noexcept(false) { RAFT_CUDA_TRY(cudaFree(status_)); } int Get() { int h_status; - CUDA_CHECK(cudaMemcpy(&h_status, status_, sizeof(int), cudaMemcpyDeviceToHost)); + RAFT_CUDA_TRY(cudaMemcpy(&h_status, status_, sizeof(int), cudaMemcpyDeviceToHost)); return h_status; } diff --git a/cpp/test/spatial/fused_l2_knn.cu b/cpp/test/spatial/fused_l2_knn.cu index 65c4284dd2..2ec4e86d1f 100644 --- a/cpp/test/spatial/fused_l2_knn.cu +++ b/cpp/test/spatial/fused_l2_knn.cu @@ -99,65 +99,70 @@ testing::AssertionResult devArrMatchKnnPair(const T* expected_idx, template class FusedL2KNNTest : public ::testing::TestWithParam { + public: + FusedL2KNNTest() + : stream_(handle_.get_stream()), + params_(::testing::TestWithParam::GetParam()), + database(params_.num_db_vecs * params_.dim, stream_), + search_queries(params_.num_queries * params_.dim, stream_), + raft_indices_(params_.num_queries * params_.k, stream_), + raft_distances_(params_.num_queries * params_.k, stream_), + faiss_indices_(params_.num_queries * params_.k, stream_), + faiss_distances_(params_.num_queries * params_.k, stream_) + { + RAFT_CUDA_TRY(cudaMemsetAsync(database.data(), 0, database.size() * sizeof(T), stream_)); + RAFT_CUDA_TRY( + cudaMemsetAsync(search_queries.data(), 0, search_queries.size() * sizeof(T), stream_)); + RAFT_CUDA_TRY( + cudaMemsetAsync(raft_indices_.data(), 0, raft_indices_.size() * sizeof(int64_t), stream_)); + RAFT_CUDA_TRY( + cudaMemsetAsync(raft_distances_.data(), 0, raft_distances_.size() * sizeof(T), stream_)); + RAFT_CUDA_TRY( + cudaMemsetAsync(faiss_indices_.data(), 0, faiss_indices_.size() * sizeof(int64_t), stream_)); + RAFT_CUDA_TRY( + cudaMemsetAsync(faiss_distances_.data(), 0, faiss_distances_.size() * sizeof(T), stream_)); + } + protected: void testBruteForce() { - cudaStream_t stream = handle_.get_stream(); - launchFaissBfknn(); detail::fusedL2Knn(dim, - raft_indices_, - raft_distances_, - database, - search_queries, + raft_indices_.data(), + raft_distances_.data(), + database.data(), + search_queries.data(), num_db_vecs, num_queries, k_, true, true, - stream, + stream_, metric); // verify. - devArrMatchKnnPair(faiss_indices_, - raft_indices_, - faiss_distances_, - raft_distances_, + devArrMatchKnnPair(faiss_indices_.data(), + raft_indices_.data(), + faiss_distances_.data(), + raft_distances_.data(), num_queries, k_, float(0.001), - stream); + stream_); } void SetUp() override { - params_ = ::testing::TestWithParam::GetParam(); num_queries = params_.num_queries; num_db_vecs = params_.num_db_vecs; dim = params_.dim; k_ = params_.k; metric = params_.metric_; - cudaStream_t stream = handle_.get_stream(); - - raft::allocate(database, num_db_vecs * dim, stream, true); - raft::allocate(search_queries, num_queries * dim, stream, true); - unsigned long long int seed = 1234ULL; raft::random::Rng r(seed); - r.uniform(database, num_db_vecs * dim, T(-1.0), T(1.0), stream); - r.uniform(search_queries, num_queries * dim, T(-1.0), T(1.0), stream); - - raft::allocate(raft_indices_, num_queries * k_, stream, true); - raft::allocate(raft_distances_, num_queries * k_, stream, true); - raft::allocate(faiss_indices_, num_queries * k_, stream, true); - raft::allocate(faiss_distances_, num_queries * k_, stream, true); - } - - void TearDown() override - { - cudaStream_t stream = handle_.get_stream(); - raft::deallocate_all(stream); + r.uniform(database.data(), num_db_vecs * dim, T(-1.0), T(1.0), stream_); + r.uniform(search_queries.data(), num_queries * dim, T(-1.0), T(1.0), stream_); } void launchFaissBfknn() @@ -169,37 +174,38 @@ class FusedL2KNNTest : public ::testing::TestWithParam { gpu_res.noTempMemory(); int device; RAFT_CUDA_TRY(cudaGetDevice(&device)); - gpu_res.setDefaultStream(device, handle_.get_stream()); + gpu_res.setDefaultStream(device, stream_); faiss::gpu::GpuDistanceParams args; args.metric = m; args.metricArg = 0; args.k = k_; args.dims = dim; - args.vectors = database; + args.vectors = database.data(); args.vectorsRowMajor = true; args.numVectors = num_db_vecs; - args.queries = search_queries; + args.queries = search_queries.data(); args.queriesRowMajor = true; args.numQueries = num_queries; - args.outDistances = faiss_distances_; - args.outIndices = faiss_indices_; + args.outDistances = faiss_distances_.data(); + args.outIndices = faiss_indices_.data(); bfKnn(&gpu_res, args); } private: raft::handle_t handle_; + cudaStream_t stream_ = 0; FusedL2KNNInputs params_; int num_queries; int num_db_vecs; int dim; - T* database; - T* search_queries; - int64_t* raft_indices_; - T* raft_distances_; - int64_t* faiss_indices_; - T* faiss_distances_; + rmm::device_uvector database; + rmm::device_uvector search_queries; + rmm::device_uvector raft_indices_; + rmm::device_uvector raft_distances_; + rmm::device_uvector faiss_indices_; + rmm::device_uvector faiss_distances_; int k_; raft::distance::DistanceType metric; }; diff --git a/cpp/test/stats/information_criterion.cu b/cpp/test/stats/information_criterion.cu index 802e3fee23..d61f8591a5 100644 --- a/cpp/test/stats/information_criterion.cu +++ b/cpp/test/stats/information_criterion.cu @@ -19,7 +19,8 @@ #include #include -#include +#include +#include #include @@ -59,21 +60,23 @@ struct BatchedICInputs { template class BatchedICTest : public ::testing::TestWithParam> { + public: + BatchedICTest() + : params(::testing::TestWithParam>::GetParam()), + stream(handle.get_stream()), + res_d(sizeof(T) * params.batch_size, stream) + { + } + protected: void SetUp() override { using std::vector; - params = ::testing::TestWithParam>::GetParam(); - - // Create stream and allocator - RAFT_CUDA_TRY(cudaStreamCreate(&stream)); - allocator = std::make_shared(); // Create arrays std::vector loglike_h = std::vector(params.batch_size); res_h.resize(params.batch_size); - T* loglike_d = (T*)allocator->allocate(sizeof(T) * params.batch_size, stream); - res_d = (T*)allocator->allocate(sizeof(T) * params.batch_size, stream); + rmm::device_uvector loglike_d(sizeof(T) * params.batch_size, stream); // Generate random data std::random_device rd; @@ -83,11 +86,11 @@ class BatchedICTest : public ::testing::TestWithParam> { loglike_h[i] = std::log(udis(gen)); // Copy the data to the device - raft::update_device(loglike_d, loglike_h.data(), params.batch_size, stream); + raft::update_device(loglike_d.data(), loglike_h.data(), params.batch_size, stream); // Compute the tested results - information_criterion_batched(res_d, - loglike_d, + information_criterion_batched(res_d.data(), + loglike_d.data(), params.ic_type, params.n_params, params.batch_size, @@ -103,22 +106,14 @@ class BatchedICTest : public ::testing::TestWithParam> { params.n_samples); RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); - - allocator->deallocate(loglike_d, sizeof(T) * params.batch_size, stream); - } - - void TearDown() override - { - allocator->deallocate(res_d, sizeof(T) * params.batch_size, stream); - RAFT_CUDA_TRY(cudaStreamDestroy(stream)); } protected: - std::shared_ptr allocator; + raft::handle_t handle; + cudaStream_t stream = 0; BatchedICInputs params; - T* res_d; + rmm::device_uvector res_d; std::vector res_h; - cudaStream_t stream = 0; }; // Test parameters (op, n_batches, m, n, p, q, tolerance) @@ -133,13 +128,19 @@ using BatchedICTestD = BatchedICTest; using BatchedICTestF = BatchedICTest; TEST_P(BatchedICTestD, Result) { - ASSERT_TRUE(devArrMatchHost( - res_h.data(), res_d, params.batch_size, raft::CompareApprox(params.tolerance), stream)); + ASSERT_TRUE(devArrMatchHost(res_h.data(), + res_d.data(), + params.batch_size, + raft::CompareApprox(params.tolerance), + stream)); } TEST_P(BatchedICTestF, Result) { - ASSERT_TRUE(devArrMatchHost( - res_h.data(), res_d, params.batch_size, raft::CompareApprox(params.tolerance), stream)); + ASSERT_TRUE(devArrMatchHost(res_h.data(), + res_d.data(), + params.batch_size, + raft::CompareApprox(params.tolerance), + stream)); } INSTANTIATE_TEST_CASE_P(BatchedICTests, BatchedICTestD, ::testing::ValuesIn(inputsd)); diff --git a/python/raft/common/handle.pxd b/python/raft/common/handle.pxd index 8415b7e3d7..48ec625370 100644 --- a/python/raft/common/handle.pxd +++ b/python/raft/common/handle.pxd @@ -26,19 +26,12 @@ from rmm._lib.cuda_stream_pool cimport cuda_stream_pool from libcpp.memory cimport shared_ptr from libcpp.memory cimport unique_ptr -cdef extern from "raft/mr/device/allocator.hpp" \ - namespace "raft::mr::device" nogil: - cdef cppclass allocator: - pass - cdef extern from "raft/handle.hpp" namespace "raft" nogil: cdef cppclass handle_t: handle_t() except + handle_t(cuda_stream_view stream_view) except + handle_t(cuda_stream_view stream_view, shared_ptr[cuda_stream_pool] stream_pool) except + - void set_device_allocator(shared_ptr[allocator] a) except + - shared_ptr[allocator] get_device_allocator() except + cuda_stream_view get_stream() except + void sync_stream() except + diff --git a/python/raft/common/handle.pyx b/python/raft/common/handle.pyx index 661c5b5f23..de3952cc87 100644 --- a/python/raft/common/handle.pyx +++ b/python/raft/common/handle.pyx @@ -20,7 +20,6 @@ # cython: language_level = 3 # import raft -from libcpp.memory cimport shared_ptr from rmm._lib.cuda_stream_view cimport cuda_stream_per_thread from rmm._lib.cuda_stream_view cimport cuda_stream_view