From 8a4c4a81d9aad4ad952990f4dabb2569a0a857a5 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Fri, 14 Apr 2023 14:01:33 +0200 Subject: [PATCH] Split memory_pool, fused_l2_knn, coalesced_reduction, selection_faiss --- cpp/CMakeLists.txt | 40 ++++++--- .../core/resource/device_memory_resource.hpp | 3 +- .../linalg/detail/coalesced_reduction-ext.cuh | 74 ++++++++++++++++ .../linalg/detail/coalesced_reduction-inl.cuh | 21 +---- .../detail/coalesced_reduction-types.cuh | 34 ++++++++ .../linalg/detail/coalesced_reduction.cuh | 25 ++++++ .../raft/neighbors/detail/knn_brute_force.cuh | 1 + .../neighbors/detail/selection_faiss-ext.cuh | 70 ++++++++++++++++ .../raft/neighbors/detail/selection_faiss.cuh | 25 ++++++ .../spatial/knn/detail/fused_l2_knn-ext.cuh | 84 +++++++++++++++++++ .../raft/spatial/knn/detail/fused_l2_knn.cuh | 24 ++++++ cpp/include/raft/util/cudart_utils.hpp | 52 +----------- cpp/include/raft/util/memory_pool-ext.hpp | 59 +++++++++++++ cpp/include/raft/util/memory_pool-inl.hpp | 43 ++++++++++ cpp/include/raft/util/memory_pool.hpp | 23 +++++ cpp/src/linalg/detail/coalesced_reduction.cu | 69 +++++++++++++++ cpp/src/neighbors/detail/selection_faiss.cu | 35 ++++++++ cpp/src/util/memory_pool.cpp | 17 ++++ cpp/test/util/device_atomics.cu | 1 - 19 files changed, 620 insertions(+), 80 deletions(-) create mode 100644 cpp/include/raft/linalg/detail/coalesced_reduction-ext.cuh create mode 100644 cpp/include/raft/linalg/detail/coalesced_reduction-types.cuh create mode 100644 cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh create mode 100644 cpp/include/raft/spatial/knn/detail/fused_l2_knn-ext.cuh create mode 100644 cpp/include/raft/util/memory_pool-ext.hpp create mode 100644 cpp/include/raft/util/memory_pool-inl.hpp create mode 100644 cpp/src/linalg/detail/coalesced_reduction.cu create mode 100644 cpp/src/neighbors/detail/selection_faiss.cu create mode 100644 cpp/src/util/memory_pool.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 94b24be853..f39638946c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -13,6 +13,17 @@ set(RAPIDS_VERSION "23.04") set(RAFT_VERSION "23.04.00") +include(FetchContent) +FetchContent_Declare( + rapids-cmake + GIT_REPOSITORY https://github.com/ahendriksen/rapids-cmake.git + GIT_TAG different-rmm +) +file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.02/RAPIDS.cmake + ${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake +) +include(${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake) + cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR) include(../fetch_rapids.cmake) include(rapids-cmake) @@ -276,12 +287,14 @@ if(RAFT_COMPILE_LIBRARY) src/cluster/cluster_cost_float.cu src/cluster/cluster_cost_double.cu src/core/logger.cpp + src/linalg/detail/coalesced_reduction.cu src/neighbors/refine_d_int64_t_float.cu src/neighbors/refine_d_int64_t_int8_t.cu src/neighbors/refine_d_int64_t_uint8_t.cu src/neighbors/refine_h_int64_t_float.cu src/neighbors/refine_h_int64_t_int8_t.cu src/neighbors/refine_h_int64_t_uint8_t.cu + src/neighbors/detail/selection_faiss.cu src/neighbors/specializations/refine_d_int64_t_float.cu src/neighbors/specializations/refine_d_int64_t_int8_t.cu src/neighbors/specializations/refine_d_int64_t_uint8_t.cu @@ -382,17 +395,17 @@ if(RAFT_COMPILE_LIBRARY) src/random/rmat_rectangular_generator_int64_double.cu src/random/rmat_rectangular_generator_int_float.cu src/random/rmat_rectangular_generator_int64_float.cu - src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_2d.cu - src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_2d.cu - src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_3d.cu - src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_3d.cu + # src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_2d.cu + # src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_2d.cu + # src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_3d.cu + # src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_3d.cu src/neighbors/specializations/ball_cover_all_knn_query.cu src/neighbors/specializations/ball_cover_build_index.cu src/neighbors/specializations/ball_cover_knn_query.cu - src/neighbors/specializations/fused_l2_knn_long_float_true.cu - src/neighbors/specializations/fused_l2_knn_long_float_false.cu - src/neighbors/specializations/fused_l2_knn_int_float_true.cu - src/neighbors/specializations/fused_l2_knn_int_float_false.cu + # src/neighbors/specializations/fused_l2_knn_long_float_true.cu + # src/neighbors/specializations/fused_l2_knn_long_float_false.cu + # src/neighbors/specializations/fused_l2_knn_int_float_true.cu + # src/neighbors/specializations/fused_l2_knn_int_float_false.cu src/neighbors/ivf_flat_search.cu src/neighbors/ivf_flat_build.cu src/neighbors/specializations/ivfflat_build_float_int64_t.cu @@ -445,6 +458,7 @@ if(RAFT_COMPILE_LIBRARY) src/random/rmat_rectangular_generator_int_float.cu src/random/rmat_rectangular_generator_int64_float.cu src/spatial/knn/detail/ball_cover/registers.cu + src/util/memory_pool.cpp ) set_target_properties( raft_lib @@ -618,7 +632,9 @@ rapids_export( COMPONENTS ${raft_components} COMPONENTS_EXPORT_SET ${raft_export_sets} GLOBAL_TARGETS raft compiled distributed - NAMESPACE raft:: DOCUMENTATION doc_string FINAL_CODE_BLOCK code_string + NAMESPACE raft:: + DOCUMENTATION doc_string + FINAL_CODE_BLOCK code_string ) # ################################################################################################## @@ -628,8 +644,10 @@ rapids_export( EXPORT_SET raft-exports COMPONENTS ${raft_components} COMPONENTS_EXPORT_SET ${raft_export_sets} - GLOBAL_TARGETS raft - compiled distributed DOCUMENTATION doc_string NAMESPACE raft:: FINAL_CODE_BLOCK code_string + GLOBAL_TARGETS raft compiled distributed + DOCUMENTATION doc_string + NAMESPACE raft:: + FINAL_CODE_BLOCK code_string ) # ################################################################################################## diff --git a/cpp/include/raft/core/resource/device_memory_resource.hpp b/cpp/include/raft/core/resource/device_memory_resource.hpp index 35ae3d715f..ebc41e0f8e 100644 --- a/cpp/include/raft/core/resource/device_memory_resource.hpp +++ b/cpp/include/raft/core/resource/device_memory_resource.hpp @@ -18,6 +18,7 @@ #include #include #include +#include namespace raft::resource { class device_memory_resource : public resource { @@ -72,4 +73,4 @@ inline void set_workspace_resource(resources const& res, rmm::mr::device_memory_ { res.add_resource_factory(std::make_shared(mr)); }; -} // namespace raft::resource \ No newline at end of file +} // namespace raft::resource diff --git a/cpp/include/raft/linalg/detail/coalesced_reduction-ext.cuh b/cpp/include/raft/linalg/detail/coalesced_reduction-ext.cuh new file mode 100644 index 0000000000..2a1bafae43 --- /dev/null +++ b/cpp/include/raft/linalg/detail/coalesced_reduction-ext.cuh @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "coalesced_reduction-types.cuh" +#include + +// Include inline definition as well. We cannot possibly cover all +// instantiations in this file. +#include "coalesced_reduction-inl.cuh" + +#define instantiate_raft_linalg_detail_coalescedReduction( \ + InType, OutType, IdxType, MainLambda, ReduceLambda, FinalLambda) \ + extern template void raft::linalg::detail::coalescedReduction(OutType* dots, \ + const InType* data, \ + IdxType D, \ + IdxType N, \ + OutType init, \ + cudaStream_t stream, \ + bool inplace, \ + MainLambda main_op, \ + ReduceLambda reduce_op, \ + FinalLambda final_op) + +instantiate_raft_linalg_detail_coalescedReduction( + double, double, int, raft::identity_op, raft::min_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + double, double, int, raft::sq_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + double, double, int, raft::sq_op, raft::add_op, raft::sqrt_op); +instantiate_raft_linalg_detail_coalescedReduction( + double, double, int, raft::abs_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + double, double, int, raft::abs_op, raft::max_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, size_t, raft::abs_op, raft::add_op, raft::sqrt_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, int, raft::abs_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, int, raft::identity_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, int, raft::identity_op, raft::min_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, int, raft::sq_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, int, raft::sq_op, raft::add_op, raft::sqrt_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, long, raft::sq_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, size_t, raft::identity_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, size_t, raft::sq_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, size_t, raft::abs_op, raft::max_op, raft::sqrt_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, size_t, raft::sq_op, raft::add_op, raft::sqrt_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, unsigned int, raft::sq_op, raft::add_op, raft::identity_op); + +#undef instantiate_raft_linalg_detail_coalescedReduction diff --git a/cpp/include/raft/linalg/detail/coalesced_reduction-inl.cuh b/cpp/include/raft/linalg/detail/coalesced_reduction-inl.cuh index 4cc19d4a17..7ba4537b0f 100644 --- a/cpp/include/raft/linalg/detail/coalesced_reduction-inl.cuh +++ b/cpp/include/raft/linalg/detail/coalesced_reduction-inl.cuh @@ -16,23 +16,17 @@ #pragma once +#include "coalesced_reduction-types.cuh" // policy structs #include -#include +#include #include #include -#include +#include // device_uvector namespace raft { namespace linalg { namespace detail { -template -struct ReductionThinPolicy { - static constexpr int LogicalWarpSize = warpSize; - static constexpr int RowsPerBlock = rpb; - static constexpr int ThreadsPerBlock = LogicalWarpSize * RowsPerBlock; -}; - template -struct ReductionThickPolicy { - static constexpr int ThreadsPerBlock = tpb; - static constexpr int BlocksPerRow = bpr; - static constexpr int BlockStride = tpb * bpr; -}; - template +struct ReductionThinPolicy { + static constexpr int LogicalWarpSize = warpSize; + static constexpr int RowsPerBlock = rpb; + static constexpr int ThreadsPerBlock = LogicalWarpSize * RowsPerBlock; +}; + +template +struct ReductionThickPolicy { + static constexpr int ThreadsPerBlock = tpb; + static constexpr int BlocksPerRow = bpr; + static constexpr int BlockStride = tpb * bpr; +}; + +} // namespace raft::linalg::detail diff --git a/cpp/include/raft/linalg/detail/coalesced_reduction.cuh b/cpp/include/raft/linalg/detail/coalesced_reduction.cuh index e69de29bb2..9a51611de1 100644 --- a/cpp/include/raft/linalg/detail/coalesced_reduction.cuh +++ b/cpp/include/raft/linalg/detail/coalesced_reduction.cuh @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#if defined(RAFT_COMPILED) && defined(RAFT_EXPLICIT_INSTANTIATE) +// Too many lambdas and complicated types to instantiate everything.. +#include "coalesced_reduction-ext.cuh" +#include "coalesced_reduction-inl.cuh" +#else +#include "coalesced_reduction-inl.cuh" +#endif diff --git a/cpp/include/raft/neighbors/detail/knn_brute_force.cuh b/cpp/include/raft/neighbors/detail/knn_brute_force.cuh index a776ce2586..0148a1a887 100644 --- a/cpp/include/raft/neighbors/detail/knn_brute_force.cuh +++ b/cpp/include/raft/neighbors/detail/knn_brute_force.cuh @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh b/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh new file mode 100644 index 0000000000..cd6fdee192 --- /dev/null +++ b/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include // size_t +#include // uint32_t +#include // RAFT_EXPLICIT + +#if defined(RAFT_EXPLICIT_INSTANTIATE) + +namespace raft::neighbors::detail { +/** + * @brief Select the k-nearest neighbors from dense + * distance and index matrices. + * + * @param[in] inK partitioned knn distance matrix + * @param[in] inV partitioned knn index matrix + * @param[in] n_rows number of rows in distance and index matrices + * @param[in] n_cols number of columns in distance and index matrices + * @param[out] outK merged knn distance matrix + * @param[out] outV merged knn index matrix + * @param[in] select_min whether to select the min or the max distances + * @param[in] k number of neighbors per partition (also number of merged neighbors) + * @param[in] stream CUDA stream to use + */ +template +void select_k(const key_t* inK, + const payload_t* inV, + size_t n_rows, + size_t n_cols, + key_t* outK, + payload_t* outV, + bool select_min, + int k, + cudaStream_t stream) RAFT_EXPLICIT; +}; // namespace raft::neighbors::detail + +#endif // RAFT_EXPLICIT_INSTANTIATE + +#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t) \ + extern template void raft::neighbors::detail::select_k(const key_t* inK, \ + const payload_t* inV, \ + size_t n_rows, \ + size_t n_cols, \ + key_t* outK, \ + payload_t* outV, \ + bool select_min, \ + int k, \ + cudaStream_t stream) + +// @benfred: Not sure if this is correct. Should I not flip float and uint32_t? +// It seems weird that float is the key and uint32_t is the payload type. +instantiate_raft_neighbors_detail_select_k(uint32_t, float); +instantiate_raft_neighbors_detail_select_k(long, float); + +#undef instantiate_raft_neighbors_detail_select_k diff --git a/cpp/include/raft/neighbors/detail/selection_faiss.cuh b/cpp/include/raft/neighbors/detail/selection_faiss.cuh index e69de29bb2..ccdba994d1 100644 --- a/cpp/include/raft/neighbors/detail/selection_faiss.cuh +++ b/cpp/include/raft/neighbors/detail/selection_faiss.cuh @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#if defined(RAFT_COMPILED) +#include "selection_faiss-ext.cuh" +#endif + +#if !defined(RAFT_EXPLICIT_INSTANTIATE) +#include "selection_faiss-inl.cuh" +#endif diff --git a/cpp/include/raft/spatial/knn/detail/fused_l2_knn-ext.cuh b/cpp/include/raft/spatial/knn/detail/fused_l2_knn-ext.cuh new file mode 100644 index 0000000000..5f4d5a6347 --- /dev/null +++ b/cpp/include/raft/spatial/knn/detail/fused_l2_knn-ext.cuh @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include // size_t +#include // DistanceType +#include // RAFT_EXPLICIT + +#if defined(RAFT_EXPLICIT_INSTANTIATE) + +namespace raft::spatial::knn::detail { +/** + * Compute the k-nearest neighbors using L2 expanded/unexpanded distance. + + * @tparam value_idx + * @tparam value_t + * @param[out] out_inds output indices array on device (size n_query_rows * k) + * @param[out] out_dists output dists array on device (size n_query_rows * k) + * @param[in] index input index array on device (size n_index_rows * D) + * @param[in] query input query array on device (size n_query_rows * D) + * @param[in] n_index_rows number of rows in index array + * @param[in] n_query_rows number of rows in query array + * @param[in] k number of closest neighbors to return + * @param[in] rowMajorIndex are the index arrays in row-major layout? + * @param[in] rowMajorQuery are the query array in row-major layout? + * @param[in] stream stream to order kernel launch + */ +template +void fusedL2Knn(size_t D, + value_idx* out_inds, + value_t* out_dists, + const value_t* index, + const value_t* query, + size_t n_index_rows, + size_t n_query_rows, + int k, + bool rowMajorIndex, + bool rowMajorQuery, + cudaStream_t stream, + raft::distance::DistanceType metric) RAFT_EXPLICIT; + +} // namespace raft::spatial::knn::detail + +#endif // RAFT_EXPLICIT_INSTANTIATE + +#define instantiate_raft_spatial_knn_detail_fusedL2Knn(Mvalue_idx, Mvalue_t, MusePrevTopKs) \ + extern template void \ + raft::spatial::knn::detail::fusedL2Knn( \ + size_t D, \ + Mvalue_idx * out_inds, \ + Mvalue_t * out_dists, \ + const Mvalue_t* index, \ + const Mvalue_t* query, \ + size_t n_index_rows, \ + size_t n_query_rows, \ + int k, \ + bool rowMajorIndex, \ + bool rowMajorQuery, \ + cudaStream_t stream, \ + raft::distance::DistanceType metric) + +instantiate_raft_spatial_knn_detail_fusedL2Knn(long, float, true); +instantiate_raft_spatial_knn_detail_fusedL2Knn(long, float, false); +instantiate_raft_spatial_knn_detail_fusedL2Knn(int, float, true); +instantiate_raft_spatial_knn_detail_fusedL2Knn(int, float, false); + +// These are used by brute_force_knn: +instantiate_raft_spatial_knn_detail_fusedL2Knn(unsigned int, float, true); +instantiate_raft_spatial_knn_detail_fusedL2Knn(unsigned int, float, false); + +#undef instantiate_raft_spatial_knn_detail_fusedL2Knn diff --git a/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh b/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh index e69de29bb2..7b16fc6f72 100644 --- a/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh +++ b/cpp/include/raft/spatial/knn/detail/fused_l2_knn.cuh @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#if defined(RAFT_COMPILED) +#include "fused_l2_knn-ext.cuh" +#endif + +#if !defined(RAFT_EXPLICIT_INSTANTIATE) +#include "fused_l2_knn-inl.cuh" +#endif diff --git a/cpp/include/raft/util/cudart_utils.hpp b/cpp/include/raft/util/cudart_utils.hpp index 1134513587..f3b083ac4a 100644 --- a/cpp/include/raft/util/cudart_utils.hpp +++ b/cpp/include/raft/util/cudart_utils.hpp @@ -18,10 +18,9 @@ #include #include +#include + #include -#include -#include -#include #include #include @@ -451,51 +450,4 @@ constexpr inline auto upper_bound() -> half return static_cast(__half_constexpr{0x7c00u}); } -/** - * @brief Get a pointer to a pooled memory resource within the scope of the lifetime of the returned - * unique pointer. - * - * This function is useful in the code where multiple repeated allocations/deallocations are - * expected. - * Use case example: - * @code{.cpp} - * void my_func(..., size_t n, rmm::mr::device_memory_resource* mr = nullptr) { - * auto pool_guard = raft::get_pool_memory_resource(mr, 2 * n * sizeof(float)); - * if (pool_guard){ - * RAFT_LOG_INFO("Created a pool %zu bytes", pool_guard->pool_size()); - * } else { - * RAFT_LOG_INFO("Using the current default or explicitly passed device memory resource"); - * } - * rmm::device_uvector x(n, stream, mr); - * rmm::device_uvector y(n, stream, mr); - * ... - * } - * @endcode - * Here, the new memory resource would be created within the function scope if the passed `mr` is - * null and the default resource is not a pool. After the call, `mr` contains a valid memory - * resource in any case. - * - * @param[inout] mr if not null do nothing; otherwise get the current device resource and wrap it - * into a `pool_memory_resource` if necessary and return the pointer to the result. - * @param initial_size if a new memory pool is created, this would be its initial size (rounded up - * to 256 bytes). - * - * @return if a new memory pool is created, it returns a unique_ptr to it; - * this managed pointer controls the lifetime of the created memory resource. - */ -inline auto get_pool_memory_resource(rmm::mr::device_memory_resource*& mr, size_t initial_size) -{ - using pool_res_t = rmm::mr::pool_memory_resource; - std::unique_ptr pool_res{}; - if (mr) return pool_res; - mr = rmm::mr::get_current_device_resource(); - if (!dynamic_cast(mr) && - !dynamic_cast*>(mr) && - !dynamic_cast*>(mr)) { - pool_res = std::make_unique(mr, (initial_size + 255) & (~255)); - mr = pool_res.get(); - } - return pool_res; -} - } // namespace raft diff --git a/cpp/include/raft/util/memory_pool-ext.hpp b/cpp/include/raft/util/memory_pool-ext.hpp new file mode 100644 index 0000000000..fb48bc70c4 --- /dev/null +++ b/cpp/include/raft/util/memory_pool-ext.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include +#include // std::unique_ptr +#include // rmm::mr::device_memory_resource + +namespace raft { + +/** + * @brief Get a pointer to a pooled memory resource within the scope of the lifetime of the returned + * unique pointer. + * + * This function is useful in the code where multiple repeated allocations/deallocations are + * expected. + * Use case example: + * @code{.cpp} + * void my_func(..., size_t n, rmm::mr::device_memory_resource* mr = nullptr) { + * auto pool_guard = raft::get_pool_memory_resource(mr, 2 * n * sizeof(float)); + * if (pool_guard){ + * RAFT_LOG_INFO("Created a pool %zu bytes", pool_guard->pool_size()); + * } else { + * RAFT_LOG_INFO("Using the current default or explicitly passed device memory resource"); + * } + * rmm::device_uvector x(n, stream, mr); + * rmm::device_uvector y(n, stream, mr); + * ... + * } + * @endcode + * Here, the new memory resource would be created within the function scope if the passed `mr` is + * null and the default resource is not a pool. After the call, `mr` contains a valid memory + * resource in any case. + * + * @param[inout] mr if not null do nothing; otherwise get the current device resource and wrap it + * into a `pool_memory_resource` if necessary and return the pointer to the result. + * @param initial_size if a new memory pool is created, this would be its initial size (rounded up + * to 256 bytes). + * + * @return if a new memory pool is created, it returns a unique_ptr to it; + * this managed pointer controls the lifetime of the created memory resource. + */ +std::unique_ptr get_pool_memory_resource( + rmm::mr::device_memory_resource*& mr, size_t initial_size); + +} // namespace raft diff --git a/cpp/include/raft/util/memory_pool-inl.hpp b/cpp/include/raft/util/memory_pool-inl.hpp new file mode 100644 index 0000000000..43ac24d3f6 --- /dev/null +++ b/cpp/include/raft/util/memory_pool-inl.hpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include +#include + +#include +#include +#include + +namespace raft { + +std::unique_ptr get_pool_memory_resource( + rmm::mr::device_memory_resource*& mr, size_t initial_size) +{ + using pool_res_t = rmm::mr::pool_memory_resource; + std::unique_ptr pool_res{}; + if (mr) return pool_res; + mr = rmm::mr::get_current_device_resource(); + if (!dynamic_cast(mr) && + !dynamic_cast*>(mr) && + !dynamic_cast*>(mr)) { + pool_res = std::make_unique(mr, (initial_size + 255) & (~255)); + mr = pool_res.get(); + } + return pool_res; +} + +} // namespace raft diff --git a/cpp/include/raft/util/memory_pool.hpp b/cpp/include/raft/util/memory_pool.hpp index e69de29bb2..3bf0beb773 100644 --- a/cpp/include/raft/util/memory_pool.hpp +++ b/cpp/include/raft/util/memory_pool.hpp @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifdef RAFT_COMPILED +#include "memory_pool-ext.hpp" +#else +#include "memory_pool-inl.hpp" +#endif // RAFT_COMPILED diff --git a/cpp/src/linalg/detail/coalesced_reduction.cu b/cpp/src/linalg/detail/coalesced_reduction.cu new file mode 100644 index 0000000000..00d025df46 --- /dev/null +++ b/cpp/src/linalg/detail/coalesced_reduction.cu @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// #include + +#include + +#define instantiate_raft_linalg_detail_coalescedReduction( \ + InType, OutType, IdxType, MainLambda, ReduceLambda, FinalLambda) \ + template void raft::linalg::detail::coalescedReduction(OutType* dots, \ + const InType* data, \ + IdxType D, \ + IdxType N, \ + OutType init, \ + cudaStream_t stream, \ + bool inplace, \ + MainLambda main_op, \ + ReduceLambda reduce_op, \ + FinalLambda final_op) + +instantiate_raft_linalg_detail_coalescedReduction( + double, double, int, raft::identity_op, raft::min_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + double, double, int, raft::sq_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + double, double, int, raft::sq_op, raft::add_op, raft::sqrt_op); +instantiate_raft_linalg_detail_coalescedReduction( + double, double, int, raft::abs_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + double, double, int, raft::abs_op, raft::max_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, size_t, raft::abs_op, raft::add_op, raft::sqrt_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, int, raft::abs_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, int, raft::identity_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, int, raft::identity_op, raft::min_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, int, raft::sq_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, int, raft::sq_op, raft::add_op, raft::sqrt_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, long, raft::sq_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, size_t, raft::identity_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, size_t, raft::sq_op, raft::add_op, raft::identity_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, size_t, raft::abs_op, raft::max_op, raft::sqrt_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, size_t, raft::sq_op, raft::add_op, raft::sqrt_op); +instantiate_raft_linalg_detail_coalescedReduction( + float, float, unsigned int, raft::sq_op, raft::add_op, raft::identity_op); + +#undef instantiate_raft_linalg_detail_coalescedReduction diff --git a/cpp/src/neighbors/detail/selection_faiss.cu b/cpp/src/neighbors/detail/selection_faiss.cu new file mode 100644 index 0000000000..067ac5fdda --- /dev/null +++ b/cpp/src/neighbors/detail/selection_faiss.cu @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define instantiate_raft_neighbors_detail_select_k(payload_t, key_t) \ + template void raft::neighbors::detail::select_k(const key_t* inK, \ + const payload_t* inV, \ + size_t n_rows, \ + size_t n_cols, \ + key_t* outK, \ + payload_t* outV, \ + bool select_min, \ + int k, \ + cudaStream_t stream) + +// @benfred: Not sure if this is correct. Should I not flip float and uint32_t? +// It seems weird that float is the key and uint32_t is the payload type. +instantiate_raft_neighbors_detail_select_k(uint32_t, float); +instantiate_raft_neighbors_detail_select_k(long, float); + +#undef instantiate_raft_neighbors_detail_select_k diff --git a/cpp/src/util/memory_pool.cpp b/cpp/src/util/memory_pool.cpp new file mode 100644 index 0000000000..837e870043 --- /dev/null +++ b/cpp/src/util/memory_pool.cpp @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include diff --git a/cpp/test/util/device_atomics.cu b/cpp/test/util/device_atomics.cu index 5e8a67c8f6..0b22073d1b 100644 --- a/cpp/test/util/device_atomics.cu +++ b/cpp/test/util/device_atomics.cu @@ -25,7 +25,6 @@ #include #include #include -#include namespace raft {