From cb0707c4a9c4d29797a88addb3f1407b07a228c5 Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Tue, 14 Jan 2025 15:48:35 +0900 Subject: [PATCH 1/6] Update default batch size for new vector addition --- cpp/src/neighbors/detail/cagra/add_nodes.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index 358b7643e..c21a23da0 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -254,7 +254,8 @@ void add_graph_nodes( const std::size_t degree = index.graph_degree(); const std::size_t dim = index.dim(); const std::size_t stride = input_updated_dataset_view.stride(0); - const std::size_t max_chunk_size_ = params.max_chunk_size == 0 ? 1 : params.max_chunk_size; + const std::size_t max_chunk_size_ = + params.max_chunk_size == 0 ? new_dataset_size : params.max_chunk_size; raft::copy(updated_graph_view.data_handle(), index.graph().data_handle(), From 26a075e65ec56528082a1cf08be4110839773410 Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Tue, 14 Jan 2025 16:51:54 +0900 Subject: [PATCH 2/6] Update search batch size in cagra::extend --- cpp/include/cuvs/neighbors/cagra.hpp | 2 ++ cpp/src/neighbors/detail/cagra/add_nodes.cuh | 25 +++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index a4684ce26..0035930b8 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -247,6 +247,8 @@ struct extend_params { * degrade recall because no edges are added between the nodes in the same chunk. Auto select when * 0. */ uint32_t max_chunk_size = 0; + + uint32_t max_working_device_memory_size_in_megabyte = 512; }; /** diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index c21a23da0..deec6660b 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -37,7 +37,8 @@ void add_node_core( const cuvs::neighbors::cagra::index& idx, raft::mdspan, raft::layout_stride, Accessor> additional_dataset_view, - raft::host_matrix_view updated_graph) + raft::host_matrix_view updated_graph, + const cuvs::neighbors::cagra::extend_params& extend_params) { using DistanceT = float; const std::size_t degree = idx.graph_degree(); @@ -68,7 +69,15 @@ void add_node_core( new_size, raft::resource::get_cuda_stream(handle)); - const std::size_t max_chunk_size = 1024; + const std::size_t data_size_per_vector = + sizeof(IdxT) * base_degree + sizeof(DistanceT) * base_degree + sizeof(T) * dim; + const std::size_t max_search_batch_size = std::max( + 1lu, + extend_params.max_working_device_memory_size_in_megabyte * (1u << 20) / data_size_per_vector); + if (extend_params.max_working_device_memory_size_in_megabyte == 0) { + RAFT_LOG_DEBUG("Overwrites the memory size for the extend function to %lu Byte", + data_size_per_vector); + } cuvs::neighbors::cagra::search_params params; params.itopk_size = std::max(base_degree * 2lu, 256lu); @@ -77,22 +86,22 @@ void add_node_core( auto mr = raft::resource::get_workspace_resource(handle); auto neighbor_indices = raft::make_device_mdarray( - handle, mr, raft::make_extents(max_chunk_size, base_degree)); + handle, mr, raft::make_extents(max_search_batch_size, base_degree)); auto neighbor_distances = raft::make_device_mdarray( - handle, mr, raft::make_extents(max_chunk_size, base_degree)); + handle, mr, raft::make_extents(max_search_batch_size, base_degree)); auto queries = raft::make_device_mdarray( - handle, mr, raft::make_extents(max_chunk_size, dim)); + handle, mr, raft::make_extents(max_search_batch_size, dim)); auto host_neighbor_indices = - raft::make_host_matrix(max_chunk_size, base_degree); + raft::make_host_matrix(max_search_batch_size, base_degree); cuvs::spatial::knn::detail::utils::batch_load_iterator additional_dataset_batch( additional_dataset_view.data_handle(), num_add, additional_dataset_view.stride(0), - max_chunk_size, + max_search_batch_size, raft::resource::get_cuda_stream(handle), raft::resource::get_workspace_resource(handle)); for (const auto& batch : additional_dataset_batch) { @@ -299,7 +308,7 @@ void add_graph_nodes( stride); neighbors::cagra::add_node_core( - handle, internal_index, additional_dataset_view, updated_graph); + handle, internal_index, additional_dataset_view, updated_graph, params); raft::resource::sync_stream(handle); } } From 642e3293bef9a784016fc4f2371bf2961e00601b Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Tue, 14 Jan 2025 00:18:27 -0800 Subject: [PATCH 3/6] Update `max_search_batch_size` --- cpp/src/neighbors/detail/cagra/add_nodes.cuh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index deec6660b..80803a972 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -71,9 +71,11 @@ void add_node_core( const std::size_t data_size_per_vector = sizeof(IdxT) * base_degree + sizeof(DistanceT) * base_degree + sizeof(T) * dim; - const std::size_t max_search_batch_size = std::max( - 1lu, - extend_params.max_working_device_memory_size_in_megabyte * (1u << 20) / data_size_per_vector); + const std::size_t max_search_batch_size = + std::min(std::max(1lu, + extend_params.max_working_device_memory_size_in_megabyte * (1u << 20) / + data_size_per_vector), + num_add); if (extend_params.max_working_device_memory_size_in_megabyte == 0) { RAFT_LOG_DEBUG("Overwrites the memory size for the extend function to %lu Byte", data_size_per_vector); From 768ecbaecc3f11f485181454dcc147edfc27aa6b Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Tue, 14 Jan 2025 00:24:30 -0800 Subject: [PATCH 4/6] Add a comment about `max_working_device_memory_size_in_megabyte` --- cpp/include/cuvs/neighbors/cagra.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index 0035930b8..241e411d7 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -248,6 +248,10 @@ struct extend_params { * 0. */ uint32_t max_chunk_size = 0; + /** The dataset chunk where the maximum size is defined by `max_chunk_size` is divided by + * sub-chunks to limit the working memory usage. This is the knob to control the working memory + * usage. Large working memory size can result in high throughput. + * */ uint32_t max_working_device_memory_size_in_megabyte = 512; }; From 5e2d306dab004ef4c22da4704c3437544cf537a9 Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Mon, 20 Jan 2025 10:05:37 +0900 Subject: [PATCH 5/6] Remove `extend_params::max_working_device_memory_size_in_megabyte` and use `raft::resource::get_workspace_free_bytes` instead --- cpp/include/cuvs/neighbors/cagra.hpp | 6 ------ cpp/src/neighbors/detail/cagra/add_nodes.cuh | 9 ++------- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index 241e411d7..a4684ce26 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -247,12 +247,6 @@ struct extend_params { * degrade recall because no edges are added between the nodes in the same chunk. Auto select when * 0. */ uint32_t max_chunk_size = 0; - - /** The dataset chunk where the maximum size is defined by `max_chunk_size` is divided by - * sub-chunks to limit the working memory usage. This is the knob to control the working memory - * usage. Large working memory size can result in high throughput. - * */ - uint32_t max_working_device_memory_size_in_megabyte = 512; }; /** diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index 80803a972..182dab76e 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -72,14 +72,9 @@ void add_node_core( const std::size_t data_size_per_vector = sizeof(IdxT) * base_degree + sizeof(DistanceT) * base_degree + sizeof(T) * dim; const std::size_t max_search_batch_size = - std::min(std::max(1lu, - extend_params.max_working_device_memory_size_in_megabyte * (1u << 20) / - data_size_per_vector), + std::min(std::max(1lu, raft::resource::get_workspace_free_bytes(handle) / data_size_per_vector), num_add); - if (extend_params.max_working_device_memory_size_in_megabyte == 0) { - RAFT_LOG_DEBUG("Overwrites the memory size for the extend function to %lu Byte", - data_size_per_vector); - } + RAFT_EXPECTS(max_search_batch_size > 0, "No enough working memory space is left."); cuvs::neighbors::cagra::search_params params; params.itopk_size = std::max(base_degree * 2lu, 256lu); From eb00a4f5d5de8fc960569e40b86736083bd218f7 Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Mon, 20 Jan 2025 21:36:34 +0900 Subject: [PATCH 6/6] Update working memory calculation --- cpp/src/neighbors/detail/cagra/add_nodes.cuh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index 182dab76e..453928992 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -69,8 +69,15 @@ void add_node_core( new_size, raft::resource::get_cuda_stream(handle)); - const std::size_t data_size_per_vector = + std::size_t data_size_per_vector = sizeof(IdxT) * base_degree + sizeof(DistanceT) * base_degree + sizeof(T) * dim; + cudaPointerAttributes attr; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, additional_dataset_view.data_handle())); + if (attr.devicePointer == nullptr) { + // for batch_load_iterator + data_size_per_vector += sizeof(T) * dim; + } + const std::size_t max_search_batch_size = std::min(std::max(1lu, raft::resource::get_workspace_free_bytes(handle) / data_size_per_vector), num_add); @@ -100,7 +107,7 @@ void add_node_core( additional_dataset_view.stride(0), max_search_batch_size, raft::resource::get_cuda_stream(handle), - raft::resource::get_workspace_resource(handle)); + mr); for (const auto& batch : additional_dataset_batch) { // Step 1: Obtain K (=base_degree) nearest neighbors of the new vectors by CAGRA search // Create queries