From d8b91872cdf89d20ec122c1b3968bf94fa6e63c3 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 27 Jul 2023 23:30:10 +0200 Subject: [PATCH] Rename CAGRA parameter num_parents to search_width (#1676) The name `search_width` is more expressive, therefore this PR renames `num_parents` to `search_width`. The original CAGRA implementation used both names internally, but externally it used `search_width`. Authors: - Tamas Bela Feher (https://github.com/tfeher) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1676 --- cpp/bench/ann/src/raft/raft_benchmark.cu | 2 +- cpp/bench/prims/neighbors/cagra_bench.cuh | 12 +++---- cpp/include/raft/neighbors/cagra_types.hpp | 2 +- .../detail/cagra/compute_distance.hpp | 8 ++--- .../detail/cagra/search_multi_cta.cuh | 14 ++++---- .../cagra/search_multi_cta_kernel-ext.cuh | 4 +-- .../cagra/search_multi_cta_kernel-inl.cuh | 28 +++++++-------- .../detail/cagra/search_multi_kernel.cuh | 34 +++++++++---------- .../neighbors/detail/cagra/search_plan.cuh | 22 ++++++------ .../detail/cagra/search_single_cta.cuh | 10 +++--- .../cagra/search_single_cta_kernel-ext.cuh | 4 +-- .../cagra/search_single_cta_kernel-inl.cuh | 26 +++++++------- .../cagra/search_multi_cta_00_generate.py | 2 +- ...arch_multi_cta_float_uint32_dim1024_t32.cu | 2 +- ...search_multi_cta_float_uint32_dim128_t8.cu | 2 +- ...earch_multi_cta_float_uint32_dim256_t16.cu | 2 +- ...earch_multi_cta_float_uint32_dim512_t32.cu | 2 +- ...arch_multi_cta_float_uint64_dim1024_t32.cu | 2 +- ...search_multi_cta_float_uint64_dim128_t8.cu | 2 +- ...earch_multi_cta_float_uint64_dim256_t16.cu | 2 +- ...earch_multi_cta_float_uint64_dim512_t32.cu | 2 +- ...earch_multi_cta_int8_uint32_dim1024_t32.cu | 2 +- .../search_multi_cta_int8_uint32_dim128_t8.cu | 2 +- ...search_multi_cta_int8_uint32_dim256_t16.cu | 2 +- ...search_multi_cta_int8_uint32_dim512_t32.cu | 2 +- ...arch_multi_cta_uint8_uint32_dim1024_t32.cu | 2 +- ...search_multi_cta_uint8_uint32_dim128_t8.cu | 2 +- ...earch_multi_cta_uint8_uint32_dim256_t16.cu | 2 +- ...earch_multi_cta_uint8_uint32_dim512_t32.cu | 2 +- .../cagra/search_single_cta_00_generate.py | 2 +- ...rch_single_cta_float_uint32_dim1024_t32.cu | 2 +- ...earch_single_cta_float_uint32_dim128_t8.cu | 2 +- ...arch_single_cta_float_uint32_dim256_t16.cu | 2 +- ...arch_single_cta_float_uint32_dim512_t32.cu | 2 +- ...rch_single_cta_float_uint64_dim1024_t32.cu | 2 +- ...earch_single_cta_float_uint64_dim128_t8.cu | 2 +- ...arch_single_cta_float_uint64_dim256_t16.cu | 2 +- ...arch_single_cta_float_uint64_dim512_t32.cu | 2 +- ...arch_single_cta_int8_uint32_dim1024_t32.cu | 2 +- ...search_single_cta_int8_uint32_dim128_t8.cu | 2 +- ...earch_single_cta_int8_uint32_dim256_t16.cu | 2 +- ...earch_single_cta_int8_uint32_dim512_t32.cu | 2 +- ...rch_single_cta_uint8_uint32_dim1024_t32.cu | 2 +- ...earch_single_cta_uint8_uint32_dim128_t8.cu | 2 +- ...arch_single_cta_uint8_uint32_dim256_t16.cu | 2 +- ...arch_single_cta_uint8_uint32_dim512_t32.cu | 2 +- cpp/test/neighbors/ann_cagra.cuh | 6 ++-- .../ann_cagra/search_kernel_uint64_t.cuh | 4 +-- 48 files changed, 122 insertions(+), 122 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index b43f52eb5c..4d8fdc9358 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -142,7 +142,7 @@ void parse_search_param(const nlohmann::json& conf, typename raft::bench::ann::RaftCagra::SearchParam& param) { if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); } - if (conf.contains("search_width")) { param.p.num_parents = conf.at("search_width"); } + if (conf.contains("search_width")) { param.p.search_width = conf.at("search_width"); } if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); } } #endif diff --git a/cpp/bench/prims/neighbors/cagra_bench.cuh b/cpp/bench/prims/neighbors/cagra_bench.cuh index 1d223d250d..bb405088bb 100644 --- a/cpp/bench/prims/neighbors/cagra_bench.cuh +++ b/cpp/bench/prims/neighbors/cagra_bench.cuh @@ -38,7 +38,7 @@ struct params { int degree; int itopk_size; int block_size; - int num_parents; + int search_width; int max_iterations; }; @@ -85,7 +85,7 @@ struct CagraBench : public fixture { search_params.itopk_size = params_.itopk_size; search_params.team_size = 0; search_params.thread_block_size = params_.block_size; - search_params.num_parents = params_.num_parents; + search_params.search_width = params_.search_width; auto indices = make_device_matrix(handle, params_.n_queries, params_.k); auto distances = make_device_matrix(handle, params_.n_queries, params_.k); @@ -106,7 +106,7 @@ struct CagraBench : public fixture { int iterations = params_.max_iterations; if (iterations == 0) { // see search_plan_impl::adjust_search_params() - double r = params_.itopk_size / static_cast(params_.num_parents); + double r = params_.itopk_size / static_cast(params_.search_width); iterations = 1 + std::min(r * 1.1, r + 10); } state.counters["dataset (GiB)"] = data_size / (1 << 30); @@ -118,7 +118,7 @@ struct CagraBench : public fixture { state.counters["k"] = params_.k; state.counters["itopk_size"] = params_.itopk_size; state.counters["block_size"] = params_.block_size; - state.counters["num_parents"] = params_.num_parents; + state.counters["search_width"] = params_.search_width; state.counters["iterations"] = iterations; } @@ -140,7 +140,7 @@ inline const std::vector generate_inputs() {64}, // knn graph degree {64}, // itopk_size {0}, // block_size - {1}, // num_parents + {1}, // search_width {0} // max_iterations ); auto inputs2 = raft::util::itertools::product({2000000ull, 10000000ull}, // n_samples @@ -150,7 +150,7 @@ inline const std::vector generate_inputs() {64}, // knn graph degree {64}, // itopk_size {64, 128, 256, 512, 1024}, // block_size - {1}, // num_parents + {1}, // search_width {0} // max_iterations ); inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp index 130c7d70c8..2583afdaa9 100644 --- a/cpp/include/raft/neighbors/cagra_types.hpp +++ b/cpp/include/raft/neighbors/cagra_types.hpp @@ -80,7 +80,7 @@ struct search_params : ann::search_params { /*/ Number of graph nodes to select as the starting point for the search in each iteration. aka * search width?*/ - size_t num_parents = 1; + size_t search_width = 1; /** Lower limit of search iterations. */ size_t min_iterations = 0; diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index 91e0d88e79..2758148942 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -155,13 +155,13 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in INDEX_T* const visited_hashmap_ptr, const std::uint32_t hash_bitlen, const INDEX_T* const parent_indices, - const std::uint32_t num_parents) + const std::uint32_t search_width) { const INDEX_T invalid_index = utils::get_max_value(); // Read child indices of parents from knn graph and check if the distance // computaiton is necessary. - for (uint32_t i = threadIdx.x; i < knn_k * num_parents; i += BLOCK_SIZE) { + for (uint32_t i = threadIdx.x; i < knn_k * search_width; i += BLOCK_SIZE) { const INDEX_T parent_id = parent_indices[i / knn_k]; INDEX_T child_id = invalid_index; if (parent_id != invalid_index) { @@ -203,10 +203,10 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in __syncthreads(); // Compute the distance to child nodes - std::uint32_t max_i = knn_k * num_parents; + std::uint32_t max_i = knn_k * search_width; if (max_i % (32 / TEAM_SIZE)) { max_i += (32 / TEAM_SIZE) - (max_i % (32 / TEAM_SIZE)); } for (std::uint32_t i = threadIdx.x / TEAM_SIZE; i < max_i; i += BLOCK_SIZE / TEAM_SIZE) { - const bool valid_i = (i < (knn_k * num_parents)); + const bool valid_i = (i < (knn_k * search_width)); INDEX_T child_id = invalid_index; if (valid_i) { child_id = result_child_indices_ptr[i]; } diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh index 7f03433229..3fd4fca0f3 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh @@ -55,7 +55,7 @@ struct search : public search_plan_impl { using search_plan_impl::itopk_size; using search_plan_impl::algo; using search_plan_impl::team_size; - using search_plan_impl::num_parents; + using search_plan_impl::search_width; using search_plan_impl::min_iterations; using search_plan_impl::max_iterations; using search_plan_impl::thread_block_size; @@ -108,9 +108,9 @@ struct search : public search_plan_impl { void set_params(raft::resources const& res, const search_params& params) { this->itopk_size = 32; - num_parents = 1; - num_cta_per_query = max(params.num_parents, params.itopk_size / 32); - result_buffer_size = itopk_size + num_parents * graph_degree; + search_width = 1; + num_cta_per_query = max(params.search_width, params.itopk_size / 32); + result_buffer_size = itopk_size + search_width * graph_degree; typedef raft::Pow2<32> AlignBytes; unsigned result_buffer_size_32 = AlignBytes::roundUp(result_buffer_size); // constexpr unsigned max_result_buffer_size = 256; @@ -118,7 +118,7 @@ struct search : public search_plan_impl { smem_size = sizeof(float) * max_dim + (sizeof(INDEX_T) + sizeof(DISTANCE_T)) * result_buffer_size_32 + - sizeof(uint32_t) * num_parents + sizeof(uint32_t); + sizeof(uint32_t) * search_width + sizeof(uint32_t); RAFT_LOG_DEBUG("# smem_size: %u", smem_size); // @@ -143,7 +143,7 @@ struct search : public search_plan_impl { cudaDeviceProp deviceProp = resource::get_device_properties(res); RAFT_LOG_DEBUG("# multiProcessorCount: %d", deviceProp.multiProcessorCount); while ((block_size < max_block_size) && - (graph_degree * num_parents * team_size >= block_size * 2) && + (graph_degree * search_width * team_size >= block_size * 2) && (num_cta_per_query * max_queries <= (1024 / (block_size * 2)) * deviceProp.multiProcessorCount)) { block_size *= 2; @@ -210,7 +210,7 @@ struct search : public search_plan_impl { rand_xor_mask, num_seeds, itopk_size, - num_parents, + search_width, min_iterations, max_iterations, stream); diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh index b18a1deed5..de83acbb64 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh @@ -46,7 +46,7 @@ void select_and_run(raft::device_matrix_view -__device__ void pickup_next_parents(INDEX_T* const next_parent_indices, // [num_parents] - const uint32_t num_parents, +__device__ void pickup_next_parents(INDEX_T* const next_parent_indices, // [search_width] + const uint32_t search_width, INDEX_T* const itopk_indices, // [num_itopk] const size_t num_itopk, uint32_t* const terminate_flag) @@ -56,7 +56,7 @@ __device__ void pickup_next_parents(INDEX_T* const next_parent_indices, // [num const unsigned warp_id = threadIdx.x / 32; if (warp_id > 0) { return; } const unsigned lane_id = threadIdx.x % 32; - for (uint32_t i = lane_id; i < num_parents; i += 32) { + for (uint32_t i = lane_id; i < search_width; i += 32) { next_parent_indices[i] = utils::get_max_value(); } uint32_t max_itopk = num_itopk; @@ -74,13 +74,13 @@ __device__ void pickup_next_parents(INDEX_T* const next_parent_indices, // [num const uint32_t ballot_mask = __ballot_sync(0xffffffff, new_parent); if (new_parent) { const auto i = __popc(ballot_mask & ((1 << lane_id) - 1)) + num_new_parents; - if (i < num_parents) { + if (i < search_width) { next_parent_indices[i] = index; itopk_indices[j] |= index_msb_1_mask; // set most significant bit as used node } } num_new_parents += __popc(ballot_mask); - if (num_new_parents >= num_parents) { break; } + if (num_new_parents >= search_width) { break; } } if (threadIdx.x == 0 && (num_new_parents == 0)) { *terminate_flag = 1; } } @@ -149,7 +149,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel( INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const uint32_t hash_bitlen, const uint32_t itopk_size, - const uint32_t num_parents, + const uint32_t search_width, const uint32_t min_iteration, const uint32_t max_iteration, uint32_t* const num_executed_iterations /* stats */ @@ -183,10 +183,10 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel( // Layout of result_buffer // +----------------+------------------------------+---------+ // | internal_top_k | neighbors of parent nodes | padding | - // | | | upto 32 | + // | | | upto 32 | // +----------------+------------------------------+---------+ // |<--- result_buffer_size --->| - uint32_t result_buffer_size = itopk_size + (num_parents * graph_degree); + uint32_t result_buffer_size = itopk_size + (search_width * graph_degree); uint32_t result_buffer_size_32 = result_buffer_size; if (result_buffer_size % 32) { result_buffer_size_32 += 32 - (result_buffer_size % 32); } assert(result_buffer_size_32 <= MAX_ELEMENTS); @@ -197,7 +197,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel( reinterpret_cast(result_indices_buffer + result_buffer_size_32); auto parent_indices_buffer = reinterpret_cast(result_distances_buffer + result_buffer_size_32); - auto terminate_flag = reinterpret_cast(parent_indices_buffer + num_parents); + auto terminate_flag = reinterpret_cast(parent_indices_buffer + search_width); #if 0 /* debug */ @@ -252,7 +252,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel( _CLK_START(); topk_by_bitonic_sort(result_distances_buffer, result_indices_buffer, - itopk_size + (num_parents * graph_degree), + itopk_size + (search_width * graph_degree), itopk_size); _CLK_REC(clk_topk); @@ -264,7 +264,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel( // pick up next parents _CLK_START(); pickup_next_parents( - parent_indices_buffer, num_parents, result_indices_buffer, itopk_size, terminate_flag); + parent_indices_buffer, search_width, result_indices_buffer, itopk_size, terminate_flag); _CLK_REC(clk_pickup_parents); __syncthreads(); @@ -287,7 +287,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel( local_visited_hashmap_ptr, hash_bitlen, parent_indices_buffer, - num_parents); + search_width); _CLK_REC(clk_compute_distance); __syncthreads(); @@ -472,7 +472,7 @@ void select_and_run( // raft::resources const& res, uint64_t rand_xor_mask, uint32_t num_seeds, size_t itopk_size, - size_t num_parents, + size_t search_width, size_t min_iterations, size_t max_iterations, cudaStream_t stream) @@ -510,7 +510,7 @@ void select_and_run( // raft::resources const& res, hashmap_ptr, hash_bitlen, itopk_size, - num_parents, + search_width, min_iterations, max_iterations, num_executed_iterations); diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index adbd3d7a2b..e664764721 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -308,8 +308,8 @@ template __global__ void compute_distance_to_child_nodes_kernel( - const INDEX_T* const parent_node_list, // [num_queries, num_parents] - const std::uint32_t num_parents, + const INDEX_T* const parent_node_list, // [num_queries, search_width] + const std::uint32_t search_width, const DATA_T* const dataset_ptr, // [dataset_size, data_dim] const std::uint32_t data_dim, const std::uint32_t dataset_size, @@ -321,16 +321,16 @@ __global__ void compute_distance_to_child_nodes_kernel( const std::uint32_t hash_bitlen, INDEX_T* const result_indices_ptr, // [num_queries, ldd] DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] - const std::uint32_t ldd // (*) ldd >= num_parents * graph_degree + const std::uint32_t ldd // (*) ldd >= search_width * graph_degree ) { const uint32_t ldb = hashmap::get_size(hash_bitlen); const auto tid = threadIdx.x + blockDim.x * blockIdx.x; const auto global_team_id = tid / TEAM_SIZE; - if (global_team_id >= num_parents * graph_degree) { return; } + if (global_team_id >= search_width * graph_degree) { return; } const std::size_t parent_index = - parent_node_list[global_team_id / graph_degree + (num_parents * blockIdx.y)]; + parent_node_list[global_team_id / graph_degree + (search_width * blockIdx.y)]; if (parent_index == utils::get_max_value()) { result_distances_ptr[ldd * blockIdx.y + global_team_id] = utils::get_max_value(); return; @@ -369,8 +369,8 @@ template void compute_distance_to_child_nodes( - const INDEX_T* const parent_node_list, // [num_queries, num_parents] - const uint32_t num_parents, + const INDEX_T* const parent_node_list, // [num_queries, search_width] + const uint32_t search_width, const DATA_T* const dataset_ptr, // [dataset_size, data_dim] const std::uint32_t data_dim, const std::uint32_t dataset_size, @@ -383,16 +383,16 @@ void compute_distance_to_child_nodes( const std::uint32_t hash_bitlen, INDEX_T* const result_indices_ptr, // [num_queries, ldd] DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] - const std::uint32_t ldd, // (*) ldd >= num_parents * graph_degree + const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree cudaStream_t cuda_stream = 0) { const auto block_size = 128; const dim3 grid_size( - (num_parents * graph_degree + (block_size / TEAM_SIZE) - 1) / (block_size / TEAM_SIZE), + (search_width * graph_degree + (block_size / TEAM_SIZE) - 1) / (block_size / TEAM_SIZE), num_queries); compute_distance_to_child_nodes_kernel <<>>(parent_node_list, - num_parents, + search_width, dataset_ptr, data_dim, dataset_size, @@ -499,7 +499,7 @@ void set_value_batch(T* const dev_ptr, // result_buffer (work buffer) for "multi-kernel" // +--------------------+------------------------------+-------------------+ // | internal_top_k (A) | neighbors of internal_top_k | internal_topk (B) | -// | | | | +// | | | | // +--------------------+------------------------------+-------------------+ // |<--- result_buffer_allocation_size --->| // |<--- result_buffer_size --->| // Double buffer (A) @@ -514,7 +514,7 @@ struct search : search_plan_impl { using search_plan_impl::itopk_size; using search_plan_impl::algo; using search_plan_impl::team_size; - using search_plan_impl::num_parents; + using search_plan_impl::search_width; using search_plan_impl::min_iterations; using search_plan_impl::max_iterations; using search_plan_impl::thread_block_size; @@ -573,14 +573,14 @@ struct search : search_plan_impl { // // Allocate memory for intermediate buffer and workspace. // - result_buffer_size = itopk_size + (num_parents * graph_degree); + result_buffer_size = itopk_size + (search_width * graph_degree); result_buffer_allocation_size = result_buffer_size + itopk_size; result_indices.resize(result_buffer_allocation_size * max_queries, resource::get_cuda_stream(res)); result_distances.resize(result_buffer_allocation_size * max_queries, resource::get_cuda_stream(res)); - parent_node_list.resize(max_queries * num_parents, resource::get_cuda_stream(res)); + parent_node_list.resize(max_queries * search_width, resource::get_cuda_stream(res)); topk_hint.resize(max_queries, resource::get_cuda_stream(res)); size_t topk_workspace_size = _cuann_find_topk_bufferSize( @@ -670,8 +670,8 @@ struct search : search_plan_impl { hash_bitlen, _small_hash_bitlen, parent_node_list.data(), - num_parents, - num_parents, + search_width, + search_width, terminate_flag.data(), stream); @@ -684,7 +684,7 @@ struct search : search_plan_impl { // Compute distance to child nodes that are adjacent to the parent node compute_distance_to_child_nodes( parent_node_list.data(), - num_parents, + search_width, dataset.data_handle(), dataset.extent(1), dataset.extent(0), diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index 29bc0247cc..bc2102b9b0 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -123,7 +123,7 @@ struct search_plan_impl : public search_plan_impl_base { _max_iterations = 1 + std::min(32 * 1.1, 32 + 10.0); // TODO(anaruse) } else { _max_iterations = - 1 + std::min((itopk_size / num_parents) * 1.1, (itopk_size / num_parents) + 10.0); + 1 + std::min((itopk_size / search_width) * 1.1, (itopk_size / search_width) + 10.0); } } if (max_iterations < min_iterations) { _max_iterations = min_iterations; } @@ -147,14 +147,14 @@ struct search_plan_impl : public search_plan_impl_base { { // for multipel CTA search uint32_t mc_num_cta_per_query = 0; - uint32_t mc_num_parents = 0; + uint32_t mc_search_width = 0; uint32_t mc_itopk_size = 0; if (algo == search_algo::MULTI_CTA) { mc_itopk_size = 32; - mc_num_parents = 1; - mc_num_cta_per_query = max(num_parents, itopk_size / 32); + mc_search_width = 1; + mc_num_cta_per_query = max(search_width, itopk_size / 32); RAFT_LOG_DEBUG("# mc_itopk_size: %u", mc_itopk_size); - RAFT_LOG_DEBUG("# mc_num_parents: %u", mc_num_parents); + RAFT_LOG_DEBUG("# mc_search_width: %u", mc_search_width); RAFT_LOG_DEBUG("# mc_num_cta_per_query: %u", mc_num_cta_per_query); } @@ -172,7 +172,7 @@ struct search_plan_impl : public search_plan_impl_base { // be determined based on the internal topk size and the number of nodes // visited per iteration. // - const auto max_visited_nodes = itopk_size + (num_parents * graph_degree * 1); + const auto max_visited_nodes = itopk_size + (search_width * graph_degree * 1); unsigned min_bitlen = 8; // 256 unsigned max_bitlen = 13; // 8K if (min_bitlen < hashmap_min_bitlen) { min_bitlen = hashmap_min_bitlen; } @@ -201,7 +201,7 @@ struct search_plan_impl : public search_plan_impl_base { small_hash_reset_interval = 1; while (1) { const auto max_visited_nodes = - itopk_size + (num_parents * graph_degree * (small_hash_reset_interval + 1)); + itopk_size + (search_width * graph_degree * (small_hash_reset_interval + 1)); if (max_visited_nodes > hashmap::get_size(hash_bitlen) * max_fill_rate) { break; } small_hash_reset_interval += 1; } @@ -213,9 +213,9 @@ struct search_plan_impl : public search_plan_impl_base { // nodes that may be visited before the search is completed and the // maximum fill rate of the hash table. // - uint32_t max_visited_nodes = itopk_size + (num_parents * graph_degree * max_iterations); + uint32_t max_visited_nodes = itopk_size + (search_width * graph_degree * max_iterations); if (algo == search_algo::MULTI_CTA) { - max_visited_nodes = mc_itopk_size + (mc_num_parents * graph_degree * max_iterations); + max_visited_nodes = mc_itopk_size + (mc_search_width * graph_degree * max_iterations); max_visited_nodes *= mc_num_cta_per_query; } unsigned min_bitlen = 11; // 2K @@ -228,7 +228,7 @@ struct search_plan_impl : public search_plan_impl_base { } RAFT_LOG_DEBUG("# internal topK = %lu", itopk_size); - RAFT_LOG_DEBUG("# parent size = %lu", num_parents); + RAFT_LOG_DEBUG("# parent size = %lu", search_width); RAFT_LOG_DEBUG("# min_iterations = %lu", min_iterations); RAFT_LOG_DEBUG("# max_iterations = %lu", max_iterations); RAFT_LOG_DEBUG("# max_queries = %lu", max_queries); @@ -254,7 +254,7 @@ struct search_plan_impl : public search_plan_impl_base { { RAFT_EXPECTS(topk <= itopk_size, "topk must be smaller than itopk_size = %lu", itopk_size); if (algo == search_algo::MULTI_CTA) { - uint32_t mc_num_cta_per_query = max(num_parents, itopk_size / 32); + uint32_t mc_num_cta_per_query = max(search_width, itopk_size / 32); RAFT_EXPECTS(mc_num_cta_per_query * 32 >= topk, "`mc_num_cta_per_query` (%u) * 32 must be equal to or greater than " "`topk` /%u) when 'search_mode' is \"multi-cta\"", diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh index 9b6658f03c..96de83369d 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh @@ -55,7 +55,7 @@ struct search : search_plan_impl { using search_plan_impl::itopk_size; using search_plan_impl::algo; using search_plan_impl::team_size; - using search_plan_impl::num_parents; + using search_plan_impl::search_width; using search_plan_impl::min_iterations; using search_plan_impl::max_iterations; using search_plan_impl::thread_block_size; @@ -101,7 +101,7 @@ struct search : search_plan_impl { inline void set_params(raft::resources const& res) { - num_itopk_candidates = num_parents * graph_degree; + num_itopk_candidates = search_width * graph_degree; result_buffer_size = itopk_size + num_itopk_candidates; typedef raft::Pow2<32> AlignBytes; @@ -122,7 +122,7 @@ struct search : search_plan_impl { const std::uint32_t topk_ws_size = 3; const std::uint32_t base_smem_size = sizeof(float) * max_dim + (sizeof(INDEX_T) + sizeof(DISTANCE_T)) * result_buffer_size_32 + - sizeof(INDEX_T) * hashmap::get_size(small_hash_bitlen) + sizeof(INDEX_T) * num_parents + + sizeof(INDEX_T) * hashmap::get_size(small_hash_bitlen) + sizeof(INDEX_T) * search_width + sizeof(std::uint32_t) * topk_ws_size + sizeof(std::uint32_t); smem_size = base_smem_size; if (num_itopk_candidates > 256) { @@ -165,7 +165,7 @@ struct search : search_plan_impl { cudaDeviceProp deviceProp = resource::get_device_properties(res); RAFT_LOG_DEBUG("# multiProcessorCount: %d", deviceProp.multiProcessorCount); while ((block_size < max_block_size) && - (graph_degree * num_parents * team_size >= block_size * 2) && + (graph_degree * search_width * team_size >= block_size * 2) && (max_queries <= (1024 / (block_size * 2)) * deviceProp.multiProcessorCount)) { block_size *= 2; } @@ -258,7 +258,7 @@ struct search : search_plan_impl { rand_xor_mask, num_seeds, itopk_size, - num_parents, + search_width, min_iterations, max_iterations, stream); diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh index 0e942ac8d6..f7c43fe11c 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh @@ -47,7 +47,7 @@ void select_and_run( // raft::resources const& res, uint64_t rand_xor_mask, uint32_t num_seeds, size_t itopk_size, - size_t num_parents, + size_t search_width, size_t min_iterations, size_t max_iterations, cudaStream_t stream) RAFT_EXPLICIT; @@ -77,7 +77,7 @@ void select_and_run( // raft::resources const& res, uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 9094e20df8..31d9c9fffa 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -52,12 +52,12 @@ __device__ void pickup_next_parents(std::uint32_t* const terminate_flag, INDEX_T* const internal_topk_indices, const std::size_t internal_topk_size, const std::size_t dataset_size, - const std::uint32_t num_parents) + const std::uint32_t search_width) { constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; // if (threadIdx.x >= 32) return; - for (std::uint32_t i = threadIdx.x; i < num_parents; i += 32) { + for (std::uint32_t i = threadIdx.x; i < search_width; i += 32) { next_parent_indices[i] = utils::get_max_value(); } std::uint32_t itopk_max = internal_topk_size; @@ -77,14 +77,14 @@ __device__ void pickup_next_parents(std::uint32_t* const terminate_flag, const std::uint32_t ballot_mask = __ballot_sync(0xffffffff, new_parent); if (new_parent) { const auto i = __popc(ballot_mask & ((1 << threadIdx.x) - 1)) + num_new_parents; - if (i < num_parents) { + if (i < search_width) { next_parent_indices[i] = index; // set most significant bit as used node internal_topk_indices[jj] |= index_msb_1_mask; } } num_new_parents += __popc(ballot_mask); - if (num_new_parents >= num_parents) { break; } + if (num_new_parents >= search_width) { break; } } if (threadIdx.x == 0 && (num_new_parents == 0)) { *terminate_flag = 1; } } @@ -476,7 +476,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ const uint32_t num_seeds, INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const std::uint32_t internal_topk, - const std::uint32_t num_parents, + const std::uint32_t search_width, const std::uint32_t min_iteration, const std::uint32_t max_iteration, std::uint32_t* const num_executed_iterations, // [num_queries] @@ -509,10 +509,10 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ // Layout of result_buffer // +----------------------+------------------------------+---------+ // | internal_top_k | neighbors of internal_top_k | padding | - // | | | upto 32 | + // | | | upto 32 | // +----------------------+------------------------------+---------+ // |<--- result_buffer_size --->| - std::uint32_t result_buffer_size = internal_topk + (num_parents * graph_degree); + std::uint32_t result_buffer_size = internal_topk + (search_width * graph_degree); std::uint32_t result_buffer_size_32 = result_buffer_size; if (result_buffer_size % 32) { result_buffer_size_32 += 32 - (result_buffer_size % 32); } const auto small_hash_size = hashmap::get_size(small_hash_bitlen); @@ -523,7 +523,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ auto visited_hash_buffer = reinterpret_cast(result_distances_buffer + result_buffer_size_32); auto parent_list_buffer = reinterpret_cast(visited_hash_buffer + small_hash_size); - auto topk_ws = reinterpret_cast(parent_list_buffer + num_parents); + auto topk_ws = reinterpret_cast(parent_list_buffer + search_width); auto terminate_flag = reinterpret_cast(topk_ws + 3); auto smem_working_ptr = reinterpret_cast(terminate_flag + 1); @@ -620,7 +620,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ internal_topk, result_distances_buffer + internal_topk, result_indices_buffer + internal_topk, - num_parents * graph_degree, + search_width * graph_degree, topk_ws, (iter == 0)); _CLK_REC(clk_topk); @@ -661,7 +661,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ result_indices_buffer, internal_topk, dataset_size, - num_parents); + search_width); _CLK_REC(clk_pickup_parents); } @@ -693,7 +693,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ local_visited_hashmap_ptr, hash_bitlen, parent_list_buffer, - num_parents); + search_width); __syncthreads(); _CLK_REC(clk_compute_distance); @@ -848,7 +848,7 @@ void select_and_run( // raft::resources const& res, uint64_t rand_xor_mask, uint32_t num_seeds, size_t itopk_size, - size_t num_parents, + size_t search_width, size_t min_iterations, size_t max_iterations, cudaStream_t stream) @@ -877,7 +877,7 @@ void select_and_run( // raft::resources const& res, num_seeds, hashmap_ptr, itopk_size, - num_parents, + search_width, min_iterations, max_iterations, num_executed_iterations, diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py index fb93906b0f..784d116503 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py @@ -63,7 +63,7 @@ uint64_t rand_xor_mask, \\ uint32_t num_seeds, \\ size_t itopk_size, \\ - size_t num_parents, \\ + size_t search_width, \\ size_t min_iterations, \\ size_t max_iterations, \\ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu index 7166561289..2a4e7ac607 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_widthhhhhhhhh, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu index f0f5736e6d..115ce3b48b 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu index 089d098cc7..c5e704a85f 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu index a0efed1a21..3469facf39 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu index 1f0e3f4309..327bfc73b4 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu index a2e0ef96a4..1abe0cd8af 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu index 33af9f93d8..dd61810d06 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu index 1158daeee5..8e12bab514 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu index 8247889983..d946ac9c79 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu index e494f3f656..e4d7b44d1e 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu index 8d8369bd94..b8dc3b38a8 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu index 28f38683ab..749b35bad6 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu index 38593cc952..428d460ba8 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_widthh, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu index cc8845289b..28a20b865e 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu index 58853d1887..e85a84ae8e 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu index c86f86ce01..232b62ebcd 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu @@ -49,7 +49,7 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py index 6f502f32cd..cf61a45b4a 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py @@ -65,7 +65,7 @@ uint64_t rand_xor_mask, \\ uint32_t num_seeds, \\ size_t itopk_size, \\ - size_t num_parents, \\ + size_t search_width, \\ size_t min_iterations, \\ size_t max_iterations, \\ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu index c5927b2b4f..eb45d4ff08 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu index 247228bc4c..049715aa20 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu index 525ff6f554..6028c283db 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu index e3d5d114e0..2566e9cbd9 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu index ebfe6d2599..4cd96ad9c0 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu index bdfad2173e..822a2efb2f 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu index af71264c7f..80d1f76b9b 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu index 1f845dad38..06c3eaf10b 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu index 73e94317df..b4c30ac943 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu index c867070d11..c8d0df3ac4 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu index 6f96461bc4..19ecee91af 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu index 68871fb609..52c4eb7d6b 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu index 73e9b2be22..4675e17084 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu index bd59227bd6..e73e1071ee 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu index 9f8e46338f..01e26b5f29 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu index 39c6a797b8..b0534b555f 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu @@ -51,7 +51,7 @@ namespace raft::neighbors::cagra::detail::single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index 4dc174a578..89cb070afc 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -134,7 +134,7 @@ struct AnnCagraInputs { int max_queries; int team_size; int itopk_size; - int num_parents; + int search_width; raft::distance::DistanceType metric; bool host_dataset; // std::optional @@ -146,7 +146,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, const AnnCagraInputs& p) std::vector algo = {"single-cta", "multi_cta", "multi_kernel", "auto"}; os << "{n_queries=" << p.n_queries << ", dataset shape=" << p.n_rows << "x" << p.dim << ", k=" << p.k << ", " << algo.at((int)p.algo) << ", max_queries=" << p.max_queries - << ", itopk_size=" << p.itopk_size << ", num_parents=" << p.num_parents + << ", itopk_size=" << p.itopk_size << ", search_width=" << p.search_width << ", metric=" << static_cast(p.metric) << (p.host_dataset ? ", host" : ", device") << '}' << std::endl; return os; @@ -366,7 +366,7 @@ class AnnCagraSortTest : public ::testing::TestWithParam { inline std::vector generate_inputs() { - // TODO(tfeher): test MULTI_CTA kernel with num_Parents>1 to allow multiple CTA per queries + // TODO(tfeher): test MULTI_CTA kernel with search_width > 1 to allow multiple CTA per queries std::vector inputs = raft::util::itertools::product( {100}, {1000}, diff --git a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh index 4733a06246..f61e476652 100644 --- a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh +++ b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh @@ -41,7 +41,7 @@ namespace multi_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream); @@ -79,7 +79,7 @@ namespace single_cta_search { uint64_t rand_xor_mask, \ uint32_t num_seeds, \ size_t itopk_size, \ - size_t num_parents, \ + size_t search_width, \ size_t min_iterations, \ size_t max_iterations, \ cudaStream_t stream);