From d2f60553eb4cbb7aac59c2d0035ba155d98bc3a5 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Fri, 5 Jan 2024 17:12:52 +0100 Subject: [PATCH 1/3] Small fixes for CAGRA: max_queries auto selection Signed-off-by: Mickael Ide --- .../raft/neighbors/detail/cagra/cagra_search.cuh | 4 ++-- .../raft/neighbors/detail/cagra/search_plan.cuh | 4 ++-- notebooks/utils.py | 5 ++--- python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx | 12 ++++++------ 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 23a966d41f..a22d892bfb 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -118,7 +118,7 @@ void search_main(raft::resources const& res, RAFT_EXPECTS(queries.extent(1) == index.dim(), "Queries and index dim must match"); const uint32_t topk = neighbors.extent(1); - if (params.max_queries == 0) { params.max_queries = queries.extent(0); } + if (params.max_queries == 0) { params.max_queries = std::max(queries.extent(0), 65535); } common::nvtx::range fun_scope( "cagra::search(max_queries = %u, k = %u, dim = %zu)", params.max_queries, topk, index.dim()); diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index f57b776ccf..f2f51617f4 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -147,7 +147,7 @@ struct search_plan_impl : public search_plan_impl_base { // defines hash_bitlen, small_hash_bitlen, small_hash_reset interval, hash_size inline void calc_hashmap_params(raft::resources const& res) { - // for multipel CTA search + // for multiple CTA search uint32_t mc_num_cta_per_query = 0; uint32_t mc_search_width = 0; uint32_t mc_itopk_size = 0; diff --git a/notebooks/utils.py b/notebooks/utils.py index 1c2e44a6ae..311efc98bc 100644 --- a/notebooks/utils.py +++ b/notebooks/utils.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -73,7 +73,7 @@ def benchmark_runs(self): self.timings.append(t1 - t0) -def load_dataset(dataset_url, work_folder=None): +def load_dataset(dataset_url="http://ann-benchmarks.com/sift-128-euclidean.hdf5", work_folder=None): """Download dataset from url. It is expected that the dataset contains a hdf5 file in ann-benchmarks format Parameters @@ -82,7 +82,6 @@ def load_dataset(dataset_url, work_folder=None): work_folder name of the local folder to store the dataset """ - dataset_url = "http://ann-benchmarks.com/sift-128-euclidean.hdf5" dataset_filename = dataset_url.split("/")[-1] # We'll need to load store some data in this tutorial diff --git a/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx b/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx index c11d933b27..5e36118b4e 100644 --- a/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx +++ b/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -91,7 +91,7 @@ from pylibraft.neighbors.common cimport _get_metric_string cdef class IndexParams: - """" + """ Parameters to build index for CAGRA nearest neighbor search Parameters @@ -107,10 +107,10 @@ cdef class IndexParams: build_algo: string denoting the graph building algorithm to use, default = "ivf_pq" Valid values for algo: ["ivf_pq", "nn_descent"], where - - ivf_pq will use the IVF-PQ algorithm for building the knn graph - - nn_descent (experimental) will use the NN-Descent algorithm for - building the knn graph. It is expected to be generally - faster than ivf_pq. + - ivf_pq will use the IVF-PQ algorithm for building the knn graph + - nn_descent (experimental) will use the NN-Descent algorithm for + building the knn graph. It is expected to be generally + faster than ivf_pq. """ cdef c_cagra.index_params params From fcd601fe8ba791d544df0d1269b66fcf8e074d50 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Sun, 7 Jan 2024 17:38:09 +0100 Subject: [PATCH 2/3] Fix max_queries --- cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index a22d892bfb..0d7a5e519a 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -118,7 +118,7 @@ void search_main(raft::resources const& res, RAFT_EXPECTS(queries.extent(1) == index.dim(), "Queries and index dim must match"); const uint32_t topk = neighbors.extent(1); - if (params.max_queries == 0) { params.max_queries = std::max(queries.extent(0), 65535); } + if (params.max_queries == 0) { params.max_queries = std::min(queries.extent(0), 65535); } common::nvtx::range fun_scope( "cagra::search(max_queries = %u, k = %u, dim = %zu)", params.max_queries, topk, index.dim()); From d91e15263ac8076fab6ea102d58ead61609ecd38 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Tue, 9 Jan 2024 15:18:46 +0100 Subject: [PATCH 3/3] Compute maxgridsize instead of hardcoding it --- .../neighbors/detail/cagra/cagra_search.cuh | 5 ++++- .../pylibraft/neighbors/cagra/cagra.pyx | 22 +++++++++---------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 0d7a5e519a..41a43c9bce 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -118,7 +118,10 @@ void search_main(raft::resources const& res, RAFT_EXPECTS(queries.extent(1) == index.dim(), "Queries and index dim must match"); const uint32_t topk = neighbors.extent(1); - if (params.max_queries == 0) { params.max_queries = std::min(queries.extent(0), 65535); } + cudaDeviceProp deviceProp = resource::get_device_properties(res); + if (params.max_queries == 0) { + params.max_queries = std::min(queries.extent(0), deviceProp.maxGridSize[1]); + } common::nvtx::range fun_scope( "cagra::search(max_queries = %u, k = %u, dim = %zu)", params.max_queries, topk, index.dim()); diff --git a/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx b/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx index 5e36118b4e..c19faa826d 100644 --- a/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx +++ b/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx @@ -104,7 +104,7 @@ cdef class IndexParams: graph_degree : int, default = 64 - build_algo: string denoting the graph building algorithm to use, + build_algo: string denoting the graph building algorithm to use, \ default = "ivf_pq" Valid values for algo: ["ivf_pq", "nn_descent"], where - ivf_pq will use the IVF-PQ algorithm for building the knn graph @@ -501,10 +501,10 @@ cdef class SearchParams: Upper limit of search iterations. Auto select when 0. algo: string denoting the search algorithm to use, default = "auto" Valid values for algo: ["auto", "single_cta", "multi_cta"], where - - auto will automatically select the best value based on query size - - single_cta is better when query contains larger number of - vectors (e.g >10) - - multi_cta is better when query contains only a few vectors + - auto will automatically select the best value based on query size + - single_cta is better when query contains larger number of + vectors (e.g >10) + - multi_cta is better when query contains only a few vectors team_size: int, default = 0 Number of threads used to calculate a single distance. 4, 8, 16, or 32. @@ -516,13 +516,13 @@ cdef class SearchParams: thread_block_size: int, default = 0 Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0. - hashmap_mode: string denoting the type of hash map to use. It's - usually better to allow the algorithm to select this value., - default = "auto" + hashmap_mode: string denoting the type of hash map to use. + It's usually better to allow the algorithm to select this value, + default = "auto". Valid values for hashmap_mode: ["auto", "small", "hash"], where - - auto will automatically select the best value based on algo - - small will use the small shared memory hash table with resetting. - - hash will use a single hash table in global memory. + - auto will automatically select the best value based on algo + - small will use the small shared memory hash table with resetting. + - hash will use a single hash table in global memory. hashmap_min_bitlen: int, default = 0 Upper limit of hashmap fill rate. More than 0.1, less than 0.9. hashmap_max_fill_rate: float, default = 0.5