Skip to content

Commit

Permalink
Fix max_queries for CAGRA (#2081)
Browse files Browse the repository at this point in the history
Fix for #2072: CAGRA search is launching a thread per query in single-CTA. The maximum number of thread is 65535 so the `max_queries` auto selection should be bounded to this number.

Authors:
  - Micka (https://github.com/lowener)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: #2081
  • Loading branch information
lowener authored Jan 9, 2024
1 parent 3b88d17 commit 1484a03
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 24 deletions.
7 changes: 5 additions & 2 deletions cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -118,7 +118,10 @@ void search_main(raft::resources const& res,
RAFT_EXPECTS(queries.extent(1) == index.dim(), "Queries and index dim must match");
const uint32_t topk = neighbors.extent(1);

if (params.max_queries == 0) { params.max_queries = queries.extent(0); }
cudaDeviceProp deviceProp = resource::get_device_properties(res);
if (params.max_queries == 0) {
params.max_queries = std::min<size_t>(queries.extent(0), deviceProp.maxGridSize[1]);
}

common::nvtx::range<common::nvtx::domain::raft> fun_scope(
"cagra::search(max_queries = %u, k = %u, dim = %zu)", params.max_queries, topk, index.dim());
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/raft/neighbors/detail/cagra/search_plan.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -147,7 +147,7 @@ struct search_plan_impl : public search_plan_impl_base {
// defines hash_bitlen, small_hash_bitlen, small_hash_reset interval, hash_size
inline void calc_hashmap_params(raft::resources const& res)
{
// for multipel CTA search
// for multiple CTA search
uint32_t mc_num_cta_per_query = 0;
uint32_t mc_search_width = 0;
uint32_t mc_itopk_size = 0;
Expand Down
5 changes: 2 additions & 3 deletions notebooks/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -73,7 +73,7 @@ def benchmark_runs(self):
self.timings.append(t1 - t0)


def load_dataset(dataset_url, work_folder=None):
def load_dataset(dataset_url="http://ann-benchmarks.com/sift-128-euclidean.hdf5", work_folder=None):
"""Download dataset from url. It is expected that the dataset contains a hdf5 file in ann-benchmarks format
Parameters
Expand All @@ -82,7 +82,6 @@ def load_dataset(dataset_url, work_folder=None):
work_folder name of the local folder to store the dataset
"""
dataset_url = "http://ann-benchmarks.com/sift-128-euclidean.hdf5"
dataset_filename = dataset_url.split("/")[-1]

# We'll need to load store some data in this tutorial
Expand Down
34 changes: 17 additions & 17 deletions python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -91,7 +91,7 @@ from pylibraft.neighbors.common cimport _get_metric_string


cdef class IndexParams:
""""
"""
Parameters to build index for CAGRA nearest neighbor search
Parameters
Expand All @@ -104,13 +104,13 @@ cdef class IndexParams:
graph_degree : int, default = 64
build_algo: string denoting the graph building algorithm to use,
build_algo: string denoting the graph building algorithm to use, \
default = "ivf_pq"
Valid values for algo: ["ivf_pq", "nn_descent"], where
- ivf_pq will use the IVF-PQ algorithm for building the knn graph
- nn_descent (experimental) will use the NN-Descent algorithm for
building the knn graph. It is expected to be generally
faster than ivf_pq.
- ivf_pq will use the IVF-PQ algorithm for building the knn graph
- nn_descent (experimental) will use the NN-Descent algorithm for
building the knn graph. It is expected to be generally
faster than ivf_pq.
"""
cdef c_cagra.index_params params

Expand Down Expand Up @@ -501,10 +501,10 @@ cdef class SearchParams:
Upper limit of search iterations. Auto select when 0.
algo: string denoting the search algorithm to use, default = "auto"
Valid values for algo: ["auto", "single_cta", "multi_cta"], where
- auto will automatically select the best value based on query size
- single_cta is better when query contains larger number of
vectors (e.g >10)
- multi_cta is better when query contains only a few vectors
- auto will automatically select the best value based on query size
- single_cta is better when query contains larger number of
vectors (e.g >10)
- multi_cta is better when query contains only a few vectors
team_size: int, default = 0
Number of threads used to calculate a single distance. 4, 8, 16,
or 32.
Expand All @@ -516,13 +516,13 @@ cdef class SearchParams:
thread_block_size: int, default = 0
Thread block size. 0, 64, 128, 256, 512, 1024.
Auto selection when 0.
hashmap_mode: string denoting the type of hash map to use. It's
usually better to allow the algorithm to select this value.,
default = "auto"
hashmap_mode: string denoting the type of hash map to use.
It's usually better to allow the algorithm to select this value,
default = "auto".
Valid values for hashmap_mode: ["auto", "small", "hash"], where
- auto will automatically select the best value based on algo
- small will use the small shared memory hash table with resetting.
- hash will use a single hash table in global memory.
- auto will automatically select the best value based on algo
- small will use the small shared memory hash table with resetting.
- hash will use a single hash table in global memory.
hashmap_min_bitlen: int, default = 0
Upper limit of hashmap fill rate. More than 0.1, less than 0.9.
hashmap_max_fill_rate: float, default = 0.5
Expand Down

0 comments on commit 1484a03

Please sign in to comment.