From 838a501657339ae22da53e4882931a6424e1fa2c Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 6 Oct 2023 11:05:33 -0400 Subject: [PATCH] Fix NN Descent overflows (#1875) NN-Descent was using `int` type for indexing in `mdarray`, however this was causing an overflow when the product of all extents was greater than `int`. This PR also adds/fixes: - Missing dependencies for `raft-ann-bench` development environment - Exposes NN Descent iterations to use in CAGRA benchmarks Authors: - Divye Gala (https://github.com/divyegala) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/raft/pull/1875 --- .../bench_ann_cuda-118_arch-x86_64.yaml | 4 +++ cpp/bench/ann/src/raft/raft_benchmark.cu | 1 + cpp/include/raft/neighbors/cagra.cuh | 1 + cpp/include/raft/neighbors/cagra_types.hpp | 2 ++ .../raft/neighbors/detail/nn_descent.cuh | 36 +++++++++---------- dependencies.yaml | 2 ++ docs/source/ann_benchmarks_param_tuning.md | 1 + 7 files changed, 29 insertions(+), 18 deletions(-) diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 4f1df12dfa..2527fdd1fc 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -31,10 +31,14 @@ dependencies: - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 - libfaiss>=1.7.1 +- matplotlib - nccl>=2.9.9 - ninja - nlohmann_json>=3.11.2 - nvcc_linux-64=11.8 +- pandas +- pyyaml +- rmm==23.12.* - scikit-build>=0.13.1 - sysroot_linux-64==2.17 name: bench_ann_cuda-118_arch-x86_64 diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index a9ff6c2922..2f005566a9 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -154,6 +154,7 @@ void parse_build_param(const nlohmann::json& conf, param.build_algo = raft::neighbors::cagra::graph_build_algo::NN_DESCENT; } } + if (conf.contains("nn_descent_niter")) { param.nn_descent_niter = conf.at("nn_descent_niter"); } } template diff --git a/cpp/include/raft/neighbors/cagra.cuh b/cpp/include/raft/neighbors/cagra.cuh index f9682a973f..1efb4da95e 100644 --- a/cpp/include/raft/neighbors/cagra.cuh +++ b/cpp/include/raft/neighbors/cagra.cuh @@ -318,6 +318,7 @@ index build(raft::resources const& res, auto nn_descent_params = experimental::nn_descent::index_params(); nn_descent_params.graph_degree = intermediate_degree; nn_descent_params.intermediate_graph_degree = 1.5 * intermediate_degree; + nn_descent_params.max_iterations = params.nn_descent_niter; build_knn_graph(res, dataset, knn_graph->view(), nn_descent_params); } diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp index 5061d6082d..4db08110b9 100644 --- a/cpp/include/raft/neighbors/cagra_types.hpp +++ b/cpp/include/raft/neighbors/cagra_types.hpp @@ -58,6 +58,8 @@ struct index_params : ann::index_params { size_t graph_degree = 64; /** ANN algorithm to build knn graph. */ graph_build_algo build_algo = graph_build_algo::IVF_PQ; + /** Number of Iterations to run if building with NN_DESCENT */ + size_t nn_descent_niter = 20; }; enum class search_algo { diff --git a/cpp/include/raft/neighbors/detail/nn_descent.cuh b/cpp/include/raft/neighbors/detail/nn_descent.cuh index 1fb568a934..ce77cdc3de 100644 --- a/cpp/include/raft/neighbors/detail/nn_descent.cuh +++ b/cpp/include/raft/neighbors/detail/nn_descent.cuh @@ -362,28 +362,28 @@ class GNND { GnndGraph graph_; std::atomic update_counter_; - Index_t nrow_; - const int ndim_; + size_t nrow_; + size_t ndim_; - raft::device_matrix<__half, Index_t, raft::row_major> d_data_; - raft::device_vector l2_norms_; + raft::device_matrix<__half, size_t, raft::row_major> d_data_; + raft::device_vector l2_norms_; - raft::device_matrix graph_buffer_; - raft::device_matrix dists_buffer_; + raft::device_matrix graph_buffer_; + raft::device_matrix dists_buffer_; // TODO: Investigate using RMM/RAFT types https://github.com/rapidsai/raft/issues/1827 thrust::host_vector> graph_host_buffer_; thrust::host_vector> dists_host_buffer_; - raft::device_vector d_locks_; + raft::device_vector d_locks_; thrust::host_vector> h_rev_graph_new_; thrust::host_vector> h_graph_old_; thrust::host_vector> h_rev_graph_old_; // int2.x is the number of forward edges, int2.y is the number of reverse edges - raft::device_vector d_list_sizes_new_; - raft::device_vector d_list_sizes_old_; + raft::device_vector d_list_sizes_new_; + raft::device_vector d_list_sizes_old_; }; constexpr int TILE_ROW_WIDTH = 64; @@ -1143,21 +1143,21 @@ GNND::GNND(raft::resources const& res, const BuildConfig& build NUM_SAMPLES), nrow_(build_config.max_dataset_size), ndim_(build_config.dataset_dim), - d_data_{raft::make_device_matrix<__half, Index_t, raft::row_major>( + d_data_{raft::make_device_matrix<__half, size_t, raft::row_major>( res, nrow_, build_config.dataset_dim)}, - l2_norms_{raft::make_device_vector(res, nrow_)}, + l2_norms_{raft::make_device_vector(res, nrow_)}, graph_buffer_{ - raft::make_device_matrix(res, nrow_, DEGREE_ON_DEVICE)}, + raft::make_device_matrix(res, nrow_, DEGREE_ON_DEVICE)}, dists_buffer_{ - raft::make_device_matrix(res, nrow_, DEGREE_ON_DEVICE)}, + raft::make_device_matrix(res, nrow_, DEGREE_ON_DEVICE)}, graph_host_buffer_(nrow_ * DEGREE_ON_DEVICE), dists_host_buffer_(nrow_ * DEGREE_ON_DEVICE), - d_locks_{raft::make_device_vector(res, nrow_)}, + d_locks_{raft::make_device_vector(res, nrow_)}, h_rev_graph_new_(nrow_ * NUM_SAMPLES), h_graph_old_(nrow_ * NUM_SAMPLES), h_rev_graph_old_(nrow_ * NUM_SAMPLES), - d_list_sizes_new_{raft::make_device_vector(res, nrow_)}, - d_list_sizes_old_{raft::make_device_vector(res, nrow_)} + d_list_sizes_new_{raft::make_device_vector(res, nrow_)}, + d_list_sizes_old_{raft::make_device_vector(res, nrow_)} { static_assert(NUM_SAMPLES <= 32); @@ -1342,8 +1342,8 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out for (size_t i = 0; i < (size_t)nrow_; i++) { for (size_t j = 0; j < build_config_.node_degree; j++) { size_t idx = i * graph_.node_degree + j; - Index_t id = graph_.h_graph[idx].id(); - if (id < nrow_) { + int id = graph_.h_graph[idx].id(); + if (id < static_cast(nrow_)) { graph_shrink_buffer[i * build_config_.node_degree + j] = id; } else { graph_shrink_buffer[i * build_config_.node_degree + j] = diff --git a/dependencies.yaml b/dependencies.yaml index fe4a4620e0..e3fd7d0679 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -29,6 +29,7 @@ files: - develop - cudatoolkit - nn_bench + - nn_bench_python test_cpp: output: none includes: @@ -228,6 +229,7 @@ dependencies: - libfaiss>=1.7.1 - benchmark>=1.8.2 - faiss-proc=*=cuda + - *rmm_conda nn_bench_python: common: - output_types: [conda] diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index 433df2ae2f..e79ef36932 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -49,6 +49,7 @@ CAGRA uses a graph-based index, which creates an intermediate, approximate kNN g | `graph_degree` | `build_param` | N | Positive Integer >0 | 64 | Degree of the final kNN graph index. | | `intermediate_graph_degree` | `build_param` | N | Positive Integer >0 | 128 | Degree of the intermediate kNN graph. | | `graph_build_algo` | `build_param` | N | ["IVF_PQ", "NN_DESCENT"] | "IVF_PQ" | Algorithm to use for search | +| `nn_descent_niter` | `build_param` | N | Positive Integer>0 | 20 | Number of iterations if using NN_DESCENT. | | `dataset_memory_type` | `build_param` | N | ["device", "host", "mmap"] | "device" | What memory type should the dataset reside? | | `query_memory_type` | `search_params` | N | ["device", "host", "mmap"] | "device | What memory type should the queries reside? | | `itopk` | `search_wdith` | N | Positive Integer >0 | 64 | Number of intermediate search results retained during the search. Higher values improve search accuracy at the cost of speed. |