From 9c707c2045293991daf5daac943987c265208583 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 20 Jul 2023 12:08:28 +0200 Subject: [PATCH 1/6] Enable host dataset in cagra ann bench --- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 399fd6a0a8..43743df6af 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -71,7 +71,7 @@ class RaftCagra : public ANN { AlgoProperty get_property() const override { AlgoProperty property; - property.dataset_memory_type = MemoryType::Device; + property.dataset_memory_type = MemoryType::Host; property.query_memory_type = MemoryType::Device; property.need_dataset_when_search = true; return property; @@ -104,8 +104,17 @@ RaftCagra::RaftCagra(Metric metric, int dim, const BuildParam& param) template void RaftCagra::build(const T* dataset, size_t nrow, cudaStream_t) { - auto dataset_view = raft::make_device_matrix_view(dataset, IdxT(nrow), dimension_); - index_.emplace(raft::neighbors::experimental::cagra::build(handle_, index_params_, dataset_view)); + logger::get(RAFT_NAME).set_level(RAFT_LEVEL_DEBUG); + if (get_property().dataset_memory_type == MemoryType::Host) { + auto dataset_view = raft::make_host_matrix_view(dataset, IdxT(nrow), dimension_); + index_.emplace( + raft::neighbors::experimental::cagra::build(handle_, index_params_, dataset_view)); + } else { + auto dataset_view = + raft::make_device_matrix_view(dataset, IdxT(nrow), dimension_); + index_.emplace( + raft::neighbors::experimental::cagra::build(handle_, index_params_, dataset_view)); + } return; } From 9fc82dd95b7f5c31fcf9d605b7069d4526ce9fc9 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 20 Jul 2023 12:16:14 +0200 Subject: [PATCH 2/6] Fix itopk search param initialization --- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 43743df6af..9354776961 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -157,7 +157,7 @@ void RaftCagra::search( raft::neighbors::experimental::cagra::search_params search_params; search_params.max_queries = batch_size; - search_params.itopk_size = search_params_.max_queries; + search_params.itopk_size = search_params_.itopk_size; raft::neighbors::experimental::cagra::search( handle_, search_params, *index_, queries_view, neighbors_view, distances_view); From 1bf7ef7c464ae4c3517ff6a38f60b39382a4c5e9 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 20 Jul 2023 16:02:46 +0200 Subject: [PATCH 3/6] improve cagra logging --- cpp/include/raft/spatial/knn/detail/ann_utils.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh index 850b741dfd..1ce041d8da 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh @@ -466,7 +466,7 @@ struct batch_load_iterator { if (source_ == nullptr) { return; } if (needs_copy_) { if (size() > 0) { - RAFT_LOG_DEBUG("batch_load_iterator::copy(offset = %zu, size = %zu, row_width = %zu)", + RAFT_LOG_TRACE("batch_load_iterator::copy(offset = %zu, size = %zu, row_width = %zu)", size_t(offset()), size_t(size()), size_t(row_width())); From fd12057afe749477580b0723bc2f19caa5bb847c Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 20 Jul 2023 16:22:04 +0200 Subject: [PATCH 4/6] Handle more conf parameters for ann cagra bench --- cpp/bench/ann/src/raft/raft_benchmark.cu | 7 ++++++- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 9 ++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index 22204c2b61..b43f52eb5c 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -132,13 +132,18 @@ void parse_build_param(const nlohmann::json& conf, param.graph_degree = conf.at("index_dim"); param.intermediate_graph_degree = param.graph_degree * 2; } + if (conf.contains("intermediate_graph_degree")) { + param.intermediate_graph_degree = conf.at("intermediate_graph_degree"); + } } template void parse_search_param(const nlohmann::json& conf, typename raft::bench::ann::RaftCagra::SearchParam& param) { - param.itopk_size = conf.at("itopk"); + if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); } + if (conf.contains("search_width")) { param.p.num_parents = conf.at("search_width"); } + if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); } } #endif diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 9354776961..2d2d5a929c 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -47,7 +47,7 @@ class RaftCagra : public ANN { using typename ANN::AnnSearchParam; struct SearchParam : public AnnSearchParam { - unsigned itopk_size; + raft::neighbors::experimental::cagra::search_params p; }; using BuildParam = raft::neighbors::experimental::cagra::index_params; @@ -121,6 +121,8 @@ void RaftCagra::build(const T* dataset, size_t nrow, cudaStream_t) template void RaftCagra::set_search_param(const AnnSearchParam& param) { + auto search_param = dynamic_cast(param); + search_params_ = search_param.p; return; } @@ -155,11 +157,8 @@ void RaftCagra::search( auto neighbors_view = raft::make_device_matrix_view(neighbors_IdxT, batch_size, k); auto distances_view = raft::make_device_matrix_view(distances, batch_size, k); - raft::neighbors::experimental::cagra::search_params search_params; - search_params.max_queries = batch_size; - search_params.itopk_size = search_params_.itopk_size; raft::neighbors::experimental::cagra::search( - handle_, search_params, *index_, queries_view, neighbors_view, distances_view); + handle_, search_params_, *index_, queries_view, neighbors_view, distances_view); if (!std::is_same::value) { raft::linalg::unaryOp(neighbors, From f440854e9576fa5ef1340c3b4c2e1e5f0f596301 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 20 Jul 2023 16:23:21 +0200 Subject: [PATCH 5/6] Improve log messages for cagra build --- .../neighbors/detail/cagra/cagra_build.cuh | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh index 5c196471aa..d2bf7bf1ed 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh @@ -135,6 +135,9 @@ void build_knn_graph(raft::resources const& res, resource::get_cuda_stream(res), device_memory); + size_t next_report_offset = 0; + size_t d_report_offset = dataset.extent(0) / 100; // Report progress in 1% steps. + for (const auto& batch : vec_batches) { auto queries_view = raft::make_device_matrix_view( batch.data(), batch.size(), batch.row_width()); @@ -212,18 +215,23 @@ void build_knn_graph(raft::resources const& res, size_t num_queries_done = batch.offset() + batch.size(); const auto end_clock = std::chrono::system_clock::now(); - const auto time = - std::chrono::duration_cast(end_clock - start_clock).count() * 1e-6; - const auto throughput = num_queries_done / time; - RAFT_LOG_DEBUG( - "# Search %12lu / %12lu (%3.2f %%), %e queries/sec, %.2f minutes ETA, self included = " - "%3.2f %% \r", - num_queries_done, - dataset.extent(0), - num_queries_done / static_cast(dataset.extent(0)) * 100, - throughput, - (num_queries - num_queries_done) / throughput / 60, - static_cast(num_self_included) / num_queries_done * 100.); + if (batch.offset() > next_report_offset) { + next_report_offset += d_report_offset; + const auto time = + std::chrono::duration_cast(end_clock - start_clock).count() * + 1e-6; + const auto throughput = num_queries_done / time; + + RAFT_LOG_INFO( + "# Search %12lu / %12lu (%3.2f %%), %e queries/sec, %.2f minutes ETA, self included = " + "%3.2f %% \r", + num_queries_done, + dataset.extent(0), + num_queries_done / static_cast(dataset.extent(0)) * 100, + throughput, + (num_queries - num_queries_done) / throughput / 60, + static_cast(num_self_included) / num_queries_done * 100.); + } first = false; } if (!first) RAFT_LOG_DEBUG("# Finished building kNN graph"); From f9cf1aed4510c81da20a530ef1c8bc60f2b6370f Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 20 Jul 2023 16:27:01 +0200 Subject: [PATCH 6/6] Remove debug logging from CAGRA bench --- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 2d2d5a929c..79ae746078 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -104,7 +104,6 @@ RaftCagra::RaftCagra(Metric metric, int dim, const BuildParam& param) template void RaftCagra::build(const T* dataset, size_t nrow, cudaStream_t) { - logger::get(RAFT_NAME).set_level(RAFT_LEVEL_DEBUG); if (get_property().dataset_memory_type == MemoryType::Host) { auto dataset_view = raft::make_host_matrix_view(dataset, IdxT(nrow), dimension_); index_.emplace(