diff --git a/cpp/include/cuvs/neighbors/nn_descent.hpp b/cpp/include/cuvs/neighbors/nn_descent.hpp index 347ccf889..bd41d1ff7 100644 --- a/cpp/include/cuvs/neighbors/nn_descent.hpp +++ b/cpp/include/cuvs/neighbors/nn_descent.hpp @@ -55,6 +55,8 @@ struct index_params : cuvs::neighbors::index_params { size_t intermediate_graph_degree = 128; // Degree of input graph for pruning. size_t max_iterations = 20; // Number of nn-descent iterations. float termination_threshold = 0.0001; // Termination threshold of nn-descent. + bool return_distances = true; // return distances if true + size_t n_clusters = 1; // defaults to not using any batching /** @brief Construct NN descent parameters for a specific kNN graph degree * @@ -100,14 +102,20 @@ struct index : cuvs::neighbors::index { * @param res raft::resources is an object mangaging resources * @param n_rows number of rows in knn-graph * @param n_cols number of cols in knn-graph + * @param return_distances whether to return distances */ - index(raft::resources const& res, int64_t n_rows, int64_t n_cols) + index(raft::resources const& res, int64_t n_rows, int64_t n_cols, bool return_distances = false) : cuvs::neighbors::index(), res_{res}, metric_{cuvs::distance::DistanceType::L2Expanded}, graph_{raft::make_host_matrix(n_rows, n_cols)}, - graph_view_{graph_.view()} + graph_view_{graph_.view()}, + return_distances_{return_distances} { + if (return_distances) { + distances_ = raft::make_device_matrix(res_, n_rows, n_cols); + distances_view_ = distances_.value().view(); + } } /** @@ -119,14 +127,20 @@ struct index : cuvs::neighbors::index { * * @param res raft::resources is an object mangaging resources * @param graph_view raft::host_matrix_view for storing knn-graph + * @param distances_view optional raft::device_matrix_view for storing + * distances */ index(raft::resources const& res, - raft::host_matrix_view graph_view) + raft::host_matrix_view graph_view, + std::optional> distances_view = + std::nullopt) : cuvs::neighbors::index(), res_{res}, metric_{cuvs::distance::DistanceType::L2Expanded}, graph_{raft::make_host_matrix(0, 0)}, - graph_view_{graph_view} + graph_view_{graph_view}, + distances_view_{distances_view}, + return_distances_{distances_view.has_value()} { } @@ -155,6 +169,13 @@ struct index : cuvs::neighbors::index { return graph_view_; } + /** neighborhood graph distances [size, graph-degree] */ + [[nodiscard]] inline auto distances() noexcept + -> std::optional> + { + return distances_view_; + } + // Don't allow copying the index for performance reasons (try avoiding copying data) index(const index&) = delete; index(index&&) = default; @@ -166,8 +187,11 @@ struct index : cuvs::neighbors::index { raft::resources const& res_; cuvs::distance::DistanceType metric_; raft::host_matrix graph_; // graph to return for non-int IdxT + std::optional> distances_; raft::host_matrix_view graph_view_; // view of graph for user provided matrix + std::optional> distances_view_; + bool return_distances_; }; /** @} */ @@ -200,12 +224,15 @@ struct index : cuvs::neighbors::index { * to run the nn-descent algorithm * @param[in] dataset raft::device_matrix_view input dataset expected to be located * in device memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::device_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; + raft::device_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; /** * @brief Build nn-descent Index with dataset in host memory @@ -232,12 +259,15 @@ auto build(raft::resources const& res, * to run the nn-descent algorithm * @param[in] dataset raft::host_matrix_view input dataset expected to be located * in host memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::host_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; + raft::host_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; /** * @brief Build nn-descent Index with dataset in device memory @@ -262,12 +292,15 @@ auto build(raft::resources const& res, * to run the nn-descent algorithm * @param[in] dataset raft::device_matrix_view input dataset expected to be located * in device memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::device_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; + raft::device_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; /** * @brief Build nn-descent Index with dataset in host memory @@ -294,12 +327,15 @@ auto build(raft::resources const& res, * to run the nn-descent algorithm * @param[in] dataset raft::host_matrix_view input dataset expected to be located * in host memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::host_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; + raft::host_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; /** * @brief Build nn-descent Index with dataset in device memory @@ -324,12 +360,15 @@ auto build(raft::resources const& res, * to run the nn-descent algorithm * @param[in] dataset raft::device_matrix_view input dataset expected to be located * in device memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::device_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; + raft::device_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; /** * @brief Build nn-descent Index with dataset in host memory @@ -356,12 +395,15 @@ auto build(raft::resources const& res, * to run the nn-descent algorithm * @param[in] dataset raft::host_matrix_view input dataset expected to be located * in host memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::host_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; + raft::host_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; /** * @brief Build nn-descent Index with dataset in device memory @@ -386,14 +428,15 @@ auto build(raft::resources const& res, * to run the nn-descent algorithm * @param[in] dataset raft::device_matrix_view input dataset expected to be located * in device memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::device_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; - -/** @} */ + raft::device_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; /** * @brief Build nn-descent Index with dataset in host memory @@ -420,12 +463,17 @@ auto build(raft::resources const& res, * to run the nn-descent algorithm * @param[in] dataset raft::host_matrix_view input dataset expected to be located * in host memory + * @param[in] graph optional raft::host_matrix_view for owning + * the output graph * @return index index containing all-neighbors knn graph in host memory */ auto build(raft::resources const& res, index_params const& params, - raft::host_matrix_view dataset) - -> cuvs::neighbors::nn_descent::index; + raft::host_matrix_view dataset, + std::optional> graph = + std::nullopt) -> cuvs::neighbors::nn_descent::index; + +/** @} */ /** * @brief Test if we have enough GPU memory to run NN descent algorithm. diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index 9e4d453e3..6209ff819 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -33,8 +33,7 @@ #include #include -// TODO: Fixme- this needs to be migrated -#include "../../nn_descent.cuh" +#include // TODO: This shouldn't be calling spatial/knn APIs #include "../ann_utils.cuh" @@ -356,8 +355,8 @@ void build_knn_graph( raft::host_matrix_view knn_graph, cuvs::neighbors::nn_descent::index_params build_params) { - auto nn_descent_idx = cuvs::neighbors::nn_descent::index(res, knn_graph); - cuvs::neighbors::nn_descent::build(res, build_params, dataset, nn_descent_idx); + std::optional> graph_view = knn_graph; + auto nn_descent_idx = cuvs::neighbors::nn_descent::build(res, build_params, dataset, graph_view); using internal_IdxT = typename std::make_unsigned::type; using g_accessor = typename decltype(nn_descent_idx.graph())::accessor_type; @@ -471,6 +470,7 @@ index build( } // Use nn-descent to build CAGRA knn graph + nn_descent_params.return_distances = false; build_knn_graph(res, dataset, knn_graph->view(), nn_descent_params); } diff --git a/cpp/src/neighbors/detail/nn_descent.cuh b/cpp/src/neighbors/detail/nn_descent.cuh index 8c5767c50..883d82d76 100644 --- a/cpp/src/neighbors/detail/nn_descent.cuh +++ b/cpp/src/neighbors/detail/nn_descent.cuh @@ -16,42 +16,41 @@ #pragma once -#include - #include "ann_utils.cuh" #include "cagra/device_common.hpp" + +#include + #include +#include #include #include +#include +#include +#include #include #include - +#include +#include #include // raft::util::arch::SM_* #include #include #include #include -#include +#include + #include -#include -#include -#include -#include -#include #include #include #include +#include #include #include namespace cuvs::neighbors::nn_descent::detail { -static const std::string RAFT_NAME = "raft"; -using pinned_memory_resource = thrust::universal_host_pinned_memory_resource; -template -using pinned_memory_allocator = thrust::mr::stateless_resource_allocator; using DistData_t = float; constexpr int DEGREE_ON_DEVICE{32}; @@ -216,6 +215,7 @@ struct BuildConfig { // If internal_node_degree == 0, the value of node_degree will be assigned to it size_t max_iterations{50}; float termination_threshold{0.0001}; + size_t output_graph_degree{32}; }; template @@ -300,6 +300,7 @@ class BloomFilter { template struct GnndGraph { + raft::resources const& res; static constexpr int segment_size = 32; InternalID_t* h_graph; @@ -310,16 +311,17 @@ struct GnndGraph { raft::host_matrix h_dists; - thrust::host_vector> h_graph_new; - thrust::host_vector> h_list_sizes_new; + raft::pinned_matrix h_graph_new; + raft::pinned_vector h_list_sizes_new; - thrust::host_vector> h_graph_old; - thrust::host_vector> h_list_sizes_old; + raft::pinned_matrix h_graph_old; + raft::pinned_vector h_list_sizes_old; BloomFilter bloom_filter; GnndGraph(const GnndGraph&) = delete; GnndGraph& operator=(const GnndGraph&) = delete; - GnndGraph(const size_t nrow, + GnndGraph(raft::resources const& res, + const size_t nrow, const size_t node_degree, const size_t internal_node_degree, const size_t num_samples); @@ -344,9 +346,14 @@ class GNND { GNND(const GNND&) = delete; GNND& operator=(const GNND&) = delete; - void build(Data_t* data, const Index_t nrow, Index_t* output_graph); + void build(Data_t* data, + const Index_t nrow, + Index_t* output_graph, + bool return_distances, + DistData_t* output_distances); ~GNND() = default; using ID_t = InternalID_t; + void reset(raft::resources const& res); private: void add_reverse_edges(Index_t* graph_ptr, @@ -371,15 +378,14 @@ class GNND { raft::device_matrix graph_buffer_; raft::device_matrix dists_buffer_; - // TODO: Investigate using RMM/RAFT types https://github.com/rapidsai/raft/issues/1827 - thrust::host_vector> graph_host_buffer_; - thrust::host_vector> dists_host_buffer_; + raft::pinned_matrix graph_host_buffer_; + raft::pinned_matrix dists_host_buffer_; raft::device_vector d_locks_; - thrust::host_vector> h_rev_graph_new_; - thrust::host_vector> h_graph_old_; - thrust::host_vector> h_rev_graph_old_; + raft::pinned_matrix h_rev_graph_new_; + raft::pinned_matrix h_graph_old_; + raft::pinned_matrix h_rev_graph_old_; // int2.x is the number of forward edges, int2.y is the number of reverse edges raft::device_vector d_list_sizes_new_; @@ -971,19 +977,21 @@ int insert_to_ordered_list(InternalID_t* list, } // namespace template -GnndGraph::GnndGraph(const size_t nrow, +GnndGraph::GnndGraph(raft::resources const& res, + const size_t nrow, const size_t node_degree, const size_t internal_node_degree, const size_t num_samples) - : nrow(nrow), + : res(res), + nrow(nrow), node_degree(node_degree), num_samples(num_samples), bloom_filter(nrow, internal_node_degree / segment_size, 3), h_dists{raft::make_host_matrix(nrow, node_degree)}, - h_graph_new(nrow * num_samples), - h_list_sizes_new(nrow), - h_graph_old(nrow * num_samples), - h_list_sizes_old{nrow} + h_graph_new{raft::make_pinned_matrix(res, nrow, num_samples)}, + h_list_sizes_new{raft::make_pinned_vector(res, nrow)}, + h_graph_old{raft::make_pinned_matrix(res, nrow, num_samples)}, + h_list_sizes_old{raft::make_pinned_vector(res, nrow)} { // node_degree must be a multiple of segment_size; assert(node_degree % segment_size == 0); @@ -1001,9 +1009,9 @@ void GnndGraph::sample_graph_new(InternalID_t* new_neighbors, { #pragma omp parallel for for (size_t i = 0; i < nrow; i++) { - auto list_new = h_graph_new.data() + i * num_samples; - h_list_sizes_new[i].x = 0; - h_list_sizes_new[i].y = 0; + auto list_new = h_graph_new.data_handle() + i * num_samples; + h_list_sizes_new.data_handle()[i].x = 0; + h_list_sizes_new.data_handle()[i].y = 0; for (size_t j = 0; j < width; j++) { auto new_neighb_id = new_neighbors[i * width + j].id(); @@ -1011,8 +1019,8 @@ void GnndGraph::sample_graph_new(InternalID_t* new_neighbors, if (bloom_filter.check(i, new_neighb_id)) { continue; } bloom_filter.add(i, new_neighb_id); new_neighbors[i * width + j].mark_old(); - list_new[h_list_sizes_new[i].x++] = new_neighb_id; - if (h_list_sizes_new[i].x == num_samples) break; + list_new[h_list_sizes_new.data_handle()[i].x++] = new_neighb_id; + if (h_list_sizes_new.data_handle()[i].x == num_samples) break; } } } @@ -1051,31 +1059,37 @@ void GnndGraph::sample_graph(bool sample_new) { #pragma omp parallel for for (size_t i = 0; i < nrow; i++) { - h_list_sizes_old[i].x = 0; - h_list_sizes_old[i].y = 0; - h_list_sizes_new[i].x = 0; - h_list_sizes_new[i].y = 0; + h_list_sizes_old.data_handle()[i].x = 0; + h_list_sizes_old.data_handle()[i].y = 0; + h_list_sizes_new.data_handle()[i].x = 0; + h_list_sizes_new.data_handle()[i].y = 0; auto list = h_graph + i * node_degree; - auto list_old = h_graph_old.data() + i * num_samples; - auto list_new = h_graph_new.data() + i * num_samples; + auto list_old = h_graph_old.data_handle() + i * num_samples; + auto list_new = h_graph_new.data_handle() + i * num_samples; for (int j = 0; j < segment_size; j++) { for (int k = 0; k < num_segments; k++) { auto neighbor = list[k * segment_size + j]; if ((size_t)neighbor.id() >= nrow) continue; if (!neighbor.is_new()) { - if (h_list_sizes_old[i].x < num_samples) { - list_old[h_list_sizes_old[i].x++] = neighbor.id(); + if (h_list_sizes_old.data_handle()[i].x < num_samples) { + list_old[h_list_sizes_old.data_handle()[i].x++] = neighbor.id(); } } else if (sample_new) { - if (h_list_sizes_new[i].x < num_samples) { + if (h_list_sizes_new.data_handle()[i].x < num_samples) { list[k * segment_size + j].mark_old(); - list_new[h_list_sizes_new[i].x++] = neighbor.id(); + list_new[h_list_sizes_new.data_handle()[i].x++] = neighbor.id(); } } - if (h_list_sizes_old[i].x == num_samples && h_list_sizes_new[i].x == num_samples) { break; } + if (h_list_sizes_old.data_handle()[i].x == num_samples && + h_list_sizes_new.data_handle()[i].x == num_samples) { + break; + } + } + if (h_list_sizes_old.data_handle()[i].x == num_samples && + h_list_sizes_new.data_handle()[i].x == num_samples) { + break; } - if (h_list_sizes_old[i].x == num_samples && h_list_sizes_new[i].x == num_samples) { break; } } } } @@ -1137,7 +1151,8 @@ template GNND::GNND(raft::resources const& res, const BuildConfig& build_config) : res(res), build_config_(build_config), - graph_(build_config.max_dataset_size, + graph_(res, + build_config.max_dataset_size, align32::roundUp(build_config.node_degree), align32::roundUp(build_config.internal_node_degree ? build_config.internal_node_degree : build_config.node_degree), @@ -1151,28 +1166,38 @@ GNND::GNND(raft::resources const& res, const BuildConfig& build raft::make_device_matrix(res, nrow_, DEGREE_ON_DEVICE)}, dists_buffer_{ raft::make_device_matrix(res, nrow_, DEGREE_ON_DEVICE)}, - graph_host_buffer_(nrow_ * DEGREE_ON_DEVICE), - dists_host_buffer_(nrow_ * DEGREE_ON_DEVICE), + graph_host_buffer_{ + raft::make_pinned_matrix(res, nrow_, DEGREE_ON_DEVICE)}, + dists_host_buffer_{ + raft::make_pinned_matrix(res, nrow_, DEGREE_ON_DEVICE)}, d_locks_{raft::make_device_vector(res, nrow_)}, - h_rev_graph_new_(nrow_ * NUM_SAMPLES), - h_graph_old_(nrow_ * NUM_SAMPLES), - h_rev_graph_old_(nrow_ * NUM_SAMPLES), + h_rev_graph_new_{ + raft::make_pinned_matrix(res, nrow_, NUM_SAMPLES)}, + h_graph_old_( + raft::make_pinned_matrix(res, nrow_, NUM_SAMPLES)), + h_rev_graph_old_{ + raft::make_pinned_matrix(res, nrow_, NUM_SAMPLES)}, d_list_sizes_new_{raft::make_device_vector(res, nrow_)}, d_list_sizes_old_{raft::make_device_vector(res, nrow_)} { static_assert(NUM_SAMPLES <= 32); - - thrust::fill(thrust::device, - dists_buffer_.data_handle(), - dists_buffer_.data_handle() + dists_buffer_.size(), - std::numeric_limits::max()); - thrust::fill(thrust::device, - reinterpret_cast(graph_buffer_.data_handle()), - reinterpret_cast(graph_buffer_.data_handle()) + graph_buffer_.size(), - std::numeric_limits::max()); - thrust::fill(thrust::device, d_locks_.data_handle(), d_locks_.data_handle() + d_locks_.size(), 0); + raft::matrix::fill(res, dists_buffer_.view(), std::numeric_limits::max()); + auto graph_buffer_view = raft::make_device_matrix_view( + reinterpret_cast(graph_buffer_.data_handle()), nrow_, DEGREE_ON_DEVICE); + raft::matrix::fill(res, graph_buffer_view, std::numeric_limits::max()); + raft::matrix::fill(res, d_locks_.view(), 0); }; +template +void GNND::reset(raft::resources const& res) +{ + raft::matrix::fill(res, dists_buffer_.view(), std::numeric_limits::max()); + auto graph_buffer_view = raft::make_device_matrix_view( + reinterpret_cast(graph_buffer_.data_handle()), nrow_, DEGREE_ON_DEVICE); + raft::matrix::fill(res, graph_buffer_view, std::numeric_limits::max()); + raft::matrix::fill(res, d_locks_.view(), 0); +} + template void GNND::add_reverse_edges(Index_t* graph_ptr, Index_t* h_rev_graph_ptr, @@ -1189,34 +1214,35 @@ void GNND::add_reverse_edges(Index_t* graph_ptr, template void GNND::local_join(cudaStream_t stream) { - thrust::fill(thrust::device.on(stream), - dists_buffer_.data_handle(), - dists_buffer_.data_handle() + dists_buffer_.size(), - std::numeric_limits::max()); - local_join_kernel<<>>( - thrust::raw_pointer_cast(graph_.h_graph_new.data()), - thrust::raw_pointer_cast(h_rev_graph_new_.data()), - d_list_sizes_new_.data_handle(), - thrust::raw_pointer_cast(h_graph_old_.data()), - thrust::raw_pointer_cast(h_rev_graph_old_.data()), - d_list_sizes_old_.data_handle(), - NUM_SAMPLES, - d_data_.data_handle(), - ndim_, - graph_buffer_.data_handle(), - dists_buffer_.data_handle(), - DEGREE_ON_DEVICE, - d_locks_.data_handle(), - l2_norms_.data_handle()); + raft::matrix::fill(res, dists_buffer_.view(), std::numeric_limits::max()); + local_join_kernel<<>>(graph_.h_graph_new.data_handle(), + h_rev_graph_new_.data_handle(), + d_list_sizes_new_.data_handle(), + h_graph_old_.data_handle(), + h_rev_graph_old_.data_handle(), + d_list_sizes_old_.data_handle(), + NUM_SAMPLES, + d_data_.data_handle(), + ndim_, + graph_buffer_.data_handle(), + dists_buffer_.data_handle(), + DEGREE_ON_DEVICE, + d_locks_.data_handle(), + l2_norms_.data_handle()); } template -void GNND::build(Data_t* data, const Index_t nrow, Index_t* output_graph) +void GNND::build(Data_t* data, + const Index_t nrow, + Index_t* output_graph, + bool return_distances, + DistData_t* output_distances) { using input_t = typename std::remove_const::type; cudaStream_t stream = raft::resource::get_cuda_stream(res); nrow_ = nrow; + graph_.nrow = nrow; graph_.h_graph = (InternalID_t*)output_graph; cudaPointerAttributes data_ptr_attr; @@ -1226,24 +1252,18 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out cuvs::spatial::knn::detail::utils::batch_load_iterator vec_batches{ data, static_cast(nrow_), build_config_.dataset_dim, batch_size, stream}; for (auto const& batch : vec_batches) { - preprocess_data_kernel<<(raft::warp_size())) * - raft::warp_size(), - stream>>>(batch.data(), - d_data_.data_handle(), - build_config_.dataset_dim, - l2_norms_.data_handle(), - batch.offset()); + preprocess_data_kernel<<< + batch.size(), + raft::warp_size(), + sizeof(Data_t) * ceildiv(build_config_.dataset_dim, static_cast(raft::warp_size())) * + raft::warp_size(), + stream>>>(batch.data(), + d_data_.data_handle(), + build_config_.dataset_dim, + l2_norms_.data_handle(), + batch.offset()); } - thrust::fill(thrust::device.on(stream), - (Index_t*)graph_buffer_.data_handle(), - (Index_t*)graph_buffer_.data_handle() + graph_buffer_.size(), - std::numeric_limits::max()); - graph_.clear(); graph_.init_random_graph(); graph_.sample_graph(true); @@ -1251,8 +1271,8 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out auto update_and_sample = [&](bool update_graph) { if (update_graph) { update_counter_ = 0; - graph_.update_graph(thrust::raw_pointer_cast(graph_host_buffer_.data()), - thrust::raw_pointer_cast(dists_host_buffer_.data()), + graph_.update_graph(graph_host_buffer_.data_handle(), + dists_host_buffer_.data_handle(), DEGREE_ON_DEVICE, update_counter_); if (update_counter_ < build_config_.termination_threshold * nrow_ * @@ -1265,15 +1285,15 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out for (size_t it = 0; it < build_config_.max_iterations; it++) { raft::copy(d_list_sizes_new_.data_handle(), - thrust::raw_pointer_cast(graph_.h_list_sizes_new.data()), + graph_.h_list_sizes_new.data_handle(), nrow_, raft::resource::get_cuda_stream(res)); - raft::copy(thrust::raw_pointer_cast(h_graph_old_.data()), - thrust::raw_pointer_cast(graph_.h_graph_old.data()), + raft::copy(h_graph_old_.data_handle(), + graph_.h_graph_old.data_handle(), nrow_ * NUM_SAMPLES, raft::resource::get_cuda_stream(res)); raft::copy(d_list_sizes_old_.data_handle(), - thrust::raw_pointer_cast(graph_.h_list_sizes_old.data()), + graph_.h_list_sizes_old.data_handle(), nrow_, raft::resource::get_cuda_stream(res)); raft::resource::sync_stream(res); @@ -1286,13 +1306,13 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out // contains some information for local_join. static_assert(DEGREE_ON_DEVICE * sizeof(*(dists_buffer_.data_handle())) >= NUM_SAMPLES * sizeof(*(graph_buffer_.data_handle()))); - add_reverse_edges(thrust::raw_pointer_cast(graph_.h_graph_new.data()), - thrust::raw_pointer_cast(h_rev_graph_new_.data()), + add_reverse_edges(graph_.h_graph_new.data_handle(), + h_rev_graph_new_.data_handle(), (Index_t*)dists_buffer_.data_handle(), d_list_sizes_new_.data_handle(), stream); - add_reverse_edges(thrust::raw_pointer_cast(h_graph_old_.data()), - thrust::raw_pointer_cast(h_rev_graph_old_.data()), + add_reverse_edges(h_graph_old_.data_handle(), + h_rev_graph_old_.data_handle(), (Index_t*)dists_buffer_.data_handle(), d_list_sizes_old_.data_handle(), stream); @@ -1316,21 +1336,21 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out update_and_sample_thread.join(); if (update_counter_ == -1) { break; } - raft::copy(thrust::raw_pointer_cast(graph_host_buffer_.data()), + raft::copy(graph_host_buffer_.data_handle(), graph_buffer_.data_handle(), nrow_ * DEGREE_ON_DEVICE, raft::resource::get_cuda_stream(res)); raft::resource::sync_stream(res); - raft::copy(thrust::raw_pointer_cast(dists_host_buffer_.data()), + raft::copy(dists_host_buffer_.data_handle(), dists_buffer_.data_handle(), nrow_ * DEGREE_ON_DEVICE, raft::resource::get_cuda_stream(res)); - graph_.sample_graph_new(thrust::raw_pointer_cast(graph_host_buffer_.data()), DEGREE_ON_DEVICE); + graph_.sample_graph_new(graph_host_buffer_.data_handle(), DEGREE_ON_DEVICE); } - graph_.update_graph(thrust::raw_pointer_cast(graph_host_buffer_.data()), - thrust::raw_pointer_cast(dists_host_buffer_.data()), + graph_.update_graph(graph_host_buffer_.data_handle(), + dists_host_buffer_.data_handle(), DEGREE_ON_DEVICE, update_counter_); raft::resource::sync_stream(res); @@ -1338,6 +1358,27 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out // Reuse graph_.h_dists as the buffer for shrink the lists in graph static_assert(sizeof(decltype(*(graph_.h_dists.data_handle()))) >= sizeof(Index_t)); + + if (return_distances) { + auto graph_d_dists = raft::make_device_matrix( + res, nrow_, build_config_.node_degree); + raft::copy(graph_d_dists.data_handle(), + graph_.h_dists.data_handle(), + nrow_ * build_config_.node_degree, + raft::resource::get_cuda_stream(res)); + + auto output_dist_view = raft::make_device_matrix_view( + output_distances, nrow_, build_config_.output_graph_degree); + + raft::matrix::slice_coordinates coords{static_cast(0), + static_cast(0), + static_cast(nrow_), + static_cast(build_config_.output_graph_degree)}; + raft::matrix::slice( + res, raft::make_const_mdspan(graph_d_dists.view()), output_dist_view, coords); + raft::resource::sync_stream(res); + } + Index_t* graph_shrink_buffer = (Index_t*)graph_.h_dists.data_handle(); #pragma omp parallel for @@ -1410,10 +1451,24 @@ void build(raft::resources const& res, .node_degree = extended_graph_degree, .internal_node_degree = extended_intermediate_degree, .max_iterations = params.max_iterations, - .termination_threshold = params.termination_threshold}; + .termination_threshold = params.termination_threshold, + .output_graph_degree = params.graph_degree}; GNND nnd(res, build_config); - nnd.build(dataset.data_handle(), dataset.extent(0), int_graph.data_handle()); + + if (idx.distances().has_value() || !params.return_distances) { + nnd.build(dataset.data_handle(), + dataset.extent(0), + int_graph.data_handle(), + params.return_distances, + idx.distances() + .value_or(raft::make_device_matrix(res, 0, 0).view()) + .data_handle()); + } else { + RAFT_EXPECTS(!params.return_distances, + "Distance view not allocated. Using return_distances set to true requires " + "distance view to be allocated."); + } #pragma omp parallel for for (size_t i = 0; i < static_cast(dataset.extent(0)); i++) { @@ -1445,11 +1500,12 @@ index build( graph_degree = intermediate_degree; } - index idx{res, dataset.extent(0), static_cast(graph_degree)}; + index idx{ + res, dataset.extent(0), static_cast(graph_degree), params.return_distances}; build(res, params, dataset, idx); return idx; } -} // namespace cuvs::neighbors::nn_descent::detail +} // namespace cuvs::neighbors::nn_descent::detail diff --git a/cpp/src/neighbors/detail/nn_descent_batch.cuh b/cpp/src/neighbors/detail/nn_descent_batch.cuh new file mode 100644 index 000000000..842dbe788 --- /dev/null +++ b/cpp/src/neighbors/detail/nn_descent_batch.cuh @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#undef RAFT_EXPLICIT_INSTANTIATE_ONLY + +#include "nn_descent.cuh" +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace cuvs::neighbors::nn_descent::detail::experimental { + +// +// Run balanced kmeans on a subsample of the dataset to get centroids +// +template , memory_type::host>> +void get_balanced_kmeans_centroids( + raft::resources const& res, + cuvs::distance::DistanceType metric, + mdspan, row_major, Accessor> dataset, + raft::device_matrix_view centroids) +{ + size_t num_rows = static_cast(dataset.extent(0)); + size_t num_cols = static_cast(dataset.extent(1)); + size_t n_clusters = centroids.extent(0); + size_t num_subsamples = + std::min(static_cast(num_rows / n_clusters), static_cast(num_rows * 0.1)); + + auto d_subsample_dataset = + raft::make_device_matrix(res, num_subsamples, num_cols); + raft::matrix::sample_rows( + res, raft::random::RngState{0}, dataset, d_subsample_dataset.view()); + + cuvs::cluster::kmeans::balanced_params kmeans_params; + kmeans_params.metric = metric; + + auto d_subsample_dataset_const_view = + raft::make_device_matrix_view( + d_subsample_dataset.data_handle(), num_subsamples, num_cols); + auto centroids_view = raft::make_device_matrix_view( + centroids.data_handle(), n_clusters, num_cols); + cuvs::cluster::kmeans::fit(res, kmeans_params, d_subsample_dataset_const_view, centroids_view); +} + +// +// Get the top k closest centroid indices for each data point +// Loads the data in batches onto device if data is on host for memory efficiency +// +template +void get_global_nearest_k( + raft::resources const& res, + size_t k, + size_t num_rows, + size_t n_clusters, + const T* dataset, + raft::host_matrix_view global_nearest_cluster, + raft::device_matrix_view centroids, + cuvs::distance::DistanceType metric) +{ + size_t num_cols = centroids.extent(1); + auto centroids_view = raft::make_device_matrix_view( + centroids.data_handle(), n_clusters, num_cols); + + cudaPointerAttributes attr; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, dataset)); + float* ptr = reinterpret_cast(attr.devicePointer); + + size_t num_batches = n_clusters; + size_t batch_size = (num_rows + n_clusters) / n_clusters; + if (ptr == nullptr) { // data on host + + auto d_dataset_batch = + raft::make_device_matrix(res, batch_size, num_cols); + + auto nearest_clusters_idx = + raft::make_device_matrix(res, batch_size, k); + auto nearest_clusters_idxt = + raft::make_device_matrix(res, batch_size, k); + auto nearest_clusters_dist = + raft::make_device_matrix(res, batch_size, k); + + for (size_t i = 0; i < num_batches; i++) { + size_t batch_size_ = batch_size; + + if (i == num_batches - 1) { batch_size_ = num_rows - batch_size * i; } + raft::copy(d_dataset_batch.data_handle(), + dataset + i * batch_size * num_cols, + batch_size_ * num_cols, + resource::get_cuda_stream(res)); + + std::optional> norms_view; + cuvs::neighbors::brute_force::index brute_force_index( + res, centroids_view, norms_view, metric); + cuvs::neighbors::brute_force::search(res, + brute_force_index, + raft::make_const_mdspan(d_dataset_batch.view()), + nearest_clusters_idx.view(), + nearest_clusters_dist.view()); + + thrust::copy(raft::resource::get_thrust_policy(res), + nearest_clusters_idx.data_handle(), + nearest_clusters_idx.data_handle() + nearest_clusters_idx.size(), + nearest_clusters_idxt.data_handle()); + raft::copy(global_nearest_cluster.data_handle() + i * batch_size * k, + nearest_clusters_idxt.data_handle(), + batch_size_ * k, + resource::get_cuda_stream(res)); + } + } else { // data on device + auto nearest_clusters_idx = + raft::make_device_matrix(res, num_rows, k); + auto nearest_clusters_dist = + raft::make_device_matrix(res, num_rows, k); + + std::optional> norms_view; + cuvs::neighbors::brute_force::index brute_force_index( + res, centroids_view, norms_view, metric); + auto dataset_view = + raft::make_device_matrix_view(dataset, num_rows, num_cols); + cuvs::neighbors::brute_force::search(res, + brute_force_index, + dataset_view, + nearest_clusters_idx.view(), + nearest_clusters_dist.view()); + + auto nearest_clusters_idxt = + raft::make_device_matrix(res, batch_size, k); + for (size_t i = 0; i < num_batches; i++) { + size_t batch_size_ = batch_size; + + if (i == num_batches - 1) { batch_size_ = num_rows - batch_size * i; } + thrust::copy(raft::resource::get_thrust_policy(res), + nearest_clusters_idx.data_handle() + i * batch_size_ * k, + nearest_clusters_idx.data_handle() + (i + 1) * batch_size_ * k, + nearest_clusters_idxt.data_handle()); + raft::copy(global_nearest_cluster.data_handle() + i * batch_size_ * k, + nearest_clusters_idxt.data_handle(), + batch_size_ * k, + resource::get_cuda_stream(res)); + } + } +} + +// +// global_nearest_cluster [num_rows X k=2] : top 2 closest clusters for each data point +// inverted_indices [num_rows x k vector] : sparse vector for data indices for each cluster +// cluster_size [n_cluster] : cluster size for each cluster +// offset [n_cluster] : offset in inverted_indices for each cluster +// Loads the data in batches onto device if data is on host for memory efficiency +// +template +void get_inverted_indices(raft::resources const& res, + size_t n_clusters, + size_t& max_cluster_size, + size_t& min_cluster_size, + raft::host_matrix_view global_nearest_cluster, + raft::host_vector_view inverted_indices, + raft::host_vector_view cluster_size, + raft::host_vector_view offset) +{ + // build sparse inverted indices and get number of data points for each cluster + size_t num_rows = global_nearest_cluster.extent(0); + size_t k = global_nearest_cluster.extent(1); + + auto local_offset = raft::make_host_vector(n_clusters); + + max_cluster_size = 0; + min_cluster_size = std::numeric_limits::max(); + + std::fill(cluster_size.data_handle(), cluster_size.data_handle() + n_clusters, 0); + std::fill(local_offset.data_handle(), local_offset.data_handle() + n_clusters, 0); + + // TODO: this part isn't really a bottleneck but maybe worth trying omp parallel + // for with atomic add + for (size_t i = 0; i < num_rows; i++) { + for (size_t j = 0; j < k; j++) { + IdxT cluster_id = global_nearest_cluster(i, j); + cluster_size(cluster_id) += 1; + } + } + + offset(0) = 0; + for (size_t i = 1; i < n_clusters; i++) { + offset(i) = offset(i - 1) + cluster_size(i - 1); + } + for (size_t i = 0; i < num_rows; i++) { + for (size_t j = 0; j < k; j++) { + IdxT cluster_id = global_nearest_cluster(i, j); + inverted_indices(offset(cluster_id) + local_offset(cluster_id)) = i; + local_offset(cluster_id) += 1; + } + } + + max_cluster_size = static_cast( + *std::max_element(cluster_size.data_handle(), cluster_size.data_handle() + n_clusters)); + min_cluster_size = static_cast( + *std::min_element(cluster_size.data_handle(), cluster_size.data_handle() + n_clusters)); +} + +template +struct KeyValuePair { + KeyType key; + ValueType value; +}; + +template +struct CustomKeyComparator { + __device__ bool operator()(const KeyValuePair& a, + const KeyValuePair& b) const + { + if (a.key == b.key) { return a.value < b.value; } + return a.key < b.key; + } +}; + +template +RAFT_KERNEL merge_subgraphs(IdxT* cluster_data_indices, + size_t graph_degree, + size_t num_cluster_in_batch, + float* global_distances, + float* batch_distances, + IdxT* global_indices, + IdxT* batch_indices) +{ + size_t batch_row = blockIdx.x; + typedef cub::BlockMergeSort, BLOCK_SIZE, ITEMS_PER_THREAD> + BlockMergeSortType; + __shared__ typename cub::BlockMergeSort, BLOCK_SIZE, ITEMS_PER_THREAD>:: + TempStorage tmpSmem; + + extern __shared__ char sharedMem[]; + float* blockKeys = reinterpret_cast(sharedMem); + IdxT* blockValues = reinterpret_cast(&sharedMem[graph_degree * 2 * sizeof(float)]); + int16_t* uniqueMask = + reinterpret_cast(&sharedMem[graph_degree * 2 * (sizeof(float) + sizeof(IdxT))]); + + if (batch_row < num_cluster_in_batch) { + // load batch or global depending on threadIdx + size_t global_row = cluster_data_indices[batch_row]; + + KeyValuePair threadKeyValuePair[ITEMS_PER_THREAD]; + + size_t halfway = BLOCK_SIZE / 2; + size_t do_global = threadIdx.x < halfway; + + float* distances; + IdxT* indices; + + if (do_global) { + distances = global_distances; + indices = global_indices; + } else { + distances = batch_distances; + indices = batch_indices; + } + + size_t idxBase = (threadIdx.x * do_global + (threadIdx.x - halfway) * (1lu - do_global)) * + static_cast(ITEMS_PER_THREAD); + size_t arrIdxBase = (global_row * do_global + batch_row * (1lu - do_global)) * graph_degree; + for (int i = 0; i < ITEMS_PER_THREAD; i++) { + size_t colId = idxBase + i; + if (colId < graph_degree) { + threadKeyValuePair[i].key = distances[arrIdxBase + colId]; + threadKeyValuePair[i].value = indices[arrIdxBase + colId]; + } else { + threadKeyValuePair[i].key = std::numeric_limits::max(); + threadKeyValuePair[i].value = std::numeric_limits::max(); + } + } + + __syncthreads(); + + BlockMergeSortType(tmpSmem).Sort(threadKeyValuePair, CustomKeyComparator{}); + + // load sorted result into shared memory to get unique values + idxBase = threadIdx.x * ITEMS_PER_THREAD; + for (int i = 0; i < ITEMS_PER_THREAD; i++) { + size_t colId = idxBase + i; + if (colId < 2 * graph_degree) { + blockKeys[colId] = threadKeyValuePair[i].key; + blockValues[colId] = threadKeyValuePair[i].value; + } + } + + __syncthreads(); + + // get unique mask + if (threadIdx.x == 0) { uniqueMask[0] = 1; } + for (int i = 0; i < ITEMS_PER_THREAD; i++) { + size_t colId = idxBase + i; + if (colId > 0 && colId < 2 * graph_degree) { + uniqueMask[colId] = static_cast(blockValues[colId] != blockValues[colId - 1]); + } + } + + __syncthreads(); + + // prefix sum + if (threadIdx.x == 0) { + for (int i = 1; i < 2 * graph_degree; i++) { + uniqueMask[i] += uniqueMask[i - 1]; + } + } + + __syncthreads(); + // load unique values to global memory + if (threadIdx.x == 0) { + global_distances[global_row * graph_degree] = blockKeys[0]; + global_indices[global_row * graph_degree] = blockValues[0]; + } + + for (int i = 0; i < ITEMS_PER_THREAD; i++) { + size_t colId = idxBase + i; + if (colId > 0 && colId < 2 * graph_degree) { + bool is_unique = uniqueMask[colId] != uniqueMask[colId - 1]; + int16_t global_colId = uniqueMask[colId] - 1; + if (is_unique && static_cast(global_colId) < graph_degree) { + global_distances[global_row * graph_degree + global_colId] = blockKeys[colId]; + global_indices[global_row * graph_degree + global_colId] = blockValues[colId]; + } + } + } + } +} + +// +// builds knn graph using NN Descent and merge with global graph +// +template , memory_type::host>> +void build_and_merge(raft::resources const& res, + const index_params& params, + size_t num_data_in_cluster, + size_t graph_degree, + size_t int_graph_node_degree, + T* cluster_data, + IdxT* cluster_data_indices, + int* int_graph, + IdxT* inverted_indices, + IdxT* global_indices_d, + float* global_distances_d, + IdxT* batch_indices_h, + IdxT* batch_indices_d, + float* batch_distances_d, + GNND& nnd) +{ + nnd.build(cluster_data, num_data_in_cluster, int_graph, true, batch_distances_d); + + // remap indices +#pragma omp parallel for + for (size_t i = 0; i < num_data_in_cluster; i++) { + for (size_t j = 0; j < graph_degree; j++) { + size_t local_idx = int_graph[i * int_graph_node_degree + j]; + batch_indices_h[i * graph_degree + j] = inverted_indices[local_idx]; + } + } + + raft::copy(batch_indices_d, + batch_indices_h, + num_data_in_cluster * graph_degree, + raft::resource::get_cuda_stream(res)); + + size_t num_elems = graph_degree * 2; + size_t sharedMemSize = num_elems * (sizeof(float) + sizeof(IdxT) + sizeof(int16_t)); + + if (num_elems <= 128) { + merge_subgraphs + <<>>( + cluster_data_indices, + graph_degree, + num_data_in_cluster, + global_distances_d, + batch_distances_d, + global_indices_d, + batch_indices_d); + } else if (num_elems <= 512) { + merge_subgraphs + <<>>( + cluster_data_indices, + graph_degree, + num_data_in_cluster, + global_distances_d, + batch_distances_d, + global_indices_d, + batch_indices_d); + } else if (num_elems <= 1024) { + merge_subgraphs + <<>>( + cluster_data_indices, + graph_degree, + num_data_in_cluster, + global_distances_d, + batch_distances_d, + global_indices_d, + batch_indices_d); + } else if (num_elems <= 2048) { + merge_subgraphs + <<>>( + cluster_data_indices, + graph_degree, + num_data_in_cluster, + global_distances_d, + batch_distances_d, + global_indices_d, + batch_indices_d); + } else { + // this is as far as we can get due to the shared mem usage of cub::BlockMergeSort + RAFT_FAIL("The degree of knn is too large (%lu). It must be smaller than 1024", graph_degree); + } + raft::resource::sync_stream(res); +} + +// +// For each cluster, gather the data samples that belong to that cluster, and +// call build_and_merge +// +template +void cluster_nnd(raft::resources const& res, + const index_params& params, + size_t graph_degree, + size_t extended_graph_degree, + size_t max_cluster_size, + raft::host_matrix_view dataset, + IdxT* offsets, + IdxT* cluster_size, + IdxT* cluster_data_indices, + int* int_graph, + IdxT* inverted_indices, + IdxT* global_indices_h, + float* global_distances_h, + IdxT* batch_indices_h, + IdxT* batch_indices_d, + float* batch_distances_d, + const BuildConfig& build_config) +{ + size_t num_rows = dataset.extent(0); + size_t num_cols = dataset.extent(1); + + GNND nnd(res, build_config); + + auto cluster_data_matrix = + raft::make_host_matrix(max_cluster_size, num_cols); + + for (size_t cluster_id = 0; cluster_id < params.n_clusters; cluster_id++) { + RAFT_LOG_DEBUG( + "# Data on host. Running clusters: %lu / %lu", cluster_id + 1, params.n_clusters); + size_t num_data_in_cluster = cluster_size[cluster_id]; + size_t offset = offsets[cluster_id]; + +#pragma omp parallel for + for (size_t i = 0; i < num_data_in_cluster; i++) { + for (size_t j = 0; j < num_cols; j++) { + size_t global_row = (inverted_indices + offset)[i]; + cluster_data_matrix(i, j) = dataset(global_row, j); + } + } + + build_and_merge(res, + params, + num_data_in_cluster, + graph_degree, + extended_graph_degree, + cluster_data_matrix.data_handle(), + cluster_data_indices + offset, + int_graph, + inverted_indices + offset, + global_indices_h, + global_distances_h, + batch_indices_h, + batch_indices_d, + batch_distances_d, + nnd); + nnd.reset(res); + } +} + +template +void cluster_nnd(raft::resources const& res, + const index_params& params, + size_t graph_degree, + size_t extended_graph_degree, + size_t max_cluster_size, + raft::device_matrix_view dataset, + IdxT* offsets, + IdxT* cluster_size, + IdxT* cluster_data_indices, + int* int_graph, + IdxT* inverted_indices, + IdxT* global_indices_h, + float* global_distances_h, + IdxT* batch_indices_h, + IdxT* batch_indices_d, + float* batch_distances_d, + const BuildConfig& build_config) +{ + size_t num_rows = dataset.extent(0); + size_t num_cols = dataset.extent(1); + + GNND nnd(res, build_config); + + auto cluster_data_matrix = + raft::make_device_matrix(res, max_cluster_size, num_cols); + + for (size_t cluster_id = 0; cluster_id < params.n_clusters; cluster_id++) { + RAFT_LOG_DEBUG( + "# Data on device. Running clusters: %lu / %lu", cluster_id + 1, params.n_clusters); + size_t num_data_in_cluster = cluster_size[cluster_id]; + size_t offset = offsets[cluster_id]; + + auto cluster_data_view = raft::make_device_matrix_view( + cluster_data_matrix.data_handle(), num_data_in_cluster, num_cols); + auto cluster_data_indices_view = raft::make_device_vector_view( + cluster_data_indices + offset, num_data_in_cluster); + + auto dataset_IdxT = + raft::make_device_matrix_view(dataset.data_handle(), num_rows, num_cols); + raft::matrix::gather(res, dataset_IdxT, cluster_data_indices_view, cluster_data_view); + + build_and_merge(res, + params, + num_data_in_cluster, + graph_degree, + extended_graph_degree, + cluster_data_view.data_handle(), + cluster_data_indices + offset, + int_graph, + inverted_indices + offset, + global_indices_h, + global_distances_h, + batch_indices_h, + batch_indices_d, + batch_distances_d, + nnd); + nnd.reset(res); + } +} + +template , memory_type::host>> +void batch_build(raft::resources const& res, + const index_params& params, + mdspan, row_major, Accessor> dataset, + index& global_idx) +{ + size_t graph_degree = params.graph_degree; + size_t intermediate_degree = params.intermediate_graph_degree; + + size_t num_rows = static_cast(dataset.extent(0)); + size_t num_cols = static_cast(dataset.extent(1)); + + auto centroids = + raft::make_device_matrix(res, params.n_clusters, num_cols); + get_balanced_kmeans_centroids(res, params.metric, dataset, centroids.view()); + + size_t k = 2; + auto global_nearest_cluster = raft::make_host_matrix(num_rows, k); + get_global_nearest_k(res, + k, + num_rows, + params.n_clusters, + dataset.data_handle(), + global_nearest_cluster.view(), + centroids.view(), + params.metric); + + auto inverted_indices = raft::make_host_vector(num_rows * k); + auto cluster_size = raft::make_host_vector(params.n_clusters); + auto offset = raft::make_host_vector(params.n_clusters); + + size_t max_cluster_size, min_cluster_size; + get_inverted_indices(res, + params.n_clusters, + max_cluster_size, + min_cluster_size, + global_nearest_cluster.view(), + inverted_indices.view(), + cluster_size.view(), + offset.view()); + + if (intermediate_degree >= min_cluster_size) { + RAFT_LOG_WARN( + "Intermediate graph degree cannot be larger than minimum cluster size, reducing it to %lu", + dataset.extent(0)); + intermediate_degree = min_cluster_size - 1; + } + if (intermediate_degree < graph_degree) { + RAFT_LOG_WARN( + "Graph degree (%lu) cannot be larger than intermediate graph degree (%lu), reducing " + "graph_degree.", + graph_degree, + intermediate_degree); + graph_degree = intermediate_degree; + } + + size_t extended_graph_degree = + align32::roundUp(static_cast(graph_degree * (graph_degree <= 32 ? 1.0 : 1.3))); + size_t extended_intermediate_degree = align32::roundUp( + static_cast(intermediate_degree * (intermediate_degree <= 32 ? 1.0 : 1.3))); + + auto int_graph = raft::make_host_matrix( + max_cluster_size, static_cast(extended_graph_degree)); + + BuildConfig build_config{.max_dataset_size = max_cluster_size, + .dataset_dim = num_cols, + .node_degree = extended_graph_degree, + .internal_node_degree = extended_intermediate_degree, + .max_iterations = params.max_iterations, + .termination_threshold = params.termination_threshold, + .output_graph_degree = graph_degree}; + + auto global_indices_h = raft::make_managed_matrix(res, num_rows, graph_degree); + auto global_distances_h = raft::make_managed_matrix(res, num_rows, graph_degree); + + std::fill(global_indices_h.data_handle(), + global_indices_h.data_handle() + num_rows * graph_degree, + std::numeric_limits::max()); + std::fill(global_distances_h.data_handle(), + global_distances_h.data_handle() + num_rows * graph_degree, + std::numeric_limits::max()); + + auto batch_indices_h = + raft::make_host_matrix(max_cluster_size, graph_degree); + auto batch_indices_d = + raft::make_device_matrix(res, max_cluster_size, graph_degree); + auto batch_distances_d = + raft::make_device_matrix(res, max_cluster_size, graph_degree); + + auto cluster_data_indices = raft::make_device_vector(res, num_rows * k); + raft::copy(cluster_data_indices.data_handle(), + inverted_indices.data_handle(), + num_rows * k, + resource::get_cuda_stream(res)); + + cluster_nnd(res, + params, + graph_degree, + extended_graph_degree, + max_cluster_size, + dataset, + offset.data_handle(), + cluster_size.data_handle(), + cluster_data_indices.data_handle(), + int_graph.data_handle(), + inverted_indices.data_handle(), + global_indices_h.data_handle(), + global_distances_h.data_handle(), + batch_indices_h.data_handle(), + batch_indices_d.data_handle(), + batch_distances_d.data_handle(), + build_config); + + raft::copy(global_idx.graph().data_handle(), + global_indices_h.data_handle(), + num_rows * graph_degree, + raft::resource::get_cuda_stream(res)); + if (params.return_distances && global_idx.distances().has_value()) { + raft::copy(global_idx.distances().value().data_handle(), + global_distances_h.data_handle(), + num_rows * graph_degree, + raft::resource::get_cuda_stream(res)); + } +} + +template , memory_type::host>> +index batch_build(raft::resources const& res, + const index_params& params, + mdspan, row_major, Accessor> dataset) +{ + size_t intermediate_degree = params.intermediate_graph_degree; + size_t graph_degree = params.graph_degree; + + if (intermediate_degree < graph_degree) { + RAFT_LOG_WARN( + "Graph degree (%lu) cannot be larger than intermediate graph degree (%lu), reducing " + "graph_degree.", + graph_degree, + intermediate_degree); + graph_degree = intermediate_degree; + } + + index idx{ + res, dataset.extent(0), static_cast(graph_degree), params.return_distances}; + + batch_build(res, params, dataset, idx); + + return idx; +} + +} // namespace cuvs::neighbors::nn_descent::detail::experimental diff --git a/cpp/src/neighbors/nn_descent.cuh b/cpp/src/neighbors/nn_descent.cuh index 582da72c1..ed91dac91 100644 --- a/cpp/src/neighbors/nn_descent.cuh +++ b/cpp/src/neighbors/nn_descent.cuh @@ -17,9 +17,14 @@ #pragma once #include "detail/nn_descent.cuh" +#include "detail/nn_descent_batch.cuh" + +#include +#include #include #include +#include #include namespace cuvs::neighbors::nn_descent { @@ -61,7 +66,15 @@ auto build(raft::resources const& res, index_params const& params, raft::device_matrix_view dataset) -> index { - return detail::build(res, params, dataset); + if (params.n_clusters > 1) { + if constexpr (std::is_same_v) { + return detail::experimental::batch_build(res, params, dataset); + } else { + RAFT_FAIL("Batched nn-descent is only supported for float precision"); + } + } else { + return detail::build(res, params, dataset); + } } /** @@ -100,7 +113,15 @@ void build(raft::resources const& res, raft::device_matrix_view dataset, index& idx) { - detail::build(res, params, dataset, idx); + if (params.n_clusters > 1) { + if constexpr (std::is_same_v) { + detail::experimental::batch_build(res, params, dataset, idx); + } else { + RAFT_FAIL("Batched nn-descent is only supported for float precision"); + } + } else { + detail::build(res, params, dataset, idx); + } } /** @@ -135,7 +156,15 @@ auto build(raft::resources const& res, index_params const& params, raft::host_matrix_view dataset) -> index { - return detail::build(res, params, dataset); + if (params.n_clusters > 1) { + if constexpr (std::is_same_v) { + return detail::experimental::batch_build(res, params, dataset); + } else { + RAFT_FAIL("Batched nn-descent is only supported for float precision"); + } + } else { + return detail::build(res, params, dataset); + } } /** @@ -174,7 +203,15 @@ void build(raft::resources const& res, raft::host_matrix_view dataset, index& idx) { - detail::build(res, params, dataset, idx); + if (params.n_clusters > 1) { + if constexpr (std::is_same_v) { + detail::experimental::batch_build(res, params, dataset, idx); + } else { + RAFT_FAIL("Batched nn-descent is only supported for float precision"); + } + } else { + detail::build(res, params, dataset, idx); + } } /** @} */ // end group nn-descent diff --git a/cpp/src/neighbors/nn_descent_float.cu b/cpp/src/neighbors/nn_descent_float.cu index c6d356671..fa85db127 100644 --- a/cpp/src/neighbors/nn_descent_float.cu +++ b/cpp/src/neighbors/nn_descent_float.cu @@ -19,21 +19,38 @@ namespace cuvs::neighbors::nn_descent { -#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ - }; \ - \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::host_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ +#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::device_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + }; \ + } \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::host_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + } \ }; CUVS_INST_NN_DESCENT_BUILD(float, uint32_t); diff --git a/cpp/src/neighbors/nn_descent_half.cu b/cpp/src/neighbors/nn_descent_half.cu index 587993031..2ee45d435 100644 --- a/cpp/src/neighbors/nn_descent_half.cu +++ b/cpp/src/neighbors/nn_descent_half.cu @@ -19,21 +19,39 @@ namespace cuvs::neighbors::nn_descent { -#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ - }; \ - \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::host_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ +#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::device_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + } \ + }; \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::host_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + } \ }; CUVS_INST_NN_DESCENT_BUILD(half, uint32_t); diff --git a/cpp/src/neighbors/nn_descent_int8.cu b/cpp/src/neighbors/nn_descent_int8.cu index 813a01746..e150f511b 100644 --- a/cpp/src/neighbors/nn_descent_int8.cu +++ b/cpp/src/neighbors/nn_descent_int8.cu @@ -19,21 +19,39 @@ namespace cuvs::neighbors::nn_descent { -#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ - }; \ - \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::host_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ +#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::device_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + } \ + }; \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::host_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + } \ }; CUVS_INST_NN_DESCENT_BUILD(int8_t, uint32_t); diff --git a/cpp/src/neighbors/nn_descent_uint8.cu b/cpp/src/neighbors/nn_descent_uint8.cu index 9d73dd90f..d8657777b 100644 --- a/cpp/src/neighbors/nn_descent_uint8.cu +++ b/cpp/src/neighbors/nn_descent_uint8.cu @@ -19,21 +19,39 @@ namespace cuvs::neighbors::nn_descent { -#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ - }; \ - \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::nn_descent::index_params& params, \ - raft::host_matrix_view dataset) \ - ->cuvs::neighbors::nn_descent::index \ - { \ - return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ +#define CUVS_INST_NN_DESCENT_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::device_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + } \ + }; \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::nn_descent::index_params& params, \ + raft::host_matrix_view dataset, \ + std::optional> graph) \ + ->cuvs::neighbors::nn_descent::index \ + { \ + if (!graph.has_value()) { \ + return cuvs::neighbors::nn_descent::build(handle, params, dataset); \ + } else { \ + std::optional> distances = \ + std::nullopt; \ + cuvs::neighbors::nn_descent::index idx{handle, graph.value(), distances}; \ + cuvs::neighbors::nn_descent::build(handle, params, dataset, idx); \ + return idx; \ + } \ }; CUVS_INST_NN_DESCENT_BUILD(uint8_t, uint32_t); diff --git a/cpp/test/neighbors/ann_nn_descent.cuh b/cpp/test/neighbors/ann_nn_descent.cuh index bce0f9899..7d2575c2b 100644 --- a/cpp/test/neighbors/ann_nn_descent.cuh +++ b/cpp/test/neighbors/ann_nn_descent.cuh @@ -18,9 +18,13 @@ #include "../test_utils.cuh" #include "ann_utils.cuh" +#include #include + #include +#include #include +#include #include "naive_knn.cuh" @@ -42,6 +46,15 @@ struct AnnNNDescentInputs { double min_recall; }; +struct AnnNNDescentBatchInputs { + std::pair recall_cluster; + int n_rows; + int dim; + int graph_degree; + cuvs::distance::DistanceType metric; + bool host_dataset; +}; + inline ::std::ostream& operator<<(::std::ostream& os, const AnnNNDescentInputs& p) { os << "dataset shape=" << p.n_rows << "x" << p.dim << ", graph_degree=" << p.graph_degree @@ -50,6 +63,14 @@ inline ::std::ostream& operator<<(::std::ostream& os, const AnnNNDescentInputs& return os; } +inline ::std::ostream& operator<<(::std::ostream& os, const AnnNNDescentBatchInputs& p) +{ + os << "dataset shape=" << p.n_rows << "x" << p.dim << ", graph_degree=" << p.graph_degree + << ", metric=" << static_cast(p.metric) << (p.host_dataset ? ", host" : ", device") + << ", clusters=" << p.recall_cluster.second << std::endl; + return os; +} + template class AnnNNDescentTest : public ::testing::TestWithParam { public: @@ -65,7 +86,9 @@ class AnnNNDescentTest : public ::testing::TestWithParam { { size_t queries_size = ps.n_rows * ps.graph_degree; std::vector indices_NNDescent(queries_size); + std::vector distances_NNDescent(queries_size); std::vector indices_naive(queries_size); + std::vector distances_naive(queries_size); { rmm::device_uvector distances_naive_dev(queries_size, stream_); @@ -81,16 +104,18 @@ class AnnNNDescentTest : public ::testing::TestWithParam { ps.graph_degree, ps.metric); raft::update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + raft::update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); raft::resource::sync_stream(handle_); } { { - cuvs::neighbors::nn_descent::index_params index_params; + nn_descent::index_params index_params; index_params.metric = ps.metric; index_params.graph_degree = ps.graph_degree; index_params.intermediate_graph_degree = 2 * ps.graph_degree; index_params.max_iterations = 100; + index_params.return_distances = true; auto database_view = raft::make_device_matrix_view( (const DataT*)database.data(), ps.n_rows, ps.dim); @@ -101,22 +126,40 @@ class AnnNNDescentTest : public ::testing::TestWithParam { raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); auto database_host_view = raft::make_host_matrix_view( (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); - auto index = - cuvs::neighbors::nn_descent::build(handle_, index_params, database_host_view); - raft::update_host( + auto index = nn_descent::build(handle_, index_params, database_host_view); + raft::copy( indices_NNDescent.data(), index.graph().data_handle(), queries_size, stream_); + if (index.distances().has_value()) { + raft::copy(distances_NNDescent.data(), + index.distances().value().data_handle(), + queries_size, + stream_); + } + } else { - auto index = cuvs::neighbors::nn_descent::build(handle_, index_params, database_view); - raft::update_host( + auto index = nn_descent::build(handle_, index_params, database_view); + raft::copy( indices_NNDescent.data(), index.graph().data_handle(), queries_size, stream_); + if (index.distances().has_value()) { + raft::copy(distances_NNDescent.data(), + index.distances().value().data_handle(), + queries_size, + stream_); + } }; } raft::resource::sync_stream(handle_); } double min_recall = ps.min_recall; - EXPECT_TRUE(eval_recall( - indices_naive, indices_NNDescent, ps.n_rows, ps.graph_degree, 0.001, min_recall)); + EXPECT_TRUE(eval_neighbours(indices_naive, + indices_NNDescent, + distances_naive, + distances_NNDescent, + ps.n_rows, + ps.graph_degree, + 0.001, + min_recall)); } } @@ -146,6 +189,125 @@ class AnnNNDescentTest : public ::testing::TestWithParam { rmm::device_uvector database; }; +template +class AnnNNDescentBatchTest : public ::testing::TestWithParam { + public: + AnnNNDescentBatchTest() + : stream_(raft::resource::get_cuda_stream(handle_)), + ps(::testing::TestWithParam::GetParam()), + database(0, stream_) + { + } + + void testNNDescentBatch() + { + size_t queries_size = ps.n_rows * ps.graph_degree; + std::vector indices_NNDescent(queries_size); + std::vector distances_NNDescent(queries_size); + std::vector indices_naive(queries_size); + std::vector distances_naive(queries_size); + + { + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + database.data(), + database.data(), + ps.n_rows, + ps.n_rows, + ps.dim, + ps.graph_degree, + ps.metric); + raft::update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + raft::update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); + raft::resource::sync_stream(handle_); + } + + { + { + nn_descent::index_params index_params; + index_params.metric = ps.metric; + index_params.graph_degree = ps.graph_degree; + index_params.intermediate_graph_degree = 2 * ps.graph_degree; + index_params.max_iterations = 10; + index_params.return_distances = true; + index_params.n_clusters = ps.recall_cluster.second; + + auto database_view = raft::make_device_matrix_view( + (const DataT*)database.data(), ps.n_rows, ps.dim); + + { + if (ps.host_dataset) { + auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + auto database_host_view = raft::make_host_matrix_view( + (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); + auto index = nn_descent::build(handle_, index_params, database_host_view); + raft::copy( + indices_NNDescent.data(), index.graph().data_handle(), queries_size, stream_); + if (index.distances().has_value()) { + raft::copy(distances_NNDescent.data(), + index.distances().value().data_handle(), + queries_size, + stream_); + } + + } else { + auto index = nn_descent::build(handle_, index_params, database_view); + raft::copy( + indices_NNDescent.data(), index.graph().data_handle(), queries_size, stream_); + if (index.distances().has_value()) { + raft::copy(distances_NNDescent.data(), + index.distances().value().data_handle(), + queries_size, + stream_); + } + }; + } + raft::resource::sync_stream(handle_); + } + double min_recall = ps.recall_cluster.first; + EXPECT_TRUE(eval_neighbours(indices_naive, + indices_NNDescent, + distances_naive, + distances_NNDescent, + ps.n_rows, + ps.graph_degree, + 0.01, + min_recall, + true, + static_cast(ps.graph_degree * 0.1))); + } + } + + void SetUp() override + { + database.resize(((size_t)ps.n_rows) * ps.dim, stream_); + raft::random::RngState r(1234ULL); + if constexpr (std::is_same{}) { + raft::random::normal(handle_, r, database.data(), ps.n_rows * ps.dim, DataT(0.1), DataT(2.0)); + } else { + raft::random::uniformInt( + handle_, r, database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20)); + } + raft::resource::sync_stream(handle_); + } + + void TearDown() override + { + raft::resource::sync_stream(handle_); + database.resize(0, stream_); + } + + private: + raft::resources handle_; + rmm::cuda_stream_view stream_; + AnnNNDescentBatchInputs ps; + rmm::device_uvector database; +}; + const std::vector inputs = raft::util::itertools::product( {1000, 2000}, // n_rows {3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 619, 1024}, // dim @@ -154,4 +316,15 @@ const std::vector inputs = raft::util::itertools::product inputsBatch = + raft::util::itertools::product( + {std::make_pair(0.9, 3lu), std::make_pair(0.9, 2lu)}, // min_recall, n_clusters + {4000, 5000}, // n_rows + {192, 512}, // dim + {32, 64}, // graph_degree + {cuvs::distance::DistanceType::L2Expanded}, + {false, true}); + +} // namespace cuvs::neighbors::nn_descent diff --git a/cpp/test/neighbors/ann_nn_descent/test_float_uint32_t.cu b/cpp/test/neighbors/ann_nn_descent/test_float_uint32_t.cu index 64c0e0291..7a24f96a1 100644 --- a/cpp/test/neighbors/ann_nn_descent/test_float_uint32_t.cu +++ b/cpp/test/neighbors/ann_nn_descent/test_float_uint32_t.cu @@ -23,6 +23,12 @@ namespace cuvs::neighbors::nn_descent { typedef AnnNNDescentTest AnnNNDescentTestF_U32; TEST_P(AnnNNDescentTestF_U32, AnnNNDescent) { this->testNNDescent(); } +// typedef AnnNNDescentBatchTest AnnNNDescentBatchTestF_U32; +// TEST_P(AnnNNDescentBatchTestF_U32, AnnNNDescentBatch) { this->testNNDescentBatch(); } + INSTANTIATE_TEST_CASE_P(AnnNNDescentTest, AnnNNDescentTestF_U32, ::testing::ValuesIn(inputs)); +// INSTANTIATE_TEST_CASE_P(AnnNNDescentBatchTest, +// AnnNNDescentBatchTestF_U32, +// ::testing::ValuesIn(inputsBatch)); } // namespace cuvs::neighbors::nn_descent diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh index b08e1d725..94bccade2 100644 --- a/cpp/test/neighbors/ann_utils.cuh +++ b/cpp/test/neighbors/ann_utils.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include // raft::make_device_matrix #include @@ -165,9 +166,14 @@ auto calc_recall(const std::vector& expected_idx, /** check uniqueness of indices */ template -auto check_unique_indices(const std::vector& actual_idx, size_t rows, size_t cols) +auto check_unique_indices(const std::vector& actual_idx, + size_t rows, + size_t cols, + size_t max_duplicates = 0) { size_t max_count; + size_t dup_count = 0lu; + std::set unique_indices; for (size_t i = 0; i < rows; ++i) { unique_indices.clear(); @@ -180,8 +186,11 @@ auto check_unique_indices(const std::vector& actual_idx, size_t rows, size_t } else if (unique_indices.find(act_idx) == unique_indices.end()) { unique_indices.insert(act_idx); } else { - return testing::AssertionFailure() - << "Duplicated index " << act_idx << " at k " << k << " for query " << i << "! "; + dup_count++; + if (dup_count > max_duplicates) { + return testing::AssertionFailure() + << "Duplicated index " << act_idx << " at k " << k << " for query " << i << "! "; + } } } } @@ -264,7 +273,8 @@ auto eval_neighbours(const std::vector& expected_idx, size_t cols, double eps, double min_recall, - bool test_unique = true) -> testing::AssertionResult + bool test_unique = true, + size_t max_duplicates = 0) -> testing::AssertionResult { auto [actual_recall, match_count, total_count] = calc_recall(expected_idx, actual_idx, expected_dist, actual_dist, rows, cols, eps); @@ -284,7 +294,7 @@ auto eval_neighbours(const std::vector& expected_idx, << min_recall << "); eps = " << eps << ". "; } if (test_unique) - return check_unique_indices(actual_idx, rows, cols); + return check_unique_indices(actual_idx, rows, cols, max_duplicates); else return testing::AssertionSuccess(); } diff --git a/python/cuvs/cuvs/test/test_hnsw.py b/python/cuvs/cuvs/test/test_hnsw.py index 0ae97266b..8bd2e8b76 100644 --- a/python/cuvs/cuvs/test/test_hnsw.py +++ b/python/cuvs/cuvs/test/test_hnsw.py @@ -23,7 +23,7 @@ def run_hnsw_build_search_test( - n_rows=1000, + n_rows=10000, n_cols=10, n_queries=100, k=10,