diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c2e6d9fce4..0d6ab7ee54 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,7 +27,7 @@ repos: types_or: [python, cython] additional_dependencies: ["flake8-force"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v0.971' + rev: 'v1.3.0' hooks: - id: mypy additional_dependencies: [types-cachetools] diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ac75d7d83b..650bc1a059 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -57,6 +57,7 @@ option(BUILD_SHARED_LIBS "Build raft shared libraries" ON) option(BUILD_TESTS "Build raft unit-tests" ON) option(BUILD_PRIMS_BENCH "Build raft C++ benchmark tests" OFF) option(BUILD_ANN_BENCH "Build raft ann benchmarks" OFF) +option(BUILD_CAGRA_HNSWLIB "Build CAGRA+hnswlib interface" ON) option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF) option(CUDA_ENABLE_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF @@ -195,6 +196,10 @@ if(BUILD_PRIMS_BENCH OR BUILD_ANN_BENCH) rapids_cpm_gbench() endif() +if(BUILD_CAGRA_HNSWLIB) + include(cmake/thirdparty/get_hnswlib.cmake) +endif() + # ################################################################################################## # * raft --------------------------------------------------------------------- add_library(raft INTERFACE) @@ -203,6 +208,9 @@ add_library(raft::raft ALIAS raft) target_include_directories( raft INTERFACE "$" "$" ) +if(BUILD_CAGRA_HNSWLIB) + target_link_libraries(raft INTERFACE hnswlib::hnswlib) +endif() if(NOT BUILD_CPU_ONLY) # Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target. @@ -424,6 +432,8 @@ if(RAFT_COMPILE_LIBRARY) src/raft_runtime/neighbors/cagra_build.cu src/raft_runtime/neighbors/cagra_search.cu src/raft_runtime/neighbors/cagra_serialize.cu + src/raft_runtime/neighbors/eps_neighborhood.cu + $<$:src/raft_runtime/neighbors/hnsw.cpp> src/raft_runtime/neighbors/ivf_flat_build.cu src/raft_runtime/neighbors/ivf_flat_search.cu src/raft_runtime/neighbors/ivf_flat_serialize.cu @@ -443,6 +453,7 @@ if(RAFT_COMPILE_LIBRARY) src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu src/raft_runtime/random/rmat_rectangular_generator_int_double.cu src/raft_runtime/random/rmat_rectangular_generator_int_float.cu + src/spatial/knn/detail/ball_cover/registers_eps_pass_euclidean.cu src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index de980e8945..ee84f7515a 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -225,9 +225,7 @@ endfunction() if(RAFT_ANN_BENCH_USE_HNSWLIB) ConfigureAnnBench( - NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp - LINKS - hnswlib::hnswlib + NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp LINKS hnswlib::hnswlib ) endif() @@ -276,12 +274,7 @@ endif() if(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) ConfigureAnnBench( - NAME - RAFT_CAGRA_HNSWLIB - PATH - bench/ann/src/raft/raft_cagra_hnswlib.cu - LINKS - raft::compiled + NAME RAFT_CAGRA_HNSWLIB PATH bench/ann/src/raft/raft_cagra_hnswlib.cu LINKS raft::compiled hnswlib::hnswlib ) endif() @@ -336,10 +329,7 @@ endif() if(RAFT_ANN_BENCH_USE_GGNN) include(cmake/thirdparty/get_glog.cmake) - ConfigureAnnBench( - NAME GGNN PATH bench/ann/src/ggnn/ggnn_benchmark.cu - LINKS glog::glog ggnn::ggnn - ) + ConfigureAnnBench(NAME GGNN PATH bench/ann/src/ggnn/ggnn_benchmark.cu LINKS glog::glog ggnn::ggnn) endif() # ################################################################################################## diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index e61de6745e..53f31d6232 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -287,11 +287,11 @@ void bench_search(::benchmark::State& state, std::make_shared>(current_algo_props->query_memory_type, k * query_set_size); cuda_timer gpu_timer; - auto start = std::chrono::high_resolution_clock::now(); { nvtx_case nvtx{state.name()}; - auto algo = dynamic_cast*>(current_algo.get())->copy(); + auto algo = dynamic_cast*>(current_algo.get())->copy(); + auto start = std::chrono::high_resolution_clock::now(); for (auto _ : state) { [[maybe_unused]] auto ntx_lap = nvtx.lap(); [[maybe_unused]] auto gpu_lap = gpu_timer.lap(); @@ -314,17 +314,15 @@ void bench_search(::benchmark::State& state, queries_processed += n_queries; } + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast>(end - start).count(); + if (state.thread_index() == 0) { state.counters.insert({{"end_to_end", duration}}); } + state.counters.insert({"Latency", {duration, benchmark::Counter::kAvgIterations}}); } - auto end = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast>(end - start).count(); - if (state.thread_index() == 0) { state.counters.insert({{"end_to_end", duration}}); } - state.counters.insert( - {"Latency", {duration / double(state.iterations()), benchmark::Counter::kAvgThreads}}); state.SetItemsProcessed(queries_processed); if (cudart.found()) { - double gpu_time_per_iteration = gpu_timer.total_time() / (double)state.iterations(); - state.counters.insert({"GPU", {gpu_time_per_iteration, benchmark::Counter::kAvgThreads}}); + state.counters.insert({"GPU", {gpu_timer.total_time(), benchmark::Counter::kAvgIterations}}); } // This will be the total number of queries across all threads diff --git a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp index 9132db7c04..f7088c7271 100644 --- a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp +++ b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,13 +49,6 @@ class cuda_huge_page_resource final : public rmm::mr::device_memory_resource { */ [[nodiscard]] bool supports_streams() const noexcept override { return false; } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return true - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; } - private: /** * @brief Allocates memory of size at least `bytes` using cudaMalloc. @@ -112,21 +105,5 @@ class cuda_huge_page_resource final : public rmm::mr::device_memory_resource { { return dynamic_cast(&other) != nullptr; } - - /** - * @brief Get free and available memory for memory resource - * - * @throws `rmm::cuda_error` if unable to retrieve memory info. - * - * @return std::pair contaiing free_size and total_size of memory - */ - [[nodiscard]] std::pair do_get_mem_info( - rmm::cuda_stream_view) const override - { - std::size_t free_size{}; - std::size_t total_size{}; - RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size)); - return std::make_pair(free_size, total_size); - } }; -} // namespace raft::mr \ No newline at end of file +} // namespace raft::mr diff --git a/cpp/bench/ann/src/common/cuda_pinned_resource.hpp b/cpp/bench/ann/src/common/cuda_pinned_resource.hpp index 28ca691f86..ab207a36fe 100644 --- a/cpp/bench/ann/src/common/cuda_pinned_resource.hpp +++ b/cpp/bench/ann/src/common/cuda_pinned_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,13 +53,6 @@ class cuda_pinned_resource final : public rmm::mr::device_memory_resource { */ [[nodiscard]] bool supports_streams() const noexcept override { return false; } - /** - * @brief Query whether the resource supports the get_mem_info API. - * - * @return true - */ - [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; } - private: /** * @brief Allocates memory of size at least `bytes` using cudaMalloc. @@ -110,21 +103,5 @@ class cuda_pinned_resource final : public rmm::mr::device_memory_resource { { return dynamic_cast(&other) != nullptr; } - - /** - * @brief Get free and available memory for memory resource - * - * @throws `rmm::cuda_error` if unable to retrieve memory info. - * - * @return std::pair contaiing free_size and total_size of memory - */ - [[nodiscard]] std::pair do_get_mem_info( - rmm::cuda_stream_view) const override - { - std::size_t free_size{}; - std::size_t total_size{}; - RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size)); - return std::make_pair(free_size, total_size); - } }; -} // namespace raft::mr \ No newline at end of file +} // namespace raft::mr diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h index 5ddfc58677..08b2f188c5 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h +++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h @@ -52,6 +52,11 @@ struct hnsw_dist_t { using type = int; }; +template <> +struct hnsw_dist_t { + using type = int; +}; + template class HnswLib : public ANN { public: @@ -135,7 +140,7 @@ void HnswLib::build(const T* dataset, size_t nrow, cudaStream_t) space_ = std::make_shared(dim_); } } else if constexpr (std::is_same_v) { - space_ = std::make_shared(dim_); + space_ = std::make_shared>(dim_); } appr_alg_ = std::make_shared::type>>( @@ -205,7 +210,7 @@ void HnswLib::load(const std::string& path_to_index) space_ = std::make_shared(dim_); } } else if constexpr (std::is_same_v) { - space_ = std::make_shared(dim_); + space_ = std::make_shared>(dim_); } appr_alg_ = std::make_shared::type>>( diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h index 2a021a8a12..ae40deb50c 100644 --- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -87,6 +87,9 @@ void parse_build_param(const nlohmann::json& conf, "', should be either 'cluster' or 'subspace'"); } } + if (conf.contains("max_train_points_per_pq_code")) { + param.max_train_points_per_pq_code = conf.at("max_train_points_per_pq_code"); + } } template diff --git a/cpp/cmake/patches/hnswlib.diff b/cpp/cmake/patches/hnswlib.diff index 0007ed6425..e7f89a8cc9 100644 --- a/cpp/cmake/patches/hnswlib.diff +++ b/cpp/cmake/patches/hnswlib.diff @@ -105,6 +105,63 @@ } } } +diff --git a/hnswlib/space_l2.h b/hnswlib/space_l2.h +index 4413537..c3240f3 100644 +--- a/hnswlib/space_l2.h ++++ b/hnswlib/space_l2.h +@@ -252,13 +252,14 @@ namespace hnswlib { + ~L2Space() {} + }; + ++ template + static int + L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) { + + size_t qty = *((size_t *) qty_ptr); + int res = 0; +- unsigned char *a = (unsigned char *) pVect1; +- unsigned char *b = (unsigned char *) pVect2; ++ T *a = (T *) pVect1; ++ T *b = (T *) pVect2; + + qty = qty >> 2; + for (size_t i = 0; i < qty; i++) { +@@ -279,11 +280,12 @@ namespace hnswlib { + return (res); + } + ++ template + static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2, const void* __restrict qty_ptr) { + size_t qty = *((size_t*)qty_ptr); + int res = 0; +- unsigned char* a = (unsigned char*)pVect1; +- unsigned char* b = (unsigned char*)pVect2; ++ T* a = (T*)pVect1; ++ T* b = (T*)pVect2; + + for(size_t i = 0; i < qty; i++) + { +@@ -294,6 +296,7 @@ namespace hnswlib { + return (res); + } + ++ template + class L2SpaceI : public SpaceInterface { + + DISTFUNC fstdistfunc_; +@@ -302,10 +305,10 @@ namespace hnswlib { + public: + L2SpaceI(size_t dim) { + if(dim % 4 == 0) { +- fstdistfunc_ = L2SqrI4x; ++ fstdistfunc_ = L2SqrI4x; + } + else { +- fstdistfunc_ = L2SqrI; ++ fstdistfunc_ = L2SqrI; + } + dim_ = dim; + data_size_ = dim * sizeof(unsigned char); diff --git a/hnswlib/visited_list_pool.h b/hnswlib/visited_list_pool.h index 5e1a4a5..4195ebd 100644 --- a/hnswlib/visited_list_pool.h diff --git a/cpp/cmake/thirdparty/get_hnswlib.cmake b/cpp/cmake/thirdparty/get_hnswlib.cmake index 82e95803f3..f4fe777379 100644 --- a/cpp/cmake/thirdparty/get_hnswlib.cmake +++ b/cpp/cmake/thirdparty/get_hnswlib.cmake @@ -30,6 +30,8 @@ function(find_and_configure_hnswlib) rapids_cpm_find( hnswlib ${PKG_VERSION} GLOBAL_TARGETS hnswlib::hnswlib + BUILD_EXPORT_SET raft-exports + INSTALL_EXPORT_SET raft-exports CPM_ARGS GIT_REPOSITORY ${PKG_REPOSITORY} GIT_TAG ${PKG_PINNED_TAG} @@ -51,11 +53,13 @@ function(find_and_configure_hnswlib) # write export rules rapids_export( BUILD hnswlib + VERSION ${PKG_VERSION} EXPORT_SET hnswlib-exports GLOBAL_TARGETS hnswlib NAMESPACE hnswlib::) rapids_export( INSTALL hnswlib + VERSION ${PKG_VERSION} EXPORT_SET hnswlib-exports GLOBAL_TARGETS hnswlib NAMESPACE hnswlib::) @@ -74,5 +78,5 @@ endif() find_and_configure_hnswlib(VERSION 0.6.2 REPOSITORY ${RAFT_HNSWLIB_GIT_REPOSITORY} PINNED_TAG ${RAFT_HNSWLIB_GIT_TAG} - EXCLUDE_FROM_ALL ON + EXCLUDE_FROM_ALL OFF ) diff --git a/cpp/include/raft/core/math.hpp b/cpp/include/raft/core/math.hpp index 56a8d78926..809b2948e7 100644 --- a/cpp/include/raft/core/math.hpp +++ b/cpp/include/raft/core/math.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,12 +49,42 @@ RAFT_INLINE_FUNCTION auto abs(T x) template constexpr RAFT_INLINE_FUNCTION auto abs(T x) -> std::enable_if_t && !std::is_same_v && +#if defined(_RAFT_HAS_CUDA) + !std::is_same_v<__half, T> && !std::is_same_v && +#endif !std::is_same_v && !std::is_same_v && !std::is_same_v, T> { return x < T{0} ? -x : x; } +#if defined(_RAFT_HAS_CUDA) +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, __half> abs(T x) +{ +#if (__CUDA_ARCH__ >= 530) + return ::__habs(x); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "__half is only supported on __CUDA_ARCH__ >= 530"); + return T{}; +#endif +} + +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, nv_bfloat16> +abs(T x) +{ +#if (__CUDA_ARCH__ >= 800) + return ::__habs(x); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "nv_bfloat16 is only supported on __CUDA_ARCH__ >= 800"); + return T{}; +#endif +} +#endif +/** @} */ /** Inverse cosine */ template diff --git a/cpp/include/raft/neighbors/ball_cover-ext.cuh b/cpp/include/raft/neighbors/ball_cover-ext.cuh index bc5fe934ab..3d0b3c7858 100644 --- a/cpp/include/raft/neighbors/ball_cover-ext.cuh +++ b/cpp/include/raft/neighbors/ball_cover-ext.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,6 +67,25 @@ void knn_query(raft::resources const& handle, bool perform_post_filtering = true, float weight = 1.0) RAFT_EXPLICIT; +template +void eps_nn(raft::resources const& handle, + const BallCoverIndex& index, + raft::device_matrix_view adj, + raft::device_vector_view vd, + raft::device_matrix_view query, + value_t eps) RAFT_EXPLICIT; + +template +void eps_nn(raft::resources const& handle, + const BallCoverIndex& index, + raft::device_vector_view adj_ia, + raft::device_vector_view adj_ja, + raft::device_vector_view vd, + raft::device_matrix_view query, + value_t eps, + std::optional> max_k = std::nullopt) + RAFT_EXPLICIT; + } // namespace raft::neighbors::ball_cover #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY @@ -87,6 +106,24 @@ void knn_query(raft::resources const& handle, bool perform_post_filtering, \ float weight); \ \ + extern template void raft::neighbors::ball_cover::eps_nn( \ + raft::resources const& handle, \ + const raft::neighbors::ball_cover::BallCoverIndex& index, \ + raft::device_matrix_view adj, \ + raft::device_vector_view vd, \ + raft::device_matrix_view query, \ + value_t eps); \ + \ + extern template void raft::neighbors::ball_cover::eps_nn( \ + raft::resources const& handle, \ + const raft::neighbors::ball_cover::BallCoverIndex& index, \ + raft::device_vector_view adj_ia, \ + raft::device_vector_view adj_ja, \ + raft::device_vector_view vd, \ + raft::device_matrix_view query, \ + value_t eps, \ + std::optional> max_k); \ + \ extern template void \ raft::neighbors::ball_cover::all_knn_query( \ raft::resources const& handle, \ @@ -119,6 +156,6 @@ void knn_query(raft::resources const& handle, bool perform_post_filtering, \ float weight); -instantiate_raft_neighbors_ball_cover(int64_t, float, uint32_t, uint32_t); +instantiate_raft_neighbors_ball_cover(int64_t, float, int64_t, int64_t); #undef instantiate_raft_neighbors_ball_cover diff --git a/cpp/include/raft/neighbors/ball_cover-inl.cuh b/cpp/include/raft/neighbors/ball_cover-inl.cuh index d35c1dc614..cdf7c30e89 100644 --- a/cpp/include/raft/neighbors/ball_cover-inl.cuh +++ b/cpp/include/raft/neighbors/ball_cover-inl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,7 +63,6 @@ template & index) { - ASSERT(index.n <= 3, "only 2d and 3d vectors are supported in current implementation"); if (index.metric == raft::distance::DistanceType::Haversine) { raft::spatial::knn::detail::rbc_build_index( handle, index, spatial::knn::detail::HaversineFunc()); @@ -255,9 +254,9 @@ void all_knn_query(raft::resources const& handle, * looking in the closest landmark. * @param[in] n_query_pts number of query points */ -template +template void knn_query(raft::resources const& handle, - const BallCoverIndex& index, + const BallCoverIndex& index, int_t k, const value_t* query, int_t n_query_pts, @@ -295,6 +294,106 @@ void knn_query(raft::resources const& handle, } } +/** + * @brief Computes epsilon neighborhood for the L2 distance metric using rbc + * + * @tparam value_t IO and math type + * @tparam idx_t Index type + * + * @param[in] handle raft handle for resource management + * @param[in] index ball cover index which has been built + * @param[out] adj adjacency matrix [row-major] [on device] [dim = m x n] + * @param[out] vd vertex degree array [on device] [len = m + 1] + * `vd + m` stores the total number of edges in the adjacency + * matrix. Pass a nullptr if you don't need this info. + * @param[in] query first matrix [row-major] [on device] [dim = m x k] + * @param[in] eps defines epsilon neighborhood radius + */ +template +void eps_nn(raft::resources const& handle, + const BallCoverIndex& index, + raft::device_matrix_view adj, + raft::device_vector_view vd, + raft::device_matrix_view query, + value_t eps) +{ + ASSERT(index.n == query.extent(1), "vector dimension needs to be the same for index and queries"); + ASSERT(index.metric == raft::distance::DistanceType::L2SqrtExpanded || + index.metric == raft::distance::DistanceType::L2SqrtUnexpanded, + "Metric not supported"); + ASSERT(index.is_index_trained(), "index must be previously trained"); + + // run query + raft::spatial::knn::detail::rbc_eps_nn_query( + handle, + index, + eps, + query.data_handle(), + query.extent(0), + adj.data_handle(), + vd.data_handle(), + spatial::knn::detail::EuclideanFunc()); +} + +/** + * @brief Computes epsilon neighborhood for the L2 distance metric using rbc + * + * @tparam value_t IO and math type + * @tparam idx_t Index type + * + * @param[in] handle raft handle for resource management + * @param[in] index ball cover index which has been built + * @param[out] adj_ia adjacency matrix CSR row offsets + * @param[out] adj_ja adjacency matrix CSR column indices, needs to be nullptr + * in first pass with max_k nullopt + * @param[out] vd vertex degree array [on device] [len = m + 1] + * `vd + m` stores the total number of edges in the adjacency + * matrix. Pass a nullptr if you don't need this info. + * @param[in] query first matrix [row-major] [on device] [dim = m x k] + * @param[in] eps defines epsilon neighborhood radius + * @param[inout] max_k if nullopt (default), the user needs to make 2 subsequent calls: + * The first call computes row offsets in adj_ia, where adj_ia[m] + * contains the minimum required size for adj_ja. + * The second call fills in adj_ja based on adj_ia. + * If max_k != nullopt the algorithm only fills up neighbors up to a + * maximum number of max_k for each row in a single pass. Note + * that it is not guarantueed to return the nearest neighbors. + * Upon return max_k is overwritten with the actual max_k found during + * computation. + */ +template +void eps_nn(raft::resources const& handle, + const BallCoverIndex& index, + raft::device_vector_view adj_ia, + raft::device_vector_view adj_ja, + raft::device_vector_view vd, + raft::device_matrix_view query, + value_t eps, + std::optional> max_k = std::nullopt) +{ + ASSERT(index.n == query.extent(1), "vector dimension needs to be the same for index and queries"); + ASSERT(index.metric == raft::distance::DistanceType::L2SqrtExpanded || + index.metric == raft::distance::DistanceType::L2SqrtUnexpanded, + "Metric not supported"); + ASSERT(index.is_index_trained(), "index must be previously trained"); + + int_t* max_k_ptr = nullptr; + if (max_k.has_value()) { max_k_ptr = max_k.value().data_handle(); } + + // run query + raft::spatial::knn::detail::rbc_eps_nn_query( + handle, + index, + eps, + max_k_ptr, + query.data_handle(), + query.extent(0), + adj_ia.data_handle(), + adj_ja.data_handle(), + vd.data_handle(), + spatial::knn::detail::EuclideanFunc()); +} + /** * @ingroup random_ball_cover * @{ @@ -377,7 +476,7 @@ void knn_query(raft::resources const& handle, index, k, query.data_handle(), - query.extent(0), + (int_t)query.extent(0), inds.data_handle(), dists.data_handle(), perform_post_filtering, diff --git a/cpp/include/raft/neighbors/ball_cover.cuh b/cpp/include/raft/neighbors/ball_cover.cuh index 41c5d0310c..20c88f3318 100644 --- a/cpp/include/raft/neighbors/ball_cover.cuh +++ b/cpp/include/raft/neighbors/ball_cover.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ #pragma once - #ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY #include "ball_cover-inl.cuh" #endif diff --git a/cpp/include/raft/neighbors/ball_cover_types.hpp b/cpp/include/raft/neighbors/ball_cover_types.hpp index 0a6ad8c407..dc96f0d45b 100644 --- a/cpp/include/raft/neighbors/ball_cover_types.hpp +++ b/cpp/include/raft/neighbors/ball_cover_types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -41,8 +42,8 @@ namespace raft::neighbors::ball_cover { */ template + typename value_int = std::int64_t, + typename matrix_idx = std::int64_t> class BallCoverIndex { public: explicit BallCoverIndex(raft::resources const& handle_, diff --git a/cpp/include/raft/neighbors/brute_force-ext.cuh b/cpp/include/raft/neighbors/brute_force-ext.cuh index ddce6d8fda..e8c25f355b 100644 --- a/cpp/include/raft/neighbors/brute_force-ext.cuh +++ b/cpp/include/raft/neighbors/brute_force-ext.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/neighbors/cagra_serialize.cuh b/cpp/include/raft/neighbors/cagra_serialize.cuh index c801bc9eda..83830c7457 100644 --- a/cpp/include/raft/neighbors/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/cagra_serialize.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,13 +32,14 @@ namespace raft::neighbors::cagra { * * @code{.cpp} * #include + * #include * * raft::resources handle; * * // create an output stream * std::ostream os(std::cout.rdbuf()); - * // create an index with `auto index = cagra::build(...);` - * raft::serialize(handle, os, index); + * // create an index with `auto index = raft::cagra::build(...);` + * raft::cagra::serialize(handle, os, index); * @endcode * * @tparam T data element type @@ -66,13 +67,14 @@ void serialize(raft::resources const& handle, * * @code{.cpp} * #include + * #include * * raft::resources handle; * * // create a string with a filepath * std::string filename("/path/to/index"); - * // create an index with `auto index = cagra::build(...);` - * raft::serialize(handle, filename, index); + * // create an index with `auto index = raft::cagra::build(...);` + * raft::cagra::serialize(handle, filename, index); * @endcode * * @tparam T data element type @@ -100,13 +102,14 @@ void serialize(raft::resources const& handle, * * @code{.cpp} * #include + * #include * * raft::resources handle; * * // create an output stream * std::ostream os(std::cout.rdbuf()); - * // create an index with `auto index = cagra::build(...);` - * raft::serialize_to_hnswlib(handle, os, index); + * // create an index with `auto index = raft::cagra::build(...);` + * raft::cagra::serialize_to_hnswlib(handle, os, index); * @endcode * * @tparam T data element type @@ -120,25 +123,26 @@ void serialize(raft::resources const& handle, template void serialize_to_hnswlib(raft::resources const& handle, std::ostream& os, - const index& index) + const raft::neighbors::cagra::index& index) { detail::serialize_to_hnswlib(handle, os, index); } /** - * Write the CAGRA built index as a base layer HNSW index to file + * Save a CAGRA build index in hnswlib base-layer-only serialized format * * Experimental, both the API and the serialization format are subject to change. * * @code{.cpp} * #include + * #include * * raft::resources handle; * * // create a string with a filepath * std::string filename("/path/to/index"); - * // create an index with `auto index = cagra::build(...);` - * raft::serialize_to_hnswlib(handle, filename, index); + * // create an index with `auto index = raft::cagra::build(...);` + * raft::cagra::serialize_to_hnswlib(handle, filename, index); * @endcode * * @tparam T data element type @@ -152,7 +156,7 @@ void serialize_to_hnswlib(raft::resources const& handle, template void serialize_to_hnswlib(raft::resources const& handle, const std::string& filename, - const index& index) + const raft::neighbors::cagra::index& index) { detail::serialize_to_hnswlib(handle, filename, index); } @@ -164,6 +168,7 @@ void serialize_to_hnswlib(raft::resources const& handle, * * @code{.cpp} * #include + * #include * * raft::resources handle; * @@ -171,7 +176,7 @@ void serialize_to_hnswlib(raft::resources const& handle, * std::istream is(std::cin.rdbuf()); * using T = float; // data element type * using IdxT = int; // type of the index - * auto index = raft::deserialize(handle, is); + * auto index = raft::cagra::deserialize(handle, is); * @endcode * * @tparam T data element type @@ -195,6 +200,7 @@ index deserialize(raft::resources const& handle, std::istream& is) * * @code{.cpp} * #include + * #include * * raft::resources handle; * @@ -202,7 +208,7 @@ index deserialize(raft::resources const& handle, std::istream& is) * std::string filename("/path/to/index"); * using T = float; // data element type * using IdxT = int; // type of the index - * auto index = raft::deserialize(handle, filename); + * auto index = raft::cagra::deserialize(handle, filename); * @endcode * * @tparam T data element type diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 51c9475434..42a979f059 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -101,9 +101,11 @@ void serialize(raft::resources const& res, template void serialize_to_hnswlib(raft::resources const& res, std::ostream& os, - const index& index_) + const raft::neighbors::cagra::index& index_) { - common::nvtx::range fun_scope("cagra::serialize_to_hnswlib"); + // static_assert(std::is_same_v or std::is_same_v, + // "An hnswlib index can only be trained with int32 or uint32 IdxT"); + common::nvtx::range fun_scope("cagra::serialize"); RAFT_LOG_DEBUG("Saving CAGRA index to hnswlib format, size %zu, dim %u", static_cast(index_.size()), index_.dim()); @@ -120,14 +122,14 @@ void serialize_to_hnswlib(raft::resources const& res, // Example:M: 16, dim = 128, data_t = float, index_t = uint32_t, list_size_type = uint32_t, // labeltype: size_t size_data_per_element_ = M * 2 * sizeof(index_t) + sizeof(list_size_type) + // dim * sizeof(data_t) + sizeof(labeltype) - auto size_data_per_element = - static_cast(index_.graph_degree() * 4 + 4 + index_.dim() * 4 + 8); + auto size_data_per_element = static_cast(index_.graph_degree() * sizeof(IdxT) + 4 + + index_.dim() * sizeof(T) + 8); os.write(reinterpret_cast(&size_data_per_element), sizeof(std::size_t)); // label_offset std::size_t label_offset = size_data_per_element - 8; os.write(reinterpret_cast(&label_offset), sizeof(std::size_t)); // offset_data - auto offset_data = static_cast(index_.graph_degree() * 4 + 4); + auto offset_data = static_cast(index_.graph_degree() * sizeof(IdxT) + 4); os.write(reinterpret_cast(&offset_data), sizeof(std::size_t)); // max_level int max_level = 1; @@ -184,17 +186,17 @@ void serialize_to_hnswlib(raft::resources const& res, } auto data_row = host_dataset.data_handle() + (index_.dim() * i); - if constexpr (std::is_same_v) { - for (std::size_t j = 0; j < index_.dim(); ++j) { - auto data_elem = host_dataset(i, j); - os.write(reinterpret_cast(&data_elem), sizeof(T)); - } - } else if constexpr (std::is_same_v or std::is_same_v) { - for (std::size_t j = 0; j < index_.dim(); ++j) { - auto data_elem = static_cast(host_dataset(i, j)); - os.write(reinterpret_cast(&data_elem), sizeof(int)); - } + // if constexpr (std::is_same_v) { + for (std::size_t j = 0; j < index_.dim(); ++j) { + auto data_elem = host_dataset(i, j); + os.write(reinterpret_cast(&data_elem), sizeof(T)); } + // } else if constexpr (std::is_same_v or std::is_same_v) { + // for (std::size_t j = 0; j < index_.dim(); ++j) { + // auto data_elem = static_cast(host_dataset(i, j)); + // os.write(reinterpret_cast(&data_elem), sizeof(int)); + // } + // } os.write(reinterpret_cast(&i), sizeof(std::size_t)); } @@ -204,13 +206,12 @@ void serialize_to_hnswlib(raft::resources const& res, auto zero = 0; os.write(reinterpret_cast(&zero), sizeof(int)); } - // delete [] host_graph; } template void serialize_to_hnswlib(raft::resources const& res, const std::string& filename, - const index& index_) + const raft::neighbors::cagra::index& index_) { std::ofstream of(filename, std::ios::out | std::ios::binary); if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } diff --git a/cpp/include/raft/neighbors/detail/hnsw.hpp b/cpp/include/raft/neighbors/detail/hnsw.hpp new file mode 100644 index 0000000000..69478205a9 --- /dev/null +++ b/cpp/include/raft/neighbors/detail/hnsw.hpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "hnsw_types.hpp" + +#include +#include +#include + +#include + +#include + +namespace raft::neighbors::hnsw::detail { + +template +void get_search_knn_results(hnswlib::HierarchicalNSW::type> const* idx, + const T* query, + int k, + uint64_t* indices, + float* distances) +{ + auto result = idx->searchKnn(query, k); + assert(result.size() >= static_cast(k)); + + for (int i = k - 1; i >= 0; --i) { + indices[i] = result.top().second; + distances[i] = result.top().first; + result.pop(); + } +} + +template +void search(raft::resources const& res, + const search_params& params, + const index& idx, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances) +{ + auto const* hnswlib_index = + reinterpret_cast::type> const*>( + idx.get_index()); + + // when num_threads == 0, automatically maximize parallelism + if (params.num_threads) { +#pragma omp parallel for num_threads(params.num_threads) + for (int64_t i = 0; i < queries.extent(0); ++i) { + get_search_knn_results(hnswlib_index, + queries.data_handle() + i * queries.extent(1), + neighbors.extent(1), + neighbors.data_handle() + i * neighbors.extent(1), + distances.data_handle() + i * distances.extent(1)); + } + } else { +#pragma omp parallel for + for (int64_t i = 0; i < queries.extent(0); ++i) { + get_search_knn_results(hnswlib_index, + queries.data_handle() + i * queries.extent(1), + neighbors.extent(1), + neighbors.data_handle() + i * neighbors.extent(1), + distances.data_handle() + i * distances.extent(1)); + } + } +} + +} // namespace raft::neighbors::hnsw::detail diff --git a/cpp/include/raft/neighbors/detail/hnsw_serialize.hpp b/cpp/include/raft/neighbors/detail/hnsw_serialize.hpp new file mode 100644 index 0000000000..8103ffc5ab --- /dev/null +++ b/cpp/include/raft/neighbors/detail/hnsw_serialize.hpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../hnsw_types.hpp" +#include "hnsw_types.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace raft::neighbors::hnsw::detail { + +template +std::unique_ptr> deserialize(raft::resources const& handle, + const std::string& filename, + int dim, + raft::distance::DistanceType metric) +{ + return std::unique_ptr>(new index_impl(filename, dim, metric)); +} + +} // namespace raft::neighbors::hnsw::detail diff --git a/cpp/include/raft/neighbors/detail/hnsw_types.hpp b/cpp/include/raft/neighbors/detail/hnsw_types.hpp new file mode 100644 index 0000000000..94ade95965 --- /dev/null +++ b/cpp/include/raft/neighbors/detail/hnsw_types.hpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../hnsw_types.hpp" +#include +#include +#include + +#include +#include +#include +#include + +namespace raft::neighbors::hnsw::detail { + +/** + * @addtogroup cagra_hnswlib Build CAGRA index and search with hnswlib + * @{ + */ + +template +struct hnsw_dist_t { + using type = void; +}; + +template <> +struct hnsw_dist_t { + using type = float; +}; + +template <> +struct hnsw_dist_t { + using type = int; +}; + +template <> +struct hnsw_dist_t { + using type = int; +}; + +template +struct index_impl : index { + public: + /** + * @brief load a base-layer-only hnswlib index originally saved from a built CAGRA index + * + * @param[in] filepath path to the index + * @param[in] dim dimensions of the training dataset + * @param[in] metric distance metric to search. Supported metrics ("L2Expanded", "InnerProduct") + */ + index_impl(std::string filepath, int dim, raft::distance::DistanceType metric) + : index{dim, metric} + { + if constexpr (std::is_same_v) { + if (metric == raft::distance::L2Expanded) { + space_ = std::make_unique(dim); + } else if (metric == raft::distance::InnerProduct) { + space_ = std::make_unique(dim); + } + } else if constexpr (std::is_same_v or std::is_same_v) { + if (metric == raft::distance::L2Expanded) { + space_ = std::make_unique>(dim); + } + } + + RAFT_EXPECTS(space_ != nullptr, "Unsupported metric type was used"); + + appr_alg_ = std::make_unique::type>>( + space_.get(), filepath); + + appr_alg_->base_layer_only = true; + } + + /** + @brief Get hnswlib index + */ + auto get_index() const -> void const* override { return appr_alg_.get(); } + + private: + std::unique_ptr::type>> appr_alg_; + std::unique_ptr::type>> space_; +}; + +/**@}*/ + +} // namespace raft::neighbors::hnsw::detail diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index cc94511fe7..0ef6cb13fb 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -353,14 +353,19 @@ void train_per_subset(raft::resources const& handle, const float* trainset, // [n_rows, dim] const uint32_t* labels, // [n_rows] uint32_t kmeans_n_iters, + uint32_t max_train_points_per_pq_code, rmm::mr::device_memory_resource* managed_memory) { auto stream = resource::get_cuda_stream(handle); auto device_memory = resource::get_workspace_resource(handle); rmm::device_uvector pq_centers_tmp(index.pq_centers().size(), stream, device_memory); - rmm::device_uvector sub_trainset(n_rows * size_t(index.pq_len()), stream, device_memory); - rmm::device_uvector sub_labels(n_rows, stream, device_memory); + // Subsampling the train set for codebook generation based on max_train_points_per_pq_code. + size_t big_enough = max_train_points_per_pq_code * size_t(index.pq_book_size()); + auto pq_n_rows = uint32_t(std::min(big_enough, n_rows)); + rmm::device_uvector sub_trainset( + pq_n_rows * size_t(index.pq_len()), stream, device_memory); + rmm::device_uvector sub_labels(pq_n_rows, stream, device_memory); rmm::device_uvector pq_cluster_sizes(index.pq_book_size(), stream, device_memory); @@ -371,7 +376,7 @@ void train_per_subset(raft::resources const& handle, // Get the rotated cluster centers for each training vector. // This will be subtracted from the input vectors afterwards. utils::copy_selected( - n_rows, + pq_n_rows, index.pq_len(), index.centers_rot().data_handle() + index.pq_len() * j, labels, @@ -387,7 +392,7 @@ void train_per_subset(raft::resources const& handle, true, false, index.pq_len(), - n_rows, + pq_n_rows, index.dim(), &alpha, index.rotation_matrix().data_handle() + index.dim() * index.pq_len() * j, @@ -400,13 +405,14 @@ void train_per_subset(raft::resources const& handle, stream); // train PQ codebook for this subspace - auto sub_trainset_view = - raft::make_device_matrix_view(sub_trainset.data(), n_rows, index.pq_len()); + auto sub_trainset_view = raft::make_device_matrix_view( + sub_trainset.data(), pq_n_rows, index.pq_len()); auto centers_tmp_view = raft::make_device_matrix_view( pq_centers_tmp.data() + index.pq_book_size() * index.pq_len() * j, index.pq_book_size(), index.pq_len()); - auto sub_labels_view = raft::make_device_vector_view(sub_labels.data(), n_rows); + auto sub_labels_view = + raft::make_device_vector_view(sub_labels.data(), pq_n_rows); auto cluster_sizes_view = raft::make_device_vector_view(pq_cluster_sizes.data(), index.pq_book_size()); raft::cluster::kmeans_balanced_params kmeans_params; @@ -430,6 +436,7 @@ void train_per_cluster(raft::resources const& handle, const float* trainset, // [n_rows, dim] const uint32_t* labels, // [n_rows] uint32_t kmeans_n_iters, + uint32_t max_train_points_per_pq_code, rmm::mr::device_memory_resource* managed_memory) { auto stream = resource::get_cuda_stream(handle); @@ -477,9 +484,11 @@ void train_per_cluster(raft::resources const& handle, indices + cluster_offsets[l], device_memory); - // limit the cluster size to bound the training time. + // limit the cluster size to bound the training time based on max_train_points_per_pq_code + // If pq_book_size is less than pq_dim, use max_train_points_per_pq_code per pq_dim instead // [sic] we interpret the data as pq_len-dimensional - size_t big_enough = 256ul * std::max(index.pq_book_size(), index.pq_dim()); + size_t big_enough = + max_train_points_per_pq_code * std::max(index.pq_book_size(), index.pq_dim()); size_t available_rows = size_t(cluster_size) * size_t(index.pq_dim()); auto pq_n_rows = uint32_t(std::min(big_enough, available_rows)); // train PQ codebook for this cluster @@ -1788,6 +1797,7 @@ auto build(raft::resources const& handle, trainset.data_handle(), labels.data(), params.kmeans_n_iters, + params.max_train_points_per_pq_code, &managed_mr); break; case codebook_gen::PER_CLUSTER: @@ -1797,6 +1807,7 @@ auto build(raft::resources const& handle, trainset.data_handle(), labels.data(), params.kmeans_n_iters, + params.max_train_points_per_pq_code, &managed_mr); break; default: RAFT_FAIL("Unreachable code"); diff --git a/cpp/include/raft/neighbors/detail/knn_brute_force.cuh b/cpp/include/raft/neighbors/detail/knn_brute_force.cuh index dff9aceb8d..adcb639301 100644 --- a/cpp/include/raft/neighbors/detail/knn_brute_force.cuh +++ b/cpp/include/raft/neighbors/detail/knn_brute_force.cuh @@ -443,13 +443,13 @@ void brute_force_knn_impl( if (metric == raft::distance::DistanceType::L2SqrtExpanded || metric == raft::distance::DistanceType::L2SqrtUnexpanded || metric == raft::distance::DistanceType::LpUnexpanded) { - float p = 0.5; // standard l2 + value_t p = 0.5; // standard l2 if (metric == raft::distance::DistanceType::LpUnexpanded) p = 1.0 / metricArg; - raft::linalg::unaryOp( + raft::linalg::unaryOp( res_D, res_D, n * k, - [p] __device__(float input) { return powf(fabsf(input), p); }, + [p] __device__(value_t input) { return powf(fabsf(input), p); }, stream); } } else { diff --git a/cpp/include/raft/neighbors/detail/knn_merge_parts.cuh b/cpp/include/raft/neighbors/detail/knn_merge_parts.cuh index 3d03d6db4f..c8ff03741c 100644 --- a/cpp/include/raft/neighbors/detail/knn_merge_parts.cuh +++ b/cpp/include/raft/neighbors/detail/knn_merge_parts.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -111,7 +111,7 @@ inline void knn_merge_parts_impl(const value_t* inK, { auto grid = dim3(n_samples); - constexpr int n_threads = (warp_q <= 1024) ? 128 : 64; + constexpr int n_threads = (warp_q < 1024) ? 128 : 64; auto block = dim3(n_threads); auto kInit = std::numeric_limits::max(); diff --git a/cpp/include/raft/neighbors/hnsw.hpp b/cpp/include/raft/neighbors/hnsw.hpp new file mode 100644 index 0000000000..dceb98c5aa --- /dev/null +++ b/cpp/include/raft/neighbors/hnsw.hpp @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "detail/hnsw.hpp" +#include "hnsw.hpp" + +#include + +#include +#include +#include +#include + +namespace raft::neighbors::hnsw { + +/** + * @addtogroup hnsw Build CAGRA index and search with hnswlib + * @{ + */ + +/** + * @brief Construct an hnswlib base-layer-only index from a CAGRA index + * NOTE: 1. This method uses the filesystem to write the CAGRA index in `/tmp/cagra_index.bin` + * before reading it as an hnswlib index, then deleting the temporary file. + * 2. This function is only offered as a compiled symbol in `libraft.so` + * + * @tparam T data element type + * @tparam IdxT type of the indices + * + * @param[in] res raft resources + * @param[in] cagra_index cagra index + * + * Usage example: + * @code{.cpp} + * // Build a CAGRA index + * using namespace raft::neighbors; + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = cagra::build(res, index_params, dataset); + * + * // Load CAGRA index as base-layer-only hnswlib index + * auto hnsw_index = hnsw::from_cagra(res, index); + * @endcode + */ +template +std::unique_ptr> from_cagra(raft::resources const& res, + raft::neighbors::cagra::index cagra_index); + +template <> +std::unique_ptr> from_cagra( + raft::resources const& res, raft::neighbors::cagra::index cagra_index); + +template <> +std::unique_ptr> from_cagra( + raft::resources const& res, raft::neighbors::cagra::index cagra_index); + +template <> +std::unique_ptr> from_cagra( + raft::resources const& res, raft::neighbors::cagra::index cagra_index); + +/** + * @brief Search hnswlib base-layer-only index constructed from a CAGRA index + * + * @tparam T data element type + * @tparam IdxT type of the indices + * + * @param[in] res raft resources + * @param[in] params configure the search + * @param[in] idx cagra index + * @param[in] queries a host matrix view to a row-major matrix [n_queries, index->dim()] + * @param[out] neighbors a host matrix view to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a host matrix view to the distances to the selected neighbors [n_queries, + * k] + * + * Usage example: + * @code{.cpp} + * // Build a CAGRA index + * using namespace raft::neighbors; + * // use default index parameters + * cagra::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = cagra::build(res, index_params, dataset); + * + * // Save CAGRA index as base layer only hnswlib index + * hnsw::serialize(res, "my_index.bin", index); + * + * // Load CAGRA index as base layer only hnswlib index + * raft::neighbors::hnsw::index* hnsw_index; + * auto hnsw_index = hnsw::deserialize(res, "my_index.bin", D, raft::distance::L2Expanded); + * + * // Search K nearest neighbors as an hnswlib index + * // using host threads for concurrency + * hnsw::search_params search_params; + * search_params.ef = 50 // ef >= K; + * search_params.num_threads = 10; + * auto neighbors = raft::make_host_matrix(res, n_queries, k); + * auto distances = raft::make_host_matrix(res, n_queries, k); + * hnsw::search(res, search_params, *index, queries, neighbors, distances); + * // de-allocate hnsw_index + * delete hnsw_index; + * @endcode + */ +template +void search(raft::resources const& res, + const search_params& params, + const index& idx, + raft::host_matrix_view queries, + raft::host_matrix_view neighbors, + raft::host_matrix_view distances) +{ + RAFT_EXPECTS( + queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0), + "Number of rows in output neighbors and distances matrices must equal the number of queries."); + + RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1), + "Number of columns in output neighbors and distances matrices must equal k"); + RAFT_EXPECTS(queries.extent(1) == idx.dim(), + "Number of query dimensions should equal number of dimensions in the index."); + + detail::search(res, params, idx, queries, neighbors, distances); +} + +/**@}*/ + +} // namespace raft::neighbors::hnsw diff --git a/cpp/include/raft/neighbors/hnsw_serialize.hpp b/cpp/include/raft/neighbors/hnsw_serialize.hpp new file mode 100644 index 0000000000..45819c8fb5 --- /dev/null +++ b/cpp/include/raft/neighbors/hnsw_serialize.hpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "detail/hnsw_serialize.hpp" +#include "hnsw_types.hpp" +#include + +#include + +namespace raft::neighbors::hnsw { + +/** + * @defgroup hnsw_serialize HNSW Serialize + * @{ + */ + +/** + * Load an hnswlib index which was serialized from a CAGRA index + * + * Experimental, both the API and the serialization format are subject to change. + * + * @code{.cpp} + * #include + * + * raft::resources handle; + * + * // create a string with a filepath + * std::string filename("/path/to/index"); + * // create an an unallocated pointer + * int dim = 10; + * raft::distance::DistanceType = raft::distance::L2Expanded + * auto index = raft::deserialize(handle, filename, dim, metric); + * @endcode + * + * @tparam T data element type + * + * @param[in] handle the raft handle + * @param[in] filename the file name for saving the index + * @param[in] dim dimensionality of the index + * @param[in] metric metric used to build the index + * + * @return std::unique_ptr> + * + */ +template +std::unique_ptr> deserialize(raft::resources const& handle, + const std::string& filename, + int dim, + raft::distance::DistanceType metric) +{ + return detail::deserialize(handle, filename, dim, metric); +} + +/**@}*/ + +} // namespace raft::neighbors::hnsw diff --git a/cpp/include/raft/neighbors/hnsw_types.hpp b/cpp/include/raft/neighbors/hnsw_types.hpp new file mode 100644 index 0000000000..aa4cefbc30 --- /dev/null +++ b/cpp/include/raft/neighbors/hnsw_types.hpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "ann_types.hpp" +#include +#include + +#include +#include +#include + +namespace raft::neighbors::hnsw { + +/** + * @defgroup hnsw Build CAGRA index and search with hnswlib + * @{ + */ + +struct search_params : ann::search_params { + int ef; // size of the candidate list + int num_threads = 0; // number of host threads to use for concurrent searches. Value of 0 + // automatically maximizes parallelism +}; + +template +struct index : ann::index { + public: + /** + * @brief load a base-layer-only hnswlib index originally saved from a built CAGRA index. + * This is a virtual class and it cannot be used directly. To create an index, use the factory + * function `raft::neighbors::hnsw::from_cagra` from the header + * `raft/neighbors/hnsw.hpp` + * + * @param[in] dim dimensions of the training dataset + * @param[in] metric distance metric to search. Supported metrics ("L2Expanded", "InnerProduct") + */ + index(int dim, raft::distance::DistanceType metric) : dim_{dim}, metric_{metric} {} + + /** + @brief Get underlying index + */ + virtual auto get_index() const -> void const* = 0; + + auto dim() const -> int const { return dim_; } + + auto metric() const -> raft::distance::DistanceType { return metric_; } + + private: + int dim_; + raft::distance::DistanceType metric_; +}; + +/**@}*/ + +} // namespace raft::neighbors::hnsw diff --git a/cpp/include/raft/neighbors/ivf_pq_types.hpp b/cpp/include/raft/neighbors/ivf_pq_types.hpp index 45ab18c84f..04c2354fbc 100644 --- a/cpp/include/raft/neighbors/ivf_pq_types.hpp +++ b/cpp/include/raft/neighbors/ivf_pq_types.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -105,6 +105,14 @@ struct index_params : ann::index_params { * flag to `true` if you prefer to use as little GPU memory for the database as possible. */ bool conservative_memory_allocation = false; + /** + * The max number of data points to use per PQ code during PQ codebook training. Using more data + * points per PQ code may increase the quality of PQ codebook but may also increase the build + * time. The parameter is applied to both PQ codebook generation methods, i.e., PER_SUBSPACE and + * PER_CLUSTER. In both cases, we will use `pq_book_size * max_train_points_per_pq_code` training + * points to train each codebook. + */ + uint32_t max_train_points_per_pq_code = 256; }; struct search_params : ann::search_params { diff --git a/cpp/include/raft/sparse/linalg/spmm.hpp b/cpp/include/raft/sparse/linalg/spmm.hpp index c2fdd64574..03c97fdb9d 100644 --- a/cpp/include/raft/sparse/linalg/spmm.hpp +++ b/cpp/include/raft/sparse/linalg/spmm.hpp @@ -42,7 +42,7 @@ namespace linalg { * @param[in] x input raft::device_csr_matrix_view * @param[in] y input raft::device_matrix_view * @param[in] beta scalar - * @param[out] z output raft::device_matrix_view + * @param[inout] z input-output raft::device_matrix_view */ template ( + z.data_handle(), z.extent(0), z.extent(1), is_row_major ? z.stride(0) : z.stride(1)); + auto descr_x = detail::create_descriptor(x); auto descr_y = detail::create_descriptor(y); - auto descr_z = detail::create_descriptor(z); + auto descr_z = detail::create_descriptor(z_tmp_view); detail::spmm(handle, trans_x, trans_y, is_row_major, alpha, descr_x, descr_y, beta, descr_z); @@ -76,4 +79,4 @@ void spmm(raft::resources const& handle, } // end namespace sparse } // end namespace raft -#endif +#endif \ No newline at end of file diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover.cuh index 4fe60e304b..879f54fd81 100644 --- a/cpp/include/raft/spatial/knn/detail/ball_cover.cuh +++ b/cpp/include/raft/spatial/knn/detail/ball_cover.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,6 +32,7 @@ #include +#include #include #include #include @@ -64,9 +65,12 @@ namespace detail { * @param handle * @param index */ -template +template void sample_landmarks(raft::resources const& handle, - BallCoverIndex& index) + BallCoverIndex& index) { rmm::device_uvector R_1nn_cols2(index.n_landmarks, resource::get_cuda_stream(handle)); rmm::device_uvector R_1nn_ones(index.m, resource::get_cuda_stream(handle)); @@ -100,8 +104,6 @@ void sample_landmarks(raft::resources const& handle, (value_idx)index.n_landmarks, (value_idx)index.m); - // index.get_X() returns the wrong indextype (uint32_t where we need value_idx), so need to - // create new device_matrix_view here auto x = index.get_X(); auto r = index.get_R(); @@ -122,12 +124,15 @@ void sample_landmarks(raft::resources const& handle, * @param k * @param index */ -template +template void construct_landmark_1nn(raft::resources const& handle, const value_idx* R_knn_inds_ptr, const value_t* R_knn_dists_ptr, value_int k, - BallCoverIndex& index) + BallCoverIndex& index) { rmm::device_uvector R_1nn_inds(index.m, resource::get_cuda_stream(handle)); @@ -177,9 +182,12 @@ void construct_landmark_1nn(raft::resources const& handle, * @param R_knn_inds * @param R_knn_dists */ -template +template void k_closest_landmarks(raft::resources const& handle, - const BallCoverIndex& index, + const BallCoverIndex& index, const value_t* query_pts, value_int n_query_pts, value_int k, @@ -205,9 +213,12 @@ void k_closest_landmarks(raft::resources const& handle, * @param handle * @param index */ -template +template void compute_landmark_radii(raft::resources const& handle, - BallCoverIndex& index) + BallCoverIndex& index) { auto entries = thrust::make_counting_iterator(0); @@ -235,13 +246,14 @@ void compute_landmark_radii(raft::resources const& handle, */ template void perform_rbc_query(raft::resources const& handle, - const BallCoverIndex& index, + const BallCoverIndex& index, const value_t* query, value_int n_query_pts, - std::uint32_t k, + value_int k, const value_idx* R_knn_inds, const value_t* R_knn_dists, dist_func dfunc, @@ -264,66 +276,128 @@ void perform_rbc_query(raft::resources const& handle, if (index.n == 2) { // Compute nearest k for each neighborhood in each closest R - rbc_low_dim_pass_one(handle, - index, - query, - n_query_pts, - k, - R_knn_inds, - R_knn_dists, - dfunc, - inds, - dists, - weight, - dists_counter); + rbc_low_dim_pass_one(handle, + index, + query, + n_query_pts, + k, + R_knn_inds, + R_knn_dists, + dfunc, + inds, + dists, + weight, + dists_counter); if (perform_post_filtering) { - rbc_low_dim_pass_two(handle, - index, - query, - n_query_pts, - k, - R_knn_inds, - R_knn_dists, - dfunc, - inds, - dists, - weight, - post_dists_counter); + rbc_low_dim_pass_two(handle, + index, + query, + n_query_pts, + k, + R_knn_inds, + R_knn_dists, + dfunc, + inds, + dists, + weight, + post_dists_counter); } } else if (index.n == 3) { // Compute nearest k for each neighborhood in each closest R - rbc_low_dim_pass_one(handle, - index, - query, - n_query_pts, - k, - R_knn_inds, - R_knn_dists, - dfunc, - inds, - dists, - weight, - dists_counter); + rbc_low_dim_pass_one(handle, + index, + query, + n_query_pts, + k, + R_knn_inds, + R_knn_dists, + dfunc, + inds, + dists, + weight, + dists_counter); if (perform_post_filtering) { - rbc_low_dim_pass_two(handle, - index, - query, - n_query_pts, - k, - R_knn_inds, - R_knn_dists, - dfunc, - inds, - dists, - weight, - post_dists_counter); + rbc_low_dim_pass_two(handle, + index, + query, + n_query_pts, + k, + R_knn_inds, + R_knn_dists, + dfunc, + inds, + dists, + weight, + post_dists_counter); } } } +/** + * Perform eps-select + * + * a. Map 1 row to each warp/block + * b. Add closest k R points to heap + * c. Iterate through batches of R, having each thread in the warp load a set + * of distances y from R (only if d(q, r) < 3 * distance to closest r) and + * marking the distance to be computed between x, y only + * if knn[k].distance >= d(x_i, R_k) + d(R_k, y) + */ +template +void perform_rbc_eps_nn_query( + raft::resources const& handle, + const BallCoverIndex& index, + const value_t* query, + value_int n_query_pts, + value_t eps, + const value_t* landmark_dists, + dist_func dfunc, + bool* adj, + value_idx* vd) +{ + // initialize output + RAFT_CUDA_TRY(cudaMemsetAsync( + adj, 0, index.m * n_query_pts * sizeof(bool), resource::get_cuda_stream(handle))); + + resource::sync_stream(handle); + + rbc_eps_pass( + handle, index, query, n_query_pts, eps, landmark_dists, dfunc, adj, vd); + + resource::sync_stream(handle); +} + +template +void perform_rbc_eps_nn_query( + raft::resources const& handle, + const BallCoverIndex& index, + const value_t* query, + value_int n_query_pts, + value_t eps, + value_int* max_k, + const value_t* landmark_dists, + dist_func dfunc, + value_idx* adj_ia, + value_idx* adj_ja, + value_idx* vd) +{ + rbc_eps_pass( + handle, index, query, n_query_pts, eps, max_k, landmark_dists, dfunc, adj_ia, adj_ja, vd); + + resource::sync_stream(handle); +} + /** * Similar to a ball tree, the random ball cover algorithm * uses the triangle inequality to prune distance computations @@ -337,13 +411,13 @@ void perform_rbc_query(raft::resources const& handle, */ template void rbc_build_index(raft::resources const& handle, - BallCoverIndex& index, + BallCoverIndex& index, distance_func dfunc) { - ASSERT(index.n <= 3, "only 2d and 3d vectors are supported in current implementation"); ASSERT(!index.is_index_trained(), "index cannot be previously trained"); rmm::device_uvector R_knn_inds(index.m, resource::get_cuda_stream(handle)); @@ -396,10 +470,11 @@ void rbc_build_index(raft::resources const& handle, */ template void rbc_all_knn_query(raft::resources const& handle, - BallCoverIndex& index, + BallCoverIndex& index, value_int k, value_idx* inds, value_t* dists, @@ -469,10 +544,11 @@ void rbc_all_knn_query(raft::resources const& handle, */ template void rbc_knn_query(raft::resources const& handle, - const BallCoverIndex& index, + const BallCoverIndex& index, value_int k, const value_t* query, value_int n_query_pts, @@ -539,6 +615,106 @@ void rbc_knn_query(raft::resources const& handle, perform_post_filtering); } +template +void compute_landmark_dists(raft::resources const& handle, + const BallCoverIndex& index, + const value_t* query_pts, + value_int n_query_pts, + value_t* R_dists) +{ + // compute distances for all queries against all landmarks + // index.get_R() -- landmark points in row order (index.n_landmarks x index.k) + // query_pts -- query points in row order (n_query_pts x index.k) + RAFT_EXPECTS(std::max(index.n_landmarks, n_query_pts) * index.n < + static_cast(std::numeric_limits::max()), + "Too large input for pairwise_distance with `int` index."); + RAFT_EXPECTS(n_query_pts * static_cast(index.n_landmarks) < + static_cast(std::numeric_limits::max()), + "Too large input for pairwise_distance with `int` index."); + raft::distance::pairwise_distance(handle, + query_pts, + index.get_R().data_handle(), + R_dists, + n_query_pts, + index.n_landmarks, + index.n, + index.get_metric()); +} + +/** + * Performs a knn query against an index. This assumes the index has + * already been built. + * Modified version that takes an eps as threshold and outputs to a dense adj matrix (row-major) + * we are assuming that there are sufficiently many landmarks + */ +template +void rbc_eps_nn_query(raft::resources const& handle, + const BallCoverIndex& index, + const value_t eps, + const value_t* query, + value_int n_query_pts, + bool* adj, + value_idx* vd, + distance_func dfunc) +{ + ASSERT(index.is_index_trained(), "index must be previously trained"); + + auto R_dists = + raft::make_device_matrix(handle, index.n_landmarks, n_query_pts); + + // find all landmarks that might have points in range + compute_landmark_dists(handle, index, query, n_query_pts, R_dists.data_handle()); + + // query all points and write to adj + perform_rbc_eps_nn_query( + handle, index, query, n_query_pts, eps, R_dists.data_handle(), dfunc, adj, vd); +} + +template +void rbc_eps_nn_query(raft::resources const& handle, + const BallCoverIndex& index, + const value_t eps, + value_int* max_k, + const value_t* query, + value_int n_query_pts, + value_idx* adj_ia, + value_idx* adj_ja, + value_idx* vd, + distance_func dfunc) +{ + ASSERT(index.is_index_trained(), "index must be previously trained"); + + auto R_dists = + raft::make_device_matrix(handle, index.n_landmarks, n_query_pts); + + // find all landmarks that might have points in range + compute_landmark_dists(handle, index, query, n_query_pts, R_dists.data_handle()); + + // query all points and write to adj + perform_rbc_eps_nn_query(handle, + index, + query, + n_query_pts, + eps, + max_k, + R_dists.data_handle(), + dfunc, + adj_ia, + adj_ja, + vd); +} + }; // namespace detail }; // namespace knn }; // namespace spatial diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh index 70c5cec23f..2ed6ee3284 100644 --- a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh +++ b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,11 +27,12 @@ namespace raft::spatial::knn::detail { template void rbc_low_dim_pass_one(raft::resources const& handle, - const BallCoverIndex& index, + const BallCoverIndex& index, const value_t* query, const value_int n_query_rows, value_int k, @@ -45,11 +46,12 @@ void rbc_low_dim_pass_one(raft::resources const& handle, template void rbc_low_dim_pass_two(raft::resources const& handle, - const BallCoverIndex& index, + const BallCoverIndex& index, const value_t* query, const value_int n_query_rows, value_int k, @@ -61,69 +63,133 @@ void rbc_low_dim_pass_two(raft::resources const& handle, float weight, value_int* post_dists_counter) RAFT_EXPLICIT; +template +void rbc_eps_pass(raft::resources const& handle, + const BallCoverIndex& index, + const value_t* query, + const value_int n_query_rows, + value_t eps, + const value_t* R_dists, + dist_func& dfunc, + bool* adj, + value_idx* vd) RAFT_EXPLICIT; + +template +void rbc_eps_pass(raft::resources const& handle, + const BallCoverIndex& index, + const value_t* query, + const value_int n_query_rows, + value_t eps, + value_int* max_k, + const value_t* R_dists, + dist_func& dfunc, + value_idx* adj_ia, + value_idx* adj_ja, + value_idx* vd) RAFT_EXPLICIT; + }; // namespace raft::spatial::knn::detail #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \ - extern template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_one( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - Mdist_func& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) - -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \ - extern template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_two( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - Mdist_func& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \ + extern template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_one( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) + +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \ + extern template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_two( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) + +#define instantiate_raft_spatial_knn_detail_rbc_eps_pass( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdist_func) \ + extern template void \ + raft::spatial::knn::detail::rbc_eps_pass( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_t eps, \ + const Mvalue_t* R_dists, \ + Mdist_func& dfunc, \ + bool* adj, \ + Mvalue_idx* vd); \ + \ + extern template void \ + raft::spatial::knn::detail::rbc_eps_pass( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_t eps, \ + Mvalue_int* max_k, \ + const Mvalue_t* R_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* adj_ia, \ + Mvalue_idx* adj_ja, \ + Mvalue_idx* vd); instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( - std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::HaversineFunc); + std::int64_t, float, std::int64_t, std::int64_t, 2, raft::spatial::knn::detail::HaversineFunc); instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( - std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::HaversineFunc); + std::int64_t, float, std::int64_t, std::int64_t, 3, raft::spatial::knn::detail::HaversineFunc); instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( - std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::EuclideanFunc); + std::int64_t, float, std::int64_t, std::int64_t, 2, raft::spatial::knn::detail::EuclideanFunc); instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( - std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::EuclideanFunc); + std::int64_t, float, std::int64_t, std::int64_t, 3, raft::spatial::knn::detail::EuclideanFunc); instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( - std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::DistFunc); + std::int64_t, float, std::int64_t, std::int64_t, 2, raft::spatial::knn::detail::DistFunc); instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( - std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::DistFunc); + std::int64_t, float, std::int64_t, std::int64_t, 3, raft::spatial::knn::detail::DistFunc); instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( - std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::HaversineFunc); + std::int64_t, float, std::int64_t, std::int64_t, 2, raft::spatial::knn::detail::HaversineFunc); instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( - std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::HaversineFunc); + std::int64_t, float, std::int64_t, std::int64_t, 3, raft::spatial::knn::detail::HaversineFunc); instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( - std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::EuclideanFunc); + std::int64_t, float, std::int64_t, std::int64_t, 2, raft::spatial::knn::detail::EuclideanFunc); instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( - std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::EuclideanFunc); + std::int64_t, float, std::int64_t, std::int64_t, 3, raft::spatial::knn::detail::EuclideanFunc); instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( - std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::DistFunc); + std::int64_t, float, std::int64_t, std::int64_t, 2, raft::spatial::knn::detail::DistFunc); instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( - std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::DistFunc); + std::int64_t, float, std::int64_t, std::int64_t, 3, raft::spatial::knn::detail::DistFunc); + +instantiate_raft_spatial_knn_detail_rbc_eps_pass( + std::int64_t, float, std::int64_t, std::int64_t, raft::spatial::knn::detail::EuclideanFunc); #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one +#undef instantiate_raft_spatial_knn_detail_rbc_eps_pass diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-inl.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-inl.cuh index 9e75f3c9c8..8b4e8f287e 100644 --- a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-inl.cuh +++ b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-inl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,9 @@ #include #include +#include #include +#include namespace raft { namespace spatial { @@ -454,13 +456,259 @@ RAFT_KERNEL block_rbc_kernel_registers(const value_t* X_index, } } -template +RAFT_KERNEL block_rbc_kernel_eps_dense(const value_t* X_index, + const value_t* X, + const value_int n_cols, + const value_t* R_dists, + const value_int m, + const value_t eps, + const value_int n_landmarks, + const value_idx* R_indptr, + const value_idx* R_1nn_cols, + const value_t* R_1nn_dists, + const value_t* R_radius, + distance_func dfunc, + bool* adj, + value_idx* vd) +{ + __shared__ int column_count_smem; + + // initialize + if (vd != nullptr) { + if (threadIdx.x == 0) { column_count_smem = 0; } + __syncthreads(); + } + + const value_t* x_ptr = X + (n_cols * blockIdx.x); + + for (value_int cur_k = 0; cur_k < n_landmarks; ++cur_k) { + // TODO: this might also be worth computing in-place here + value_t cur_R_dist = R_dists[blockIdx.x * n_landmarks + cur_k]; + + // prune all R's that can't be within eps + if (cur_R_dist - R_radius[cur_k] > eps) continue; + + // The whole warp should iterate through the elements in the current R + value_idx R_start_offset = R_indptr[cur_k]; + value_idx R_stop_offset = R_indptr[cur_k + 1]; + + value_idx R_size = R_stop_offset - R_start_offset; + + value_int limit = Pow2::roundDown(R_size); + value_int i = threadIdx.x; + for (; i < limit; i += tpb) { + // Index and distance of current candidate's nearest landmark + value_idx cur_candidate_ind = R_1nn_cols[R_start_offset + i]; + value_t cur_candidate_dist = R_1nn_dists[R_start_offset + i]; + + const value_t* y_ptr = X_index + (n_cols * cur_candidate_ind); + if (dfunc(x_ptr, y_ptr, n_cols) <= eps) { + adj[blockIdx.x * m + cur_candidate_ind] = true; + if (vd != nullptr) atomicAdd(&column_count_smem, 1); + } + } + + if (i < R_size) { + value_idx cur_candidate_ind = R_1nn_cols[R_start_offset + i]; + value_t cur_candidate_dist = R_1nn_dists[R_start_offset + i]; + + const value_t* y_ptr = X_index + (n_cols * cur_candidate_ind); + if (dfunc(x_ptr, y_ptr, n_cols) <= eps) { + adj[blockIdx.x * m + cur_candidate_ind] = true; + if (vd != nullptr) atomicAdd(&column_count_smem, 1); + } + } + } + + if (vd != nullptr) { + __syncthreads(); + if (threadIdx.x == 0) { vd[blockIdx.x] = column_count_smem; } + } +} + +template +RAFT_KERNEL block_rbc_kernel_eps_csr_pass(const value_t* X_index, + const value_t* X, + const value_int n_cols, + const value_t* R_dists, + const value_int m, + const value_t eps, + const value_int n_landmarks, + const value_idx* R_indptr, + const value_idx* R_1nn_cols, + const value_t* R_1nn_dists, + const value_t* R_radius, + distance_func dfunc, + value_idx* adj_ia, + value_idx* adj_ja) +{ + const value_t* x_ptr = X + (n_cols * blockIdx.x); + + __shared__ unsigned long long int column_index_smem; + + bool pass2 = adj_ja != nullptr; + + // initialize + if (threadIdx.x == 0) { column_index_smem = pass2 ? adj_ia[blockIdx.x] : 0; } + + __syncthreads(); + + for (value_int cur_k = 0; cur_k < n_landmarks; ++cur_k) { + // TODO: this might also be worth computing in-place here + value_t cur_R_dist = R_dists[blockIdx.x * n_landmarks + cur_k]; + + // prune all R's that can't be within eps + if (cur_R_dist - R_radius[cur_k] > eps) continue; + + // The whole warp should iterate through the elements in the current R + value_idx R_start_offset = R_indptr[cur_k]; + value_idx R_stop_offset = R_indptr[cur_k + 1]; + + value_idx R_size = R_stop_offset - R_start_offset; + + value_int limit = Pow2::roundDown(R_size); + value_int i = threadIdx.x; + for (; i < limit; i += tpb) { + // Index and distance of current candidate's nearest landmark + value_idx cur_candidate_ind = R_1nn_cols[R_start_offset + i]; + value_t cur_candidate_dist = R_1nn_dists[R_start_offset + i]; + + const value_t* y_ptr = X_index + (n_cols * cur_candidate_ind); + if (dfunc(x_ptr, y_ptr, n_cols) <= eps) { + auto row_pos = atomicAdd(&column_index_smem, 1); + if (pass2) adj_ja[row_pos] = cur_candidate_ind; + } + } + + if (i < R_size) { + value_idx cur_candidate_ind = R_1nn_cols[R_start_offset + i]; + value_t cur_candidate_dist = R_1nn_dists[R_start_offset + i]; + + const value_t* y_ptr = X_index + (n_cols * cur_candidate_ind); + if (dfunc(x_ptr, y_ptr, n_cols) <= eps) { + auto row_pos = atomicAdd(&column_index_smem, 1); + if (pass2) adj_ja[row_pos] = cur_candidate_ind; + } + } + } + + __syncthreads(); + if (threadIdx.x == 0 && !pass2) { adj_ia[blockIdx.x] = (value_idx)column_index_smem; } +} + +template +RAFT_KERNEL block_rbc_kernel_eps_max_k(const value_t* X_index, + const value_t* X, + const value_int n_cols, + const value_t* R_dists, + const value_int m, + const value_t eps, + const value_int n_landmarks, + const value_idx* R_indptr, + const value_idx* R_1nn_cols, + const value_t* R_1nn_dists, + const value_t* R_radius, + distance_func dfunc, + value_idx* vd, + const value_int max_k, + value_idx* tmp) +{ + const value_t* x_ptr = X + (n_cols * blockIdx.x); + + __shared__ int column_count_smem; + + // initialize + if (threadIdx.x == 0) { column_count_smem = 0; } + + __syncthreads(); + + // we store all column indices in dense tmp store [blockDim.x * max_k] + value_int offset = blockIdx.x * max_k; + + for (value_int cur_k = 0; cur_k < n_landmarks; ++cur_k) { + // TODO: this might also be worth computing in-place here + value_t cur_R_dist = R_dists[blockIdx.x * n_landmarks + cur_k]; + + // prune all R's that can't be within eps + if (cur_R_dist - R_radius[cur_k] > eps) continue; + + // The whole warp should iterate through the elements in the current R + value_idx R_start_offset = R_indptr[cur_k]; + value_idx R_stop_offset = R_indptr[cur_k + 1]; + + value_idx R_size = R_stop_offset - R_start_offset; + + value_int limit = Pow2::roundDown(R_size); + value_int i = threadIdx.x; + for (; i < limit; i += tpb) { + // Index and distance of current candidate's nearest landmark + value_idx cur_candidate_ind = R_1nn_cols[R_start_offset + i]; + value_t cur_candidate_dist = R_1nn_dists[R_start_offset + i]; + + const value_t* y_ptr = X_index + (n_cols * cur_candidate_ind); + if (dfunc(x_ptr, y_ptr, n_cols) <= eps) { + int row_pos = atomicAdd(&column_count_smem, 1); + if (row_pos < max_k) tmp[row_pos + offset] = cur_candidate_ind; + } + } + + if (i < R_size) { + value_idx cur_candidate_ind = R_1nn_cols[R_start_offset + i]; + value_t cur_candidate_dist = R_1nn_dists[R_start_offset + i]; + + const value_t* y_ptr = X_index + (n_cols * cur_candidate_ind); + if (dfunc(x_ptr, y_ptr, n_cols) <= eps) { + int row_pos = atomicAdd(&column_count_smem, 1); + if (row_pos < max_k) tmp[row_pos + offset] = cur_candidate_ind; + } + } + } + + __syncthreads(); + if (threadIdx.x == 0) { vd[blockIdx.x] = column_count_smem; } +} + +template +RAFT_KERNEL block_rbc_kernel_eps_max_k_copy(const value_int max_k, + const value_idx* adj_ia, + const value_idx* tmp, + value_idx* adj_ja) +{ + value_int offset = blockIdx.x * max_k; + + value_int row_idx = blockIdx.x; + value_idx col_start_idx = adj_ia[row_idx]; + value_idx num_cols = adj_ia[row_idx + 1] - col_start_idx; + + value_int limit = Pow2::roundDown(num_cols); + value_int i = threadIdx.x; + for (; i < limit; i += tpb) { + adj_ja[col_start_idx + i] = tmp[offset + i]; + } + if (i < num_cols) { adj_ja[col_start_idx + i] = tmp[offset + i]; } +} + +template void rbc_low_dim_pass_one(raft::resources const& handle, - const BallCoverIndex& index, + const BallCoverIndex& index, const value_t* query, const value_int n_query_rows, value_int k, @@ -594,11 +842,12 @@ void rbc_low_dim_pass_one(raft::resources const& handle, template void rbc_low_dim_pass_two(raft::resources const& handle, - const BallCoverIndex& index, + const BallCoverIndex& index, const value_t* query, const value_int n_query_rows, value_int k, @@ -788,6 +1037,179 @@ void rbc_low_dim_pass_two(raft::resources const& handle, post_dists_counter); } +template +void rbc_eps_pass(raft::resources const& handle, + const BallCoverIndex& index, + const value_t* query, + const value_int n_query_rows, + value_t eps, + const value_t* R_dists, + dist_func& dfunc, + bool* adj, + value_idx* vd) +{ + block_rbc_kernel_eps_dense + <<>>( + index.get_X().data_handle(), + query, + index.n, + R_dists, + index.m, + eps, + index.n_landmarks, + index.get_R_indptr().data_handle(), + index.get_R_1nn_cols().data_handle(), + index.get_R_1nn_dists().data_handle(), + index.get_R_radius().data_handle(), + dfunc, + adj, + vd); + + if (vd != nullptr) { + value_idx sum = thrust::reduce(resource::get_thrust_policy(handle), vd, vd + n_query_rows); + // copy sum to last element + RAFT_CUDA_TRY(cudaMemcpyAsync(vd + n_query_rows, + &sum, + sizeof(value_idx), + cudaMemcpyHostToDevice, + resource::get_cuda_stream(handle))); + } + + resource::sync_stream(handle); +} + +template +void rbc_eps_pass(raft::resources const& handle, + const BallCoverIndex& index, + const value_t* query, + const value_int n_query_rows, + value_t eps, + value_int* max_k, + const value_t* R_dists, + dist_func& dfunc, + value_idx* adj_ia, + value_idx* adj_ja, + value_idx* vd) +{ + // if max_k == nullptr we are either pass 1 or pass 2 + if (max_k == nullptr) { + if (adj_ja == nullptr) { + // pass 1 -> only compute adj_ia / vd + value_idx* vd_ptr = (vd != nullptr) ? vd : adj_ia; + block_rbc_kernel_eps_csr_pass + <<>>( + index.get_X().data_handle(), + query, + index.n, + R_dists, + index.m, + eps, + index.n_landmarks, + index.get_R_indptr().data_handle(), + index.get_R_1nn_cols().data_handle(), + index.get_R_1nn_dists().data_handle(), + index.get_R_radius().data_handle(), + dfunc, + vd_ptr, + nullptr); + + thrust::exclusive_scan(resource::get_thrust_policy(handle), + vd_ptr, + vd_ptr + n_query_rows + 1, + adj_ia, + (value_idx)0); + + } else { + // pass 2 -> fill in adj_ja + block_rbc_kernel_eps_csr_pass + <<>>( + index.get_X().data_handle(), + query, + index.n, + R_dists, + index.m, + eps, + index.n_landmarks, + index.get_R_indptr().data_handle(), + index.get_R_1nn_cols().data_handle(), + index.get_R_1nn_dists().data_handle(), + index.get_R_radius().data_handle(), + dfunc, + adj_ia, + adj_ja); + } + } else { + value_int max_k_in = *max_k; + value_idx* vd_ptr = (vd != nullptr) ? vd : adj_ia; + + rmm::device_uvector tmp(n_query_rows * max_k_in, resource::get_cuda_stream(handle)); + + block_rbc_kernel_eps_max_k + <<>>( + index.get_X().data_handle(), + query, + index.n, + R_dists, + index.m, + eps, + index.n_landmarks, + index.get_R_indptr().data_handle(), + index.get_R_1nn_cols().data_handle(), + index.get_R_1nn_dists().data_handle(), + index.get_R_radius().data_handle(), + dfunc, + vd_ptr, + max_k_in, + tmp.data()); + + value_int actual_max = thrust::reduce(resource::get_thrust_policy(handle), + vd_ptr, + vd_ptr + n_query_rows, + (value_idx)0, + thrust::maximum()); + + if (actual_max > max_k_in) { + // ceil vd to max_k + thrust::transform(resource::get_thrust_policy(handle), + vd_ptr, + vd_ptr + n_query_rows, + vd_ptr, + [max_k_in] __device__(value_idx vd_count) { + return vd_count > max_k_in ? max_k_in : vd_count; + }); + } + + thrust::exclusive_scan( + resource::get_thrust_policy(handle), vd_ptr, vd_ptr + n_query_rows + 1, adj_ia, (value_idx)0); + + block_rbc_kernel_eps_max_k_copy + <<>>( + max_k_in, adj_ia, tmp.data(), adj_ja); + + // return 'new' max-k + *max_k = actual_max; + } + + if (vd != nullptr && (max_k != nullptr || adj_ja == nullptr)) { + // copy sum to last element + RAFT_CUDA_TRY(cudaMemcpyAsync(vd + n_query_rows, + adj_ia + n_query_rows, + sizeof(value_idx), + cudaMemcpyDeviceToDevice, + resource::get_cuda_stream(handle))); + } + + resource::sync_stream(handle); +} + }; // namespace detail }; // namespace knn }; // namespace spatial diff --git a/cpp/include/raft/spatial/knn/detail/epsilon_neighborhood.cuh b/cpp/include/raft/spatial/knn/detail/epsilon_neighborhood.cuh index cb0ca6cc68..7a5a217959 100644 --- a/cpp/include/raft/spatial/knn/detail/epsilon_neighborhood.cuh +++ b/cpp/include/raft/spatial/knn/detail/epsilon_neighborhood.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -95,13 +95,10 @@ struct EpsUnexpL2SqNeighborhood : public BaseClass { IdxT startx = blockIdx.x * P::Mblk + this->accrowid; IdxT starty = blockIdx.y * P::Nblk + this->acccolid; auto lid = raft::laneId(); - IdxT sums[P::AccColsPerTh]; -#pragma unroll - for (int j = 0; j < P::AccColsPerTh; ++j) { - sums[j] = 0; - } + IdxT sums[P::AccRowsPerTh]; #pragma unroll for (int i = 0; i < P::AccRowsPerTh; ++i) { + sums[i] = 0; auto xid = startx + i * P::AccThRows; #pragma unroll for (int j = 0; j < P::AccColsPerTh; ++j) { @@ -110,7 +107,7 @@ struct EpsUnexpL2SqNeighborhood : public BaseClass { ///@todo: fix uncoalesced writes using shared mem if (xid < this->m && yid < this->n) { adj[xid * this->n + yid] = is_neigh; - sums[j] += is_neigh; + sums[i] += is_neigh; } } } @@ -137,19 +134,21 @@ struct EpsUnexpL2SqNeighborhood : public BaseClass { } } - DI void updateVertexDegree(IdxT (&sums)[P::AccColsPerTh]) + DI void updateVertexDegree(IdxT (&sums)[P::AccRowsPerTh]) { __syncthreads(); // so that we can safely reuse smem - int gid = threadIdx.x / P::AccThCols; - int lid = threadIdx.x % P::AccThCols; - auto cidx = IdxT(blockIdx.y) * P::Nblk + lid; + int gid = this->accrowid; + int lid = this->acccolid; + auto cidx = IdxT(blockIdx.x) * P::Mblk + gid; IdxT totalSum = 0; // update the individual vertex degrees #pragma unroll - for (int i = 0; i < P::AccColsPerTh; ++i) { - sums[i] = batchedBlockReduce(sums[i], smem); - auto cid = cidx + i * P::AccThCols; - if (gid == 0 && cid < this->n) { + for (int i = 0; i < P::AccRowsPerTh; ++i) { + // P::AccThCols neighboring threads need to reduce + // -> we have P::Nblk/P::AccThCols individual reductions + auto cid = cidx + i * P::AccThRows; + sums[i] = raft::logicalWarpReduce(sums[i], raft::add_op()); + if (lid == 0 && cid < this->m) { atomicUpdate(cid, sums[i]); totalSum += sums[i]; } @@ -157,7 +156,7 @@ struct EpsUnexpL2SqNeighborhood : public BaseClass { } // update the total edge count totalSum = raft::blockReduce(totalSum, smem); - if (threadIdx.x == 0) { atomicUpdate(this->n, totalSum); } + if (threadIdx.x == 0) { atomicUpdate(this->m, totalSum); } } DI void atomicUpdate(IdxT addrId, IdxT val) @@ -226,6 +225,8 @@ void epsUnexpL2SqNeighborhood(bool* adj, DataT eps, cudaStream_t stream) { + if (vd != nullptr) { RAFT_CUDA_TRY(cudaMemsetAsync(vd, 0, (m + 1) * sizeof(IdxT), stream)); } + size_t bytes = sizeof(DataT) * k; if (16 % sizeof(DataT) == 0 && bytes % 16 == 0) { epsUnexpL2SqNeighImpl(adj, vd, x, y, m, n, k, eps, stream); @@ -238,4 +239,4 @@ void epsUnexpL2SqNeighborhood(bool* adj, } // namespace detail } // namespace knn } // namespace spatial -} // namespace raft \ No newline at end of file +} // namespace raft diff --git a/cpp/include/raft/spatial/knn/detail/fused_l2_knn-ext.cuh b/cpp/include/raft/spatial/knn/detail/fused_l2_knn-ext.cuh index 1a48e1adde..0eca119450 100644 --- a/cpp/include/raft/spatial/knn/detail/fused_l2_knn-ext.cuh +++ b/cpp/include/raft/spatial/knn/detail/fused_l2_knn-ext.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/raft/spatial/knn/detail/fused_l2_knn-inl.cuh b/cpp/include/raft/spatial/knn/detail/fused_l2_knn-inl.cuh index 30ebab43b6..0c9f0059f9 100644 --- a/cpp/include/raft/spatial/knn/detail/fused_l2_knn-inl.cuh +++ b/cpp/include/raft/spatial/knn/detail/fused_l2_knn-inl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -969,12 +969,16 @@ void fusedL2Knn(size_t D, size_t worksize = 0, tempWorksize = 0; rmm::device_uvector workspace(worksize, stream); value_idx lda = D, ldb = D, ldd = n_index_rows; - + // switch (metric) { case raft::distance::DistanceType::L2SqrtExpanded: case raft::distance::DistanceType::L2Expanded: - tempWorksize = raft::distance::detail:: - getWorkspaceSize( + tempWorksize = + raft::distance::detail::getWorkspaceSize( query, index, n_query_rows, n_index_rows, D); worksize = tempWorksize; workspace.resize(worksize, stream); diff --git a/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh b/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh index 5b8cc36368..5fb912843d 100644 --- a/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh +++ b/cpp/include/raft/spatial/knn/detail/haversine_distance.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -133,8 +133,10 @@ void haversine_knn(value_idx* out_inds, int k, cudaStream_t stream) { - haversine_knn_kernel<<>>( - out_inds, out_dists, index, query, n_index_rows, k); + // ensure kernel does not breach shared memory limits + constexpr int kWarpQ = sizeof(value_t) > 4 ? 512 : 1024; + haversine_knn_kernel + <<>>(out_inds, out_dists, index, query, n_index_rows, k); } } // namespace detail diff --git a/cpp/include/raft/util/cuda_utils.cuh b/cpp/include/raft/util/cuda_utils.cuh index e718ca3545..bf46e069e4 100644 --- a/cpp/include/raft/util/cuda_utils.cuh +++ b/cpp/include/raft/util/cuda_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,15 +16,12 @@ #pragma once +#include +#include #include #include #include -#if defined(_RAFT_HAS_CUDA) -#include -#include -#endif - #include #include #include @@ -278,17 +275,53 @@ template <> * @{ */ template -inline __device__ T myInf(); -template <> -inline __device__ float myInf() +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, float> myInf() { return CUDART_INF_F; } -template <> -inline __device__ double myInf() +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, double> myInf() { return CUDART_INF; } +// Half/Bfloat constants only defined after CUDA 12.2 +#if __CUDACC_VER_MAJOR__ < 12 || (__CUDACC_VER_MAJOR__ == 12 && __CUDACC_VER_MINOR__ < 2) +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, __half> myInf() +{ +#if (__CUDA_ARCH__ >= 530) + return __ushort_as_half((unsigned short)0x7C00U); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "__half is only supported on __CUDA_ARCH__ >= 530"); + return T{}; +#endif +} +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, nv_bfloat16> +myInf() +{ +#if (__CUDA_ARCH__ >= 800) + return __ushort_as_bfloat16((unsigned short)0x7F80U); +#else + // Fail during template instantiation if the compute capability doesn't support this operation + static_assert(sizeof(T) != sizeof(T), "nv_bfloat16 is only supported on __CUDA_ARCH__ >= 800"); + return T{}; +#endif +} +#else +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, __half> myInf() +{ + return CUDART_INF_FP16; +} +template +RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t, nv_bfloat16> +myInf() +{ + return CUDART_INF_BF16; +} +#endif /** @} */ /** diff --git a/cpp/include/raft_runtime/neighbors/cagra.hpp b/cpp/include/raft_runtime/neighbors/cagra.hpp index c54ed32b77..8389929b15 100644 --- a/cpp/include/raft_runtime/neighbors/cagra.hpp +++ b/cpp/include/raft_runtime/neighbors/cagra.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,48 +27,53 @@ namespace raft::runtime::neighbors::cagra { // Using device and host_matrix_view avoids needing to typedef mutltiple mdspans based on accessors -#define RAFT_INST_CAGRA_FUNCS(T, IdxT) \ - auto build(raft::resources const& handle, \ - const raft::neighbors::cagra::index_params& params, \ - raft::device_matrix_view dataset) \ - ->raft::neighbors::cagra::index; \ - \ - auto build(raft::resources const& handle, \ - const raft::neighbors::cagra::index_params& params, \ - raft::host_matrix_view dataset) \ - ->raft::neighbors::cagra::index; \ - \ - void build_device(raft::resources const& handle, \ - const raft::neighbors::cagra::index_params& params, \ - raft::device_matrix_view dataset, \ - raft::neighbors::cagra::index& idx); \ - \ - void build_host(raft::resources const& handle, \ - const raft::neighbors::cagra::index_params& params, \ - raft::host_matrix_view dataset, \ - raft::neighbors::cagra::index& idx); \ - \ - void search(raft::resources const& handle, \ - raft::neighbors::cagra::search_params const& params, \ - const raft::neighbors::cagra::index& index, \ - raft::device_matrix_view queries, \ - raft::device_matrix_view neighbors, \ - raft::device_matrix_view distances); \ - void serialize_file(raft::resources const& handle, \ - const std::string& filename, \ - const raft::neighbors::cagra::index& index, \ - bool include_dataset = true); \ - \ - void deserialize_file(raft::resources const& handle, \ - const std::string& filename, \ - raft::neighbors::cagra::index* index); \ - void serialize(raft::resources const& handle, \ - std::string& str, \ - const raft::neighbors::cagra::index& index, \ - bool include_dataset = true); \ - \ - void deserialize(raft::resources const& handle, \ - const std::string& str, \ +#define RAFT_INST_CAGRA_FUNCS(T, IdxT) \ + auto build(raft::resources const& handle, \ + const raft::neighbors::cagra::index_params& params, \ + raft::device_matrix_view dataset) \ + ->raft::neighbors::cagra::index; \ + \ + auto build(raft::resources const& handle, \ + const raft::neighbors::cagra::index_params& params, \ + raft::host_matrix_view dataset) \ + ->raft::neighbors::cagra::index; \ + \ + void build_device(raft::resources const& handle, \ + const raft::neighbors::cagra::index_params& params, \ + raft::device_matrix_view dataset, \ + raft::neighbors::cagra::index& idx); \ + \ + void build_host(raft::resources const& handle, \ + const raft::neighbors::cagra::index_params& params, \ + raft::host_matrix_view dataset, \ + raft::neighbors::cagra::index& idx); \ + \ + void search(raft::resources const& handle, \ + raft::neighbors::cagra::search_params const& params, \ + const raft::neighbors::cagra::index& index, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + void serialize_file(raft::resources const& handle, \ + const std::string& filename, \ + const raft::neighbors::cagra::index& index, \ + bool include_dataset = true); \ + \ + void deserialize_file(raft::resources const& handle, \ + const std::string& filename, \ + raft::neighbors::cagra::index* index); \ + void serialize(raft::resources const& handle, \ + std::string& str, \ + const raft::neighbors::cagra::index& index, \ + bool include_dataset = true); \ + void serialize_to_hnswlib(raft::resources const& handle, \ + std::string& str, \ + const raft::neighbors::cagra::index& index); \ + void serialize_to_hnswlib_file(raft::resources const& handle, \ + const std::string& filename, \ + const raft::neighbors::cagra::index& index); \ + void deserialize(raft::resources const& handle, \ + const std::string& str, \ raft::neighbors::cagra::index* index); RAFT_INST_CAGRA_FUNCS(float, uint32_t); diff --git a/cpp/include/raft_runtime/neighbors/eps_neighborhood.hpp b/cpp/include/raft_runtime/neighbors/eps_neighborhood.hpp new file mode 100644 index 0000000000..ee1ca846f6 --- /dev/null +++ b/cpp/include/raft_runtime/neighbors/eps_neighborhood.hpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace raft::runtime::neighbors::epsilon_neighborhood { + +#define RAFT_INST_BFEPSN(IDX_T, DATA_T, MATRIX_IDX_T, INDEX_LAYOUT, SEARCH_LAYOUT) \ + void eps_neighbors(raft::resources const& handle, \ + raft::device_matrix_view index, \ + raft::device_matrix_view search, \ + raft::device_matrix_view adj, \ + raft::device_vector_view vd, \ + DATA_T eps); + +RAFT_INST_BFEPSN(int64_t, float, int64_t, raft::row_major, raft::row_major); + +#undef RAFT_INST_BFEPSN + +#define RAFT_INST_RBCEPSN(IDX_T, DATA_T, INT_T, MATRIX_IDX_T, INDEX_LAYOUT, SEARCH_LAYOUT) \ + void eps_neighbors_rbc( \ + raft::resources const& handle, \ + raft::device_matrix_view index, \ + raft::device_matrix_view search, \ + raft::device_matrix_view adj, \ + raft::device_vector_view vd, \ + DATA_T eps); \ + void build_rbc_index( \ + raft::resources const& handle, \ + raft::neighbors::ball_cover::BallCoverIndex& rbc_index); \ + void eps_neighbors_rbc_pass1( \ + raft::resources const& handle, \ + raft::neighbors::ball_cover::BallCoverIndex rbc_index, \ + raft::device_matrix_view search, \ + raft::device_vector_view adj_ia, \ + raft::device_vector_view vd, \ + DATA_T eps); \ + void eps_neighbors_rbc_pass2( \ + raft::resources const& handle, \ + raft::neighbors::ball_cover::BallCoverIndex rbc_index, \ + raft::device_matrix_view search, \ + raft::device_vector_view adj_ia, \ + raft::device_vector_view adj_ja, \ + raft::device_vector_view vd, \ + DATA_T eps); + +RAFT_INST_RBCEPSN(int64_t, float, int64_t, int64_t, raft::row_major, raft::row_major); + +#undef RAFT_INST_RBCEPSN + +} // namespace raft::runtime::neighbors::epsilon_neighborhood diff --git a/cpp/include/raft_runtime/neighbors/hnsw.hpp b/cpp/include/raft_runtime/neighbors/hnsw.hpp new file mode 100644 index 0000000000..e8b932d490 --- /dev/null +++ b/cpp/include/raft_runtime/neighbors/hnsw.hpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace raft::runtime::neighbors::hnsw { + +#define RAFT_INST_HNSW_FUNCS(T, IdxT) \ + std::unique_ptr> from_cagra( \ + raft::resources const& res, raft::neighbors::cagra::index); \ + void search(raft::resources const& handle, \ + raft::neighbors::hnsw::search_params const& params, \ + raft::neighbors::hnsw::index const& index, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances); \ + template \ + std::unique_ptr> deserialize_file( \ + raft::resources const& handle, \ + const std::string& filename, \ + int dim, \ + raft::distance::DistanceType metric); \ + template <> \ + std::unique_ptr> deserialize_file( \ + raft::resources const& handle, \ + const std::string& filename, \ + int dim, \ + raft::distance::DistanceType metric); + +RAFT_INST_HNSW_FUNCS(float, uint32_t); +RAFT_INST_HNSW_FUNCS(int8_t, uint32_t); +RAFT_INST_HNSW_FUNCS(uint8_t, uint32_t); + +} // namespace raft::runtime::neighbors::hnsw diff --git a/cpp/src/neighbors/ball_cover.cu b/cpp/src/neighbors/ball_cover.cu index 3b129e168b..0a59060c8e 100644 --- a/cpp/src/neighbors/ball_cover.cu +++ b/cpp/src/neighbors/ball_cover.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,24 @@ raft::resources const& handle, \ raft::neighbors::ball_cover::BallCoverIndex& index); \ \ + template void raft::neighbors::ball_cover::eps_nn( \ + raft::resources const& handle, \ + const raft::neighbors::ball_cover::BallCoverIndex& index, \ + raft::device_matrix_view adj, \ + raft::device_vector_view vd, \ + raft::device_matrix_view query, \ + value_t eps); \ + \ + template void raft::neighbors::ball_cover::eps_nn( \ + raft::resources const& handle, \ + const raft::neighbors::ball_cover::BallCoverIndex& index, \ + raft::device_vector_view ia, \ + raft::device_vector_view ja, \ + raft::device_vector_view vd, \ + raft::device_matrix_view query, \ + value_t eps, \ + std::optional> max_k); \ + \ template void raft::neighbors::ball_cover::all_knn_query( \ raft::resources const& handle, \ raft::neighbors::ball_cover::BallCoverIndex& index, \ @@ -40,9 +58,9 @@ bool perform_post_filtering, \ float weight); \ \ - template void raft::neighbors::ball_cover::knn_query( \ + template void raft::neighbors::ball_cover::knn_query( \ raft::resources const& handle, \ - const raft::neighbors::ball_cover::BallCoverIndex& index, \ + const raft::neighbors::ball_cover::BallCoverIndex& index, \ int_t k, \ const value_t* query, \ int_t n_query_pts, \ @@ -61,6 +79,6 @@ bool perform_post_filtering, \ float weight); -instantiate_raft_neighbors_ball_cover(int64_t, float, uint32_t, uint32_t); +instantiate_raft_neighbors_ball_cover(int64_t, float, int64_t, int64_t); #undef instantiate_raft_neighbors_ball_cover diff --git a/cpp/src/neighbors/brute_force_00_generate.py b/cpp/src/neighbors/brute_force_00_generate.py index 9adc5fef90..8ed05dc4c2 100644 --- a/cpp/src/neighbors/brute_force_00_generate.py +++ b/cpp/src/neighbors/brute_force_00_generate.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ header = """ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/raft_runtime/neighbors/cagra_serialize.cu b/cpp/src/raft_runtime/neighbors/cagra_serialize.cu index adde8663f4..f386bcce8e 100644 --- a/cpp/src/raft_runtime/neighbors/cagra_serialize.cu +++ b/cpp/src/raft_runtime/neighbors/cagra_serialize.cu @@ -26,39 +26,54 @@ namespace raft::runtime::neighbors::cagra { -#define RAFT_INST_CAGRA_SERIALIZE(DTYPE) \ - void serialize_file(raft::resources const& handle, \ - const std::string& filename, \ - const raft::neighbors::cagra::index& index, \ - bool include_dataset) \ - { \ - raft::neighbors::cagra::serialize(handle, filename, index, include_dataset); \ - }; \ - \ - void deserialize_file(raft::resources const& handle, \ - const std::string& filename, \ - raft::neighbors::cagra::index* index) \ - { \ - if (!index) { RAFT_FAIL("Invalid index pointer"); } \ - *index = raft::neighbors::cagra::deserialize(handle, filename); \ - }; \ - void serialize(raft::resources const& handle, \ - std::string& str, \ - const raft::neighbors::cagra::index& index, \ - bool include_dataset) \ - { \ - std::stringstream os; \ - raft::neighbors::cagra::serialize(handle, os, index, include_dataset); \ - str = os.str(); \ - } \ - \ - void deserialize(raft::resources const& handle, \ - const std::string& str, \ - raft::neighbors::cagra::index* index) \ - { \ - std::istringstream is(str); \ - if (!index) { RAFT_FAIL("Invalid index pointer"); } \ - *index = raft::neighbors::cagra::deserialize(handle, is); \ +#define RAFT_INST_CAGRA_SERIALIZE(DTYPE) \ + void serialize_file(raft::resources const& handle, \ + const std::string& filename, \ + const raft::neighbors::cagra::index& index, \ + bool include_dataset) \ + { \ + raft::neighbors::cagra::serialize(handle, filename, index, include_dataset); \ + }; \ + \ + void deserialize_file(raft::resources const& handle, \ + const std::string& filename, \ + raft::neighbors::cagra::index* index) \ + { \ + if (!index) { RAFT_FAIL("Invalid index pointer"); } \ + *index = raft::neighbors::cagra::deserialize(handle, filename); \ + }; \ + void serialize(raft::resources const& handle, \ + std::string& str, \ + const raft::neighbors::cagra::index& index, \ + bool include_dataset) \ + { \ + std::stringstream os; \ + raft::neighbors::cagra::serialize(handle, os, index, include_dataset); \ + str = os.str(); \ + } \ + \ + void serialize_to_hnswlib_file(raft::resources const& handle, \ + const std::string& filename, \ + const raft::neighbors::cagra::index& index) \ + { \ + raft::neighbors::cagra::serialize_to_hnswlib(handle, filename, index); \ + }; \ + void serialize_to_hnswlib(raft::resources const& handle, \ + std::string& str, \ + const raft::neighbors::cagra::index& index) \ + { \ + std::stringstream os; \ + raft::neighbors::cagra::serialize_to_hnswlib(handle, os, index); \ + str = os.str(); \ + } \ + \ + void deserialize(raft::resources const& handle, \ + const std::string& str, \ + raft::neighbors::cagra::index* index) \ + { \ + std::istringstream is(str); \ + if (!index) { RAFT_FAIL("Invalid index pointer"); } \ + *index = raft::neighbors::cagra::deserialize(handle, is); \ } RAFT_INST_CAGRA_SERIALIZE(float); diff --git a/cpp/src/raft_runtime/neighbors/eps_neighborhood.cu b/cpp/src/raft_runtime/neighbors/eps_neighborhood.cu new file mode 100644 index 0000000000..23cb6fd790 --- /dev/null +++ b/cpp/src/raft_runtime/neighbors/eps_neighborhood.cu @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +#include + +namespace raft::runtime::neighbors::epsilon_neighborhood { + +#define RAFT_INST_BFEPSN(IDX_T, DATA_T, MATRIX_IDX_T, INDEX_LAYOUT, SEARCH_LAYOUT) \ + void eps_neighbors(raft::resources const& handle, \ + raft::device_matrix_view index, \ + raft::device_matrix_view search, \ + raft::device_matrix_view adj, \ + raft::device_vector_view vd, \ + DATA_T eps) \ + { \ + raft::neighbors::epsilon_neighborhood::eps_neighbors_l2sq( \ + handle, search, index, adj, vd, eps* eps); \ + } + +RAFT_INST_BFEPSN(int64_t, float, int64_t, raft::row_major, raft::row_major); + +#undef RAFT_INST_BFEPSN + +#define RAFT_INST_RBCEPSN(IDX_T, DATA_T, INT_T, MATRIX_IDX_T, INDEX_LAYOUT, SEARCH_LAYOUT) \ + void eps_neighbors_rbc( \ + raft::resources const& handle, \ + raft::device_matrix_view index, \ + raft::device_matrix_view search, \ + raft::device_matrix_view adj, \ + raft::device_vector_view vd, \ + DATA_T eps) \ + { \ + raft::neighbors::ball_cover::BallCoverIndex rbc_index( \ + handle, \ + index.data_handle(), \ + index.extent(0), \ + index.extent(1), \ + raft::distance::DistanceType::L2SqrtUnexpanded); \ + raft::neighbors::ball_cover::build_index(handle, rbc_index); \ + raft::neighbors::ball_cover::eps_nn(handle, rbc_index, adj, vd, search, eps); \ + } \ + void build_rbc_index( \ + raft::resources const& handle, \ + raft::neighbors::ball_cover::BallCoverIndex& rbc_index) \ + { \ + raft::neighbors::ball_cover::build_index(handle, rbc_index); \ + } \ + void eps_neighbors_rbc_pass1( \ + raft::resources const& handle, \ + raft::neighbors::ball_cover::BallCoverIndex rbc_index, \ + raft::device_matrix_view search, \ + raft::device_vector_view adj_ia, \ + raft::device_vector_view vd, \ + DATA_T eps) \ + { \ + raft::neighbors::ball_cover::eps_nn( \ + handle, \ + rbc_index, \ + adj_ia, \ + raft::make_device_vector_view(nullptr, 0), \ + vd, \ + search, \ + eps); \ + } \ + void eps_neighbors_rbc_pass2( \ + raft::resources const& handle, \ + raft::neighbors::ball_cover::BallCoverIndex rbc_index, \ + raft::device_matrix_view search, \ + raft::device_vector_view adj_ia, \ + raft::device_vector_view adj_ja, \ + raft::device_vector_view vd, \ + DATA_T eps) \ + { \ + raft::neighbors::ball_cover::eps_nn(handle, rbc_index, adj_ia, adj_ja, vd, search, eps); \ + } + +RAFT_INST_RBCEPSN(int64_t, float, int64_t, int64_t, raft::row_major, raft::row_major); + +#undef RAFT_INST_RBCEPSN + +} // namespace raft::runtime::neighbors::epsilon_neighborhood diff --git a/cpp/src/raft_runtime/neighbors/hnsw.cpp b/cpp/src/raft_runtime/neighbors/hnsw.cpp new file mode 100644 index 0000000000..1f9e6b0a0b --- /dev/null +++ b/cpp/src/raft_runtime/neighbors/hnsw.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +namespace raft::neighbors::hnsw { +#define RAFT_INST_HNSW(T) \ + template <> \ + std::unique_ptr> from_cagra( \ + raft::resources const& res, raft::neighbors::cagra::index cagra_index) \ + { \ + std::string filepath = "/tmp/cagra_index.bin"; \ + raft::runtime::neighbors::cagra::serialize_to_hnswlib(res, filepath, cagra_index); \ + auto hnsw_index = raft::runtime::neighbors::hnsw::deserialize_file( \ + res, filepath, cagra_index.dim(), cagra_index.metric()); \ + std::filesystem::remove(filepath); \ + return hnsw_index; \ + } + +RAFT_INST_HNSW(float); +RAFT_INST_HNSW(int8_t); +RAFT_INST_HNSW(uint8_t); +#undef RAFT_INST_HNSW +} // namespace raft::neighbors::hnsw + +namespace raft::runtime::neighbors::hnsw { + +#define RAFT_INST_HNSW(T) \ + void search(raft::resources const& handle, \ + raft::neighbors::hnsw::search_params const& params, \ + const raft::neighbors::hnsw::index& index, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbors, \ + raft::host_matrix_view distances) \ + { \ + raft::neighbors::hnsw::search(handle, params, index, queries, neighbors, distances); \ + } \ + \ + template <> \ + std::unique_ptr> deserialize_file( \ + raft::resources const& handle, \ + const std::string& filename, \ + int dim, \ + raft::distance::DistanceType metric) \ + { \ + return raft::neighbors::hnsw::deserialize(handle, filename, dim, metric); \ + } + +RAFT_INST_HNSW(float); +RAFT_INST_HNSW(int8_t); +RAFT_INST_HNSW(uint8_t); + +#undef RAFT_INST_HNSW + +} // namespace raft::runtime::neighbors::hnsw diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers.cu b/cpp/src/spatial/knn/detail/ball_cover/registers.cu index 493a602362..31595272b6 100644 --- a/cpp/src/spatial/knn/detail/ball_cover/registers.cu +++ b/cpp/src/spatial/knn/detail/ball_cover/registers.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,45 +16,79 @@ #include -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims) \ - template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_one( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - raft::spatial::knn::detail::DistFunc& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) - -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims) \ - template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_two( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - raft::spatial::knn::detail::DistFunc& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) - -instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(std::int64_t, float, std::uint32_t, 2); -instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(std::int64_t, float, std::uint32_t, 3); - -instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(std::int64_t, float, std::uint32_t, 2); -instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(std::int64_t, float, std::uint32_t, 3); +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims) \ + template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_one( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + raft::spatial::knn::detail::DistFunc& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims) \ + template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_two( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + raft::spatial::knn::detail::DistFunc& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) + +#define instantiate_raft_spatial_knn_detail_rbc_eps_pass( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx) \ + template void \ + raft::spatial::knn::detail::rbc_eps_pass( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_t eps, \ + const Mvalue_t* R_dists, \ + raft::spatial::knn::detail::DistFunc& dfunc, \ + bool* adj, \ + Mvalue_idx* vd); \ + \ + template void \ + raft::spatial::knn::detail::rbc_eps_pass( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_t eps, \ + const Mvalue_t* R_dists, \ + raft::spatial::knn::detail::DistFunc& dfunc, \ + Mvalue_idx* ia, \ + Mvalue_idx* ja, \ + Mvalue_idx* vd) + +instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( + std::int64_t, float, std::int64_t, std::int64_t, 2); +instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( + std::int64_t, float, std::int64_t, std::int64_t, 3); + +instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( + std::int64_t, float, std::int64_t, std::int64_t, 2); +instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( + std::int64_t, float, std::int64_t, std::int64_t, 3); + +instantiate_raft_spatial_knn_detail_rbc_eps_pass(std::int64_t, float, std::int64_t, std::int64_t); + +#undef instantiate_raft_spatial_knn_detail_rbc_eps_pass #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_00_generate.py b/cpp/src/spatial/knn/detail/ball_cover/registers_00_generate.py index d7b6e618fd..dff2e015a4 100644 --- a/cpp/src/spatial/knn/detail/ball_cover/registers_00_generate.py +++ b/cpp/src/spatial/knn/detail/ball_cover/registers_00_generate.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # limitations under the License. header = """/* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,11 +45,11 @@ macro_pass_one = """ #define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \\ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \\ - template void \\ - raft::spatial::knn::detail::rbc_low_dim_pass_one( \\ - raft::resources const& handle, \\ - const BallCoverIndex& index, \\ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \\ + template void \\ + raft::spatial::knn::detail::rbc_low_dim_pass_one( \\ + raft::resources const& handle, \\ + const BallCoverIndex& index, \\ const Mvalue_t* query, \\ const Mvalue_int n_query_rows, \\ Mvalue_int k, \\ @@ -65,11 +65,11 @@ macro_pass_two = """ #define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \\ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \\ - template void \\ - raft::spatial::knn::detail::rbc_low_dim_pass_two( \\ - raft::resources const& handle, \\ - const BallCoverIndex& index, \\ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \\ + template void \\ + raft::spatial::knn::detail::rbc_low_dim_pass_two( \\ + raft::resources const& handle, \\ + const BallCoverIndex& index, \\ const Mvalue_t* query, \\ const Mvalue_int n_query_rows, \\ Mvalue_int k, \\ @@ -83,20 +83,58 @@ """ +macro_pass_eps = """ +#define instantiate_raft_spatial_knn_detail_rbc_eps_pass( \\ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdist_func) \\ + template void \\ + raft::spatial::knn::detail::rbc_eps_pass( \\ + raft::resources const& handle, \\ + const BallCoverIndex& index, \\ + const Mvalue_t* query, \\ + const Mvalue_int n_query_rows, \\ + Mvalue_t eps, \\ + const Mvalue_t* R_dists, \\ + Mdist_func& dfunc, \\ + bool* adj, \\ + Mvalue_idx* vd); \\ + \\ + template void \\ + raft::spatial::knn::detail::rbc_eps_pass( \\ + raft::resources const& handle, \\ + const BallCoverIndex& index, \\ + const Mvalue_t* query, \\ + const Mvalue_int n_query_rows, \\ + Mvalue_t eps, \\ + Mvalue_int* max_k, \\ + const Mvalue_t* R_dists, \\ + Mdist_func& dfunc, \\ + Mvalue_idx* adj_ia, \\ + Mvalue_idx* adj_ja, \\ + Mvalue_idx* vd) + +""" + + distances = dict( haversine="raft::spatial::knn::detail::HaversineFunc", euclidean="raft::spatial::knn::detail::EuclideanFunc", dist="raft::spatial::knn::detail::DistFunc", ) +types = dict( + int64_float=("std::int64_t", "float"), + #int64_double=("std::int64_t", "double"), +) + for k, v in distances.items(): for dim in [2, 3]: path = f"registers_pass_one_{dim}d_{k}.cu" with open(path, "w") as f: f.write(header) f.write(macro_pass_one) - f.write(f"instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(\n") - f.write(f" std::int64_t, float, std::uint32_t, {dim}, {v});\n") + for type_path, (int_t, data_t) in types.items(): + f.write(f"instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one(\n") + f.write(f" {int_t}, {data_t}, {int_t}, {int_t}, {dim}, {v});\n") f.write("#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one\n") print(f"src/spatial/knn/detail/ball_cover/{path}") @@ -106,7 +144,19 @@ with open(path, "w") as f: f.write(header) f.write(macro_pass_two) - f.write(f"instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(\n") - f.write(f" std::int64_t, float, std::uint32_t, {dim}, {v});\n") + for type_path, (int_t, data_t) in types.items(): + f.write(f"instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two(\n") + f.write(f" {int_t}, {data_t}, {int_t}, {int_t}, {dim}, {v});\n") f.write("#undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two\n") print(f"src/spatial/knn/detail/ball_cover/{path}") + +path="registers_eps_pass_euclidean.cu" +with open(path, "w") as f: + f.write(header) + f.write(macro_pass_eps) + for type_path, (int_t, data_t) in types.items(): + f.write(f"instantiate_raft_spatial_knn_detail_rbc_eps_pass(\n") + f.write(f" {int_t}, {data_t}, {int_t}, {int_t}, {distances['euclidean']});\n") + f.write("#undef instantiate_raft_spatial_knn_detail_rbc_eps_pass\n") + print(f"src/spatial/knn/detail/ball_cover/{path}") + diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_eps_pass_euclidean.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_eps_pass_euclidean.cu new file mode 100644 index 0000000000..0d09f88b65 --- /dev/null +++ b/cpp/src/spatial/knn/detail/ball_cover/registers_eps_pass_euclidean.cu @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by registers_00_generate.py + * + * Make changes there and run in this directory: + * + * > python registers_00_generate.py + * + */ + +#include // int64_t +#include + +#define instantiate_raft_spatial_knn_detail_rbc_eps_pass( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdist_func) \ + template void \ + raft::spatial::knn::detail::rbc_eps_pass( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_t eps, \ + const Mvalue_t* R_dists, \ + Mdist_func& dfunc, \ + bool* adj, \ + Mvalue_idx* vd); \ + \ + template void \ + raft::spatial::knn::detail::rbc_eps_pass( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_t eps, \ + Mvalue_int* max_k, \ + const Mvalue_t* R_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* adj_ia, \ + Mvalue_idx* adj_ja, \ + Mvalue_idx* vd) + +instantiate_raft_spatial_knn_detail_rbc_eps_pass( + std::int64_t, float, std::int64_t, std::int64_t, raft::spatial::knn::detail::EuclideanFunc); +#undef instantiate_raft_spatial_knn_detail_rbc_eps_pass diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu index bb9ec284cc..3681acf245 100644 --- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu +++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,23 +26,23 @@ #include // int64_t #include -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \ - template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_one( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - Mdist_func& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \ + template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_one( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( - std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::DistFunc); + std::int64_t, float, std::int64_t, std::int64_t, 2, raft::spatial::knn::detail::DistFunc); #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu index 2b06d0a1cd..3fa20779b7 100644 --- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu +++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,23 +26,23 @@ #include // int64_t #include -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \ - template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_one( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - Mdist_func& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \ + template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_one( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( - std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::EuclideanFunc); + std::int64_t, float, std::int64_t, std::int64_t, 2, raft::spatial::knn::detail::EuclideanFunc); #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu index 6f4e4061ac..7abc89cc11 100644 --- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu +++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,23 +26,23 @@ #include // int64_t #include -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \ - template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_one( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - Mdist_func& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \ + template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_one( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( - std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::HaversineFunc); + std::int64_t, float, std::int64_t, std::int64_t, 2, raft::spatial::knn::detail::HaversineFunc); #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu index aa407eeb20..6251a86867 100644 --- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu +++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,23 +26,23 @@ #include // int64_t #include -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \ - template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_one( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - Mdist_func& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \ + template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_one( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( - std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::DistFunc); + std::int64_t, float, std::int64_t, std::int64_t, 3, raft::spatial::knn::detail::DistFunc); #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu index 7918fb79cb..07b97ac718 100644 --- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu +++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,23 +26,23 @@ #include // int64_t #include -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \ - template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_one( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - Mdist_func& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \ + template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_one( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( - std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::EuclideanFunc); + std::int64_t, float, std::int64_t, std::int64_t, 3, raft::spatial::knn::detail::EuclideanFunc); #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu index f8f29a107c..4fc18184b0 100644 --- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu +++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,23 +26,23 @@ #include // int64_t #include -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \ - template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_one( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - Mdist_func& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \ + template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_one( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one( - std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::HaversineFunc); + std::int64_t, float, std::int64_t, std::int64_t, 3, raft::spatial::knn::detail::HaversineFunc); #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_one diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu index 1facd24510..882496c7d9 100644 --- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu +++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,23 +26,23 @@ #include // int64_t #include -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \ - template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_two( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - Mdist_func& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \ + template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_two( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( - std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::DistFunc); + std::int64_t, float, std::int64_t, std::int64_t, 2, raft::spatial::knn::detail::DistFunc); #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu index 6e681e2e9b..0a736d7e13 100644 --- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu +++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,23 +26,23 @@ #include // int64_t #include -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \ - template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_two( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - Mdist_func& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \ + template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_two( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( - std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::EuclideanFunc); + std::int64_t, float, std::int64_t, std::int64_t, 2, raft::spatial::knn::detail::EuclideanFunc); #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu index b4a038ffd7..23aff93966 100644 --- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu +++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,23 +26,23 @@ #include // int64_t #include -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \ - template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_two( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - Mdist_func& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \ + template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_two( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( - std::int64_t, float, std::uint32_t, 2, raft::spatial::knn::detail::HaversineFunc); + std::int64_t, float, std::int64_t, std::int64_t, 2, raft::spatial::knn::detail::HaversineFunc); #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu index bcb27568c1..d3ec2b4c65 100644 --- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu +++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,23 +26,23 @@ #include // int64_t #include -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \ - template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_two( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - Mdist_func& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \ + template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_two( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( - std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::DistFunc); + std::int64_t, float, std::int64_t, std::int64_t, 3, raft::spatial::knn::detail::DistFunc); #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu index e40d837862..dd9f0e4658 100644 --- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu +++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,23 +26,23 @@ #include // int64_t #include -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \ - template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_two( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - Mdist_func& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \ + template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_two( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( - std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::EuclideanFunc); + std::int64_t, float, std::int64_t, std::int64_t, 3, raft::spatial::knn::detail::EuclideanFunc); #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two diff --git a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu index 8a362bcf16..849bbf0f96 100644 --- a/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu +++ b/cpp/src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,23 +26,23 @@ #include // int64_t #include -#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ - Mvalue_idx, Mvalue_t, Mvalue_int, Mdims, Mdist_func) \ - template void \ - raft::spatial::knn::detail::rbc_low_dim_pass_two( \ - raft::resources const& handle, \ - const BallCoverIndex& index, \ - const Mvalue_t* query, \ - const Mvalue_int n_query_rows, \ - Mvalue_int k, \ - const Mvalue_idx* R_knn_inds, \ - const Mvalue_t* R_knn_dists, \ - Mdist_func& dfunc, \ - Mvalue_idx* inds, \ - Mvalue_t* dists, \ - float weight, \ - Mvalue_int* dists_counter) +#define instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( \ + Mvalue_idx, Mvalue_t, Mvalue_int, Mmatrix_idx, Mdims, Mdist_func) \ + template void raft::spatial::knn::detail:: \ + rbc_low_dim_pass_two( \ + raft::resources const& handle, \ + const BallCoverIndex& index, \ + const Mvalue_t* query, \ + const Mvalue_int n_query_rows, \ + Mvalue_int k, \ + const Mvalue_idx* R_knn_inds, \ + const Mvalue_t* R_knn_dists, \ + Mdist_func& dfunc, \ + Mvalue_idx* inds, \ + Mvalue_t* dists, \ + float weight, \ + Mvalue_int* dists_counter) instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two( - std::int64_t, float, std::uint32_t, 3, raft::spatial::knn::detail::HaversineFunc); + std::int64_t, float, std::int64_t, std::int64_t, 3, raft::spatial::knn::detail::HaversineFunc); #undef instantiate_raft_spatial_knn_detail_rbc_low_dim_pass_two diff --git a/cpp/test/neighbors/ball_cover.cu b/cpp/test/neighbors/ball_cover.cu index fc711fc668..62fbdd6edb 100644 --- a/cpp/test/neighbors/ball_cover.cu +++ b/cpp/test/neighbors/ball_cover.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -126,7 +126,7 @@ struct ToRadians { __device__ __host__ float operator()(float a) { return a * (CUDART_PI_F / 180.0); } }; -template +template struct BallCoverInputs { value_int k; value_int n_rows; @@ -136,7 +136,7 @@ struct BallCoverInputs { raft::distance::DistanceType metric; }; -template +template class BallCoverKNNQueryTest : public ::testing::TestWithParam> { protected: void basicTest() @@ -151,26 +151,26 @@ class BallCoverKNNQueryTest : public ::testing::TestWithParam X(params.n_rows * params.n_cols, resource::get_cuda_stream(handle)); - rmm::device_uvector Y(params.n_rows, resource::get_cuda_stream(handle)); + rmm::device_uvector Y(params.n_rows, resource::get_cuda_stream(handle)); // Make sure the train and query sets are completely disjoint rmm::device_uvector X2(params.n_query * params.n_cols, resource::get_cuda_stream(handle)); - rmm::device_uvector Y2(params.n_query, resource::get_cuda_stream(handle)); - - raft::random::make_blobs(X.data(), - Y.data(), - params.n_rows, - params.n_cols, - n_centers, - resource::get_cuda_stream(handle)); - - raft::random::make_blobs(X2.data(), - Y2.data(), - params.n_query, - params.n_cols, - n_centers, - resource::get_cuda_stream(handle)); + rmm::device_uvector Y2(params.n_query, resource::get_cuda_stream(handle)); + + raft::random::make_blobs(X.data(), + Y.data(), + params.n_rows, + params.n_cols, + n_centers, + resource::get_cuda_stream(handle)); + + raft::random::make_blobs(X2.data(), + Y2.data(), + params.n_query, + params.n_cols, + n_centers, + resource::get_cuda_stream(handle)); rmm::device_uvector d_ref_I(params.n_query * k, resource::get_cuda_stream(handle)); rmm::device_uvector d_ref_D(params.n_query * k, resource::get_cuda_stream(handle)); @@ -215,7 +215,8 @@ class BallCoverKNNQueryTest : public ::testing::TestWithParam index(handle, X_view, metric); build_index(handle, index); - knn_query(handle, index, X2_view, d_pred_I_view, d_pred_D_view, k, true); + knn_query( + handle, index, X2_view, d_pred_I_view, d_pred_D_view, k, true); resource::sync_stream(handle); // What we really want are for the distances to match exactly. The @@ -249,7 +250,7 @@ class BallCoverKNNQueryTest : public ::testing::TestWithParam params; }; -template +template class BallCoverAllKNNTest : public ::testing::TestWithParam> { protected: void basicTest() @@ -264,14 +265,14 @@ class BallCoverAllKNNTest : public ::testing::TestWithParam X(params.n_rows * params.n_cols, resource::get_cuda_stream(handle)); - rmm::device_uvector Y(params.n_rows, resource::get_cuda_stream(handle)); + rmm::device_uvector Y(params.n_rows, resource::get_cuda_stream(handle)); - raft::random::make_blobs(X.data(), - Y.data(), - params.n_rows, - params.n_cols, - n_centers, - resource::get_cuda_stream(handle)); + raft::random::make_blobs(X.data(), + Y.data(), + params.n_rows, + params.n_cols, + n_centers, + resource::get_cuda_stream(handle)); rmm::device_uvector d_ref_I(params.n_rows * k, resource::get_cuda_stream(handle)); rmm::device_uvector d_ref_D(params.n_rows * k, resource::get_cuda_stream(handle)); @@ -308,7 +309,8 @@ class BallCoverAllKNNTest : public ::testing::TestWithParam index(handle, X_view, metric); - all_knn_query(handle, index, d_pred_I_view, d_pred_D_view, k, true); + all_knn_query( + handle, index, d_pred_I_view, d_pred_D_view, k, true); resource::sync_stream(handle); // What we really want are for the distances to match exactly. The @@ -348,7 +350,7 @@ class BallCoverAllKNNTest : public ::testing::TestWithParam BallCoverAllKNNTestF; typedef BallCoverKNNQueryTest BallCoverKNNQueryTestF; -const std::vector> ballcover_inputs = { +const std::vector> ballcover_inputs = { {11, 5000, 2, 1.0, 10000, raft::distance::DistanceType::Haversine}, {25, 10000, 2, 1.0, 5000, raft::distance::DistanceType::Haversine}, {2, 10000, 2, 1.0, 5000, raft::distance::DistanceType::L2SqrtUnexpanded}, diff --git a/cpp/test/neighbors/epsilon_neighborhood.cu b/cpp/test/neighbors/epsilon_neighborhood.cu index 1601037edb..8b35e3ca70 100644 --- a/cpp/test/neighbors/epsilon_neighborhood.cu +++ b/cpp/test/neighbors/epsilon_neighborhood.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,11 @@ #include #include #include +#include #include +#include #include +#include #include #include #include @@ -82,8 +85,11 @@ class EpsNeighTest : public ::testing::TestWithParam> { IdxT batchSize; }; // class EpsNeighTest -const std::vector> inputsfi = { +const std::vector> inputsfi = { + {100, 16, 5, 2, 2.f}, + {1500, 16, 5, 3, 2.f}, {15000, 16, 5, 1, 2.f}, + {15000, 3, 5, 1, 2.f}, {14000, 16, 5, 1, 2.f}, {15000, 17, 5, 1, 2.f}, {14000, 17, 5, 1, 2.f}, @@ -91,31 +97,317 @@ const std::vector> inputsfi = { {14000, 18, 5, 1, 2.f}, {15000, 32, 5, 1, 2.f}, {14000, 32, 5, 1, 2.f}, + {14000, 32, 5, 10, 2.f}, {20000, 10000, 10, 1, 2.f}, {20000, 10000, 10, 2, 2.f}, }; -typedef EpsNeighTest EpsNeighTestFI; -TEST_P(EpsNeighTestFI, Result) + +typedef EpsNeighTest EpsNeighTestFI; + +TEST_P(EpsNeighTestFI, ResultBruteForce) { for (int i = 0; i < param.n_batches; ++i) { RAFT_CUDA_TRY(cudaMemsetAsync(adj.data(), 0, sizeof(bool) * param.n_row * batchSize, stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(vd.data(), 0, sizeof(int) * (batchSize + 1), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(vd.data(), 0, sizeof(int64_t) * (batchSize + 1), stream)); - auto adj_view = make_device_matrix_view(adj.data(), param.n_row, batchSize); - auto vd_view = make_device_vector_view(vd.data(), batchSize + 1); - auto x_view = make_device_matrix_view(data.data(), param.n_row, param.n_col); - auto y_view = make_device_matrix_view( + auto adj_view = make_device_matrix_view(adj.data(), batchSize, param.n_row); + auto vd_view = make_device_vector_view(vd.data(), batchSize + 1); + auto x_view = make_device_matrix_view( data.data() + (i * batchSize * param.n_col), batchSize, param.n_col); + auto y_view = make_device_matrix_view(data.data(), param.n_row, param.n_col); - eps_neighbors_l2sq( + eps_neighbors_l2sq( handle, x_view, y_view, adj_view, vd_view, param.eps * param.eps); ASSERT_TRUE(raft::devArrMatch( - param.n_row / param.n_centers, vd.data(), batchSize, raft::Compare(), stream)); + param.n_row / param.n_centers, vd.data(), batchSize, raft::Compare(), stream)); } } + INSTANTIATE_TEST_CASE_P(EpsNeighTests, EpsNeighTestFI, ::testing::ValuesIn(inputsfi)); +// rbc examples take fewer points as correctness checks are very costly +const std::vector> inputsfi_rbc = { + {100, 16, 5, 2, 2.f}, + {1500, 16, 5, 3, 2.f}, + {1500, 16, 5, 1, 2.f}, + {1500, 3, 5, 1, 2.f}, + {1400, 16, 5, 1, 2.f}, + {1500, 17, 5, 1, 2.f}, + {1400, 17, 5, 1, 2.f}, + {1500, 18, 5, 1, 2.f}, + {1400, 18, 5, 1, 2.f}, + {1500, 32, 5, 1, 2.f}, + {1400, 32, 5, 1, 2.f}, + {1400, 32, 5, 10, 2.f}, + {2000, 1000, 10, 1, 2.f}, + {2000, 1000, 10, 2, 2.f}, +}; + +typedef EpsNeighTest EpsNeighRbcTestFI; + +TEST_P(EpsNeighRbcTestFI, DenseRbc) +{ + auto adj_baseline = raft::make_device_matrix(handle, batchSize, param.n_row); + + raft::neighbors::ball_cover::BallCoverIndex rbc_index( + handle, data.data(), param.n_row, param.n_col, raft::distance::DistanceType::L2SqrtUnexpanded); + raft::neighbors::ball_cover::build_index(handle, rbc_index); + + for (int i = 0; i < param.n_batches; ++i) { + // invalidate + RAFT_CUDA_TRY(cudaMemsetAsync(adj.data(), 1, sizeof(bool) * param.n_row * batchSize, stream)); + RAFT_CUDA_TRY(cudaMemsetAsync(vd.data(), 1, sizeof(int64_t) * (batchSize + 1), stream)); + RAFT_CUDA_TRY(cudaMemsetAsync( + adj_baseline.data_handle(), 1, sizeof(bool) * param.n_row * batchSize, stream)); + + float* query = data.data() + (i * batchSize * param.n_col); + + raft::neighbors::ball_cover::eps_nn( + handle, + rbc_index, + make_device_matrix_view(adj.data(), batchSize, param.n_row), + make_device_vector_view(vd.data(), batchSize + 1), + make_device_matrix_view(query, batchSize, param.n_col), + param.eps * param.eps); + + ASSERT_TRUE(raft::devArrMatch( + param.n_row / param.n_centers, vd.data(), batchSize, raft::Compare(), stream)); + + // compute baseline via brute force + compare + epsUnexpL2SqNeighborhood(adj_baseline.data_handle(), + nullptr, + query, + data.data(), + batchSize, + param.n_row, + param.n_col, + param.eps * param.eps, + stream); + + ASSERT_TRUE(raft::devArrMatch(adj_baseline.data_handle(), + adj.data(), + batchSize, + param.n_row, + raft::Compare(), + stream)); + } +} + +template +testing::AssertionResult assertCsrEqualUnordered( + T* ia_exp, T* ja_exp, T* ia_act, T* ja_act, size_t rows, size_t cols, cudaStream_t stream) +{ + std::unique_ptr ia_exp_h(new T[rows + 1]); + std::unique_ptr ia_act_h(new T[rows + 1]); + raft::update_host(ia_exp_h.get(), ia_exp, rows + 1, stream); + raft::update_host(ia_act_h.get(), ia_act, rows + 1, stream); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + + size_t nnz = ia_exp_h.get()[rows]; + std::unique_ptr ja_exp_h(new T[nnz]); + std::unique_ptr ja_act_h(new T[nnz]); + raft::update_host(ja_exp_h.get(), ja_exp, nnz, stream); + raft::update_host(ja_act_h.get(), ja_act, nnz, stream); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + + for (size_t i(0); i < rows; ++i) { + auto row_start = ia_exp_h.get()[i]; + auto row_end = ia_exp_h.get()[i + 1]; + + // sort ja's + std::sort(ja_exp_h.get() + row_start, ja_exp_h.get() + row_end); + std::sort(ja_act_h.get() + row_start, ja_act_h.get() + row_end); + + for (size_t idx(row_start); idx < (size_t)row_end; ++idx) { + auto exp = ja_exp_h.get()[idx]; + auto act = ja_act_h.get()[idx]; + if (exp != act) { + return testing::AssertionFailure() + << "actual=" << act << " != expected=" << exp << " @" << i << "," << idx; + } + } + } + return testing::AssertionSuccess(); +} + +TEST_P(EpsNeighRbcTestFI, SparseRbc) +{ + auto adj_ia = raft::make_device_vector(handle, batchSize + 1); + auto adj_ja = raft::make_device_vector(handle, param.n_row * batchSize); + auto vd_baseline = raft::make_device_vector(handle, batchSize + 1); + auto adj_ia_baseline = raft::make_device_vector(handle, batchSize + 1); + auto adj_ja_baseline = raft::make_device_vector(handle, param.n_row * batchSize); + + raft::neighbors::ball_cover::BallCoverIndex rbc_index( + handle, data.data(), param.n_row, param.n_col, raft::distance::DistanceType::L2SqrtUnexpanded); + raft::neighbors::ball_cover::build_index(handle, rbc_index); + + for (int i = 0; i < param.n_batches; ++i) { + // reset full array -- that way we can compare the full size + RAFT_CUDA_TRY( + cudaMemsetAsync(adj_ja.data_handle(), 0, sizeof(int64_t) * param.n_row * batchSize, stream)); + RAFT_CUDA_TRY(cudaMemsetAsync( + adj_ja_baseline.data_handle(), 0, sizeof(int64_t) * param.n_row * batchSize, stream)); + + float* query = data.data() + (i * batchSize * param.n_col); + + // compute dense baseline and convert adj to csr + { + raft::neighbors::ball_cover::eps_nn( + handle, + rbc_index, + make_device_matrix_view(adj.data(), batchSize, param.n_row), + make_device_vector_view(vd_baseline.data_handle(), batchSize + 1), + make_device_matrix_view(query, batchSize, param.n_col), + param.eps * param.eps); + thrust::exclusive_scan(resource::get_thrust_policy(handle), + vd_baseline.data_handle(), + vd_baseline.data_handle() + batchSize + 1, + adj_ia_baseline.data_handle()); + raft::sparse::convert::adj_to_csr(handle, + adj.data(), + adj_ia_baseline.data_handle(), + batchSize, + param.n_row, + labels.data(), + adj_ja_baseline.data_handle()); + } + + // exact computation with 2 passes + { + raft::neighbors::ball_cover::eps_nn( + handle, + rbc_index, + make_device_vector_view(adj_ia.data_handle(), batchSize + 1), + make_device_vector_view(nullptr, 0), + make_device_vector_view(vd.data(), batchSize + 1), + make_device_matrix_view(query, batchSize, param.n_col), + param.eps * param.eps); + raft::neighbors::ball_cover::eps_nn( + handle, + rbc_index, + make_device_vector_view(adj_ia.data_handle(), batchSize + 1), + make_device_vector_view(adj_ja.data_handle(), batchSize * param.n_row), + make_device_vector_view(vd.data(), batchSize + 1), + make_device_matrix_view(query, batchSize, param.n_col), + param.eps * param.eps); + ASSERT_TRUE(raft::devArrMatch(adj_ia_baseline.data_handle(), + adj_ia.data_handle(), + batchSize + 1, + raft::Compare(), + stream)); + ASSERT_TRUE(assertCsrEqualUnordered(adj_ia_baseline.data_handle(), + adj_ja_baseline.data_handle(), + adj_ia.data_handle(), + adj_ja.data_handle(), + batchSize, + param.n_row, + stream)); + } + } +} + +TEST_P(EpsNeighRbcTestFI, SparseRbcMaxK) +{ + auto adj_ia = raft::make_device_vector(handle, batchSize + 1); + auto adj_ja = raft::make_device_vector(handle, param.n_row * batchSize); + auto vd_baseline = raft::make_device_vector(handle, batchSize + 1); + auto adj_ia_baseline = raft::make_device_vector(handle, batchSize + 1); + auto adj_ja_baseline = raft::make_device_vector(handle, param.n_row * batchSize); + + raft::neighbors::ball_cover::BallCoverIndex rbc_index( + handle, data.data(), param.n_row, param.n_col, raft::distance::DistanceType::L2SqrtUnexpanded); + raft::neighbors::ball_cover::build_index(handle, rbc_index); + + int64_t expected_max_k = param.n_row / param.n_centers; + + for (int i = 0; i < param.n_batches; ++i) { + // reset full array -- that way we can compare the full size + RAFT_CUDA_TRY( + cudaMemsetAsync(adj_ja.data_handle(), 0, sizeof(int64_t) * param.n_row * batchSize, stream)); + RAFT_CUDA_TRY(cudaMemsetAsync( + adj_ja_baseline.data_handle(), 0, sizeof(int64_t) * param.n_row * batchSize, stream)); + + float* query = data.data() + (i * batchSize * param.n_col); + + // compute dense baseline and convert adj to csr + { + raft::neighbors::ball_cover::eps_nn( + handle, + rbc_index, + make_device_matrix_view(adj.data(), batchSize, param.n_row), + make_device_vector_view(vd_baseline.data_handle(), batchSize + 1), + make_device_matrix_view(query, batchSize, param.n_col), + param.eps * param.eps); + thrust::exclusive_scan(resource::get_thrust_policy(handle), + vd_baseline.data_handle(), + vd_baseline.data_handle() + batchSize + 1, + adj_ia_baseline.data_handle()); + raft::sparse::convert::adj_to_csr(handle, + adj.data(), + adj_ia_baseline.data_handle(), + batchSize, + param.n_row, + labels.data(), + adj_ja_baseline.data_handle()); + } + + // exact computation with 1 pass + { + int64_t max_k = expected_max_k; + raft::neighbors::ball_cover::eps_nn( + handle, + rbc_index, + make_device_vector_view(adj_ia.data_handle(), batchSize + 1), + make_device_vector_view(adj_ja.data_handle(), batchSize * param.n_row), + make_device_vector_view(vd.data(), batchSize + 1), + make_device_matrix_view(query, batchSize, param.n_col), + param.eps * param.eps, + make_host_scalar_view(&max_k)); + ASSERT_TRUE(raft::devArrMatch(adj_ia_baseline.data_handle(), + adj_ia.data_handle(), + batchSize + 1, + raft::Compare(), + stream)); + ASSERT_TRUE(assertCsrEqualUnordered(adj_ia_baseline.data_handle(), + adj_ja_baseline.data_handle(), + adj_ia.data_handle(), + adj_ja.data_handle(), + batchSize, + param.n_row, + stream)); + ASSERT_TRUE(raft::devArrMatch( + vd_baseline.data_handle(), vd.data(), batchSize + 1, raft::Compare(), stream)); + ASSERT_TRUE(max_k == expected_max_k); + } + + // k-limited computation with 1 pass + { + int64_t max_k = expected_max_k / 2; + raft::neighbors::ball_cover::eps_nn( + handle, + rbc_index, + make_device_vector_view(adj_ia.data_handle(), batchSize + 1), + make_device_vector_view(adj_ja.data_handle(), batchSize * param.n_row), + make_device_vector_view(vd.data(), batchSize + 1), + make_device_matrix_view(query, batchSize, param.n_col), + param.eps * param.eps, + make_host_scalar_view(&max_k)); + ASSERT_TRUE(max_k == expected_max_k); + ASSERT_TRUE(raft::devArrMatch( + expected_max_k / 2, vd.data(), batchSize, raft::Compare(), stream)); + ASSERT_TRUE(raft::devArrMatch(expected_max_k / 2 * batchSize, + vd.data() + batchSize, + 1, + raft::Compare(), + stream)); + } + } +} + +INSTANTIATE_TEST_CASE_P(EpsNeighTests, EpsNeighRbcTestFI, ::testing::ValuesIn(inputsfi_rbc)); + }; // namespace knn }; // namespace spatial }; // namespace raft diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index afb4ed18ea..e003aa879c 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -38,6 +38,7 @@ IVF-pq is an inverted-file index, which partitions the vectors into a series of | `pq_bits` | `build` | N | Positive Integer. [4-8] | 8 | Bit length of the vector element after quantization. | | `codebook_kind` | `build` | N | ["cluster", "subspace"] | "subspace" | Type of codebook. See the [API docs](https://docs.rapids.ai/api/raft/nightly/cpp_api/neighbors_ivf_pq/#_CPPv412codebook_gen) for more detail | | `dataset_memory_type` | `build` | N | ["device", "host", "mmap"] | "host" | What memory type should the dataset reside? | +| `max_train_points_per_pq_code` | `build` | N | Positive Number >=1 | 256 | Max number of data points per PQ code used for PQ code book creation. Depending on input dataset size, the data points could be less than what user specifies. | | `query_memory_type` | `search` | N | ["device", "host", "mmap"] | "device | What memory type should the queries reside? | | `nprobe` | `search` | Y | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. | | `internalDistanceDtype` | `search` | N | [`float`, `half`] | `half` | The precision to use for the distance computations. Lower precision can increase performance at the cost of accuracy. | diff --git a/docs/source/cpp_api/neighbors_hnsw.rst b/docs/source/cpp_api/neighbors_hnsw.rst new file mode 100644 index 0000000000..86f9544c35 --- /dev/null +++ b/docs/source/cpp_api/neighbors_hnsw.rst @@ -0,0 +1,29 @@ +HNSW +===== + +HNSW is a graph-based nearest neighbors implementation for the CPU. +This implementation provides the ability to serialize a CAGRA graph and read it as a base-layer-only hnswlib graph. + +.. role:: py(code) + :language: c++ + :class: highlight + +``#include `` + +namespace *raft::neighbors::hnsw* + +.. doxygengroup:: hnsw + :project: RAFT + :members: + :content-only: + +Serializer Methods +------------------ +``#include `` + +namespace *raft::neighbors::hnsw* + +.. doxygengroup:: hnsw_serialize + :project: RAFT + :members: + :content-only: diff --git a/docs/source/pylibraft_api/neighbors.rst b/docs/source/pylibraft_api/neighbors.rst index 680a2982cb..e9e890fccb 100644 --- a/docs/source/pylibraft_api/neighbors.rst +++ b/docs/source/pylibraft_api/neighbors.rst @@ -33,6 +33,22 @@ Serializer Methods .. autofunction:: pylibraft.neighbors.cagra.load +HNSW +#### + +.. autoclass:: pylibraft.neighbors.hnsw.SearchParams + :members: + +.. autofunction:: pylibraft.neighbors.hnsw.from_cagra + +.. autofunction:: pylibraft.neighbors.hnsw.search + +Serializer Methods +------------------ +.. autofunction:: pylibraft.neighbors.hnsw.save + +.. autofunction:: pylibraft.neighbors.hnsw.load + IVF-Flat ######## diff --git a/docs/source/wiki_all_dataset.md b/docs/source/wiki_all_dataset.md index 983cbefd5a..c001bdc409 100644 --- a/docs/source/wiki_all_dataset.md +++ b/docs/source/wiki_all_dataset.md @@ -2,14 +2,11 @@ The `wiki-all` dataset was created to stress vector search algorithms at scale with both a large number of vectors and dimensions. The entire dataset contains 88M vectors with 768 dimensions and is meant for testing the types of vectors one would typically encounter in retrieval augmented generation (RAG) workloads. The full dataset is ~251GB in size, which is intentionally larger than the typical memory of GPUs. The massive scale is intended to promote the use of compression and efficient out-of-core methods for both indexing and search. -The dataset is composed of all the available languages of in the [Cohere Wikipedia dataset](https://huggingface.co/datasets/Cohere/wikipedia-22-12). An [English version]( https://www.kaggle.com/datasets/jjinho/wikipedia-20230701) is also available. - - The dataset is composed of English wiki texts from [Kaggle](https://www.kaggle.com/datasets/jjinho/wikipedia-20230701) and multi-lingual wiki texts from [Cohere Wikipedia](https://huggingface.co/datasets/Cohere/wikipedia-22-12). Cohere's English Texts are older (2022) and smaller than the Kaggle English Wiki texts (2023) so the English texts have been removed from Cohere completely. The final Wiki texts include English Wiki from Kaggle and the other languages from Cohere. The English texts constitute 50% of the total text size. -To form the final dataset, the Wiki texts were chunked into 85 million 128-token pieces. For reference, Cohere chunks Wiki texts into 104-token pieces. Finally, the embeddings of each chunk were computed using the [paraphrase-multilingual-mpnet-base-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2) embedding model. The resulting dataset is an embedding matrix of size 88 million by 768. Also included with the dataset is a query file containing 10k query vectors and a groundtruth file to evaluate nearest neighbors algorithms. +To form the final dataset, the Wiki texts were chunked into 85 million 128-token pieces. For reference, Cohere chunks Wiki texts into 104-token pieces. Finally, the embeddings of each chunk were computed using the [paraphrase-multilingual-mpnet-base-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2) embedding model. The resulting dataset is an embedding matrix of size 88 million by 768. Also included with the dataset is a query file containing 10k query vectors and a groundtruth file to evaluate nearest neighbors algorithms. ## Getting the dataset @@ -44,3 +41,7 @@ curl -s https://data.rapids.ai/raft/datasets/wiki_all_10M/wiki_all_10M.tar ## Using the dataset After the dataset is downloaded and extracted to the `wiki_all_88M` directory (or `wiki_all_1M`/`wiki_all_10M` depending on whether the subsets are used), the files can be used in the benchmarking tool. The dataset name is `wiki_all` (or `wiki_all_1M`/`wiki_all_10M`), and the benchmarking tool can be used by specifying the appropriate name `--dataset wiki_all_88M` in the scripts. + +## License info + +The English wiki texts available on Kaggle come with the [CC BY-NCSA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license and the Cohere wikipedia data set comes with the [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) license. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 2982db2a23..1e4ba0b369 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,9 @@ ignore_missing_imports = true # If we don't specify this, then mypy will check excluded files if # they are imported by a checked file. follow_imports = "skip" +exclude = [ + "pylibraft/pylibraft/test", + ] [tool.codespell] # note: pre-commit passes explicit lists of files here, which this skip file list doesn't override - diff --git a/python/pylibraft/pylibraft/common/mdspan.pxd b/python/pylibraft/pylibraft/common/mdspan.pxd index 17dd2d8bfd..9da3957f03 100644 --- a/python/pylibraft/pylibraft/common/mdspan.pxd +++ b/python/pylibraft/pylibraft/common/mdspan.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,7 +19,8 @@ # cython: embedsignature = True # cython: language_level = 3 -from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t +from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uint64_t +from libcpp cimport bool from libcpp.string cimport string from pylibraft.common.cpp.mdspan cimport ( @@ -40,6 +41,9 @@ ctypedef const uint8_t const_uint8_t cdef device_matrix_view[float, int64_t, row_major] get_dmv_float( array, check_shape) except * +cdef device_matrix_view[bool, int64_t, row_major] get_dmv_bool( + array, check_shape) except * + cdef device_matrix_view[uint8_t, int64_t, row_major] get_dmv_uint8( array, check_shape) except * @@ -79,6 +83,9 @@ cdef host_matrix_view[int64_t, int64_t, row_major] get_hmv_int64( cdef host_matrix_view[uint32_t, int64_t, row_major] get_hmv_uint32( array, check_shape) except * +cdef host_matrix_view[uint64_t, int64_t, row_major] get_hmv_uint64( + array, check_shape) except * + cdef host_matrix_view[const_float, int64_t, row_major] get_const_hmv_float( array, check_shape) except * diff --git a/python/pylibraft/pylibraft/common/mdspan.pyx b/python/pylibraft/pylibraft/common/mdspan.pyx index 7442a6bb89..c1a9188585 100644 --- a/python/pylibraft/pylibraft/common/mdspan.pyx +++ b/python/pylibraft/pylibraft/common/mdspan.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ from cpython.object cimport PyObject from cython.operator cimport dereference as deref from libc.stddef cimport size_t from libc.stdint cimport int8_t, int32_t, int64_t, uint8_t, uint32_t, uintptr_t +from libcpp cimport bool from pylibraft.common.cpp.mdspan cimport ( col_major, @@ -160,6 +161,18 @@ cdef device_matrix_view[float, int64_t, row_major] \ return make_device_matrix_view[float, int64_t, row_major]( cai.data, shape[0], shape[1]) + +cdef device_matrix_view[bool, int64_t, row_major] \ + get_dmv_bool(cai, check_shape) except *: + if cai.dtype != np.bool_: + raise TypeError("dtype %s not supported" % cai.dtype) + if check_shape and len(cai.shape) != 2: + raise ValueError("Expected a 2D array, got %d D" % len(cai.shape)) + shape = (cai.shape[0], cai.shape[1] if len(cai.shape) == 2 else 1) + return make_device_matrix_view[bool, int64_t, row_major]( + cai.data, shape[0], shape[1]) + + cdef device_matrix_view[uint8_t, int64_t, row_major] \ get_dmv_uint8(cai, check_shape) except *: if cai.dtype != np.uint8: @@ -290,7 +303,7 @@ cdef host_matrix_view[int64_t, int64_t, row_major] \ cdef host_matrix_view[uint32_t, int64_t, row_major] \ get_hmv_uint32(cai, check_shape) except *: - if cai.dtype != np.int64: + if cai.dtype != np.uint32: raise TypeError("dtype %s not supported" % cai.dtype) if check_shape and len(cai.shape) != 2: raise ValueError("Expected a 2D array, got %d D" % len(cai.shape)) @@ -299,6 +312,17 @@ cdef host_matrix_view[uint32_t, int64_t, row_major] \ cai.data, shape[0], shape[1]) +cdef host_matrix_view[uint64_t, int64_t, row_major] \ + get_hmv_uint64(cai, check_shape) except *: + if cai.dtype != np.uint64: + raise TypeError("dtype %s not supported" % cai.dtype) + if check_shape and len(cai.shape) != 2: + raise ValueError("Expected a 2D array, got %d D" % len(cai.shape)) + shape = (cai.shape[0], cai.shape[1] if len(cai.shape) == 2 else 1) + return make_host_matrix_view[uint64_t, int64_t, row_major]( + cai.data, shape[0], shape[1]) + + cdef host_matrix_view[const_float, int64_t, row_major] \ get_const_hmv_float(cai, check_shape) except *: if cai.dtype != np.float32: diff --git a/python/pylibraft/pylibraft/neighbors/CMakeLists.txt b/python/pylibraft/pylibraft/neighbors/CMakeLists.txt index 45cd9f74e6..069038a0e8 100644 --- a/python/pylibraft/pylibraft/neighbors/CMakeLists.txt +++ b/python/pylibraft/pylibraft/neighbors/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -13,7 +13,7 @@ # ============================================================================= # Set the list of Cython files to build -set(cython_sources common.pyx refine.pyx brute_force.pyx) +set(cython_sources common.pyx refine.pyx brute_force.pyx hnsw.pyx rbc.pyx) set(linked_libraries raft::raft raft::compiled) # Build all of the Cython targets diff --git a/python/pylibraft/pylibraft/neighbors/__init__.py b/python/pylibraft/pylibraft/neighbors/__init__.py index 325ea5842e..86612b2fbb 100644 --- a/python/pylibraft/pylibraft/neighbors/__init__.py +++ b/python/pylibraft/pylibraft/neighbors/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,8 +13,20 @@ # limitations under the License. # -from pylibraft.neighbors import brute_force, cagra, ivf_flat, ivf_pq +from pylibraft.neighbors import brute_force # type: ignore +from pylibraft.neighbors import hnsw # type: ignore +from pylibraft.neighbors import rbc # type: ignore +from pylibraft.neighbors import cagra, ivf_flat, ivf_pq from .refine import refine -__all__ = ["common", "refine", "brute_force", "ivf_flat", "ivf_pq", "cagra"] +__all__ = [ + "common", + "refine", + "brute_force", + "ivf_flat", + "ivf_pq", + "cagra", + "hnsw", + "rbc", +] diff --git a/python/pylibraft/pylibraft/neighbors/brute_force.pyx b/python/pylibraft/pylibraft/neighbors/brute_force.pyx index 4aa47b8a18..19d20fb75d 100644 --- a/python/pylibraft/pylibraft/neighbors/brute_force.pyx +++ b/python/pylibraft/pylibraft/neighbors/brute_force.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ from libc.stdint cimport int64_t, uintptr_t from pylibraft.common.cpp.optional cimport optional from pylibraft.common.handle cimport device_resources -from pylibraft.common.mdspan cimport get_dmv_float, get_dmv_int64 +from pylibraft.common.mdspan cimport get_dmv_bool, get_dmv_float, get_dmv_int64 from pylibraft.common.handle import auto_sync_handle from pylibraft.common.interruptible import cuda_interruptible @@ -51,12 +51,17 @@ from pylibraft.neighbors.common import _check_input_array from pylibraft.common.cpp.mdspan cimport ( device_matrix_view, + device_vector_view, host_matrix_view, make_device_matrix_view, + make_device_vector_view, make_host_matrix_view, row_major, ) -from pylibraft.neighbors.cpp.brute_force cimport knn as c_knn +from pylibraft.neighbors.cpp.brute_force cimport ( + eps_neighbors as c_eps_neighbors, + knn as c_knn, +) def _get_array_params(array_interface, check_dtype=None): @@ -177,3 +182,88 @@ def knn(dataset, queries, k=None, indices=None, distances=None, raise TypeError("dtype %s not supported" % dataset_cai.dtype) return (distances, indices) + + +@auto_sync_handle +@auto_convert_output +def eps_neighbors(dataset, queries, eps, handle=None): + """ + Perform an epsilon neighborhood search using the L2-norm. + + Parameters + ---------- + dataset : array interface compliant matrix, row-major layout, + shape (n_samples, dim). Supported dtype [float] + queries : array interface compliant matrix, row-major layout, + shape (n_queries, dim) Supported dtype [float] + eps : threshold + {handle_docstring} + + Returns + ------- + adj: array interface compliant object containing bool adjacency mask + shape (n_queries, n_samples) + + vd: array interface compliant object containing row sums of adj + shape (n_queries + 1). vd[n_queries] contains the total sum + + Examples + -------- + >>> import cupy as cp + >>> from pylibraft.common import DeviceResources + >>> from pylibraft.neighbors.brute_force import eps_neighbors + >>> handle = DeviceResources() + >>> n_samples = 50000 + >>> n_features = 50 + >>> n_queries = 1000 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> queries = cp.random.random_sample((n_queries, n_features), + ... dtype=cp.float32) + >>> eps = 0.1 + >>> adj, vd = eps_neighbors(dataset, queries, eps, handle=handle) + >>> adj = cp.asarray(adj) + >>> vd = cp.asarray(vd) + >>> # pylibraft functions are often asynchronous so the + >>> # handle needs to be explicitly synchronized + >>> handle.sync() + """ + + if handle is None: + handle = DeviceResources() + + dataset_cai = cai_wrapper(dataset) + queries_cai = cai_wrapper(queries) + + # we require c-contiguous (rowmajor) inputs here + _check_input_array(dataset_cai, [np.dtype("float32")]) + _check_input_array(queries_cai, [np.dtype("float32")], + exp_cols=dataset_cai.shape[1]) + + n_queries = queries_cai.shape[0] + n_samples = dataset_cai.shape[0] + + adj = device_ndarray.empty((n_queries, n_samples), dtype='bool') + vd = device_ndarray.empty((n_queries + 1, ), dtype='int64') + adj_cai = cai_wrapper(adj) + vd_cai = cai_wrapper(vd) + + cdef device_resources* handle_ = \ + handle.getHandle() + + vd_vector_view = make_device_vector_view( + vd_cai.data, vd_cai.shape[0]) + + if dataset_cai.dtype == np.float32: + with cuda_interruptible(): + c_eps_neighbors( + deref(handle_), + get_dmv_float(dataset_cai, check_shape=True), + get_dmv_float(queries_cai, check_shape=True), + get_dmv_bool(adj_cai, check_shape=True), + vd_vector_view, + eps) + else: + raise TypeError("dtype %s not supported" % dataset_cai.dtype) + + return (adj, vd) diff --git a/python/pylibraft/pylibraft/neighbors/cagra/cagra.pxd b/python/pylibraft/pylibraft/neighbors/cagra/cagra.pxd new file mode 100644 index 0000000000..98537f8357 --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/cagra/cagra.pxd @@ -0,0 +1,39 @@ +# +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from libc.stdint cimport int8_t, uint8_t, uint32_t +from libcpp cimport bool +from libcpp.string cimport string + +cimport pylibraft.neighbors.cagra.cpp.c_cagra as c_cagra + + +cdef class Index: + cdef readonly bool trained + cdef str active_index_type + +cdef class IndexFloat(Index): + cdef c_cagra.index[float, uint32_t] * index + +cdef class IndexInt8(Index): + cdef c_cagra.index[int8_t, uint32_t] * index + +cdef class IndexUint8(Index): + cdef c_cagra.index[uint8_t, uint32_t] * index diff --git a/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx b/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx index c19faa826d..df31d2560b 100644 --- a/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx +++ b/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx @@ -142,8 +142,6 @@ cdef class IndexParams: cdef class Index: - cdef readonly bool trained - cdef str active_index_type def __cinit__(self): self.trained = False @@ -151,7 +149,6 @@ cdef class Index: cdef class IndexFloat(Index): - cdef c_cagra.index[float, uint32_t] * index def __cinit__(self, handle=None): if handle is None: @@ -216,7 +213,6 @@ cdef class IndexFloat(Index): cdef class IndexInt8(Index): - cdef c_cagra.index[int8_t, uint32_t] * index def __cinit__(self, handle=None): if handle is None: @@ -281,7 +277,6 @@ cdef class IndexInt8(Index): cdef class IndexUint8(Index): - cdef c_cagra.index[uint8_t, uint32_t] * index def __cinit__(self, handle=None): if handle is None: diff --git a/python/pylibraft/pylibraft/neighbors/cagra/cpp/c_cagra.pxd b/python/pylibraft/pylibraft/neighbors/cagra/cpp/c_cagra.pxd index 7e22f274e9..1dffd40186 100644 --- a/python/pylibraft/pylibraft/neighbors/cagra/cpp/c_cagra.pxd +++ b/python/pylibraft/pylibraft/neighbors/cagra/cpp/c_cagra.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -211,6 +211,36 @@ cdef extern from "raft_runtime/neighbors/cagra.hpp" \ const index[uint8_t, uint32_t]& index, bool include_dataset) except + + cdef void serialize_to_hnswlib( + const device_resources& handle, + string& str, + const index[float, uint32_t]& index) except + + + cdef void serialize_to_hnswlib( + const device_resources& handle, + string& str, + const index[uint8_t, uint32_t]& index) except + + + cdef void serialize_to_hnswlib( + const device_resources& handle, + string& str, + const index[int8_t, uint32_t]& index) except + + + cdef void serialize_to_hnswlib_file( + const device_resources& handle, + const string& filename, + const index[float, uint32_t]& index) except + + + cdef void serialize_to_hnswlib_file( + const device_resources& handle, + const string& filename, + const index[uint8_t, uint32_t]& index) except + + + cdef void serialize_to_hnswlib_file( + const device_resources& handle, + const string& filename, + const index[int8_t, uint32_t]& index) except + + cdef void deserialize_file(const device_resources& handle, const string& filename, index[uint8_t, uint32_t]* index) except + diff --git a/python/pylibraft/pylibraft/neighbors/cpp/brute_force.pxd b/python/pylibraft/pylibraft/neighbors/cpp/brute_force.pxd index de5e0af267..5f6a83a9dc 100644 --- a/python/pylibraft/pylibraft/neighbors/cpp/brute_force.pxd +++ b/python/pylibraft/pylibraft/neighbors/cpp/brute_force.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -32,8 +32,10 @@ from rmm._lib.memory_resource cimport device_memory_resource from pylibraft.common.cpp.mdspan cimport ( device_matrix_view, + device_vector_view, host_matrix_view, make_device_matrix_view, + make_device_vector_view, make_host_matrix_view, row_major, ) @@ -53,3 +55,14 @@ cdef extern from "raft_runtime/neighbors/brute_force.hpp" \ DistanceType metric, optional[float] metric_arg, optional[int64_t] global_id_offset) except + + +cdef extern from "raft_runtime/neighbors/eps_neighborhood.hpp" \ + namespace "raft::runtime::neighbors::epsilon_neighborhood" nogil: + + cdef void eps_neighbors( + const device_resources & handle, + device_matrix_view[float, int64_t, row_major] index, + device_matrix_view[float, int64_t, row_major] search, + device_matrix_view[bool, int64_t, row_major] adj, + device_vector_view[int64_t, int64_t] vd, + float eps) except + diff --git a/python/pylibraft/pylibraft/neighbors/cpp/hnsw.pxd b/python/pylibraft/pylibraft/neighbors/cpp/hnsw.pxd new file mode 100644 index 0000000000..75c0c14aad --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/cpp/hnsw.pxd @@ -0,0 +1,94 @@ +# +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uint64_t +from libcpp.memory cimport unique_ptr +from libcpp.string cimport string + +from pylibraft.common.cpp.mdspan cimport ( + device_vector_view, + host_matrix_view, + row_major, +) +from pylibraft.common.handle cimport device_resources +from pylibraft.distance.distance_type cimport DistanceType +from pylibraft.neighbors.ivf_pq.cpp.c_ivf_pq cimport ( + ann_index, + ann_search_params, +) + + +cdef extern from "raft/neighbors/hnsw.hpp" \ + namespace "raft::neighbors::hnsw" nogil: + + cpdef cppclass search_params(ann_search_params): + int ef + int num_threads + + cdef cppclass index[T](ann_index): + index(int dim, DistanceType metric) + + int dim() + DistanceType metric() + + +cdef extern from "raft_runtime/neighbors/hnsw.hpp" \ + namespace "raft::runtime::neighbors::hnsw" nogil: + cdef void search( + const device_resources& handle, + const search_params& params, + const index[float]& index, + host_matrix_view[float, int64_t, row_major] queries, + host_matrix_view[uint64_t, int64_t, row_major] neighbors, + host_matrix_view[float, int64_t, row_major] distances) except + + + cdef void search( + const device_resources& handle, + const search_params& params, + const index[int8_t]& index, + host_matrix_view[int8_t, int64_t, row_major] queries, + host_matrix_view[uint64_t, int64_t, row_major] neighbors, + host_matrix_view[float, int64_t, row_major] distances) except + + + cdef void search( + const device_resources& handle, + const search_params& params, + const index[uint8_t]& index, + host_matrix_view[uint8_t, int64_t, row_major] queries, + host_matrix_view[uint64_t, int64_t, row_major] neighbors, + host_matrix_view[float, int64_t, row_major] distances) except + + + cdef unique_ptr[index[float]] deserialize_file[float]( + const device_resources& handle, + const string& filename, + int dim, + DistanceType metric) except + + + cdef unique_ptr[index[int8_t]] deserialize_file[int8_t]( + const device_resources& handle, + const string& filename, + int dim, + DistanceType metric) except + + + cdef unique_ptr[index[uint8_t]] deserialize_file[uint8_t]( + const device_resources& handle, + const string& filename, + int dim, + DistanceType metric) except + diff --git a/python/pylibraft/pylibraft/neighbors/cpp/rbc.pxd b/python/pylibraft/pylibraft/neighbors/cpp/rbc.pxd new file mode 100644 index 0000000000..531c0dc2c1 --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/cpp/rbc.pxd @@ -0,0 +1,84 @@ +# +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +import numpy as np + +import pylibraft.common.handle + +from cython.operator cimport dereference as deref +from libc.stdint cimport int8_t, int64_t, uint8_t, uint64_t, uintptr_t +from libcpp cimport bool, nullptr +from libcpp.string cimport string +from libcpp.vector cimport vector + +from rmm._lib.memory_resource cimport device_memory_resource + +from pylibraft.common.cpp.mdspan cimport ( + device_matrix_view, + device_vector_view, + host_matrix_view, + make_device_matrix_view, + make_host_matrix_view, + row_major, +) +from pylibraft.common.handle cimport device_resources +from pylibraft.distance.distance_type cimport DistanceType + + +cdef extern from "raft/neighbors/ball_cover_types.hpp" \ + namespace "raft::neighbors::ball_cover" nogil: + + cdef cppclass BallCoverIndex[IdxT, T, IntT, MatIdxT]: + BallCoverIndex(const device_resources& handle, + device_matrix_view[T, MatIdxT, row_major] dataset, + DistanceType metric) + + +cdef extern from "raft_runtime/neighbors/eps_neighborhood.hpp" \ + namespace "raft::runtime::neighbors::epsilon_neighborhood" nogil: + + cdef void eps_neighbors_rbc( + const device_resources & handle, + device_matrix_view[float, int64_t, row_major] index, + device_matrix_view[float, int64_t, row_major] search, + device_matrix_view[bool, int64_t, row_major] adj, + device_vector_view[int64_t, int64_t] vd, + float eps) except + + + cdef void build_rbc_index( + const device_resources & handle, + BallCoverIndex[int64_t, float, int64_t, int64_t] rbc_index) except + + + cdef void eps_neighbors_rbc_pass1( + const device_resources & handle, + BallCoverIndex[int64_t, float, int64_t, int64_t] rbc_index, + device_matrix_view[float, int64_t, row_major] search, + device_vector_view[int64_t, int64_t] adj_ia, + device_vector_view[int64_t, int64_t] vd, + float eps) except + + + cdef void eps_neighbors_rbc_pass2( + const device_resources & handle, + BallCoverIndex[int64_t, float, int64_t, int64_t] rbc_index, + device_matrix_view[float, int64_t, row_major] search, + device_vector_view[int64_t, int64_t] adj_ia, + device_vector_view[int64_t, int64_t] adj_ja, + device_vector_view[int64_t, int64_t] vd, + float eps) except + diff --git a/python/pylibraft/pylibraft/neighbors/hnsw.pyx b/python/pylibraft/pylibraft/neighbors/hnsw.pyx new file mode 100644 index 0000000000..aa589ffb65 --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/hnsw.pyx @@ -0,0 +1,488 @@ +# +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from cython.operator cimport dereference as deref +from libc.stdint cimport int8_t, uint8_t, uint32_t +from libcpp cimport bool +from libcpp.memory cimport unique_ptr +from libcpp.string cimport string + +cimport pylibraft.neighbors.cagra.cpp.c_cagra as c_cagra +from pylibraft.distance.distance_type cimport DistanceType +from pylibraft.neighbors.cagra.cagra cimport ( + Index, + IndexFloat, + IndexInt8, + IndexUint8, +) + +from pylibraft.common.handle import auto_sync_handle + +from pylibraft.common.handle cimport device_resources + +from pylibraft.common import DeviceResources, ai_wrapper, auto_convert_output + +cimport pylibraft.neighbors.cpp.hnsw as c_hnsw + +from pylibraft.neighbors.common import _check_input_array, _get_metric + +from pylibraft.common.mdspan cimport ( + get_hmv_float, + get_hmv_int8, + get_hmv_uint8, + get_hmv_uint64, +) +from pylibraft.neighbors.common cimport _get_metric_string + +import os + +import numpy as np + + +cdef class HnswIndex: + cdef readonly bool trained + cdef str active_index_type + + def __cinit__(self): + self.trained = False + self.active_index_type = None + +cdef class HnswIndexFloat(HnswIndex): + cdef unique_ptr[c_hnsw.index[float]] index + + def __cinit__(self): + pass + + def __repr__(self): + m_str = "metric=" + _get_metric_string(self.metric) + attr_str = [attr + "=" + str(getattr(self, attr)) + for attr in ["dim"]] + attr_str = [m_str] + attr_str + return "Index(type=hnsw, " + (", ".join(attr_str)) + ")" + + @property + def dim(self): + return self.index.get()[0].dim() + + @property + def metric(self): + return self.index.get()[0].metric() + +cdef class HnswIndexInt8(HnswIndex): + cdef unique_ptr[c_hnsw.index[int8_t]] index + + def __cinit__(self): + pass + + def __repr__(self): + m_str = "metric=" + _get_metric_string(self.metric) + attr_str = [attr + "=" + str(getattr(self, attr)) + for attr in ["dim"]] + attr_str = [m_str] + attr_str + return "Index(type=hnsw, " + (", ".join(attr_str)) + ")" + + @property + def dim(self): + return self.index.get()[0].dim() + + @property + def metric(self): + return self.index.get()[0].metric() + +cdef class HnswIndexUint8(HnswIndex): + cdef unique_ptr[c_hnsw.index[uint8_t]] index + + def __cinit__(self): + pass + + def __repr__(self): + m_str = "metric=" + _get_metric_string(self.metric) + attr_str = [attr + "=" + str(getattr(self, attr)) + for attr in ["dim"]] + attr_str = [m_str] + attr_str + return "Index(type=hnsw, " + (", ".join(attr_str)) + ")" + + @property + def dim(self): + return self.index.get()[0].dim() + + @property + def metric(self): + return self.index.get()[0].metric() + + +@auto_sync_handle +def save(filename, Index index, handle=None): + """ + Saves the CAGRA index as an hnswlib base-layer-only index to a file. + + Saving / loading the index is experimental. The serialization format is + subject to change. + + Parameters + ---------- + filename : string + Name of the file. + index : Index + Trained CAGRA index. + {handle_docstring} + + Examples + -------- + >>> import cupy as cp + >>> from pylibraft.common import DeviceResources + >>> from pylibraft.neighbors import cagra + >>> from pylibraft.neighbors import hnsw + >>> n_samples = 50000 + >>> n_features = 50 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build index + >>> handle = DeviceResources() + >>> index = cagra.build(cagra.IndexParams(), dataset, handle=handle) + >>> # Serialize the CAGRA index to hnswlib base layer only index format + >>> hnsw.save("my_index.bin", index, handle=handle) + """ + if not index.trained: + raise ValueError("Index need to be built before saving it.") + + if handle is None: + handle = DeviceResources() + cdef device_resources* handle_ = \ + handle.getHandle() + + cdef string c_filename = filename.encode('utf-8') + + cdef IndexFloat idx_float + cdef IndexInt8 idx_int8 + cdef IndexUint8 idx_uint8 + + cdef c_cagra.index[float, uint32_t] * c_index_float + cdef c_cagra.index[int8_t, uint32_t] * c_index_int8 + cdef c_cagra.index[uint8_t, uint32_t] * c_index_uint8 + + if index.active_index_type == "float32": + idx_float = index + c_index_float = \ + idx_float.index + c_cagra.serialize_to_hnswlib_file( + deref(handle_), c_filename, deref(c_index_float)) + elif index.active_index_type == "byte": + idx_int8 = index + c_index_int8 = \ + idx_int8.index + c_cagra.serialize_to_hnswlib_file( + deref(handle_), c_filename, deref(c_index_int8)) + elif index.active_index_type == "ubyte": + idx_uint8 = index + c_index_uint8 = \ + idx_uint8.index + c_cagra.serialize_to_hnswlib_file( + deref(handle_), c_filename, deref(c_index_uint8)) + else: + raise ValueError( + "Index dtype %s not supported" % index.active_index_type) + + +@auto_sync_handle +def load(filename, dim, dtype, metric="sqeuclidean", handle=None): + """ + Loads base-layer-only hnswlib index from file, which was originally + saved as a built CAGRA index. + + Saving / loading the index is experimental. The serialization format is + subject to change, therefore loading an index saved with a previous + version of raft is not guaranteed to work. + + Parameters + ---------- + filename : string + Name of the file. + dim : int + Dimensions of the training dataest + dtype : np.dtype of the saved index + Valid values for dtype: [np.float32, np.byte, np.ubyte] + metric : string denoting the metric type, default="sqeuclidean" + Valid values for metric: ["sqeuclidean", "inner_product"], where + - sqeuclidean is the euclidean distance without the square root + operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2, + - inner product distance is defined as + distance(a, b) = \\sum_i a_i * b_i. + {handle_docstring} + + Returns + ------- + index : HnswIndex + + Examples + -------- + >>> import cupy as cp + >>> from pylibraft.common import DeviceResources + >>> from pylibraft.neighbors import cagra + >>> from pylibraft.neighbors import hnsw + >>> n_samples = 50000 + >>> n_features = 50 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build index + >>> handle = DeviceResources() + >>> index = cagra.build(cagra.IndexParams(), dataset, handle=handle) + >>> # Serialize the CAGRA index to hnswlib base layer only index format + >>> hnsw.save("my_index.bin", index, handle=handle) + >>> index = hnsw.load("my_index.bin", n_features, np.float32, + ... "sqeuclidean") + """ + if handle is None: + handle = DeviceResources() + cdef device_resources* handle_ = \ + handle.getHandle() + + cdef string c_filename = filename.encode('utf-8') + cdef HnswIndexFloat idx_float + cdef HnswIndexInt8 idx_int8 + cdef HnswIndexUint8 idx_uint8 + + cdef DistanceType c_metric = _get_metric(metric) + + if dtype == np.float32: + idx_float = HnswIndexFloat() + idx_float.index = c_hnsw.deserialize_file[float]( + deref(handle_), c_filename, dim, c_metric) + idx_float.trained = True + idx_float.active_index_type = 'float32' + return idx_float + elif dtype == np.byte: + idx_int8 = HnswIndexInt8(dim, metric) + idx_int8.index = c_hnsw.deserialize_file[int8_t]( + deref(handle_), c_filename, dim, c_metric) + idx_int8.trained = True + idx_int8.active_index_type = 'byte' + return idx_int8 + elif dtype == np.ubyte: + idx_uint8 = HnswIndexUint8(dim, metric) + idx_uint8.index = c_hnsw.deserialize_file[uint8_t]( + deref(handle_), c_filename, dim, c_metric) + idx_uint8.trained = True + idx_uint8.active_index_type = 'ubyte' + return idx_uint8 + else: + raise ValueError("Dataset dtype %s not supported" % dtype) + + +@auto_sync_handle +def from_cagra(Index index, handle=None): + """ + Returns an hnswlib base-layer-only index from a CAGRA index. + + NOTE: This method uses the filesystem to write the CAGRA index in + `/tmp/cagra_index.bin` before reading it as an hnswlib index, + then deleting the temporary file. + + Saving / loading the index is experimental. The serialization format is + subject to change. + + Parameters + ---------- + index : Index + Trained CAGRA index. + {handle_docstring} + + Examples + -------- + >>> import cupy as cp + >>> from pylibraft.common import DeviceResources + >>> from pylibraft.neighbors import cagra + >>> from pylibraft.neighbors import hnsw + >>> n_samples = 50000 + >>> n_features = 50 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build index + >>> handle = DeviceResources() + >>> index = cagra.build(cagra.IndexParams(), dataset, handle=handle) + >>> # Serialize the CAGRA index to hnswlib base layer only index format + >>> hnsw_index = hnsw.from_cagra(index, handle=handle) + """ + filename = "/tmp/cagra_index.bin" + save(filename, index, handle=handle) + hnsw_index = load(filename, index.dim, np.dtype(index.active_index_type), + _get_metric_string(index.metric), handle=handle) + os.remove(filename) + return hnsw_index + + +cdef class SearchParams: + """ + Hnswlib search parameters + + Parameters + ---------- + ef: int, default=200 + Size of list from which final neighbors k will be selected. + ef should be greater than or equal to k. + num_threads: int, default=1 + Number of host threads to use to search the hnswlib index + and increase concurrency + """ + cdef c_hnsw.search_params params + + def __init__(self, ef=200, num_threads=1): + self.params.ef = ef + self.params.num_threads = num_threads + + def __repr__(self): + attr_str = [attr + "=" + str(getattr(self, attr)) + for attr in [ + "ef", "num_threads"]] + return "SearchParams(type=hnsw, " + ( + ", ".join(attr_str)) + ")" + + @property + def ef(self): + return self.params.ef + + @property + def num_threads(self): + return self.params.num_threads + + +@auto_sync_handle +@auto_convert_output +def search(SearchParams search_params, + HnswIndex index, + queries, + k, + neighbors=None, + distances=None, + handle=None): + """ + Find the k nearest neighbors for each query. + + Parameters + ---------- + search_params : SearchParams + index : HnswIndex + Trained CAGRA index saved as base-layer-only hnswlib index. + queries : array interface compliant matrix shape (n_samples, dim) + Supported dtype [float, int8, uint8] + k : int + The number of neighbors. + neighbors : Optional array interface compliant matrix shape + (n_queries, k), dtype int64_t. If supplied, neighbor + indices will be written here in-place. (default None) + distances : Optional array interface compliant matrix shape + (n_queries, k) If supplied, the distances to the + neighbors will be written here in-place. (default None) + {handle_docstring} + + Examples + -------- + >>> import cupy as cp + >>> import numpy as np + >>> from pylibraft.common import DeviceResources + >>> from pylibraft.neighbors import cagra + >>> from pylibraft.neighbors import hnsw + >>> n_samples = 50000 + >>> n_features = 50 + >>> n_queries = 1000 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build index + >>> handle = DeviceResources() + >>> index = cagra.build(cagra.IndexParams(), dataset, handle=handle) + >>> + >>> # Load saved base-layer-only hnswlib index from CAGRA index + >>> hnsw_index = hnsw.from_cagra(index, handle=handle) + >>> + >>> # Search hnswlib using the loaded index + >>> queries = np.random.random_sample((n_queries, n_features)).astype( + ... np.float32) + >>> k = 10 + >>> search_params = hnsw.SearchParams( + ... ef=20, + ... num_threads=5 + ... ) + >>> distances, neighbors = hnsw.search(search_params, hnsw_index, + ... queries, k, handle=handle) + """ + + if not index.trained: + raise ValueError("Index need to be built before calling search.") + + if handle is None: + handle = DeviceResources() + cdef device_resources* handle_ = \ + handle.getHandle() + + queries_ai = ai_wrapper(queries) + queries_dt = queries_ai.dtype + cdef uint32_t n_queries = queries_ai.shape[0] + + _check_input_array(queries_ai, [np.dtype('float32'), np.dtype('byte'), + np.dtype('ubyte')], + exp_cols=index.dim) + + if neighbors is None: + neighbors = np.empty((n_queries, k), dtype='uint64') + + neighbors_ai = ai_wrapper(neighbors) + _check_input_array(neighbors_ai, [np.dtype('uint64')], + exp_rows=n_queries, exp_cols=k) + + if distances is None: + distances = np.empty((n_queries, k), dtype='float32') + + distances_ai = ai_wrapper(distances) + _check_input_array(distances_ai, [np.dtype('float32')], + exp_rows=n_queries, exp_cols=k) + + cdef c_hnsw.search_params params = search_params.params + cdef HnswIndexFloat idx_float + cdef HnswIndexInt8 idx_int8 + cdef HnswIndexUint8 idx_uint8 + + if queries_dt == np.float32: + idx_float = index + c_hnsw.search(deref(handle_), + params, + deref(idx_float.index), + get_hmv_float(queries_ai, check_shape=True), + get_hmv_uint64(neighbors_ai, check_shape=True), + get_hmv_float(distances_ai, check_shape=True)) + elif queries_dt == np.byte: + idx_int8 = index + c_hnsw.search(deref(handle_), + params, + deref(idx_int8.index), + get_hmv_int8(queries_ai, check_shape=True), + get_hmv_uint64(neighbors_ai, check_shape=True), + get_hmv_float(distances_ai, check_shape=True)) + elif queries_dt == np.ubyte: + idx_uint8 = index + c_hnsw.search(deref(handle_), + params, + deref(idx_uint8.index), + get_hmv_uint8(queries_ai, check_shape=True), + get_hmv_uint64(neighbors_ai, check_shape=True), + get_hmv_float(distances_ai, check_shape=True)) + else: + raise ValueError("query dtype %s not supported" % queries_dt) + + return (distances, neighbors) diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd index 531c2428e9..930c3245f1 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -78,6 +78,7 @@ cdef extern from "raft/neighbors/ivf_pq_types.hpp" \ codebook_gen codebook_kind bool force_random_rotation bool conservative_memory_allocation + uint32_t max_train_points_per_pq_code cdef cppclass index[IdxT](ann_index): index(const device_resources& handle, diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx index 0c1bbf6b9c..7081b65ce3 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -156,6 +156,14 @@ cdef class IndexParams: repeated calls to `extend` (extending the database). To disable this behavior and use as little GPU memory for the database as possible, set this flat to `True`. + max_train_points_per_pq_code : int, default = 256 + The max number of data points to use per PQ code during PQ codebook + training. Using more data points per PQ code may increase the + quality of PQ codebook but may also increase the build time. The + parameter is applied to both PQ codebook generation methods, i.e., + PER_SUBSPACE and PER_CLUSTER. In both cases, we will use + pq_book_size * max_train_points_per_pq_code training points to + train each codebook. """ def __init__(self, *, n_lists=1024, @@ -167,7 +175,8 @@ cdef class IndexParams: codebook_kind="subspace", force_random_rotation=False, add_data_on_build=True, - conservative_memory_allocation=False): + conservative_memory_allocation=False, + max_train_points_per_pq_code=256): self.params.n_lists = n_lists self.params.metric = _get_metric(metric) self.params.metric_arg = 0 @@ -185,6 +194,8 @@ cdef class IndexParams: self.params.add_data_on_build = add_data_on_build self.params.conservative_memory_allocation = \ conservative_memory_allocation + self.params.max_train_points_per_pq_code = \ + max_train_points_per_pq_code @property def n_lists(self): @@ -226,6 +237,9 @@ cdef class IndexParams: def conservative_memory_allocation(self): return self.params.conservative_memory_allocation + @property + def max_train_points_per_pq_code(self): + return self.params.max_train_points_per_pq_code cdef class Index: # We store a pointer to the index because it dose not have a trivial diff --git a/python/pylibraft/pylibraft/neighbors/rbc.pyx b/python/pylibraft/pylibraft/neighbors/rbc.pyx new file mode 100644 index 0000000000..a703dc1745 --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/rbc.pyx @@ -0,0 +1,241 @@ +# +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +import numpy as np + +from cython.operator cimport dereference as deref +from libcpp cimport bool, nullptr +from libcpp.vector cimport vector + +from pylibraft.common import ( + DeviceResources, + auto_convert_output, + cai_wrapper, + device_ndarray, +) + +from libc.stdint cimport int64_t, uintptr_t + +from pylibraft.common.cpp.optional cimport optional +from pylibraft.common.handle cimport device_resources +from pylibraft.common.mdspan cimport get_dmv_bool, get_dmv_float, get_dmv_int64 + +from pylibraft.common.handle import auto_sync_handle +from pylibraft.common.interruptible import cuda_interruptible +from pylibraft.neighbors.common import _check_input_array, _get_metric + +from pylibraft.common.cpp.mdspan cimport ( + device_matrix_view, + device_vector_view, + host_matrix_view, + make_device_matrix_view, + make_device_vector_view, + make_host_matrix_view, + row_major, +) +from pylibraft.neighbors.cpp.rbc cimport ( + BallCoverIndex as c_BallCoverIndex, + build_rbc_index as c_build_rbc_index, + eps_neighbors_rbc as c_eps_neighbors_rbc, + eps_neighbors_rbc_pass1 as c_eps_neighbors_rbc_pass1, + eps_neighbors_rbc_pass2 as c_eps_neighbors_rbc_pass2, +) + + +cdef class RbcIndex: + cdef readonly bool trained + cdef str data_type + + def __cinit__(self): + self.trained = False + self.data_type = None + + +cdef class RbcIndexFloat(RbcIndex): + cdef c_BallCoverIndex[int64_t, float, int64_t, int64_t]* index + + def __cinit__(self, dataset, handle): + cdef device_resources* handle_ = \ + handle.getHandle() + self.index = new c_BallCoverIndex[int64_t, float, int64_t, int64_t]( + deref(handle_), + get_dmv_float(dataset, check_shape=True), + _get_metric("euclidean")) + + +@auto_sync_handle +@auto_convert_output +def build_rbc_index(dataset, handle=None): + """ + Builds a random ball cover index from dataset using the L2-norm. + + Parameters + ---------- + dataset : array interface compliant matrix, row-major layout, + shape (n_samples, dim). Supported dtype [float] + {handle_docstring} + + Returns + ------- + index : Index + + Examples + -------- + see 'eps_neighbors_sparse' + + """ + if handle is None: + handle = DeviceResources() + + dataset_cai = cai_wrapper(dataset) + + # we require c-contiguous (rowmajor) inputs here + _check_input_array(dataset_cai, [np.dtype("float32")]) + + cdef device_resources* handle_ = \ + handle.getHandle() + + cdef RbcIndexFloat rbc_index_float + + if dataset_cai.dtype == np.float32: + rbc_index_float = RbcIndexFloat(dataset=dataset_cai, handle=handle) + rbc_index_float.data_type = "float32" + with cuda_interruptible(): + c_build_rbc_index( + deref(handle_), + deref(rbc_index_float.index)) + rbc_index_float.trained = True + return rbc_index_float + else: + raise TypeError("dtype %s not supported" % dataset_cai.dtype) + + +@auto_sync_handle +@auto_convert_output +def eps_neighbors(RbcIndex rbc_index, queries, eps, handle=None): + """ + Perform an epsilon neighborhood search with random ball cover (rbc) + using the L2-norm. + + Parameters + ---------- + rbc_index : RbcIndex created via 'build_rbc_index'. + Supported dtype [float] + queries : array interface compliant matrix, row-major layout, + shape (n_queries, dim) Supported dtype [float] + eps : threshold + {handle_docstring} + + Returns + ------- + adj_ia: array interface compliant object containing row indices for + adj_ja + + adj_ja: array interface compliant object containing adjacency mask + column indices + + vd: array interface compliant object containing row sums of adj + shape (n_queries + 1). vd[n_queries] contains the total sum + + Examples + -------- + >>> import cupy as cp + >>> from pylibraft.common import DeviceResources + >>> from pylibraft.neighbors.rbc import eps_neighbors + >>> from pylibraft.neighbors.rbc import build_rbc_index + >>> n_samples = 50000 + >>> n_features = 50 + >>> n_queries = 1000 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> queries = cp.random.random_sample((n_queries, n_features), + ... dtype=cp.float32) + >>> eps = 0.1 + >>> handle = DeviceResources() + >>> rbc_index = build_rbc_index(dataset) + >>> adj_ia, adj_ja, vd = eps_neighbors(rbc_index, queries, eps) + >>> adj_ia = cp.asarray(adj_ia) + >>> adj_ja = cp.asarray(adj_ja) + >>> vd = cp.asarray(vd) + >>> # pylibraft functions are often asynchronous so the + >>> # handle needs to be explicitly synchronized + >>> handle.sync() + """ + if not rbc_index.trained: + raise ValueError("Index need to be built before calling extend.") + + if handle is None: + handle = DeviceResources() + + queries_cai = cai_wrapper(queries) + + _check_input_array(queries_cai, [np.dtype(rbc_index.data_type)]) + + n_queries = queries_cai.shape[0] + + adj_ia = device_ndarray.empty((n_queries + 1, ), dtype='int64') + vd = device_ndarray.empty((n_queries + 1, ), dtype='int64') + adj_ia_cai = cai_wrapper(adj_ia) + vd_cai = cai_wrapper(vd) + + cdef device_resources* handle_ = \ + handle.getHandle() + + vd_vector_view = make_device_vector_view( + vd_cai.data, vd_cai.shape[0]) + adj_ia_vector_view = make_device_vector_view( + adj_ia_cai.data, adj_ia_cai.shape[0]) + + cdef RbcIndexFloat rbc_index_float + + if queries_cai.dtype == np.float32: + rbc_index_float = rbc_index + with cuda_interruptible(): + c_eps_neighbors_rbc_pass1( + deref(handle_), + deref(rbc_index_float.index), + get_dmv_float(queries_cai, check_shape=True), + adj_ia_vector_view, + vd_vector_view, + eps) + else: + raise TypeError("dtype %s not supported" % queries_cai.dtype) + + handle.sync() + n_nnz = adj_ia.copy_to_host()[n_queries] + adj_ja = device_ndarray.empty((n_nnz, ), dtype='int64') + adj_ja_cai = cai_wrapper(adj_ja) + adj_ja_vector_view = make_device_vector_view( + adj_ja_cai.data, adj_ja_cai.shape[0]) + + if queries_cai.dtype == np.float32: + with cuda_interruptible(): + c_eps_neighbors_rbc_pass2( + deref(handle_), + deref(rbc_index_float.index), + get_dmv_float(queries_cai, check_shape=True), + adj_ia_vector_view, + adj_ja_vector_view, + vd_vector_view, + eps) + else: + raise TypeError("dtype %s not supported" % queries_cai.dtype) + + return (adj_ia, adj_ja, vd) diff --git a/python/pylibraft/pylibraft/test/__init__py b/python/pylibraft/pylibraft/test/__init__py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/pylibraft/pylibraft/test/ann_utils.py b/python/pylibraft/pylibraft/test/ann_utils.py new file mode 100644 index 0000000000..60db7f3273 --- /dev/null +++ b/python/pylibraft/pylibraft/test/ann_utils.py @@ -0,0 +1,35 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# h ttp://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + + +def generate_data(shape, dtype): + if dtype == np.byte: + x = np.random.randint(-127, 128, size=shape, dtype=np.byte) + elif dtype == np.ubyte: + x = np.random.randint(0, 255, size=shape, dtype=np.ubyte) + else: + x = np.random.random_sample(shape).astype(dtype) + + return x + + +def calc_recall(ann_idx, true_nn_idx): + assert ann_idx.shape == true_nn_idx.shape + n = 0 + for i in range(ann_idx.shape[0]): + n += np.intersect1d(ann_idx[i, :], true_nn_idx[i, :]).size + recall = n / ann_idx.size + return recall diff --git a/python/pylibraft/pylibraft/test/test_cagra.py b/python/pylibraft/pylibraft/test/test_cagra.py index 24126c0c5a..be53b33da3 100644 --- a/python/pylibraft/pylibraft/test/test_cagra.py +++ b/python/pylibraft/pylibraft/test/test_cagra.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,27 +20,7 @@ from pylibraft.common import device_ndarray from pylibraft.neighbors import cagra - - -# todo (dantegd): consolidate helper utils of ann methods -def generate_data(shape, dtype): - if dtype == np.byte: - x = np.random.randint(-127, 128, size=shape, dtype=np.byte) - elif dtype == np.ubyte: - x = np.random.randint(0, 255, size=shape, dtype=np.ubyte) - else: - x = np.random.random_sample(shape).astype(dtype) - - return x - - -def calc_recall(ann_idx, true_nn_idx): - assert ann_idx.shape == true_nn_idx.shape - n = 0 - for i in range(ann_idx.shape[0]): - n += np.intersect1d(ann_idx[i, :], true_nn_idx[i, :]).size - recall = n / ann_idx.size - return recall +from pylibraft.test.ann_utils import calc_recall, generate_data def run_cagra_build_search_test( diff --git a/python/pylibraft/pylibraft/test/test_eps_neighborhood.py b/python/pylibraft/pylibraft/test/test_eps_neighborhood.py new file mode 100644 index 0000000000..f2643de904 --- /dev/null +++ b/python/pylibraft/pylibraft/test/test_eps_neighborhood.py @@ -0,0 +1,102 @@ +# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from scipy.sparse import csr_array + +from pylibraft.common import DeviceResources, Stream +from pylibraft.neighbors.brute_force import eps_neighbors as eps_neighbors_bf +from pylibraft.neighbors.rbc import ( + build_rbc_index, + eps_neighbors as eps_neighbors_rbc, +) + + +def test_bf_eps_neighbors_check_col_major_inputs(): + # make sure that we get an exception if passed col-major inputs, + # instead of returning incorrect results + cp = pytest.importorskip("cupy") + n_index_rows, n_query_rows, n_cols = 128, 16, 32 + eps = 0.02 + index = cp.random.random_sample((n_index_rows, n_cols), dtype="float32") + queries = cp.random.random_sample((n_query_rows, n_cols), dtype="float32") + + with pytest.raises(ValueError): + eps_neighbors_bf(cp.asarray(index, order="F"), queries, eps) + + with pytest.raises(ValueError): + eps_neighbors_bf(index, cp.asarray(queries, order="F"), eps) + + # shouldn't throw an exception with c-contiguous inputs + eps_neighbors_bf(index, queries, eps) + + +def test_rbc_eps_neighbors_check_col_major_inputs(): + # make sure that we get an exception if passed col-major inputs, + # instead of returning incorrect results + cp = pytest.importorskip("cupy") + n_index_rows, n_query_rows, n_cols = 128, 16, 32 + eps = 0.02 + index = cp.random.random_sample((n_index_rows, n_cols), dtype="float32") + queries = cp.random.random_sample((n_query_rows, n_cols), dtype="float32") + + with pytest.raises(ValueError): + build_rbc_index(cp.asarray(index, order="F")) + + rbc_index = build_rbc_index(index) + + with pytest.raises(ValueError): + eps_neighbors_rbc(rbc_index, cp.asarray(queries, order="F"), eps) + + eps_neighbors_rbc(rbc_index, queries, eps) + + +@pytest.mark.parametrize("n_index_rows", [32, 100, 1000]) +@pytest.mark.parametrize("n_query_rows", [32, 100, 1000]) +@pytest.mark.parametrize("n_cols", [2, 3, 40, 100]) +def test_eps_neighbors(n_index_rows, n_query_rows, n_cols): + s2 = Stream() + handle = DeviceResources(stream=s2) + + cp = pytest.importorskip("cupy") + eps = 0.02 + index = cp.random.random_sample((n_index_rows, n_cols), dtype="float32") + queries = cp.random.random_sample((n_query_rows, n_cols), dtype="float32") + + # brute force + adj_bf, vd_bf = eps_neighbors_bf(index, queries, eps, handle=handle) + adj_bf = cp.asarray(adj_bf) + vd_bf = cp.asarray(vd_bf) + + rbc_index = build_rbc_index(index, handle=handle) + adj_rbc_ia, adj_rbc_ja, vd_rbc = eps_neighbors_rbc( + rbc_index, queries, eps, handle=handle + ) + adj_rbc_ia = cp.asarray(adj_rbc_ia) + adj_rbc_ja = cp.asarray(adj_rbc_ja) + vd_rbc = cp.asarray(vd_rbc) + + np.testing.assert_array_equal(vd_bf.get(), vd_rbc.get()) + + adj_rbc = csr_array( + ( + np.ones(adj_rbc_ia.get()[n_query_rows]), + adj_rbc_ja.get(), + adj_rbc_ia.get(), + ), + shape=(n_query_rows, n_index_rows), + ).toarray() + np.testing.assert_array_equal(adj_bf.get(), adj_rbc) diff --git a/python/pylibraft/pylibraft/test/test_hnsw.py b/python/pylibraft/pylibraft/test/test_hnsw.py new file mode 100644 index 0000000000..487f190e4e --- /dev/null +++ b/python/pylibraft/pylibraft/test/test_hnsw.py @@ -0,0 +1,77 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# h ttp://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from sklearn.neighbors import NearestNeighbors +from sklearn.preprocessing import normalize + +from pylibraft.neighbors import cagra, hnsw +from pylibraft.test.ann_utils import calc_recall, generate_data + + +def run_hnsw_build_search_test( + n_rows=10000, + n_cols=10, + n_queries=100, + k=10, + dtype=np.float32, + metric="sqeuclidean", + intermediate_graph_degree=128, + graph_degree=64, + search_params={}, +): + dataset = generate_data((n_rows, n_cols), dtype) + if metric == "inner_product": + dataset = normalize(dataset, norm="l2", axis=1) + + build_params = cagra.IndexParams( + metric=metric, + intermediate_graph_degree=intermediate_graph_degree, + graph_degree=graph_degree, + ) + + index = cagra.build(build_params, dataset) + + assert index.trained + + hnsw_index = hnsw.from_cagra(index) + + queries = generate_data((n_queries, n_cols), dtype) + out_idx = np.zeros((n_queries, k), dtype=np.uint32) + + search_params = hnsw.SearchParams(**search_params) + + out_dist, out_idx = hnsw.search(search_params, hnsw_index, queries, k) + + # Calculate reference values with sklearn + nn_skl = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric) + nn_skl.fit(dataset) + skl_idx = nn_skl.kneighbors(queries, return_distance=False) + + recall = calc_recall(out_idx, skl_idx) + assert recall > 0.95 + + +@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) +@pytest.mark.parametrize("k", [10, 20]) +@pytest.mark.parametrize("ef", [30, 40]) +@pytest.mark.parametrize("num_threads", [2, 4]) +def test_hnsw(dtype, k, ef, num_threads): + # Note that inner_product tests use normalized input which we cannot + # represent in int8, therefore we test only sqeuclidean metric here. + run_hnsw_build_search_test( + dtype=dtype, k=k, search_params={"ef": ef, "num_threads": num_threads} + )