From b1f17edaacd01875eb579592ee4c5a4cbb2afa87 Mon Sep 17 00:00:00 2001 From: Paul Taylor <178183+trxcllnt@users.noreply.github.com> Date: Fri, 24 May 2024 14:18:57 -0700 Subject: [PATCH 01/11] fix devcontainer name for codespaces (#153) This PR fixes launching the devcontainers in GitHub Codespaces: ![image](https://github.com/rapidsai/cuvs/assets/178183/71d4bab2-82b1-4671-b382-c5f21503dfce) Authors: - Paul Taylor (https://github.com/trxcllnt) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cuvs/pull/153 --- .devcontainer/cuda11.8-conda/devcontainer.json | 2 +- .devcontainer/cuda11.8-pip/devcontainer.json | 2 +- .devcontainer/cuda12.2-conda/devcontainer.json | 2 +- .devcontainer/cuda12.2-pip/devcontainer.json | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index bfba756fbe..1fcd67a012 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -11,7 +11,7 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index bb25e2ba31..534e50effb 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -11,7 +11,7 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { diff --git a/.devcontainer/cuda12.2-conda/devcontainer.json b/.devcontainer/cuda12.2-conda/devcontainer.json index bc0548622b..4ff6263c2e 100644 --- a/.devcontainer/cuda12.2-conda/devcontainer.json +++ b/.devcontainer/cuda12.2-conda/devcontainer.json @@ -11,7 +11,7 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json index 19ab3e145e..6979fd5d92 100644 --- a/.devcontainer/cuda12.2-pip/devcontainer.json +++ b/.devcontainer/cuda12.2-pip/devcontainer.json @@ -11,7 +11,7 @@ "runArgs": [ "--rm", "--name", - "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { From b9e8a4baaae91e9c3f50c9d91a4bca019a2ce931 Mon Sep 17 00:00:00 2001 From: Micka Date: Tue, 28 May 2024 20:17:56 +0200 Subject: [PATCH 02/11] Add `refine` to public API (#154) Add `cuvs::neighbors::refine` to public API, with it's test Authors: - Micka (https://github.com/lowener) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Tamas Bela Feher (https://github.com/tfeher) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/154 --- cpp/CMakeLists.txt | 12 +- cpp/include/cuvs/neighbors/ivf_pq.hpp | 1 - cpp/include/cuvs/neighbors/refine.hpp | 439 ++++++++++++++++++ .../neighbors/detail/cagra/cagra_build.cuh | 31 +- cpp/src/neighbors/detail/refine.cuh | 19 - cpp/src/neighbors/detail/refine_host-ext.hpp | 66 --- cpp/src/neighbors/detail/refine_host.hpp | 19 - .../detail/refine_host_float_float.cpp | 30 -- .../detail/refine_host_half_float.cpp | 31 -- .../detail/refine_host_int8_t_float.cpp | 29 -- .../detail/refine_host_uint8_t_float.cpp | 30 -- cpp/src/neighbors/ivf_flat_index.cpp | 1 + cpp/src/neighbors/refine-ext.cuh | 85 ---- cpp/src/neighbors/refine-inl.cuh | 104 ----- cpp/src/neighbors/refine.cuh | 24 - .../detail/refine_device_float_float.cu} | 27 +- .../refine/detail/refine_device_half_float.cu | 49 ++ .../detail/refine_device_int8_t_float.cu} | 27 +- .../detail/refine_device_uint8_t_float.cu} | 27 +- .../refine/detail/refine_host_float_float.cpp | 38 ++ .../refine/detail/refine_host_half_float.cpp | 37 ++ .../detail/refine_host_int8_t_float.cpp | 37 ++ .../detail/refine_host_uint8_t_float.cpp | 37 ++ .../{detail => refine}/refine_common.hpp | 0 .../{detail => refine}/refine_device.cuh | 25 +- .../refine_host.hpp} | 25 +- cpp/src/neighbors/refine_float_float.cu | 54 --- cpp/test/CMakeLists.txt | 5 +- cpp/test/neighbors/refine.cu | 127 +++++ cpp/test/neighbors/refine_helper.cuh | 158 +++++++ docs/source/cpp_api/neighbors.rst | 1 + docs/source/cpp_api/neighbors_refine.rst | 20 + 32 files changed, 1048 insertions(+), 567 deletions(-) create mode 100644 cpp/include/cuvs/neighbors/refine.hpp delete mode 100644 cpp/src/neighbors/detail/refine.cuh delete mode 100644 cpp/src/neighbors/detail/refine_host-ext.hpp delete mode 100644 cpp/src/neighbors/detail/refine_host.hpp delete mode 100644 cpp/src/neighbors/detail/refine_host_float_float.cpp delete mode 100644 cpp/src/neighbors/detail/refine_host_half_float.cpp delete mode 100644 cpp/src/neighbors/detail/refine_host_int8_t_float.cpp delete mode 100644 cpp/src/neighbors/detail/refine_host_uint8_t_float.cpp delete mode 100644 cpp/src/neighbors/refine-ext.cuh delete mode 100644 cpp/src/neighbors/refine-inl.cuh delete mode 100644 cpp/src/neighbors/refine.cuh rename cpp/src/neighbors/{refine_half_float.cu => refine/detail/refine_device_float_float.cu} (56%) create mode 100644 cpp/src/neighbors/refine/detail/refine_device_half_float.cu rename cpp/src/neighbors/{refine_int8_t_float.cu => refine/detail/refine_device_int8_t_float.cu} (57%) rename cpp/src/neighbors/{refine_uint8_t_float.cu => refine/detail/refine_device_uint8_t_float.cu} (57%) create mode 100644 cpp/src/neighbors/refine/detail/refine_host_float_float.cpp create mode 100644 cpp/src/neighbors/refine/detail/refine_host_half_float.cpp create mode 100644 cpp/src/neighbors/refine/detail/refine_host_int8_t_float.cpp create mode 100644 cpp/src/neighbors/refine/detail/refine_host_uint8_t_float.cpp rename cpp/src/neighbors/{detail => refine}/refine_common.hpp (100%) rename cpp/src/neighbors/{detail => refine}/refine_device.cuh (87%) rename cpp/src/neighbors/{detail/refine_host-inl.hpp => refine/refine_host.hpp} (89%) delete mode 100644 cpp/src/neighbors/refine_float_float.cu create mode 100644 cpp/test/neighbors/refine.cu create mode 100644 cpp/test/neighbors/refine_helper.cuh create mode 100644 docs/source/cpp_api/neighbors_refine.rst diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 229eb5b934..d0c4f85489 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -387,10 +387,6 @@ add_library( src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu - src/neighbors/detail/refine_host_float_float.cpp - src/neighbors/detail/refine_host_half_float.cpp - src/neighbors/detail/refine_host_int8_t_float.cpp - src/neighbors/detail/refine_host_uint8_t_float.cpp src/neighbors/ivf_flat_index.cpp src/neighbors/ivf_flat/ivf_flat_build_extend_float_int64_t.cu src/neighbors/ivf_flat/ivf_flat_build_extend_int8_t_int64_t.cu @@ -432,6 +428,14 @@ add_library( src/neighbors/nn_descent_float.cu src/neighbors/nn_descent_int8.cu src/neighbors/nn_descent_uint8.cu + src/neighbors/refine/detail/refine_device_float_float.cu + src/neighbors/refine/detail/refine_device_half_float.cu + src/neighbors/refine/detail/refine_device_int8_t_float.cu + src/neighbors/refine/detail/refine_device_uint8_t_float.cu + src/neighbors/refine/detail/refine_host_float_float.cpp + src/neighbors/refine/detail/refine_host_half_float.cpp + src/neighbors/refine/detail/refine_host_int8_t_float.cpp + src/neighbors/refine/detail/refine_host_uint8_t_float.cpp src/neighbors/sample_filter.cu ) diff --git a/cpp/include/cuvs/neighbors/ivf_pq.hpp b/cpp/include/cuvs/neighbors/ivf_pq.hpp index d2d6ca9bb9..d1e6b37c2f 100644 --- a/cpp/include/cuvs/neighbors/ivf_pq.hpp +++ b/cpp/include/cuvs/neighbors/ivf_pq.hpp @@ -16,7 +16,6 @@ #pragma once -#include "common.hpp" #include #include diff --git a/cpp/include/cuvs/neighbors/refine.hpp b/cpp/include/cuvs/neighbors/refine.hpp new file mode 100644 index 0000000000..19fbd30bb7 --- /dev/null +++ b/cpp/include/cuvs/neighbors/refine.hpp @@ -0,0 +1,439 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +namespace cuvs::neighbors { +/** + * @defgroup ann_refine Approximate Nearest Neighbors Refinement + * @{ + */ + +/** + * @brief Refine nearest neighbor search. + * + * Refinement is an operation that follows an approximate NN search. The approximate search has + * already selected n_candidates neighbor candidates for each query. We narrow it down to k + * neighbors. For each query, we calculate the exact distance between the query and its + * n_candidates neighbor candidate, and select the k nearest ones. + * + * The k nearest neighbors and distances are returned. + * + * Example usage + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(handle, index_params, dataset); + * // use default search parameters + * ivf_pq::search_params search_params; + * // search m = 4 * k nearest neighbours for each of the N queries + * ivf_pq::search(handle, search_params, index, queries, neighbor_candidates, + * out_dists_tmp); + * // refine it to the k nearest one + * refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists, + * index.metric()); + * @endcode + * + * + * @param[in] handle the raft handle + * @param[in] dataset device matrix that stores the dataset [n_rows, dims] + * @param[in] queries device matrix of the queries [n_queris, dims] + * @param[in] neighbor_candidates indices of candidate vectors [n_queries, n_candidates], where + * n_candidates >= k + * @param[out] indices device matrix that stores the refined indices [n_queries, k] + * @param[out] distances device matrix that stores the refined distances [n_queries, k] + * @param[in] metric distance metric to use. Euclidean (L2) is used by default + */ +void refine(raft::resources const& handle, + raft::device_matrix_view dataset, + raft::device_matrix_view queries, + raft::device_matrix_view neighbor_candidates, + raft::device_matrix_view indices, + raft::device_matrix_view distances, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + +/** + * @brief Refine nearest neighbor search. + * + * Refinement is an operation that follows an approximate NN search. The approximate search has + * already selected n_candidates neighbor candidates for each query. We narrow it down to k + * neighbors. For each query, we calculate the exact distance between the query and its + * n_candidates neighbor candidate, and select the k nearest ones. + * + * The k nearest neighbors and distances are returned. + * + * Example usage + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(handle, index_params, dataset); + * // use default search parameters + * ivf_pq::search_params search_params; + * // search m = 4 * k nearest neighbours for each of the N queries + * ivf_pq::search(handle, search_params, index, queries, neighbor_candidates, + * out_dists_tmp); + * // refine it to the k nearest one + * refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists, + * index.metric()); + * @endcode + * + * + * @param[in] handle the raft handle + * @param[in] dataset device matrix that stores the dataset [n_rows, dims] + * @param[in] queries device matrix of the queries [n_queris, dims] + * @param[in] neighbor_candidates indices of candidate vectors [n_queries, n_candidates], where + * n_candidates >= k + * @param[out] indices device matrix that stores the refined indices [n_queries, k] + * @param[out] distances device matrix that stores the refined distances [n_queries, k] + * @param[in] metric distance metric to use. Euclidean (L2) is used by default + */ +void refine(raft::resources const& handle, + raft::device_matrix_view dataset, + raft::device_matrix_view queries, + raft::device_matrix_view neighbor_candidates, + raft::device_matrix_view indices, + raft::device_matrix_view distances, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + +/** + * @brief Refine nearest neighbor search. + * + * Refinement is an operation that follows an approximate NN search. The approximate search has + * already selected n_candidates neighbor candidates for each query. We narrow it down to k + * neighbors. For each query, we calculate the exact distance between the query and its + * n_candidates neighbor candidate, and select the k nearest ones. + * + * The k nearest neighbors and distances are returned. + * + * Example usage + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(handle, index_params, dataset); + * // use default search parameters + * ivf_pq::search_params search_params; + * // search m = 4 * k nearest neighbours for each of the N queries + * ivf_pq::search(handle, search_params, index, queries, neighbor_candidates, + * out_dists_tmp); + * // refine it to the k nearest one + * refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists, + * index.metric()); + * @endcode + * + * + * @param[in] handle the raft handle + * @param[in] dataset device matrix that stores the dataset [n_rows, dims] + * @param[in] queries device matrix of the queries [n_queris, dims] + * @param[in] neighbor_candidates indices of candidate vectors [n_queries, n_candidates], where + * n_candidates >= k + * @param[out] indices device matrix that stores the refined indices [n_queries, k] + * @param[out] distances device matrix that stores the refined distances [n_queries, k] + * @param[in] metric distance metric to use. Euclidean (L2) is used by default + */ +void refine(raft::resources const& handle, + raft::device_matrix_view dataset, + raft::device_matrix_view queries, + raft::device_matrix_view neighbor_candidates, + raft::device_matrix_view indices, + raft::device_matrix_view distances, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + +/** + * @brief Refine nearest neighbor search. + * + * Refinement is an operation that follows an approximate NN search. The approximate search has + * already selected n_candidates neighbor candidates for each query. We narrow it down to k + * neighbors. For each query, we calculate the exact distance between the query and its + * n_candidates neighbor candidate, and select the k nearest ones. + * + * The k nearest neighbors and distances are returned. + * + * Example usage + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(handle, index_params, dataset); + * // use default search parameters + * ivf_pq::search_params search_params; + * // search m = 4 * k nearest neighbours for each of the N queries + * ivf_pq::search(handle, search_params, index, queries, neighbor_candidates, + * out_dists_tmp); + * // refine it to the k nearest one + * refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists, + * index.metric()); + * @endcode + * + * + * @param[in] handle the raft handle + * @param[in] dataset device matrix that stores the dataset [n_rows, dims] + * @param[in] queries device matrix of the queries [n_queris, dims] + * @param[in] neighbor_candidates indices of candidate vectors [n_queries, n_candidates], where + * n_candidates >= k + * @param[out] indices device matrix that stores the refined indices [n_queries, k] + * @param[out] distances device matrix that stores the refined distances [n_queries, k] + * @param[in] metric distance metric to use. Euclidean (L2) is used by default + */ +void refine(raft::resources const& handle, + raft::device_matrix_view dataset, + raft::device_matrix_view queries, + raft::device_matrix_view neighbor_candidates, + raft::device_matrix_view indices, + raft::device_matrix_view distances, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + +/** + * @brief Refine nearest neighbor search. + * + * Refinement is an operation that follows an approximate NN search. The approximate search has + * already selected n_candidates neighbor candidates for each query. We narrow it down to k + * neighbors. For each query, we calculate the exact distance between the query and its + * n_candidates neighbor candidate, and select the k nearest ones. + * + * The k nearest neighbors and distances are returned. + * + * Example usage + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(handle, index_params, dataset); + * // use default search parameters + * ivf_pq::search_params search_params; + * // search m = 4 * k nearest neighbours for each of the N queries + * ivf_pq::search(handle, search_params, index, queries, neighbor_candidates, + * out_dists_tmp); + * // refine it to the k nearest one + * refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists, + * index.metric()); + * @endcode + * + * + * @param[in] handle the raft handle + * @param[in] dataset host matrix that stores the dataset [n_rows, dims] + * @param[in] queries host matrix of the queries [n_queris, dims] + * @param[in] neighbor_candidates host matrix with indices of candidate vectors [n_queries, + * n_candidates], where n_candidates >= k + * @param[out] indices host matrix that stores the refined indices [n_queries, k] + * @param[out] distances host matrix that stores the refined distances [n_queries, k] + * @param[in] metric distance metric to use. Euclidean (L2) is used by default + */ +void refine(raft::resources const& handle, + raft::host_matrix_view dataset, + raft::host_matrix_view queries, + raft::host_matrix_view neighbor_candidates, + raft::host_matrix_view indices, + raft::host_matrix_view distances, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + +/** + * @brief Refine nearest neighbor search. + * + * Refinement is an operation that follows an approximate NN search. The approximate search has + * already selected n_candidates neighbor candidates for each query. We narrow it down to k + * neighbors. For each query, we calculate the exact distance between the query and its + * n_candidates neighbor candidate, and select the k nearest ones. + * + * The k nearest neighbors and distances are returned. + * + * Example usage + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(handle, index_params, dataset); + * // use default search parameters + * ivf_pq::search_params search_params; + * // search m = 4 * k nearest neighbours for each of the N queries + * ivf_pq::search(handle, search_params, index, queries, neighbor_candidates, + * out_dists_tmp); + * // refine it to the k nearest one + * refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists, + * index.metric()); + * @endcode + * + * + * @param[in] handle the raft handle + * @param[in] dataset host matrix that stores the dataset [n_rows, dims] + * @param[in] queries host matrix of the queries [n_queris, dims] + * @param[in] neighbor_candidates host matrix with indices of candidate vectors [n_queries, + * n_candidates], where n_candidates >= k + * @param[out] indices host matrix that stores the refined indices [n_queries, k] + * @param[out] distances host matrix that stores the refined distances [n_queries, k] + * @param[in] metric distance metric to use. Euclidean (L2) is used by default + */ +void refine(raft::resources const& handle, + raft::host_matrix_view dataset, + raft::host_matrix_view queries, + raft::host_matrix_view neighbor_candidates, + raft::host_matrix_view indices, + raft::host_matrix_view distances, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + +/** + * @brief Refine nearest neighbor search. + * + * Refinement is an operation that follows an approximate NN search. The approximate search has + * already selected n_candidates neighbor candidates for each query. We narrow it down to k + * neighbors. For each query, we calculate the exact distance between the query and its + * n_candidates neighbor candidate, and select the k nearest ones. + * + * The k nearest neighbors and distances are returned. + * + * Example usage + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(handle, index_params, dataset); + * // use default search parameters + * ivf_pq::search_params search_params; + * // search m = 4 * k nearest neighbours for each of the N queries + * ivf_pq::search(handle, search_params, index, queries, neighbor_candidates, + * out_dists_tmp); + * // refine it to the k nearest one + * refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists, + * index.metric()); + * @endcode + * + * + * @param[in] handle the raft handle + * @param[in] dataset host matrix that stores the dataset [n_rows, dims] + * @param[in] queries host matrix of the queries [n_queris, dims] + * @param[in] neighbor_candidates host matrix with indices of candidate vectors [n_queries, + * n_candidates], where n_candidates >= k + * @param[out] indices host matrix that stores the refined indices [n_queries, k] + * @param[out] distances host matrix that stores the refined distances [n_queries, k] + * @param[in] metric distance metric to use. Euclidean (L2) is used by default + */ +void refine(raft::resources const& handle, + raft::host_matrix_view dataset, + raft::host_matrix_view queries, + raft::host_matrix_view neighbor_candidates, + raft::host_matrix_view indices, + raft::host_matrix_view distances, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + +/** + * @brief Refine nearest neighbor search. + * + * Refinement is an operation that follows an approximate NN search. The approximate search has + * already selected n_candidates neighbor candidates for each query. We narrow it down to k + * neighbors. For each query, we calculate the exact distance between the query and its + * n_candidates neighbor candidate, and select the k nearest ones. + * + * The k nearest neighbors and distances are returned. + * + * Example usage + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(handle, index_params, dataset); + * // use default search parameters + * ivf_pq::search_params search_params; + * // search m = 4 * k nearest neighbours for each of the N queries + * ivf_pq::search(handle, search_params, index, queries, neighbor_candidates, + * out_dists_tmp); + * // refine it to the k nearest one + * refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists, + * index.metric()); + * @endcode + * + * + * @param[in] handle the raft handle + * @param[in] dataset host matrix that stores the dataset [n_rows, dims] + * @param[in] queries host matrix of the queries [n_queris, dims] + * @param[in] neighbor_candidates host matrix with indices of candidate vectors [n_queries, + * n_candidates], where n_candidates >= k + * @param[out] indices host matrix that stores the refined indices [n_queries, k] + * @param[out] distances host matrix that stores the refined distances [n_queries, k] + * @param[in] metric distance metric to use. Euclidean (L2) is used by default + */ +void refine(raft::resources const& handle, + raft::host_matrix_view dataset, + raft::host_matrix_view queries, + raft::host_matrix_view neighbor_candidates, + raft::host_matrix_view indices, + raft::host_matrix_view distances, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + +/** + * @brief Refine nearest neighbor search. + * + * Refinement is an operation that follows an approximate NN search. The approximate search has + * already selected n_candidates neighbor candidates for each query. We narrow it down to k + * neighbors. For each query, we calculate the exact distance between the query and its + * n_candidates neighbor candidate, and select the k nearest ones. + * + * The k nearest neighbors and distances are returned. + * + * Example usage + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(handle, index_params, dataset); + * // use default search parameters + * ivf_pq::search_params search_params; + * // search m = 4 * k nearest neighbours for each of the N queries + * ivf_pq::search(handle, search_params, index, queries, neighbor_candidates, + * out_dists_tmp); + * // refine it to the k nearest one + * refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists, + * index.metric()); + * @endcode + * + * + * @param[in] handle the raft handle + * @param[in] dataset host matrix that stores the dataset [n_rows, dims] + * @param[in] queries host matrix of the queries [n_queris, dims] + * @param[in] neighbor_candidates host matrix with indices of candidate vectors [n_queries, + * n_candidates], where n_candidates >= k + * @param[out] indices host matrix that stores the refined indices [n_queries, k] + * @param[out] distances host matrix that stores the refined distances [n_queries, k] + * @param[in] metric distance metric to use. Euclidean (L2) is used by default + */ +void refine(raft::resources const& handle, + raft::host_matrix_view dataset, + raft::host_matrix_view queries, + raft::host_matrix_view neighbor_candidates, + raft::host_matrix_view indices, + raft::host_matrix_view distances, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded); + +} // namespace cuvs::neighbors \ No newline at end of file diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index 9a92a53502..2f6cb17e36 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -30,11 +30,11 @@ #include #include +#include // TODO: Fixme- this needs to be migrated #include "../../ivf_pq/ivf_pq_build.cuh" #include "../../nn_descent.cuh" -#include "../../refine.cuh" // TODO: This shouldn't be calling spatial/knn APIs #include "../ann_utils.cuh" @@ -174,13 +174,13 @@ void build_knn_graph( refined_distances_host.data_handle(), batch.size(), top_k); raft::resource::sync_stream(res); - cuvs::neighbors::detail::refine_host( - dataset, - queries_host_view, - neighbors_host_view, - refined_neighbors_host_view, - refined_distances_host_view, - build_params->metric); + cuvs::neighbors::refine(res, + dataset, + queries_host_view, + neighbors_host_view, + refined_neighbors_host_view, + refined_distances_host_view, + build_params->metric); } else { auto neighbor_candidates_view = raft::make_device_matrix_view( neighbors.data_handle(), batch.size(), gpu_top_k); @@ -191,14 +191,13 @@ void build_knn_graph( auto dataset_view = raft::make_device_matrix_view( dataset.data_handle(), dataset.extent(0), dataset.extent(1)); - cuvs::neighbors::detail::refine_device( - res, - dataset_view, - queries_view, - neighbor_candidates_view, - refined_neighbors_view, - refined_distances_view, - build_params->metric); + cuvs::neighbors::refine(res, + dataset_view, + queries_view, + neighbor_candidates_view, + refined_neighbors_view, + refined_distances_view, + build_params->metric); raft::copy(refined_neighbors_host.data_handle(), refined_neighbors_view.data_handle(), refined_neighbors_view.size(), diff --git a/cpp/src/neighbors/detail/refine.cuh b/cpp/src/neighbors/detail/refine.cuh deleted file mode 100644 index 170f973984..0000000000 --- a/cpp/src/neighbors/detail/refine.cuh +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "refine_device.cuh" -#include "refine_host.hpp" diff --git a/cpp/src/neighbors/detail/refine_host-ext.hpp b/cpp/src/neighbors/detail/refine_host-ext.hpp deleted file mode 100644 index 88f5a2a646..0000000000 --- a/cpp/src/neighbors/detail/refine_host-ext.hpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "refine_host-inl.hpp" -#include // cuvs::distance::DistanceType -#include // _RAFT_HAS_CUDA -#include // raft::host_matrix_view -#include // RAFT_EXPLICIT - -#include // int64_t - -#if defined(_RAFT_HAS_CUDA) -#include -#endif - -#ifdef CUVS_EXPLICIT_INSTANTIATE_ONLY - -namespace cuvs::neighbors::detail { - -template -[[gnu::optimize(3), gnu::optimize("tree-vectorize")]] void refine_host( - raft::host_matrix_view dataset, - raft::host_matrix_view queries, - raft::host_matrix_view neighbor_candidates, - raft::host_matrix_view indices, - raft::host_matrix_view distances, - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded) RAFT_EXPLICIT; - -} - -#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY - -#define instantiate_raft_neighbors_refine(IdxT, DataT, DistanceT, ExtentsT) \ - extern template void cuvs::neighbors::detail::refine_host( \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ - cuvs::distance::DistanceType metric); - -instantiate_raft_neighbors_refine(int64_t, float, float, int64_t); -instantiate_raft_neighbors_refine(uint32_t, float, float, int64_t); -instantiate_raft_neighbors_refine(int64_t, int8_t, float, int64_t); -instantiate_raft_neighbors_refine(int64_t, uint8_t, float, int64_t); - -#if defined(_RAFT_HAS_CUDA) -instantiate_raft_neighbors_refine(int64_t, half, float, int64_t); -#endif - -#undef instantiate_raft_neighbors_refine diff --git a/cpp/src/neighbors/detail/refine_host.hpp b/cpp/src/neighbors/detail/refine_host.hpp deleted file mode 100644 index d20bea110f..0000000000 --- a/cpp/src/neighbors/detail/refine_host.hpp +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "refine_host-ext.hpp" -#include "refine_host-inl.hpp" diff --git a/cpp/src/neighbors/detail/refine_host_float_float.cpp b/cpp/src/neighbors/detail/refine_host_float_float.cpp deleted file mode 100644 index c9acfc51f1..0000000000 --- a/cpp/src/neighbors/detail/refine_host_float_float.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "refine_host-inl.hpp" - -#define instantiate_cuvs_neighbors_refine(IdxT, DataT, DistanceT, ExtentsT) \ - template void cuvs::neighbors::detail::refine_host( \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ - cuvs::distance::DistanceType metric); - -instantiate_cuvs_neighbors_refine(int64_t, float, float, int64_t); -instantiate_cuvs_neighbors_refine(uint32_t, float, float, int64_t); - -#undef instantiate_cuvs_neighbors_refine diff --git a/cpp/src/neighbors/detail/refine_host_half_float.cpp b/cpp/src/neighbors/detail/refine_host_half_float.cpp deleted file mode 100644 index 3dbc1b1794..0000000000 --- a/cpp/src/neighbors/detail/refine_host_half_float.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "refine_host-inl.hpp" - -#include - -#define instantiate_cuvs_neighbors_refine(IdxT, DataT, DistanceT, ExtentsT) \ - template void cuvs::neighbors::detail::refine_host( \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ - cuvs::distance::DistanceType metric); - -instantiate_cuvs_neighbors_refine(int64_t, half, float, int64_t); - -#undef instantiate_cuvs_neighbors_refine diff --git a/cpp/src/neighbors/detail/refine_host_int8_t_float.cpp b/cpp/src/neighbors/detail/refine_host_int8_t_float.cpp deleted file mode 100644 index c46cb3efba..0000000000 --- a/cpp/src/neighbors/detail/refine_host_int8_t_float.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "refine_host-inl.hpp" - -#define instantiate_cuvs_neighbors_refine(IdxT, DataT, DistanceT, ExtentsT) \ - template void cuvs::neighbors::detail::refine_host( \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ - cuvs::distance::DistanceType metric); -instantiate_cuvs_neighbors_refine(int64_t, int8_t, float, int64_t); - -#undef instantiate_cuvs_neighbors_refine diff --git a/cpp/src/neighbors/detail/refine_host_uint8_t_float.cpp b/cpp/src/neighbors/detail/refine_host_uint8_t_float.cpp deleted file mode 100644 index 532fbaac05..0000000000 --- a/cpp/src/neighbors/detail/refine_host_uint8_t_float.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "refine_host-inl.hpp" - -#define instantiate_cuvs_neighbors_refine(IdxT, DataT, DistanceT, ExtentsT) \ - template void cuvs::neighbors::detail::refine_host( \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ - cuvs::distance::DistanceType metric); - -instantiate_cuvs_neighbors_refine(int64_t, uint8_t, float, int64_t); - -#undef instantiate_cuvs_neighbors_refine diff --git a/cpp/src/neighbors/ivf_flat_index.cpp b/cpp/src/neighbors/ivf_flat_index.cpp index 25cf36d329..61d720665f 100644 --- a/cpp/src/neighbors/ivf_flat_index.cpp +++ b/cpp/src/neighbors/ivf_flat_index.cpp @@ -220,6 +220,7 @@ void index::check_consistency() } template struct index; +template struct index; template struct index; template struct index; diff --git a/cpp/src/neighbors/refine-ext.cuh b/cpp/src/neighbors/refine-ext.cuh deleted file mode 100644 index 2f40842871..0000000000 --- a/cpp/src/neighbors/refine-ext.cuh +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include // cuvs::distance::DistanceType -#include // raft::device_matrix_view -#include // // raft::host_matrix_view -#include // raft::resources -#include // RAFT_EXPLICIT - -#include // int64_t - -#ifdef CUVS_EXPLICIT_INSTANTIATE_ONLY - -namespace cuvs::neighbors { - -template -void refine(raft::resources const& handle, - raft::device_matrix_view dataset, - raft::device_matrix_view queries, - raft::device_matrix_view neighbor_candidates, - raft::device_matrix_view indices, - raft::device_matrix_view distances, - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded) - RAFT_EXPLICIT; - -template -void refine(raft::resources const& handle, - raft::host_matrix_view dataset, - raft::host_matrix_view queries, - raft::host_matrix_view neighbor_candidates, - raft::host_matrix_view indices, - raft::host_matrix_view distances, - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded) - RAFT_EXPLICIT; - -} // namespace cuvs::neighbors - -#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY - -#define instantiate_raft_neighbors_refine_d(idx_t, data_t, distance_t, matrix_idx) \ - extern template void cuvs::neighbors::refine( \ - raft::resources const& handle, \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view queries, \ - raft::device_matrix_view neighbor_candidates, \ - raft::device_matrix_view indices, \ - raft::device_matrix_view distances, \ - cuvs::distance::DistanceType metric); - -#define instantiate_raft_neighbors_refine_h(idx_t, data_t, distance_t, matrix_idx) \ - extern template void cuvs::neighbors::refine( \ - raft::resources const& handle, \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ - cuvs::distance::DistanceType metric); - -instantiate_raft_neighbors_refine_d(int64_t, float, float, int64_t); -instantiate_raft_neighbors_refine_d(int64_t, int8_t, float, int64_t); -instantiate_raft_neighbors_refine_d(int64_t, uint8_t, float, int64_t); - -instantiate_raft_neighbors_refine_h(int64_t, float, float, int64_t); -instantiate_raft_neighbors_refine_h(uint32_t, float, float, int64_t); -instantiate_raft_neighbors_refine_h(int64_t, int8_t, float, int64_t); -instantiate_raft_neighbors_refine_h(int64_t, uint8_t, float, int64_t); - -#undef instantiate_raft_neighbors_refine_d -#undef instantiate_raft_neighbors_refine_h diff --git a/cpp/src/neighbors/refine-inl.cuh b/cpp/src/neighbors/refine-inl.cuh deleted file mode 100644 index 6203290273..0000000000 --- a/cpp/src/neighbors/refine-inl.cuh +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "detail/ann_utils.cuh" -#include "detail/refine.cuh" -#include -#include -#include - -namespace cuvs::neighbors { - -/** - * @defgroup ann_refine Approximate Nearest Neighbors Refinement - * @{ - */ - -/** - * @brief Refine nearest neighbor search. - * - * Refinement is an operation that follows an approximate NN search. The approximate search has - * already selected n_candidates neighbor candidates for each query. We narrow it down to k - * neighbors. For each query, we calculate the exact distance between the query and its - * n_candidates neighbor candidate, and select the k nearest ones. - * - * The k nearest neighbors and distances are returned. - * - * Example usage - * @code{.cpp} - * using namespace raft::neighbors; - * // use default index parameters - * ivf_pq::index_params index_params; - * // create and fill the index from a [N, D] dataset - * auto index = ivf_pq::build(handle, index_params, dataset, N, D); - * // use default search parameters - * ivf_pq::search_params search_params; - * // search m = 4 * k nearest neighbours for each of the N queries - * ivf_pq::search(handle, search_params, index, queries, N, 4 * k, neighbor_candidates, - * out_dists_tmp); - * // refine it to the k nearest one - * refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists, - * index.metric()); - * @endcode - * - * - * @param[in] handle the raft handle - * @param[in] dataset device matrix that stores the dataset [n_rows, dims] - * @param[in] queries device matrix of the queries [n_queris, dims] - * @param[in] neighbor_candidates indices of candidate vectors [n_queries, n_candidates], where - * n_candidates >= k - * @param[out] indices device matrix that stores the refined indices [n_queries, k] - * @param[out] distances device matrix that stores the refined distances [n_queries, k] - * @param[in] metric distance metric to use. Euclidean (L2) is used by default - */ -template -void refine(raft::resources const& handle, - raft::device_matrix_view dataset, - raft::device_matrix_view queries, - raft::device_matrix_view neighbor_candidates, - raft::device_matrix_view indices, - raft::device_matrix_view distances, - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded) -{ - detail::refine_device(handle, dataset, queries, neighbor_candidates, indices, distances, metric); -} - -/** Same as above, but all input and out data is in host memory. - * @param[in] handle the raft handle - * @param[in] dataset host matrix that stores the dataset [n_rows, dims] - * @param[in] queries host matrix of the queries [n_queris, dims] - * @param[in] neighbor_candidates host matrix with indices of candidate vectors [n_queries, - * n_candidates], where n_candidates >= k - * @param[out] indices host matrix that stores the refined indices [n_queries, k] - * @param[out] distances host matrix that stores the refined distances [n_queries, k] - * @param[in] metric distance metric to use. Euclidean (L2) is used by default - */ -template -void refine(raft::resources const& handle, - raft::host_matrix_view dataset, - raft::host_matrix_view queries, - raft::host_matrix_view neighbor_candidates, - raft::host_matrix_view indices, - raft::host_matrix_view distances, - cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded) -{ - detail::refine_host(dataset, queries, neighbor_candidates, indices, distances, metric); -} - -/** @} */ // end group ann_refine -} // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/refine.cuh b/cpp/src/neighbors/refine.cuh deleted file mode 100644 index b3a3cdb4eb..0000000000 --- a/cpp/src/neighbors/refine.cuh +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#ifndef CUVS_EXPLICIT_INSTANTIATE_ONLY -#include "refine-inl.cuh" -#endif - -#ifdef RAFT_COMPILED -#include "refine-ext.cuh" -#endif diff --git a/cpp/src/neighbors/refine_half_float.cu b/cpp/src/neighbors/refine/detail/refine_device_float_float.cu similarity index 56% rename from cpp/src/neighbors/refine_half_float.cu rename to cpp/src/neighbors/refine/detail/refine_device_float_float.cu index 3967c15abb..25bad201b1 100644 --- a/cpp/src/neighbors/refine_half_float.cu +++ b/cpp/src/neighbors/refine/detail/refine_device_float_float.cu @@ -24,27 +24,24 @@ * */ -#include "refine-inl.cuh" +#include -#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ - template void cuvs::neighbors::refine( \ +#include "../refine_device.cuh" + +#define instantiate_cuvs_neighbors_refine_d(idx_t, data_t, distance_t, matrix_idx) \ + void cuvs::neighbors::refine( \ raft::resources const& handle, \ raft::device_matrix_view dataset, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbor_candidates, \ raft::device_matrix_view indices, \ raft::device_matrix_view distances, \ - cuvs::distance::DistanceType metric); \ - \ - template void cuvs::neighbors::refine( \ - raft::resources const& handle, \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ - cuvs::distance::DistanceType metric); + cuvs::distance::DistanceType metric) \ + { \ + refine_impl( \ + handle, dataset, queries, neighbor_candidates, indices, distances, metric); \ + } -instantiate_raft_neighbors_refine(int64_t, half, float, int64_t); +instantiate_cuvs_neighbors_refine_d(int64_t, float, float, int64_t); -#undef instantiate_raft_neighbors_refine +#undef instantiate_cuvs_neighbors_refine_d diff --git a/cpp/src/neighbors/refine/detail/refine_device_half_float.cu b/cpp/src/neighbors/refine/detail/refine_device_half_float.cu new file mode 100644 index 0000000000..1113025487 --- /dev/null +++ b/cpp/src/neighbors/refine/detail/refine_device_half_float.cu @@ -0,0 +1,49 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by refine_00_generate.py + * + * Make changes there and run in this directory: + * + * > python refine_00_generate.py + * + */ + +#include + +#include + +#include "../refine_device.cuh" + +#define instantiate_cuvs_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ + void cuvs::neighbors::refine( \ + raft::resources const& handle, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbor_candidates, \ + raft::device_matrix_view indices, \ + raft::device_matrix_view distances, \ + cuvs::distance::DistanceType metric) \ + { \ + refine_impl( \ + handle, dataset, queries, neighbor_candidates, indices, distances, metric); \ + } + +instantiate_cuvs_neighbors_refine(int64_t, half, float, int64_t); + +#undef instantiate_cuvs_neighbors_refine diff --git a/cpp/src/neighbors/refine_int8_t_float.cu b/cpp/src/neighbors/refine/detail/refine_device_int8_t_float.cu similarity index 57% rename from cpp/src/neighbors/refine_int8_t_float.cu rename to cpp/src/neighbors/refine/detail/refine_device_int8_t_float.cu index 8115c71044..5917cf459f 100644 --- a/cpp/src/neighbors/refine_int8_t_float.cu +++ b/cpp/src/neighbors/refine/detail/refine_device_int8_t_float.cu @@ -24,27 +24,24 @@ * */ -#include "refine-inl.cuh" +#include -#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ - template void cuvs::neighbors::refine( \ +#include "../refine_device.cuh" + +#define instantiate_cuvs_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ + void cuvs::neighbors::refine( \ raft::resources const& handle, \ raft::device_matrix_view dataset, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbor_candidates, \ raft::device_matrix_view indices, \ raft::device_matrix_view distances, \ - cuvs::distance::DistanceType metric); \ - \ - template void cuvs::neighbors::refine( \ - raft::resources const& handle, \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ - cuvs::distance::DistanceType metric); + cuvs::distance::DistanceType metric) \ + { \ + refine_impl( \ + handle, dataset, queries, neighbor_candidates, indices, distances, metric); \ + } -instantiate_raft_neighbors_refine(int64_t, int8_t, float, int64_t); +instantiate_cuvs_neighbors_refine(int64_t, int8_t, float, int64_t); -#undef instantiate_raft_neighbors_refine +#undef instantiate_cuvs_neighbors_refine diff --git a/cpp/src/neighbors/refine_uint8_t_float.cu b/cpp/src/neighbors/refine/detail/refine_device_uint8_t_float.cu similarity index 57% rename from cpp/src/neighbors/refine_uint8_t_float.cu rename to cpp/src/neighbors/refine/detail/refine_device_uint8_t_float.cu index ccee2e6788..03c38f1c56 100644 --- a/cpp/src/neighbors/refine_uint8_t_float.cu +++ b/cpp/src/neighbors/refine/detail/refine_device_uint8_t_float.cu @@ -24,27 +24,24 @@ * */ -#include "refine-inl.cuh" +#include -#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ - template void cuvs::neighbors::refine( \ +#include "../refine_device.cuh" + +#define instantiate_cuvs_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ + void cuvs::neighbors::refine( \ raft::resources const& handle, \ raft::device_matrix_view dataset, \ raft::device_matrix_view queries, \ raft::device_matrix_view neighbor_candidates, \ raft::device_matrix_view indices, \ raft::device_matrix_view distances, \ - cuvs::distance::DistanceType metric); \ - \ - template void cuvs::neighbors::refine( \ - raft::resources const& handle, \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ - cuvs::distance::DistanceType metric); + cuvs::distance::DistanceType metric) \ + { \ + cuvs::neighbors::refine_impl( \ + handle, dataset, queries, neighbor_candidates, indices, distances, metric); \ + } -instantiate_raft_neighbors_refine(int64_t, uint8_t, float, int64_t); +instantiate_cuvs_neighbors_refine(int64_t, uint8_t, float, int64_t); -#undef instantiate_raft_neighbors_refine +#undef instantiate_cuvs_neighbors_refine diff --git a/cpp/src/neighbors/refine/detail/refine_host_float_float.cpp b/cpp/src/neighbors/refine/detail/refine_host_float_float.cpp new file mode 100644 index 0000000000..dbbe05f84e --- /dev/null +++ b/cpp/src/neighbors/refine/detail/refine_host_float_float.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "../refine_host.hpp" + +#define instantiate_cuvs_neighbors_refine_h(idx_t, data_t, distance_t, matrix_idx) \ + void cuvs::neighbors::refine( \ + raft::resources const& handle, \ + raft::host_matrix_view dataset, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbor_candidates, \ + raft::host_matrix_view indices, \ + raft::host_matrix_view distances, \ + cuvs::distance::DistanceType metric) \ + { \ + refine_impl( \ + handle, dataset, queries, neighbor_candidates, indices, distances, metric); \ + } + +instantiate_cuvs_neighbors_refine_h(int64_t, float, float, int64_t); +instantiate_cuvs_neighbors_refine_h(uint32_t, float, float, int64_t); + +#undef instantiate_cuvs_neighbors_refine_h diff --git a/cpp/src/neighbors/refine/detail/refine_host_half_float.cpp b/cpp/src/neighbors/refine/detail/refine_host_half_float.cpp new file mode 100644 index 0000000000..ed9c412089 --- /dev/null +++ b/cpp/src/neighbors/refine/detail/refine_host_half_float.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "../refine_host.hpp" +#include + +#include + +#define instantiate_cuvs_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ + void cuvs::neighbors::refine( \ + raft::resources const& handle, \ + raft::host_matrix_view dataset, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbor_candidates, \ + raft::host_matrix_view indices, \ + raft::host_matrix_view distances, \ + cuvs::distance::DistanceType metric) \ + { \ + refine_impl( \ + handle, dataset, queries, neighbor_candidates, indices, distances, metric); \ + } + +instantiate_cuvs_neighbors_refine(int64_t, half, float, int64_t); + +#undef instantiate_cuvs_neighbors_refine \ No newline at end of file diff --git a/cpp/src/neighbors/refine/detail/refine_host_int8_t_float.cpp b/cpp/src/neighbors/refine/detail/refine_host_int8_t_float.cpp new file mode 100644 index 0000000000..47b6fc356b --- /dev/null +++ b/cpp/src/neighbors/refine/detail/refine_host_int8_t_float.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "../refine_host.hpp" + +#define instantiate_cuvs_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ + void cuvs::neighbors::refine( \ + raft::resources const& handle, \ + raft::host_matrix_view dataset, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbor_candidates, \ + raft::host_matrix_view indices, \ + raft::host_matrix_view distances, \ + cuvs::distance::DistanceType metric) \ + { \ + refine_impl( \ + handle, dataset, queries, neighbor_candidates, indices, distances, metric); \ + } + +instantiate_cuvs_neighbors_refine(int64_t, int8_t, float, int64_t); + +#undef instantiate_cuvs_neighbors_refine diff --git a/cpp/src/neighbors/refine/detail/refine_host_uint8_t_float.cpp b/cpp/src/neighbors/refine/detail/refine_host_uint8_t_float.cpp new file mode 100644 index 0000000000..1698cd3ee9 --- /dev/null +++ b/cpp/src/neighbors/refine/detail/refine_host_uint8_t_float.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "../refine_host.hpp" + +#define instantiate_cuvs_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ + void cuvs::neighbors::refine( \ + raft::resources const& handle, \ + raft::host_matrix_view dataset, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbor_candidates, \ + raft::host_matrix_view indices, \ + raft::host_matrix_view distances, \ + cuvs::distance::DistanceType metric) \ + { \ + cuvs::neighbors::refine_impl( \ + handle, dataset, queries, neighbor_candidates, indices, distances, metric); \ + } + +instantiate_cuvs_neighbors_refine(int64_t, uint8_t, float, int64_t); + +#undef instantiate_cuvs_neighbors_refine diff --git a/cpp/src/neighbors/detail/refine_common.hpp b/cpp/src/neighbors/refine/refine_common.hpp similarity index 100% rename from cpp/src/neighbors/detail/refine_common.hpp rename to cpp/src/neighbors/refine/refine_common.hpp diff --git a/cpp/src/neighbors/detail/refine_device.cuh b/cpp/src/neighbors/refine/refine_device.cuh similarity index 87% rename from cpp/src/neighbors/detail/refine_device.cuh rename to cpp/src/neighbors/refine/refine_device.cuh index 7148733f1c..5bf315ae5b 100644 --- a/cpp/src/neighbors/detail/refine_device.cuh +++ b/cpp/src/neighbors/refine/refine_device.cuh @@ -16,15 +16,15 @@ #pragma once +#include "../../core/nvtx.hpp" +#include "../detail/ann_utils.cuh" #include "../ivf_flat/ivf_flat_build.cuh" #include "../ivf_flat/ivf_flat_interleaved_scan.cuh" -#include "ann_utils.cuh" #include "refine_common.hpp" #include #include #include #include -#include #include #include #include @@ -32,8 +32,9 @@ #include -namespace cuvs::neighbors::detail { +namespace cuvs::neighbors { +namespace detail { /** * See cuvs::neighbors::refine for docs. */ @@ -57,7 +58,7 @@ void refine_device( "k must be less than topk::kMaxCapacity (%d).", raft::matrix::detail::select::warpsort::kMaxCapacity); - raft::common::nvtx::range fun_scope( + cuvs::common::nvtx::range fun_scope( "neighbors::refine(%zu, %u)", size_t(n_queries), uint32_t(n_candidates)); refine_check_input(dataset.extents(), @@ -143,4 +144,18 @@ void refine_device( raft::resource::get_cuda_stream(handle)); } -} // namespace cuvs::neighbors::detail +} // namespace detail + +template +void refine_impl( + raft::resources const& handle, + raft::device_matrix_view dataset, + raft::device_matrix_view queries, + raft::device_matrix_view neighbor_candidates, + raft::device_matrix_view indices, + raft::device_matrix_view distances, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded) +{ + detail::refine_device(handle, dataset, queries, neighbor_candidates, indices, distances, metric); +} +} // namespace cuvs::neighbors \ No newline at end of file diff --git a/cpp/src/neighbors/detail/refine_host-inl.hpp b/cpp/src/neighbors/refine/refine_host.hpp similarity index 89% rename from cpp/src/neighbors/detail/refine_host-inl.hpp rename to cpp/src/neighbors/refine/refine_host.hpp index eddcc13e9b..4f293e5b81 100644 --- a/cpp/src/neighbors/detail/refine_host-inl.hpp +++ b/cpp/src/neighbors/refine/refine_host.hpp @@ -16,16 +16,18 @@ #pragma once +#include "../../core/nvtx.hpp" #include "refine_common.hpp" #include -#include #include #include #include -namespace cuvs::neighbors::detail { +namespace cuvs::neighbors { + +namespace detail { template [[gnu::optimize(3), gnu::optimize("tree-vectorize")]] void refine_host_impl( @@ -41,7 +43,7 @@ template fun_scope( + cuvs::common::nvtx::range fun_scope( "neighbors::refine_host(%zu, %zu -> %zu)", n_queries, orig_k, refined_k); auto suggested_n_threads = std::max(1, std::min(omp_get_num_procs(), omp_get_max_threads())); @@ -190,4 +192,19 @@ template } } -} // namespace cuvs::neighbors::detail +} // namespace detail + +template +void refine_impl( + raft::resources const& handle, + raft::host_matrix_view dataset, + raft::host_matrix_view queries, + raft::host_matrix_view neighbor_candidates, + raft::host_matrix_view indices, + raft::host_matrix_view distances, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Unexpanded) +{ + detail::refine_host(dataset, queries, neighbor_candidates, indices, distances, metric); +} + +} // namespace cuvs::neighbors \ No newline at end of file diff --git a/cpp/src/neighbors/refine_float_float.cu b/cpp/src/neighbors/refine_float_float.cu deleted file mode 100644 index 648d2e02a2..0000000000 --- a/cpp/src/neighbors/refine_float_float.cu +++ /dev/null @@ -1,54 +0,0 @@ - -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: this file is generated by refine_00_generate.py - * - * Make changes there and run in this directory: - * - * > python refine_00_generate.py - * - */ - -#include "refine-inl.cuh" - -#define instantiate_raft_neighbors_refine_d(idx_t, data_t, distance_t, matrix_idx) \ - template void cuvs::neighbors::refine( \ - raft::resources const& handle, \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view queries, \ - raft::device_matrix_view neighbor_candidates, \ - raft::device_matrix_view indices, \ - raft::device_matrix_view distances, \ - cuvs::distance::DistanceType metric); - -#define instantiate_raft_neighbors_refine_h(idx_t, data_t, distance_t, matrix_idx) \ - template void cuvs::neighbors::refine( \ - raft::resources const& handle, \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ - cuvs::distance::DistanceType metric); - -instantiate_raft_neighbors_refine_d(int64_t, float, float, int64_t); -instantiate_raft_neighbors_refine_h(int64_t, float, float, int64_t); -instantiate_raft_neighbors_refine_h(uint32_t, float, float, int64_t); - -#undef instantiate_raft_neighbors_refine_d -#undef instantiate_raft_neighbors_refine_h diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 8d532336e5..3fbf62cdba 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -90,7 +90,10 @@ endfunction() # ################################################################################################## if(BUILD_TESTS) - ConfigureTest(NAME NEIGHBORS_TEST PATH test/neighbors/brute_force.cu GPUS 1 PERCENT 100) + ConfigureTest( + NAME NEIGHBORS_TEST PATH test/neighbors/brute_force.cu test/neighbors/refine.cu GPUS 1 PERCENT + 100 + ) ConfigureTest( NAME CLUSTER_TEST PATH test/cluster/kmeans.cu test/cluster/kmeans_balanced.cu diff --git a/cpp/test/neighbors/refine.cu b/cpp/test/neighbors/refine.cu new file mode 100644 index 0000000000..10575e8eb8 --- /dev/null +++ b/cpp/test/neighbors/refine.cu @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.cuh" +#include "ann_utils.cuh" +#include "refine_helper.cuh" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +namespace cuvs::neighbors { + +template +class RefineTest : public ::testing::TestWithParam> { + public: + RefineTest() + : stream_(raft::resource::get_cuda_stream(handle_)), + data(handle_, ::testing::TestWithParam>::GetParam()) + { + } + + protected: + public: // tamas remove + void testRefine() + { + std::vector indices(data.p.n_queries * data.p.k); + std::vector distances(data.p.n_queries * data.p.k); + + if (data.p.host_data) { + cuvs::neighbors::refine(handle_, + data.dataset_host.view(), + data.queries_host.view(), + data.candidates_host.view(), + data.refined_indices_host.view(), + data.refined_distances_host.view(), + data.p.metric); + raft::copy(indices.data(), + data.refined_indices_host.data_handle(), + data.refined_indices_host.size(), + stream_); + raft::copy(distances.data(), + data.refined_distances_host.data_handle(), + data.refined_distances_host.size(), + stream_); + + } else { + cuvs::neighbors::refine(handle_, + data.dataset.view(), + data.queries.view(), + data.candidates.view(), + data.refined_indices.view(), + data.refined_distances.view(), + data.p.metric); + raft::update_host(distances.data(), + data.refined_distances.data_handle(), + data.refined_distances.size(), + stream_); + raft::update_host( + indices.data(), data.refined_indices.data_handle(), data.refined_indices.size(), stream_); + } + raft::resource::sync_stream(handle_); + + double min_recall = 1; + + ASSERT_TRUE(cuvs::neighbors::eval_neighbours(data.true_refined_indices_host, + indices, + data.true_refined_distances_host, + distances, + data.p.n_queries, + data.p.k, + 0.001, + min_recall)); + } + + public: + raft::resources handle_; + rmm::cuda_stream_view stream_; + RefineHelper data; +}; + +const std::vector> inputs = + raft::util::itertools::product>( + {static_cast(137)}, + {static_cast(1000)}, + {static_cast(16)}, + {static_cast(1), static_cast(10), static_cast(33)}, + {static_cast(33)}, + {cuvs::distance::DistanceType::L2Expanded, cuvs::distance::DistanceType::InnerProduct}, + {false, true}); + +typedef RefineTest RefineTestF; +TEST_P(RefineTestF, AnnRefine) { this->testRefine(); } + +INSTANTIATE_TEST_CASE_P(RefineTest, RefineTestF, ::testing::ValuesIn(inputs)); + +typedef RefineTest RefineTestF_uint8; +TEST_P(RefineTestF_uint8, AnnRefine) { this->testRefine(); } +INSTANTIATE_TEST_CASE_P(RefineTest, RefineTestF_uint8, ::testing::ValuesIn(inputs)); + +typedef RefineTest RefineTestF_int8; +TEST_P(RefineTestF_int8, AnnRefine) { this->testRefine(); } +INSTANTIATE_TEST_CASE_P(RefineTest, RefineTestF_int8, ::testing::ValuesIn(inputs)); +} // namespace cuvs::neighbors diff --git a/cpp/test/neighbors/refine_helper.cuh b/cpp/test/neighbors/refine_helper.cuh new file mode 100644 index 0000000000..18af47a10b --- /dev/null +++ b/cpp/test/neighbors/refine_helper.cuh @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "naive_knn.cuh" + +#include +#include + +namespace cuvs::neighbors { + +template +struct RefineInputs { + IdxT n_queries; + IdxT n_rows; + IdxT dim; + IdxT k; // after refinement + IdxT k0; // initial k before refinement (k0 >= k). + cuvs::distance::DistanceType metric; + bool host_data; +}; + +/** Helper class to allocate arrays and generate input data for refinement test and benchmark. */ +template +class RefineHelper { + public: + RefineHelper(const raft::resources& handle, RefineInputs params) + : handle_(handle), + stream_(raft::resource::get_cuda_stream(handle)), + p(params), + dataset(handle), + queries(handle), + refined_distances(handle), + refined_indices(handle), + candidates(handle), + dataset_host(handle), + queries_host(handle), + candidates_host(handle), + refined_distances_host(handle), + refined_indices_host(handle) + { + raft::random::RngState rng(1234ULL); + + dataset = raft::make_device_matrix(handle_, p.n_rows, p.dim); + queries = raft::make_device_matrix(handle_, p.n_queries, p.dim); + if constexpr (std::is_same{}) { + raft::random::uniform( + handle, rng, dataset.data_handle(), dataset.size(), DataT(-10.0), DataT(10.0)); + raft::random::uniform( + handle, rng, queries.data_handle(), queries.size(), DataT(-10.0), DataT(10.0)); + } else { + raft::random::uniformInt( + handle, rng, dataset.data_handle(), dataset.size(), DataT(1), DataT(20)); + raft::random::uniformInt( + handle, rng, queries.data_handle(), queries.size(), DataT(1), DataT(20)); + } + + refined_distances = raft::make_device_matrix(handle_, p.n_queries, p.k); + refined_indices = raft::make_device_matrix(handle_, p.n_queries, p.k); + + // Generate candidate vectors + { + candidates = raft::make_device_matrix(handle_, p.n_queries, p.k0); + rmm::device_uvector distances_tmp(p.n_queries * p.k0, stream_); + naive_knn(handle_, + distances_tmp.data(), + candidates.data_handle(), + queries.data_handle(), + dataset.data_handle(), + p.n_queries, + p.n_rows, + p.dim, + p.k0, + p.metric); + raft::resource::sync_stream(handle_, stream_); + } + + if (p.host_data) { + dataset_host = raft::make_host_matrix(p.n_rows, p.dim); + queries_host = raft::make_host_matrix(p.n_queries, p.dim); + candidates_host = raft::make_host_matrix(p.n_queries, p.k0); + + raft::copy(dataset_host.data_handle(), dataset.data_handle(), dataset.size(), stream_); + raft::copy(queries_host.data_handle(), queries.data_handle(), queries.size(), stream_); + raft::copy( + candidates_host.data_handle(), candidates.data_handle(), candidates.size(), stream_); + + refined_distances_host = raft::make_host_matrix(p.n_queries, p.k); + refined_indices_host = raft::make_host_matrix(p.n_queries, p.k); + raft::resource::sync_stream(handle_, stream_); + } + + // Generate ground thruth for testing. + { + rmm::device_uvector distances_dev(p.n_queries * p.k, stream_); + rmm::device_uvector indices_dev(p.n_queries * p.k, stream_); + naive_knn(handle_, + distances_dev.data(), + indices_dev.data(), + queries.data_handle(), + dataset.data_handle(), + p.n_queries, + p.n_rows, + p.dim, + p.k, + p.metric); + true_refined_distances_host.resize(p.n_queries * p.k); + true_refined_indices_host.resize(p.n_queries * p.k); + raft::copy(true_refined_indices_host.data(), indices_dev.data(), indices_dev.size(), stream_); + raft::copy( + true_refined_distances_host.data(), distances_dev.data(), distances_dev.size(), stream_); + raft::resource::sync_stream(handle_, stream_); + } + } + + public: + RefineInputs p; + const raft::resources& handle_; + rmm::cuda_stream_view stream_; + + raft::device_matrix dataset; + raft::device_matrix queries; + raft::device_matrix candidates; // Neighbor candidate indices + raft::device_matrix refined_indices; + raft::device_matrix refined_distances; + + raft::host_matrix dataset_host; + raft::host_matrix queries_host; + raft::host_matrix candidates_host; + raft::host_matrix refined_indices_host; + raft::host_matrix refined_distances_host; + + std::vector true_refined_indices_host; + std::vector true_refined_distances_host; +}; +} // namespace cuvs::neighbors diff --git a/docs/source/cpp_api/neighbors.rst b/docs/source/cpp_api/neighbors.rst index f9006412c9..15b4ff0516 100644 --- a/docs/source/cpp_api/neighbors.rst +++ b/docs/source/cpp_api/neighbors.rst @@ -13,3 +13,4 @@ Nearest Neighbors neighbors_ivf_flat.rst neighbors_ivf_pq.rst neighbors_cagra.rst + neighbors_refine.rst diff --git a/docs/source/cpp_api/neighbors_refine.rst b/docs/source/cpp_api/neighbors_refine.rst new file mode 100644 index 0000000000..d53c8087cc --- /dev/null +++ b/docs/source/cpp_api/neighbors_refine.rst @@ -0,0 +1,20 @@ +Refinement +========== + +Candidate refinement methods for nearest neighbors search + +.. role:: py(code) + :language: c++ + :class: highlight + +``#include `` + +namespace *cuvs::neighbors* + +Index +----- + +.. doxygengroup:: ann_refine + :project: cuvs + :members: + :content-only: \ No newline at end of file From 191235517c23e4ce4b8e69f2ff08a95c75456d69 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 28 May 2024 20:21:56 +0200 Subject: [PATCH 03/11] Accept host_mdspan for IVF-PQ build and extend (#148) This PR enables host input arrays for `ivf_pq::build` and `ivf_pq::extend`. closes #120 closes #143 Authors: - Tamas Bela Feher (https://github.com/tfeher) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/148 --- cpp/include/cuvs/neighbors/ivf_pq.hpp | 380 +++++++++++++++++- .../neighbors/detail/cagra/cagra_build.cuh | 3 +- .../ivf_pq/detail/generate_ivf_pq.py | 39 +- .../ivf_pq_build_extend_float_int64_t.cu | 35 +- .../detail/ivf_pq_build_extend_inst.cuh | 93 +++++ .../ivf_pq_build_extend_int8_t_int64_t.cu | 35 +- .../ivf_pq_build_extend_uint8_t_int64_t.cu | 35 +- cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh | 59 ++- .../neighbors/ivf_pq/ivf_pq_deserialize.cu | 15 +- cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cu | 15 +- cpp/test/neighbors/ann_ivf_pq.cuh | 18 +- 11 files changed, 527 insertions(+), 200 deletions(-) create mode 100644 cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_inst.cuh diff --git a/cpp/include/cuvs/neighbors/ivf_pq.hpp b/cpp/include/cuvs/neighbors/ivf_pq.hpp index d1e6b37c2f..8493882b43 100644 --- a/cpp/include/cuvs/neighbors/ivf_pq.hpp +++ b/cpp/include/cuvs/neighbors/ivf_pq.hpp @@ -593,6 +593,146 @@ void build(raft::resources const& handle, const cuvs::neighbors::ivf_pq::index_params& index_params, raft::device_matrix_view dataset, cuvs::neighbors::ivf_pq::index* idx); +/** + * @brief Build the index from the dataset for efficient search. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(handle, index_params, dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] dataset a host_matrix_view to a row-major matrix [n_rows, dim] + * + * @return the constructed ivf-pq index + */ +auto build(raft::resources const& handle, + const cuvs::neighbors::ivf_pq::index_params& index_params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::ivf_pq::index; + +/** + * @brief Build the index from the dataset for efficient search. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * ivf_pq::index index; + * ivf_pq::build(handle, index_params, dataset, index); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] dataset raft::host_matrix_view to a row-major matrix [n_rows, dim] + * @param[out] idx reference to ivf_pq::index + * + */ +void build(raft::resources const& handle, + const cuvs::neighbors::ivf_pq::index_params& index_params, + raft::host_matrix_view dataset, + cuvs::neighbors::ivf_pq::index* idx); + +/** + * @brief Build the index from the dataset for efficient search. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(handle, index_params, dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] dataset a host_matrix_view to a row-major matrix [n_rows, dim] + * + * @return the constructed ivf-pq index + */ +auto build(raft::resources const& handle, + const cuvs::neighbors::ivf_pq::index_params& index_params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::ivf_pq::index; + +/** + * @brief Build the index from the dataset for efficient search. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * ivf_pq::index index; + * ivf_pq::build(handle, index_params, dataset, index); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] dataset raft::host_matrix_view to a row-major matrix [n_rows, dim] + * @param[out] idx reference to ivf_pq::index + * + */ +void build(raft::resources const& handle, + const cuvs::neighbors::ivf_pq::index_params& index_params, + raft::host_matrix_view dataset, + cuvs::neighbors::ivf_pq::index* idx); + +/** + * @brief Build the index from the dataset for efficient search. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * auto index = ivf_pq::build(handle, index_params, dataset); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] dataset a host_matrix_view to a row-major matrix [n_rows, dim] + * + * @return the constructed ivf-pq index + */ +auto build(raft::resources const& handle, + const cuvs::neighbors::ivf_pq::index_params& index_params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::ivf_pq::index; + +/** + * @brief Build the index from the dataset for efficient search. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters + * ivf_pq::index_params index_params; + * // create and fill the index from a [N, D] dataset + * ivf_pq::index index; + * ivf_pq::build(handle, index_params, dataset, index); + * @endcode + * + * @param[in] handle + * @param[in] index_params configure the index building + * @param[in] dataset raft::host_matrix_view to a row-major matrix [n_rows, dim] + * @param[out] idx reference to ivf_pq::index + * + */ +void build(raft::resources const& handle, + const cuvs::neighbors::ivf_pq::index_params& index_params, + raft::host_matrix_view dataset, + cuvs::neighbors::ivf_pq::index* idx); /** * @} */ @@ -771,6 +911,177 @@ void extend(raft::resources const& handle, raft::device_matrix_view new_vectors, std::optional> new_indices, cuvs::neighbors::ivf_pq::index* idx); + +/** + * @brief Extend the index with the new data. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_pq::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_pq::build(handle, index_params, dataset); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * auto index = ivf_pq::extend(handle, new_vectors, no_op, index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()] + * @param[in] new_indices a device vector view to a vector of indices [n_rows]. + * If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx + */ +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_pq::index& idx) + -> cuvs::neighbors::ivf_pq::index; + +/** + * @brief Extend the index with the new data. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_pq::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_pq::build(handle, index_params, dataset); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * ivf_pq::extend(handle, new_vectors, no_op, &index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()] + * @param[in] new_indices a device vector view to a vector of indices [n_rows]. + * If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx + */ +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_pq::index* idx); + +/** + * @brief Extend the index with the new data. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_pq::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_pq::build(handle, index_params, dataset); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * auto index = ivf_pq::extend(handle, new_vectors, no_op, index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()] + * @param[in] new_indices a device vector view to a vector of indices [n_rows]. + * If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx + */ +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_pq::index& idx) + -> cuvs::neighbors::ivf_pq::index; + +/** + * @brief Extend the index with the new data. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_pq::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_pq::build(handle, index_params, dataset); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * ivf_pq::extend(handle, new_vectors, no_op, &index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()] + * @param[in] new_indices a device vector view to a vector of indices [n_rows]. + * If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx + */ +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_pq::index* idx); + +/** + * @brief Extend the index with the new data. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_pq::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_pq::build(handle, index_params, dataset); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * auto index = ivf_pq::extend(handle, new_vectors, no_op, index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()] + * @param[in] new_indices a device vector view to a vector of indices [n_rows]. + * If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx + */ +auto extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + const cuvs::neighbors::ivf_pq::index& idx) + -> cuvs::neighbors::ivf_pq::index; + +/** + * @brief Extend the index with the new data. + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * ivf_pq::index_params index_params; + * index_params.add_data_on_build = false; // don't populate index on build + * index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training + * // train the index from a [N, D] dataset + * auto index_empty = ivf_pq::build(handle, index_params, dataset); + * // fill the index with the data + * std::optional> no_op = std::nullopt; + * ivf_pq::extend(handle, new_vectors, no_op, &index_empty); + * @endcode + * + * @param[in] handle + * @param[in] new_vectors a device matrix view to a row-major matrix [n_rows, idx.dim()] + * @param[in] new_indices a device vector view to a vector of indices [n_rows]. + * If the original index is empty (`idx.size() == 0`), you can pass `std::nullopt` + * here to imply a continuous range `[0...n_rows)`. + * @param[inout] idx + */ +void extend(raft::resources const& handle, + raft::host_matrix_view new_vectors, + std::optional> new_indices, + cuvs::neighbors::ivf_pq::index* idx); /** * @} */ @@ -1009,9 +1320,30 @@ void search_with_filtering( * @{ */ /** - * Save the index to file. + * Serialize the index to an output string. * - * Experimental, both the API and the serialization format are subject to change. + * @code{.cpp} + * #include + * + * raft::resources handle; + * + * // create an input string + * std::string str + * // create an index with `auto index = ivf_pq::build(...);` + * cuvs::serialize(handle, str, index); + * @endcode + * + * @param[in] handle the raft handle + * @param[out] str output string + * @param[in] index IVF-PQ index + * + */ +void serialize(raft::resources const& handle, + std::string& str, + const cuvs::neighbors::ivf_pq::index& index); + +/** + * Save the index to file. * * @code{.cpp} * #include @@ -1029,14 +1361,38 @@ void search_with_filtering( * @param[in] index IVF-PQ index * */ -void serialize(raft::resources const& handle, - std::string& filename, - const cuvs::neighbors::ivf_pq::index& index); +void serialize_file(raft::resources const& handle, + const std::string& filename, + const cuvs::neighbors::ivf_pq::index& index); /** - * Load index from file. + * Load index from input string. + * + * @code{.cpp} + * #include * - * Experimental, both the API and the serialization format are subject to change. + * raft::resources handle; + * + * std::string str = ... + * + * using IdxT = int64_t; // type of the index + * // create an empty index + * cuvs::neighbors::ivf_pq::index index(handl, index_params, dim); + * + * cuvs::deserialize(handle, filename, &index); + * @endcode + * + * @param[in] handle the raft handle + * @param[in] str the name of the file that stores the index + * @param[out] index IVF-PQ index + * + */ + +void deserialize(raft::resources const& handle, + const std::string& str, + cuvs::neighbors::ivf_pq::index* index); +/** + * Load index from file. * * @code{.cpp} * #include @@ -1046,7 +1402,9 @@ void serialize(raft::resources const& handle, * // create a string with a filepath * std::string filename("/path/to/index"); * using IdxT = int64_t; // type of the index - * // create an empty index with `ivf_pq::index index(handle, index_params, dim);` + * // create an empty index with + * ivf_pq::index index(handle, index_params, dim); + * * cuvs::deserialize(handle, filename, &index); * @endcode * @@ -1055,9 +1413,9 @@ void serialize(raft::resources const& handle, * @param[out] index IVF-PQ index * */ -void deserialize(raft::resources const& handle, - const std::string& filename, - cuvs::neighbors::ivf_pq::index* index); +void deserialize_file(raft::resources const& handle, + const std::string& filename, + cuvs::neighbors::ivf_pq::index* index); /** * @} */ diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index 2f6cb17e36..0ca97b9ca9 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -91,8 +91,7 @@ void build_knn_graph( }(); RAFT_LOG_DEBUG("# Building IVF-PQ index %s", model_name.c_str()); - auto index = cuvs::neighbors::ivf_pq::detail::build( - res, *build_params, dataset.data_handle(), dataset.extent(0), dataset.extent(1)); + auto index = cuvs::neighbors::ivf_pq::detail::build(res, *build_params, dataset); // // search top (k + 1) neighbors diff --git a/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py b/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py index 2ba513579c..878c7ee214 100644 --- a/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py +++ b/cpp/src/neighbors/ivf_pq/detail/generate_ivf_pq.py @@ -41,7 +41,7 @@ """ build_include_macro = """ -#include "../ivf_pq_build.cuh" +#include "ivf_pq_build_extend_inst.cuh" """ search_include_macro = """ #include "../ivf_pq_search.cuh" @@ -61,42 +61,7 @@ uint8_t_int64_t=("uint8_t", "int64_t"), ) -build_extend_macro = """ -#define CUVS_INST_IVF_PQ_BUILD_EXTEND(T, IdxT) \\ - auto build(raft::resources const& handle, \\ - const cuvs::neighbors::ivf_pq::index_params& params, \\ - raft::device_matrix_view dataset) \\ - ->cuvs::neighbors::ivf_pq::index \\ - { \\ - return cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset); \\ - } \\ - \\ - void build(raft::resources const& handle, \\ - const cuvs::neighbors::ivf_pq::index_params& params, \\ - raft::device_matrix_view dataset, \\ - cuvs::neighbors::ivf_pq::index* idx) \\ - { \\ - cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset, idx); \\ - } \\ - auto extend(raft::resources const& handle, \\ - raft::device_matrix_view new_vectors, \\ - std::optional> new_indices, \\ - const cuvs::neighbors::ivf_pq::index& orig_index) \\ - ->cuvs::neighbors::ivf_pq::index \\ - { \\ - return cuvs::neighbors::ivf_pq::detail::extend( \\ - handle, new_vectors, new_indices, orig_index); \\ - } \\ - \\ - void extend(raft::resources const& handle, \\ - raft::device_matrix_view new_vectors, \\ - std::optional> new_indices, \\ - cuvs::neighbors::ivf_pq::index* idx) \\ - { \\ - cuvs::neighbors::ivf_pq::detail::extend( \\ - handle, new_vectors, new_indices, idx); \\ - } -""" +build_extend_macro = "" # moved to header ivf_pq_build_extend_inst.cuh search_macro = """ #define CUVS_INST_IVF_PQ_SEARCH(T, IdxT) \\ diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_float_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_float_int64_t.cu index ee7f26381f..65c443d883 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_float_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_float_int64_t.cu @@ -25,42 +25,9 @@ #include -#include "../ivf_pq_build.cuh" +#include "ivf_pq_build_extend_inst.cuh" namespace cuvs::neighbors::ivf_pq { - -#define CUVS_INST_IVF_PQ_BUILD_EXTEND(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_pq::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::ivf_pq::index \ - { \ - return cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset); \ - } \ - \ - void build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_pq::index_params& params, \ - raft::device_matrix_view dataset, \ - cuvs::neighbors::ivf_pq::index* idx) \ - { \ - cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset, idx); \ - } \ - auto extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const cuvs::neighbors::ivf_pq::index& orig_index) \ - ->cuvs::neighbors::ivf_pq::index \ - { \ - return cuvs::neighbors::ivf_pq::detail::extend(handle, new_vectors, new_indices, orig_index); \ - } \ - \ - void extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - cuvs::neighbors::ivf_pq::index* idx) \ - { \ - cuvs::neighbors::ivf_pq::detail::extend(handle, new_vectors, new_indices, idx); \ - } CUVS_INST_IVF_PQ_BUILD_EXTEND(float, int64_t); #undef CUVS_INST_IVF_PQ_BUILD_EXTEND diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_inst.cuh b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_inst.cuh new file mode 100644 index 0000000000..4b963b0b37 --- /dev/null +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_inst.cuh @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is used by generate_ivf_pq.py + * + */ + +#include + +#include "../ivf_pq_build.cuh" + +namespace cuvs::neighbors::ivf_pq { + +#define CUVS_INST_IVF_PQ_BUILD_EXTEND(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_pq::index_params& params, \ + raft::device_matrix_view dataset) \ + ->cuvs::neighbors::ivf_pq::index \ + { \ + return cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_pq::index_params& params, \ + raft::device_matrix_view dataset, \ + cuvs::neighbors::ivf_pq::index* idx) \ + { \ + cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset, idx); \ + } \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_pq::index_params& params, \ + raft::host_matrix_view dataset) \ + ->cuvs::neighbors::ivf_pq::index \ + { \ + return cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset); \ + } \ + \ + void build(raft::resources const& handle, \ + const cuvs::neighbors::ivf_pq::index_params& params, \ + raft::host_matrix_view dataset, \ + cuvs::neighbors::ivf_pq::index* idx) \ + { \ + cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset, idx); \ + } \ + auto extend( \ + raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_pq::index& orig_index) \ + ->cuvs::neighbors::ivf_pq::index \ + { \ + return cuvs::neighbors::ivf_pq::detail::extend(handle, new_vectors, new_indices, orig_index); \ + } \ + void extend(raft::resources const& handle, \ + raft::device_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_pq::index* idx) \ + { \ + cuvs::neighbors::ivf_pq::detail::extend(handle, new_vectors, new_indices, idx); \ + } \ + auto extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + const cuvs::neighbors::ivf_pq::index& orig_index) \ + ->cuvs::neighbors::ivf_pq::index \ + { \ + return cuvs::neighbors::ivf_pq::detail::extend(handle, new_vectors, new_indices, orig_index); \ + } \ + \ + void extend(raft::resources const& handle, \ + raft::host_matrix_view new_vectors, \ + std::optional> new_indices, \ + cuvs::neighbors::ivf_pq::index* idx) \ + { \ + cuvs::neighbors::ivf_pq::detail::extend(handle, new_vectors, new_indices, idx); \ + } + +} // namespace cuvs::neighbors::ivf_pq diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_int8_t_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_int8_t_int64_t.cu index 8b612439d6..048560b5a4 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_int8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_int8_t_int64_t.cu @@ -25,42 +25,9 @@ #include -#include "../ivf_pq_build.cuh" +#include "ivf_pq_build_extend_inst.cuh" namespace cuvs::neighbors::ivf_pq { - -#define CUVS_INST_IVF_PQ_BUILD_EXTEND(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_pq::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::ivf_pq::index \ - { \ - return cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset); \ - } \ - \ - void build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_pq::index_params& params, \ - raft::device_matrix_view dataset, \ - cuvs::neighbors::ivf_pq::index* idx) \ - { \ - cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset, idx); \ - } \ - auto extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const cuvs::neighbors::ivf_pq::index& orig_index) \ - ->cuvs::neighbors::ivf_pq::index \ - { \ - return cuvs::neighbors::ivf_pq::detail::extend(handle, new_vectors, new_indices, orig_index); \ - } \ - \ - void extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - cuvs::neighbors::ivf_pq::index* idx) \ - { \ - cuvs::neighbors::ivf_pq::detail::extend(handle, new_vectors, new_indices, idx); \ - } CUVS_INST_IVF_PQ_BUILD_EXTEND(int8_t, int64_t); #undef CUVS_INST_IVF_PQ_BUILD_EXTEND diff --git a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_uint8_t_int64_t.cu b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_uint8_t_int64_t.cu index df6e12b7e3..755fb9f8d7 100644 --- a/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_uint8_t_int64_t.cu +++ b/cpp/src/neighbors/ivf_pq/detail/ivf_pq_build_extend_uint8_t_int64_t.cu @@ -25,42 +25,9 @@ #include -#include "../ivf_pq_build.cuh" +#include "ivf_pq_build_extend_inst.cuh" namespace cuvs::neighbors::ivf_pq { - -#define CUVS_INST_IVF_PQ_BUILD_EXTEND(T, IdxT) \ - auto build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_pq::index_params& params, \ - raft::device_matrix_view dataset) \ - ->cuvs::neighbors::ivf_pq::index \ - { \ - return cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset); \ - } \ - \ - void build(raft::resources const& handle, \ - const cuvs::neighbors::ivf_pq::index_params& params, \ - raft::device_matrix_view dataset, \ - cuvs::neighbors::ivf_pq::index* idx) \ - { \ - cuvs::neighbors::ivf_pq::detail::build(handle, params, dataset, idx); \ - } \ - auto extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - const cuvs::neighbors::ivf_pq::index& orig_index) \ - ->cuvs::neighbors::ivf_pq::index \ - { \ - return cuvs::neighbors::ivf_pq::detail::extend(handle, new_vectors, new_indices, orig_index); \ - } \ - \ - void extend(raft::resources const& handle, \ - raft::device_matrix_view new_vectors, \ - std::optional> new_indices, \ - cuvs::neighbors::ivf_pq::index* idx) \ - { \ - cuvs::neighbors::ivf_pq::detail::extend(handle, new_vectors, new_indices, idx); \ - } CUVS_INST_IVF_PQ_BUILD_EXTEND(uint8_t, int64_t); #undef CUVS_INST_IVF_PQ_BUILD_EXTEND diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh index 5536d6edb1..3b0e475f4b 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh @@ -1659,13 +1659,14 @@ auto extend(raft::resources const& handle, return ext_index; } -template +template auto build(raft::resources const& handle, const index_params& params, - const T* dataset, - IdxT n_rows, - uint32_t dim) -> index + raft::mdspan, raft::row_major, accessor> dataset) + -> index { + IdxT n_rows = dataset.extent(0); + IdxT dim = dataset.extent(1); raft::common::nvtx::range fun_scope( "ivf_pq::build(%zu, %u)", size_t(n_rows), dim); static_assert(std::is_same_v || std::is_same_v || std::is_same_v || @@ -1700,7 +1701,7 @@ auto build(raft::resources const& handle, if constexpr (std::is_same_v) { RAFT_CUDA_TRY(cudaMemcpy2DAsync(trainset.data(), sizeof(T) * index.dim(), - dataset, + dataset.data_handle(), sizeof(T) * index.dim() * trainset_ratio, sizeof(T) * index.dim(), n_rows_train, @@ -1709,7 +1710,7 @@ auto build(raft::resources const& handle, } else { size_t dim = index.dim(); cudaPointerAttributes dataset_attr; - RAFT_CUDA_TRY(cudaPointerGetAttributes(&dataset_attr, dataset)); + RAFT_CUDA_TRY(cudaPointerGetAttributes(&dataset_attr, dataset.data_handle())); if (dataset_attr.devicePointer != nullptr) { // data is available on device: just run the kernel to raft::copy and map the data auto p = reinterpret_cast(dataset_attr.devicePointer); @@ -1728,7 +1729,7 @@ auto build(raft::resources const& handle, // T at the end of float rows. RAFT_CUDA_TRY(cudaMemcpy2DAsync(trainset_tmp, sizeof(float) * index.dim(), - dataset, + dataset.data_handle(), sizeof(T) * index.dim() * trainset_ratio, sizeof(T) * index.dim(), n_rows_train, @@ -1809,37 +1810,27 @@ auto build(raft::resources const& handle, // add the data if necessary if (params.add_data_on_build) { - detail::extend(handle, &index, dataset, nullptr, n_rows); + detail::extend(handle, &index, dataset.data_handle(), nullptr, n_rows); } return index; } -template -auto build(raft::resources const& handle, - const index_params& params, - raft::device_matrix_view dataset) -> index -{ - IdxT n_rows = dataset.extent(0); - IdxT dim = dataset.extent(1); - return build(handle, params, dataset.data_handle(), n_rows, dim); -} - -template +template void build(raft::resources const& handle, const index_params& params, - raft::device_matrix_view dataset, + raft::mdspan, raft::row_major, accessor> dataset, index* index) { - IdxT n_rows = dataset.extent(0); - IdxT dim = dataset.extent(1); - *index = build(handle, params, dataset.data_handle(), n_rows, dim); + *index = build(handle, params, dataset); } -template -auto extend(raft::resources const& handle, - raft::device_matrix_view new_vectors, - std::optional> new_indices, - const cuvs::neighbors::ivf_pq::index& orig_index) -> index +template +auto extend( + raft::resources const& handle, + raft::mdspan, raft::row_major, accessor> new_vectors, + std::optional, raft::row_major, accessor2>> + new_indices, + const cuvs::neighbors::ivf_pq::index& orig_index) -> index { ASSERT(new_vectors.extent(1) == orig_index.dim(), "new_vectors should have the same dimension as the index"); @@ -1857,11 +1848,13 @@ auto extend(raft::resources const& handle, n_rows); } -template -void extend(raft::resources const& handle, - raft::device_matrix_view new_vectors, - std::optional> new_indices, - index* index) +template +void extend( + raft::resources const& handle, + raft::mdspan, raft::row_major, accessor> new_vectors, + std::optional, raft::row_major, accessor2>> + new_indices, + index* index) { ASSERT(new_vectors.extent(1) == index->dim(), "new_vectors should have the same dimension as the index"); diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_deserialize.cu b/cpp/src/neighbors/ivf_pq/ivf_pq_deserialize.cu index 9515cd2d58..7827e7892a 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_deserialize.cu +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_deserialize.cu @@ -16,15 +16,24 @@ #include "ivf_pq_serialize.cuh" #include +#include namespace cuvs::neighbors::ivf_pq { -void deserialize(raft::resources const& handle, - const std::string& filename, - cuvs::neighbors::ivf_pq::index* index) +void deserialize_file(raft::resources const& handle, + const std::string& filename, + cuvs::neighbors::ivf_pq::index* index) { if (!index) { RAFT_FAIL("Invalid index pointer"); } *index = cuvs::neighbors::ivf_pq::detail::deserialize(handle, filename); } +void deserialize(raft::resources const& handle, + const std::string& str, + cuvs::neighbors::ivf_pq::index* index) +{ + if (!index) { RAFT_FAIL("Invalid index pointer"); } + std::istringstream is(str); + *index = cuvs::neighbors::ivf_pq::detail::deserialize(handle, is); +} } // namespace cuvs::neighbors::ivf_pq \ No newline at end of file diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cu b/cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cu index 0fb71d0317..f0214f4bb3 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cu +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_serialize.cu @@ -16,14 +16,23 @@ #include "ivf_pq_serialize.cuh" #include +#include namespace cuvs::neighbors::ivf_pq { -void serialize(raft::resources const& handle, - std::string& filename, - const cuvs::neighbors::ivf_pq::index& index) +void serialize_file(raft::resources const& handle, + const std::string& filename, + const cuvs::neighbors::ivf_pq::index& index) { cuvs::neighbors::ivf_pq::detail::serialize(handle, filename, index); } +void serialize(raft::resources const& handle, + std::string& str, + const cuvs::neighbors::ivf_pq::index& index) +{ + std::ostringstream os; + cuvs::neighbors::ivf_pq::detail::serialize(handle, os, index); + str = os.str(); +} } // namespace cuvs::neighbors::ivf_pq \ No newline at end of file diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh index fda32c6df2..f716a8efed 100644 --- a/cpp/test/neighbors/ann_ivf_pq.cuh +++ b/cpp/test/neighbors/ann_ivf_pq.cuh @@ -204,7 +204,7 @@ class ivf_pq_test : public ::testing::TestWithParam { ipams.add_data_on_build = true; auto index_view = - raft::make_device_matrix_view(database.data(), ps.num_db_vecs, ps.dim); + raft::make_device_matrix_view(database.data(), ps.num_db_vecs, ps.dim); return cuvs::neighbors::ivf_pq::build(handle_, ipams, index_view); } @@ -224,16 +224,16 @@ class ivf_pq_test : public ::testing::TestWithParam { ipams.add_data_on_build = false; auto database_view = - raft::make_device_matrix_view(database.data(), ps.num_db_vecs, ps.dim); + raft::make_device_matrix_view(database.data(), ps.num_db_vecs, ps.dim); auto idx = cuvs::neighbors::ivf_pq::build(handle_, ipams, database_view); - auto vecs_2_view = raft::make_device_matrix_view(vecs_2, size_2, ps.dim); - auto inds_2_view = raft::make_device_vector_view(inds_2, size_2); + auto vecs_2_view = raft::make_device_matrix_view(vecs_2, size_2, ps.dim); + auto inds_2_view = raft::make_device_vector_view(inds_2, size_2); cuvs::neighbors::ivf_pq::extend(handle_, vecs_2_view, inds_2_view, &idx); auto vecs_1_view = - raft::make_device_matrix_view(vecs_1, size_1, ps.dim); - auto inds_1_view = raft::make_device_vector_view(inds_1, size_1); + raft::make_device_matrix_view(vecs_1, size_1, ps.dim); + auto inds_1_view = raft::make_device_vector_view(inds_1, size_1); cuvs::neighbors::ivf_pq::extend(handle_, vecs_1_view, inds_1_view, &idx); return idx; } @@ -241,9 +241,9 @@ class ivf_pq_test : public ::testing::TestWithParam { auto build_serialize() { std::string filename = "ivf_pq_index"; - cuvs::neighbors::ivf_pq::serialize(handle_, filename, build_only()); + cuvs::neighbors::ivf_pq::serialize_file(handle_, filename, build_only()); cuvs::neighbors::ivf_pq::index index(handle_, ps.index_params, ps.dim); - cuvs::neighbors::ivf_pq::deserialize(handle_, filename, &index); + cuvs::neighbors::ivf_pq::deserialize_file(handle_, filename, &index); return index; } @@ -564,7 +564,7 @@ class ivf_pq_filter_test : public ::testing::TestWithParam { ipams.add_data_on_build = true; auto index_view = - raft::make_device_matrix_view(database.data(), ps.num_db_vecs, ps.dim); + raft::make_device_matrix_view(database.data(), ps.num_db_vecs, ps.dim); return cuvs::neighbors::ivf_pq::build(handle_, ipams, index_view); } From c533fe3624db3784e8838c38c737ff858fdffe5d Mon Sep 17 00:00:00 2001 From: rhdong Date: Wed, 29 May 2024 16:00:49 -0700 Subject: [PATCH 04/11] [FEA] support of prefiltered brute force (#146) - The PR is one part of prefiltered brute force and should work with the PR of raft: https://github.com/rapidsai/raft/pull/2294 Authors: - rhdong (https://github.com/rhdong) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/146 --- cpp/include/cuvs/core/bitmap.hpp | 27 + cpp/include/cuvs/neighbors/brute_force.hpp | 5 +- cpp/include/cuvs/neighbors/common.hpp | 1 + cpp/src/neighbors/brute_force.cu | 46 +- cpp/src/neighbors/brute_force_c.cpp | 2 +- cpp/src/neighbors/detail/knn_brute_force.cuh | 202 ++++++- cpp/src/neighbors/detail/knn_utils.cuh | 96 ++++ cpp/test/CMakeLists.txt | 10 +- cpp/test/neighbors/brute_force.cu | 2 +- cpp/test/neighbors/brute_force_prefiltered.cu | 524 ++++++++++++++++++ cpp/test/neighbors/knn_utils.cuh | 95 ++++ 11 files changed, 986 insertions(+), 24 deletions(-) create mode 100644 cpp/include/cuvs/core/bitmap.hpp create mode 100644 cpp/src/neighbors/detail/knn_utils.cuh create mode 100644 cpp/test/neighbors/brute_force_prefiltered.cu create mode 100644 cpp/test/neighbors/knn_utils.cuh diff --git a/cpp/include/cuvs/core/bitmap.hpp b/cpp/include/cuvs/core/bitmap.hpp new file mode 100644 index 0000000000..80ae25cd28 --- /dev/null +++ b/cpp/include/cuvs/core/bitmap.hpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cuvs::core { +/* To use bitmap functions containing CUDA code, include */ + +template +using bitmap_view = raft::core::bitmap_view; + +} // end namespace cuvs::core diff --git a/cpp/include/cuvs/neighbors/brute_force.hpp b/cpp/include/cuvs/neighbors/brute_force.hpp index 755c8cfdb3..13a5ea0cbf 100644 --- a/cpp/include/cuvs/neighbors/brute_force.hpp +++ b/cpp/include/cuvs/neighbors/brute_force.hpp @@ -191,12 +191,15 @@ auto build(raft::resources const& handle, * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset * [n_queries, k] * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k] + * @param[in] sample_filter a optional device bitmap filter function that greenlights samples for a + * given */ void search(raft::resources const& handle, const cuvs::neighbors::brute_force::index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, - raft::device_matrix_view distances); + raft::device_matrix_view distances, + std::optional> sample_filter); /** * @} */ diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp index 45fa1a107f..72d35961f0 100644 --- a/cpp/include/cuvs/neighbors/common.hpp +++ b/cpp/include/cuvs/neighbors/common.hpp @@ -25,6 +25,7 @@ #include // get_device_for_address #include // rounding up +#include #include #include diff --git a/cpp/src/neighbors/brute_force.cu b/cpp/src/neighbors/brute_force.cu index 45d4be4a77..13554c0b5d 100644 --- a/cpp/src/neighbors/brute_force.cu +++ b/cpp/src/neighbors/brute_force.cu @@ -15,6 +15,7 @@ */ #include "./detail/knn_brute_force.cuh" + #include #include @@ -84,25 +85,32 @@ void index::update_dataset(raft::resources const& res, dataset_view_ = raft::make_const_mdspan(dataset_.view()); } -#define CUVS_INST_BFKNN(T) \ - auto build(raft::resources const& res, \ - raft::device_matrix_view dataset, \ - cuvs::distance::DistanceType metric, \ - T metric_arg) \ - ->cuvs::neighbors::brute_force::index \ - { \ - return detail::build(res, dataset, metric, metric_arg); \ - } \ - \ - void search(raft::resources const& res, \ - const cuvs::neighbors::brute_force::index& idx, \ - raft::device_matrix_view queries, \ - raft::device_matrix_view neighbors, \ - raft::device_matrix_view distances) \ - { \ - detail::brute_force_search(res, idx, queries, neighbors, distances); \ - } \ - \ +#define CUVS_INST_BFKNN(T) \ + auto build(raft::resources const& res, \ + raft::device_matrix_view dataset, \ + cuvs::distance::DistanceType metric, \ + T metric_arg) \ + ->cuvs::neighbors::brute_force::index \ + { \ + return detail::build(res, dataset, metric, metric_arg); \ + } \ + \ + void search( \ + raft::resources const& res, \ + const cuvs::neighbors::brute_force::index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + std::optional> sample_filter = std::nullopt) \ + { \ + if (!sample_filter.has_value()) { \ + detail::brute_force_search(res, idx, queries, neighbors, distances); \ + } else { \ + detail::brute_force_search_filtered( \ + res, idx, queries, *sample_filter, neighbors, distances); \ + } \ + } \ + \ template struct cuvs::neighbors::brute_force::index; CUVS_INST_BFKNN(float); diff --git a/cpp/src/neighbors/brute_force_c.cpp b/cpp/src/neighbors/brute_force_c.cpp index e988ac2f0c..5f04ffa340 100644 --- a/cpp/src/neighbors/brute_force_c.cpp +++ b/cpp/src/neighbors/brute_force_c.cpp @@ -66,7 +66,7 @@ void _search(cuvsResources_t res, auto distances_mds = cuvs::core::from_dlpack(distances_tensor); cuvs::neighbors::brute_force::search( - *res_ptr, *index_ptr, queries_mds, neighbors_mds, distances_mds); + *res_ptr, *index_ptr, queries_mds, neighbors_mds, distances_mds, std::nullopt); } } // namespace diff --git a/cpp/src/neighbors/detail/knn_brute_force.cuh b/cpp/src/neighbors/detail/knn_brute_force.cuh index 29cd26d9f3..4865ade77d 100644 --- a/cpp/src/neighbors/detail/knn_brute_force.cuh +++ b/cpp/src/neighbors/detail/knn_brute_force.cuh @@ -15,6 +15,7 @@ */ #pragma once + #include #include @@ -23,16 +24,26 @@ #include "./fused_l2_knn.cuh" #include "./haversine_distance.cuh" #include "./knn_merge_parts.cuh" +#include "./knn_utils.cuh" +#include +#include +#include #include #include #include #include #include #include +#include #include #include #include +#include +#include +#include +#include +#include #include #include @@ -65,7 +76,8 @@ void tiled_brute_force_knn(const raft::resources& handle, size_t max_row_tile_size = 0, size_t max_col_tile_size = 0, const ElementType* precomputed_index_norms = nullptr, - const ElementType* precomputed_search_norms = nullptr) + const ElementType* precomputed_search_norms = nullptr, + const uint32_t* filter_bitmap = nullptr) { // Figure out the number of rows/cols to tile for size_t tile_rows = 0; @@ -214,6 +226,27 @@ void tiled_brute_force_knn(const raft::resources& handle, }); } + if (filter_bitmap != nullptr) { + auto distances_ptr = temp_distances.data(); + auto count = thrust::make_counting_iterator(0); + ElementType masked_distance = select_min ? std::numeric_limits::infinity() + : std::numeric_limits::lowest(); + thrust::for_each(raft::resource::get_thrust_policy(handle), + count, + count + current_query_size * current_centroid_size, + [=] __device__(IndexType idx) { + IndexType row = i + (idx / current_centroid_size); + IndexType col = j + (idx % current_centroid_size); + IndexType g_idx = row * n + col; + IndexType item_idx = (g_idx) >> 5; + uint32_t bit_idx = (g_idx)&31; + uint32_t filter = filter_bitmap[item_idx]; + if ((filter & (uint32_t(1) << bit_idx)) == 0) { + distances_ptr[idx] = masked_distance; + } + }); + } + raft::matrix::select_k( handle, raft::make_device_matrix_view( @@ -519,6 +552,173 @@ void brute_force_search( query_norms ? query_norms->data_handle() : nullptr); } +template +void brute_force_search_filtered( + raft::resources const& res, + const cuvs::neighbors::brute_force::index& idx, + raft::device_matrix_view queries, + cuvs::core::bitmap_view filter, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + std::optional> query_norms = std::nullopt) +{ + auto metric = idx.metric(); + + RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1), "Value of k must match for outputs"); + RAFT_EXPECTS(idx.dataset().extent(1) == queries.extent(1), + "Number of columns in queries must match brute force index"); + RAFT_EXPECTS(metric == cuvs::distance::DistanceType::InnerProduct || + metric == cuvs::distance::DistanceType::L2Expanded || + metric == cuvs::distance::DistanceType::L2SqrtExpanded || + metric == cuvs::distance::DistanceType::CosineExpanded, + "Only Euclidean, IP, and Cosine are supported!"); + + RAFT_EXPECTS(idx.has_norms() || !(metric == cuvs::distance::DistanceType::L2Expanded || + metric == cuvs::distance::DistanceType::L2SqrtExpanded || + metric == cuvs::distance::DistanceType::CosineExpanded), + "Index must has norms when using Euclidean, IP, and Cosine!"); + + IdxT n_queries = queries.extent(0); + IdxT n_dataset = idx.dataset().extent(0); + IdxT dim = idx.dataset().extent(1); + IdxT k = neighbors.extent(1); + + auto stream = raft::resource::get_cuda_stream(res); + + // calc nnz + IdxT nnz_h = 0; + rmm::device_scalar nnz(0, stream); + auto nnz_view = raft::make_device_scalar_view(nnz.data()); + auto filter_view = + raft::make_device_vector_view(filter.data(), filter.n_elements()); + + // TODO(rhdong): Need to switch to the public API, + // with the issue: https://github.com/rapidsai/cuvs/issues/158 + raft::detail::popc(res, filter_view, n_queries * n_dataset, nnz_view); + raft::copy(&nnz_h, nnz.data(), 1, stream); + + raft::resource::sync_stream(res, stream); + float sparsity = (1.0f * nnz_h / (1.0f * n_queries * n_dataset)); + + if (sparsity > 0.01f) { + raft::resources stream_pool_handle(res); + raft::resource::set_cuda_stream(stream_pool_handle, stream); + auto idx_norm = idx.has_norms() ? const_cast(idx.norms().data_handle()) : nullptr; + + tiled_brute_force_knn(stream_pool_handle, + queries.data_handle(), + idx.dataset().data_handle(), + n_queries, + n_dataset, + dim, + k, + distances.data_handle(), + neighbors.data_handle(), + metric, + 2.0, + 0, + 0, + idx_norm, + nullptr, + filter.data()); + } else { + auto csr = raft::make_device_csr_matrix(res, n_queries, n_dataset, nnz_h); + + // fill csr + raft::sparse::convert::bitmap_to_csr(res, filter, csr); + + // create filter csr view + auto compressed_csr_view = csr.structure_view(); + rmm::device_uvector rows(compressed_csr_view.get_nnz(), stream); + raft::sparse::convert::csr_to_coo(compressed_csr_view.get_indptr().data(), + compressed_csr_view.get_n_rows(), + rows.data(), + compressed_csr_view.get_nnz(), + stream); + if (n_queries > 10) { + auto csr_view = raft::make_device_csr_matrix_view( + csr.get_elements().data(), compressed_csr_view); + + // create dataset view + auto dataset_view = raft::make_device_matrix_view( + idx.dataset().data_handle(), dim, n_dataset); + + // calc dot + T alpha = static_cast(1.0f); + T beta = static_cast(0.0f); + raft::sparse::linalg::sddmm(res, + queries, + dataset_view, + csr_view, + raft::linalg::Operation::NON_TRANSPOSE, + raft::linalg::Operation::NON_TRANSPOSE, + raft::make_host_scalar_view(&alpha), + raft::make_host_scalar_view(&beta)); + } else { + raft::sparse::distance::detail::faster_dot_on_csr(res, + csr.get_elements().data(), + compressed_csr_view.get_nnz(), + compressed_csr_view.get_indptr().data(), + compressed_csr_view.get_indices().data(), + queries.data_handle(), + idx.dataset().data_handle(), + compressed_csr_view.get_n_rows(), + dim); + } + + // post process + std::optional> query_norms_; + if (metric == cuvs::distance::DistanceType::L2Expanded || + metric == cuvs::distance::DistanceType::L2SqrtExpanded || + metric == cuvs::distance::DistanceType::CosineExpanded) { + if (metric == cuvs::distance::DistanceType::CosineExpanded) { + if (!query_norms) { + query_norms_ = raft::make_device_vector(res, n_queries); + raft::linalg::rowNorm((T*)(query_norms_->data_handle()), + queries.data_handle(), + dim, + n_queries, + raft::linalg::L2Norm, + true, + stream, + raft::sqrt_op{}); + } + } else { + if (!query_norms) { + query_norms_ = raft::make_device_vector(res, n_queries); + raft::linalg::rowNorm((T*)(query_norms_->data_handle()), + queries.data_handle(), + dim, + n_queries, + raft::linalg::L2Norm, + true, + stream, + raft::identity_op{}); + } + } + cuvs::neighbors::detail::epilogue_on_csr( + res, + csr.get_elements().data(), + compressed_csr_view.get_nnz(), + rows.data(), + compressed_csr_view.get_indices().data(), + query_norms ? query_norms->data_handle() : query_norms_->data_handle(), + idx.norms().data_handle(), + metric); + } + + // select k + auto const_csr_view = raft::make_device_csr_matrix_view( + csr.get_elements().data(), compressed_csr_view); + std::optional> no_opt = std::nullopt; + bool select_min = cuvs::distance::is_min_close(metric); + raft::sparse::matrix::select_k( + res, const_csr_view, no_opt, distances, neighbors, select_min, true); + } + + return; +} + template cuvs::neighbors::brute_force::index build( raft::resources const& res, diff --git a/cpp/src/neighbors/detail/knn_utils.cuh b/cpp/src/neighbors/detail/knn_utils.cuh new file mode 100644 index 0000000000..1cc709fa40 --- /dev/null +++ b/cpp/src/neighbors/detail/knn_utils.cuh @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +#include +#include + +namespace cuvs::neighbors::detail { + +template +RAFT_KERNEL epilogue_on_csr_kernel(value_t* __restrict__ compressed_C, + const value_idx* __restrict__ rows, + const value_idx* __restrict__ cols, + const value_t* __restrict__ Q_sq_norms, + const value_t* __restrict__ R_sq_norms, + value_idx nnz, + expansion_f expansion_func) +{ + auto tid = blockDim.x * blockIdx.x + threadIdx.x; + + if (tid >= nnz) return; + const value_idx i = rows[tid]; + const value_idx j = cols[tid]; + + compressed_C[tid] = expansion_func(compressed_C[tid], Q_sq_norms[i], R_sq_norms[j]); +} + +template +void epilogue_on_csr(raft::resources const& handle, + value_t* compressed_C, + const value_idx nnz, + const value_idx* rows, + const value_idx* cols, + const value_t* Q_sq_norms, + const value_t* R_sq_norms, + cuvs::distance::DistanceType metric) +{ + if (nnz == 0) return; + auto stream = raft::resource::get_cuda_stream(handle); + + int blocks = raft::ceildiv((size_t)nnz, tpb); + if (metric == cuvs::distance::DistanceType::L2Expanded) { + epilogue_on_csr_kernel<<>>( + compressed_C, + rows, + cols, + Q_sq_norms, + R_sq_norms, + nnz, + [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) -> value_t { + return value_t(-2.0) * dot + q_norm + r_norm; + }); + } else if (metric == cuvs::distance::DistanceType::L2SqrtExpanded) { + epilogue_on_csr_kernel<<>>( + compressed_C, + rows, + cols, + Q_sq_norms, + R_sq_norms, + nnz, + [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) -> value_t { + return raft::sqrt(value_t(-2.0) * dot + q_norm + r_norm); + }); + } else if (metric == cuvs::distance::DistanceType::CosineExpanded) { + epilogue_on_csr_kernel<<>>( + compressed_C, + rows, + cols, + Q_sq_norms, + R_sq_norms, + nnz, + [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) -> value_t { + return value_t(1.0) - dot / (q_norm * r_norm); + }); + } + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} +} // namespace cuvs::neighbors::detail diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 3fbf62cdba..1fae2f70ba 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -91,7 +91,15 @@ endfunction() if(BUILD_TESTS) ConfigureTest( - NAME NEIGHBORS_TEST PATH test/neighbors/brute_force.cu test/neighbors/refine.cu GPUS 1 PERCENT + NAME + NEIGHBORS_TEST + PATH + test/neighbors/brute_force.cu + test/neighbors/brute_force_prefiltered.cu + test/neighbors/refine.cu + GPUS + 1 + PERCENT 100 ) diff --git a/cpp/test/neighbors/brute_force.cu b/cpp/test/neighbors/brute_force.cu index fdb1801861..081a2966ee 100644 --- a/cpp/test/neighbors/brute_force.cu +++ b/cpp/test/neighbors/brute_force.cu @@ -82,7 +82,7 @@ class KNNTest : public ::testing::TestWithParam { auto metric = cuvs::distance::DistanceType::L2Unexpanded; auto idx = cuvs::neighbors::brute_force::build(handle, index, metric); - cuvs::neighbors::brute_force::search(handle, idx, search, indices, distances); + cuvs::neighbors::brute_force::search(handle, idx, search, indices, distances, std::nullopt); build_actual_output<<>>( actual_labels_.data(), rows_, k_, search_labels_.data(), indices_.data()); diff --git a/cpp/test/neighbors/brute_force_prefiltered.cu b/cpp/test/neighbors/brute_force_prefiltered.cu new file mode 100644 index 0000000000..17166fd7a5 --- /dev/null +++ b/cpp/test/neighbors/brute_force_prefiltered.cu @@ -0,0 +1,524 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.cuh" +#include "knn_utils.cuh" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cuvs::neighbors::brute_force { + +template +struct PrefilteredBruteForceInputs { + index_t n_queries; + index_t n_dataset; + index_t dim; + index_t top_k; + float sparsity; + cuvs::distance::DistanceType metric; + bool select_min = true; +}; + +template +struct CompareApproxWithInf { + CompareApproxWithInf(T eps_) : eps(eps_) {} + bool operator()(const T& a, const T& b) const + { + if (std::isinf(a) && std::isinf(b)) return true; + T diff = std::abs(a - b); + T m = std::max(std::abs(a), std::abs(b)); + T ratio = diff > eps ? diff / m : diff; + + return (ratio <= eps); + } + + private: + T eps; +}; + +template +RAFT_KERNEL normalize_kernel( + OutT* theta, const InT* in_vals, size_t max_scale, size_t r_scale, size_t c_scale) +{ + size_t idx = threadIdx.x; + if (idx < max_scale) { + auto a = OutT(in_vals[4 * idx]); + auto b = OutT(in_vals[4 * idx + 1]); + auto c = OutT(in_vals[4 * idx + 2]); + auto d = OutT(in_vals[4 * idx + 3]); + auto sum = a + b + c + d; + a /= sum; + b /= sum; + c /= sum; + d /= sum; + theta[4 * idx] = a; + theta[4 * idx + 1] = b; + theta[4 * idx + 2] = c; + theta[4 * idx + 3] = d; + } +} + +template +void normalize(OutT* theta, + const InT* in_vals, + size_t max_scale, + size_t r_scale, + size_t c_scale, + bool handle_rect, + bool theta_array, + cudaStream_t stream) +{ + normalize_kernel<<<1, 256, 0, stream>>>(theta, in_vals, max_scale, r_scale, c_scale); + RAFT_CUDA_TRY(cudaGetLastError()); +} + +template +RAFT_KERNEL set_bitmap_kernel( + const index_t* src, const index_t* dst, bitmap_t* bitmap, index_t n_edges, index_t n_cols) +{ + size_t idx = threadIdx.x + blockDim.x * blockIdx.x; + if (idx < n_edges) { + index_t row = src[idx]; + index_t col = dst[idx]; + index_t g_idx = row * n_cols + col; + index_t item_idx = (g_idx) >> 5; + uint32_t bit_idx = (g_idx)&31; + atomicOr(bitmap + item_idx, (uint32_t(1) << bit_idx)); + } +} + +template +void set_bitmap(const index_t* src, + const index_t* dst, + bitmap_t* bitmap, + index_t n_edges, + index_t n_cols, + cudaStream_t stream) +{ + int block_size = 256; + int blocks = raft::ceildiv(n_edges, block_size); + set_bitmap_kernel + <<>>(src, dst, bitmap, n_edges, n_cols); + RAFT_CUDA_TRY(cudaGetLastError()); +} +template +class PrefilteredBruteForceTest + : public ::testing::TestWithParam> { + public: + PrefilteredBruteForceTest() + : stream(raft::resource::get_cuda_stream(handle)), + params(::testing::TestWithParam>::GetParam()), + filter_d(0, stream), + dataset_d(0, stream), + queries_d(0, stream), + out_val_d(0, stream), + out_val_expected_d(0, stream), + out_idx_d(0, stream), + out_idx_expected_d(0, stream) + { + } + + protected: + index_t create_sparse_matrix_with_rmat(index_t m, + index_t n, + value_t sparsity, + rmm::device_uvector& filter_d) + { + index_t r_scale = (index_t)std::log2(m); + index_t c_scale = (index_t)std::log2(n); + index_t n_edges = (index_t)(m * n * 1.0 * sparsity); + index_t max_scale = std::max(r_scale, c_scale); + + rmm::device_uvector out_src{(unsigned long)n_edges, stream}; + rmm::device_uvector out_dst{(unsigned long)n_edges, stream}; + rmm::device_uvector theta{(unsigned long)(4 * max_scale), stream}; + + raft::random::RngState state{2024ULL, raft::random::GeneratorType::GenPC}; + + raft::random::uniform(handle, state, theta.data(), theta.size(), 0.0f, 1.0f); + normalize( + theta.data(), theta.data(), max_scale, r_scale, c_scale, r_scale != c_scale, true, stream); + raft::random::rmat_rectangular_gen((index_t*)nullptr, + out_src.data(), + out_dst.data(), + theta.data(), + r_scale, + c_scale, + n_edges, + stream, + state); + + index_t nnz_h = 0; + { + auto src = out_src.data(); + auto dst = out_dst.data(); + auto bitmap = filter_d.data(); + rmm::device_scalar nnz(0, stream); + auto nnz_view = raft::make_device_scalar_view(nnz.data()); + auto filter_view = + raft::make_device_vector_view(filter_d.data(), filter_d.size()); + + set_bitmap(src, dst, bitmap, n_edges, n, stream); + + // TODO(rhdong): Need to switch to the public API, + // with the issue: https://github.com/rapidsai/cuvs/issues/158 + raft::detail::popc(handle, filter_view, m * n, nnz_view); + raft::copy(&nnz_h, nnz.data(), 1, stream); + + raft::resource::sync_stream(handle, stream); + } + + return nnz_h; + } + + void cpu_convert_to_csr(std::vector& bitmap, + index_t rows, + index_t cols, + std::vector& indices, + std::vector& indptr) + { + index_t offset_indptr = 0; + index_t offset_values = 0; + indptr[offset_indptr++] = 0; + + index_t index = 0; + bitmap_t element = 0; + index_t bit_position = 0; + + for (index_t i = 0; i < rows; ++i) { + for (index_t j = 0; j < cols; ++j) { + index = i * cols + j; + element = bitmap[index / (8 * sizeof(bitmap_t))]; + bit_position = index % (8 * sizeof(bitmap_t)); + + if (((element >> bit_position) & 1)) { + indices[offset_values] = static_cast(j); + offset_values++; + } + } + indptr[offset_indptr++] = static_cast(offset_values); + } + } + + void cpu_sddmm(const std::vector& A, + const std::vector& B, + std::vector& vals, + const std::vector& cols, + const std::vector& row_ptrs, + bool is_row_major_A, + bool is_row_major_B, + value_t alpha = 1.0, + value_t beta = 0.0) + { + if (params.n_queries * params.dim != static_cast(A.size()) || + params.dim * params.n_dataset != static_cast(B.size())) { + std::cerr << "Matrix dimensions and vector size do not match!" << std::endl; + return; + } + + bool trans_a = is_row_major_A; + bool trans_b = is_row_major_B; + + for (index_t i = 0; i < params.n_queries; ++i) { + for (index_t j = row_ptrs[i]; j < row_ptrs[i + 1]; ++j) { + value_t sum = 0; + value_t norms_A = 0; + value_t norms_B = 0; + for (index_t l = 0; l < params.dim; ++l) { + index_t a_index = trans_a ? i * params.dim + l : l * params.n_queries + i; + index_t b_index = trans_b ? l * params.n_dataset + cols[j] : cols[j] * params.dim + l; + sum += A[a_index] * B[b_index]; + + norms_A += A[a_index] * A[a_index]; + norms_B += B[b_index] * B[b_index]; + } + vals[j] = alpha * sum + beta * vals[j]; + if (params.metric == cuvs::distance::DistanceType::L2Expanded) { + vals[j] = value_t(-2.0) * vals[j] + norms_A + norms_B; + } else if (params.metric == cuvs::distance::DistanceType::L2SqrtExpanded) { + vals[j] = std::sqrt(value_t(-2.0) * vals[j] + norms_A + norms_B); + } else if (params.metric == cuvs::distance::DistanceType::CosineExpanded) { + vals[j] = value_t(1.0) - vals[j] / std::sqrt(norms_A * norms_B); + } + } + } + } + + void cpu_select_k(const std::vector& indptr_h, + const std::vector& indices_h, + const std::vector& values_h, + std::optional>& in_idx_h, + index_t n_queries, + index_t n_dataset, + index_t top_k, + std::vector& out_values_h, + std::vector& out_indices_h, + bool select_min = true) + { + auto comp = [select_min](const std::pair& a, + const std::pair& b) { + return select_min ? a.first < b.first : a.first >= b.first; + }; + + for (index_t row = 0; row < n_queries; ++row) { + std::priority_queue, + std::vector>, + decltype(comp)> + pq(comp); + + for (index_t idx = indptr_h[row]; idx < indptr_h[row + 1]; ++idx) { + pq.push({values_h[idx], (in_idx_h.has_value()) ? (*in_idx_h)[idx] : indices_h[idx]}); + if (pq.size() > size_t(top_k)) { pq.pop(); } + } + + std::vector> row_pairs; + while (!pq.empty()) { + row_pairs.push_back(pq.top()); + pq.pop(); + } + + if (select_min) { + std::sort(row_pairs.begin(), row_pairs.end(), [](const auto& a, const auto& b) { + return a.first <= b.first; + }); + } else { + std::sort(row_pairs.begin(), row_pairs.end(), [](const auto& a, const auto& b) { + return a.first >= b.first; + }); + } + for (index_t col = 0; col < top_k; col++) { + if (col < index_t(row_pairs.size())) { + out_values_h[row * top_k + col] = row_pairs[col].first; + out_indices_h[row * top_k + col] = row_pairs[col].second; + } + } + } + } + + void SetUp() override + { + index_t element = + raft::ceildiv(params.n_queries * params.n_dataset, index_t(sizeof(bitmap_t) * 8)); + std::vector filter_h(element); + filter_d.resize(element, stream); + + nnz = + create_sparse_matrix_with_rmat(params.n_queries, params.n_dataset, params.sparsity, filter_d); + + raft::update_host(filter_h.data(), filter_d.data(), filter_d.size(), stream); + raft::resource::sync_stream(handle, stream); + + index_t dataset_size = params.n_dataset * params.dim; + index_t queries_size = params.n_queries * params.dim; + + std::vector dataset_h(dataset_size); + std::vector queries_h(queries_size); + + dataset_d.resize(dataset_size, stream); + queries_d.resize(queries_size, stream); + + auto blobs_in_val = + raft::make_device_matrix(handle, 1, dataset_size + queries_size); + auto labels = raft::make_device_vector(handle, 1); + + raft::random::make_blobs(blobs_in_val.data_handle(), + labels.data_handle(), + 1, + dataset_size + queries_size, + 1, + stream, + false, + nullptr, + nullptr, + value_t(1.0), + false, + value_t(-1.0f), + value_t(1.0f), + uint64_t(2024)); + + raft::copy(dataset_h.data(), blobs_in_val.data_handle(), dataset_size, stream); + raft::copy(dataset_d.data(), blobs_in_val.data_handle(), dataset_size, stream); + + raft::copy(queries_h.data(), blobs_in_val.data_handle() + dataset_size, queries_size, stream); + raft::copy(queries_d.data(), blobs_in_val.data_handle() + dataset_size, queries_size, stream); + + raft::resource::sync_stream(handle); + + std::vector values_h(nnz); + std::vector indices_h(nnz); + std::vector indptr_h(params.n_queries + 1); + + cpu_convert_to_csr(filter_h, params.n_queries, params.n_dataset, indices_h, indptr_h); + + cpu_sddmm(queries_h, dataset_h, values_h, indices_h, indptr_h, true, false); + + bool select_min = cuvs::distance::is_min_close(params.metric); + + std::vector out_val_h(params.n_queries * params.top_k, + select_min ? std::numeric_limits::infinity() + : std::numeric_limits::lowest()); + std::vector out_idx_h(params.n_queries * params.top_k, static_cast(0)); + + out_val_d.resize(params.n_queries * params.top_k, stream); + out_idx_d.resize(params.n_queries * params.top_k, stream); + + raft::update_device(out_val_d.data(), out_val_h.data(), out_val_h.size(), stream); + raft::update_device(out_idx_d.data(), out_idx_h.data(), out_idx_h.size(), stream); + + raft::resource::sync_stream(handle); + + std::optional> optional_indices_h = std::nullopt; + + cpu_select_k(indptr_h, + indices_h, + values_h, + optional_indices_h, + params.n_queries, + params.n_dataset, + params.top_k, + out_val_h, + out_idx_h, + select_min); + + out_val_expected_d.resize(params.n_queries * params.top_k, stream); + out_idx_expected_d.resize(params.n_queries * params.top_k, stream); + + raft::update_device(out_val_expected_d.data(), out_val_h.data(), out_val_h.size(), stream); + raft::update_device(out_idx_expected_d.data(), out_idx_h.data(), out_idx_h.size(), stream); + + raft::resource::sync_stream(handle); + } + + void Run() + { + auto dataset_raw = raft::make_device_matrix_view( + (const value_t*)dataset_d.data(), params.n_dataset, params.dim); + + auto queries = raft::make_device_matrix_view( + (const value_t*)queries_d.data(), params.n_queries, params.dim); + + auto dataset = brute_force::build(handle, dataset_raw, params.metric); + + auto filter = cuvs::core::bitmap_view( + (const bitmap_t*)filter_d.data(), params.n_queries, params.n_dataset); + + auto out_val = raft::make_device_matrix_view( + out_val_d.data(), params.n_queries, params.top_k); + auto out_idx = raft::make_device_matrix_view( + out_idx_d.data(), params.n_queries, params.top_k); + + brute_force::search(handle, dataset, queries, out_idx, out_val, std::make_optional(filter)); + + ASSERT_TRUE(cuvs::neighbors::devArrMatchKnnPair(out_idx_expected_d.data(), + out_idx.data_handle(), + out_val_expected_d.data(), + out_val.data_handle(), + params.n_queries, + params.top_k, + 0.001f, + stream, + true)); + } + + protected: + raft::resources handle; + cudaStream_t stream; + + PrefilteredBruteForceInputs params; + + index_t nnz; + + rmm::device_uvector dataset_d; + rmm::device_uvector queries_d; + rmm::device_uvector filter_d; + + rmm::device_uvector out_val_d; + rmm::device_uvector out_val_expected_d; + + rmm::device_uvector out_idx_d; + rmm::device_uvector out_idx_expected_d; +}; + +using PrefilteredBruteForceTest_float_int64 = PrefilteredBruteForceTest; +TEST_P(PrefilteredBruteForceTest_float_int64, Result) { Run(); } + +template +const std::vector> selectk_inputs = { + {2, 131072, 255, 255, 0.4, cuvs::distance::DistanceType::L2Expanded}, + {8, 131072, 512, 16, 0.5, cuvs::distance::DistanceType::L2Expanded}, + {16, 131072, 2052, 16, 0.2, cuvs::distance::DistanceType::L2Expanded}, + {2, 8192, 255, 16, 0.4, cuvs::distance::DistanceType::InnerProduct}, + {16, 8192, 512, 16, 0.5, cuvs::distance::DistanceType::InnerProduct}, + {128, 8192, 2052, 16, 0.2, cuvs::distance::DistanceType::InnerProduct}, + {1024, 8192, 1, 0, 0.1, cuvs::distance::DistanceType::L2Expanded}, + {1024, 8192, 3, 0, 0.1, cuvs::distance::DistanceType::InnerProduct}, + {1024, 8192, 5, 0, 0.1, cuvs::distance::DistanceType::L2SqrtExpanded}, + {1024, 8192, 8, 0, 0.1, cuvs::distance::DistanceType::CosineExpanded}, + + {1024, 8192, 1, 1, 0.1, cuvs::distance::DistanceType::L2Expanded}, + {1024, 8192, 3, 1, 0.1, cuvs::distance::DistanceType::InnerProduct}, + {1024, 8192, 5, 1, 0.1, cuvs::distance::DistanceType::L2SqrtExpanded}, + {1024, 8192, 8, 1, 0.1, cuvs::distance::DistanceType::CosineExpanded}, + + {1024, 8192, 2050, 16, 0.4, cuvs::distance::DistanceType::L2Expanded}, + {1024, 8192, 2051, 16, 0.5, cuvs::distance::DistanceType::L2Expanded}, + {1024, 8192, 2052, 16, 0.2, cuvs::distance::DistanceType::L2Expanded}, + {1024, 8192, 2050, 16, 0.4, cuvs::distance::DistanceType::InnerProduct}, + {1024, 8192, 2051, 16, 0.5, cuvs::distance::DistanceType::InnerProduct}, + {1024, 8192, 2052, 16, 0.2, cuvs::distance::DistanceType::InnerProduct}, + {1024, 8192, 2050, 16, 0.4, cuvs::distance::DistanceType::L2SqrtExpanded}, + {1024, 8192, 2051, 16, 0.5, cuvs::distance::DistanceType::L2SqrtExpanded}, + {1024, 8192, 2052, 16, 0.2, cuvs::distance::DistanceType::L2SqrtExpanded}, + {1024, 8192, 2050, 16, 0.4, cuvs::distance::DistanceType::CosineExpanded}, + {1024, 8192, 2051, 16, 0.5, cuvs::distance::DistanceType::CosineExpanded}, + {1024, 8192, 2052, 16, 0.2, cuvs::distance::DistanceType::CosineExpanded}, + + {1024, 8192, 1, 16, 0.5, cuvs::distance::DistanceType::L2Expanded}, + {1024, 8192, 2, 16, 0.2, cuvs::distance::DistanceType::L2Expanded}, + {1024, 8192, 3, 16, 0.4, cuvs::distance::DistanceType::InnerProduct}, + {1024, 8192, 4, 16, 0.5, cuvs::distance::DistanceType::InnerProduct}, + {1024, 8192, 5, 16, 0.2, cuvs::distance::DistanceType::L2SqrtExpanded}, + {1024, 8192, 8, 16, 0.4, cuvs::distance::DistanceType::L2SqrtExpanded}, + {1024, 8192, 5, 16, 0.5, cuvs::distance::DistanceType::CosineExpanded}, + {1024, 8192, 8, 16, 0.2, cuvs::distance::DistanceType::CosineExpanded}}; + +INSTANTIATE_TEST_CASE_P(PrefilteredBruteForceTest, + PrefilteredBruteForceTest_float_int64, + ::testing::ValuesIn(selectk_inputs)); + +} // namespace cuvs::neighbors::brute_force diff --git a/cpp/test/neighbors/knn_utils.cuh b/cpp/test/neighbors/knn_utils.cuh new file mode 100644 index 0000000000..d95174ef65 --- /dev/null +++ b/cpp/test/neighbors/knn_utils.cuh @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../test_utils.cuh" + +#include + +#include + +#include + +namespace cuvs::neighbors { +template +struct idx_dist_pair { + IdxT idx; + DistT dist; + compareDist eq_compare; + bool operator==(const idx_dist_pair& a) const + { + if (idx == a.idx) return true; + if (eq_compare(dist, a.dist)) return true; + return false; + } + idx_dist_pair(IdxT x, DistT y, compareDist op) : idx(x), dist(y), eq_compare(op) {} +}; + +template +testing::AssertionResult devArrMatchKnnPair(const T* expected_idx, + const T* actual_idx, + const DistT* expected_dist, + const DistT* actual_dist, + size_t rows, + size_t cols, + const DistT eps, + cudaStream_t stream = 0, + bool sort_inputs = false) +{ + size_t size = rows * cols; + std::unique_ptr exp_idx_h(new T[size]); + std::unique_ptr act_idx_h(new T[size]); + std::unique_ptr exp_dist_h(new DistT[size]); + std::unique_ptr act_dist_h(new DistT[size]); + raft::update_host(exp_idx_h.get(), expected_idx, size, stream); + raft::update_host(act_idx_h.get(), actual_idx, size, stream); + raft::update_host(exp_dist_h.get(), expected_dist, size, stream); + raft::update_host(act_dist_h.get(), actual_dist, size, stream); + + RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); + for (size_t i(0); i < rows; ++i) { + std::vector> actual; + std::vector> expected; + for (size_t j(0); j < cols; ++j) { + auto idx = i * cols + j; // row major assumption! + auto exp_idx = exp_idx_h.get()[idx]; + auto act_idx = act_idx_h.get()[idx]; + auto exp_dist = exp_dist_h.get()[idx]; + auto act_dist = act_dist_h.get()[idx]; + actual.push_back(std::make_pair(act_dist, act_idx)); + expected.push_back(std::make_pair(exp_dist, exp_idx)); + } + if (sort_inputs) { + // inputs could be unsorted here, sort for comparison + std::sort(actual.begin(), actual.end()); + std::sort(expected.begin(), expected.end()); + } + for (size_t j(0); j < cols; ++j) { + auto act = actual[j]; + auto exp = expected[j]; + idx_dist_pair exp_kvp(exp.second, exp.first, cuvs::CompareApprox(eps)); + idx_dist_pair act_kvp(act.second, act.first, cuvs::CompareApprox(eps)); + if (!(exp_kvp == act_kvp)) { + return testing::AssertionFailure() + << "actual=" << act_kvp.idx << "," << act_kvp.dist << "!=" + << "expected" << exp_kvp.idx << "," << exp_kvp.dist << " @" << i << "," << j; + } + } + } + return testing::AssertionSuccess(); +} +} // namespace cuvs::neighbors From 5937d543af276123d57837650309c913b1ca196d Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Thu, 30 May 2024 08:01:41 +0900 Subject: [PATCH 05/11] chore: update Doxyfile (#162) efficively -> effectively Authors: - Ikko Eltociear Ashimine (https://github.com/eltociear) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/162 --- cpp/doxygen/Doxyfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile index 614cf43d99..2459d521d5 100644 --- a/cpp/doxygen/Doxyfile +++ b/cpp/doxygen/Doxyfile @@ -454,7 +454,7 @@ LOOKUP_CACHE_SIZE = 0 # than 0 to get more control over the balance between CPU load and processing # speed. At this moment only the input processing can be done using multiple # threads. Since this is still an experimental feature the default is set to 1, -# which efficively disables parallel processing. Please report any issues you +# which effectively disables parallel processing. Please report any issues you # encounter. Generating dot graphs in parallel is controlled by the # DOT_NUM_THREADS setting. # Minimum value: 0, maximum value: 32, default value: 1. From 758d147b50e8f25b15b892a015d3f090f30bd5b7 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Wed, 29 May 2024 18:11:21 -0700 Subject: [PATCH 06/11] Select k instantiations (#159) Since we're now using raft in header only mode, we don't have the compiled select_k instantiations in raft available to us anymore. Instead instantiate inside cuvs so we don't recompile in multiple spots. Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Tamas Bela Feher (https://github.com/tfeher) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/159 --- cpp/CMakeLists.txt | 3 + cpp/include/cuvs/selection/select_k.hpp | 201 ++++++++++++++++++ .../detail/cagra/search_multi_kernel.cuh | 5 +- cpp/src/neighbors/detail/knn_brute_force.cuh | 6 +- .../neighbors/ivf_flat/ivf_flat_search.cuh | 62 +++--- cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh | 42 ++-- cpp/src/selection/select_k.cuh | 80 +++++++ cpp/src/selection/select_k_float_int64_t.cu | 19 ++ cpp/src/selection/select_k_float_uint32_t.cu | 19 ++ cpp/src/selection/select_k_half_uint32_t.cu | 19 ++ docs/source/cpp_api.rst | 1 + docs/source/cpp_api/selection.rst | 19 ++ 12 files changed, 423 insertions(+), 53 deletions(-) create mode 100644 cpp/include/cuvs/selection/select_k.hpp create mode 100644 cpp/src/selection/select_k.cuh create mode 100644 cpp/src/selection/select_k_float_int64_t.cu create mode 100644 cpp/src/selection/select_k_float_uint32_t.cu create mode 100644 cpp/src/selection/select_k_half_uint32_t.cu create mode 100644 docs/source/cpp_api/selection.rst diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d0c4f85489..d4ab1e62bb 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -437,6 +437,9 @@ add_library( src/neighbors/refine/detail/refine_host_int8_t_float.cpp src/neighbors/refine/detail/refine_host_uint8_t_float.cpp src/neighbors/sample_filter.cu + src/selection/select_k_float_int64_t.cu + src/selection/select_k_float_uint32_t.cu + src/selection/select_k_half_uint32_t.cu ) target_compile_options( diff --git a/cpp/include/cuvs/selection/select_k.hpp b/cpp/include/cuvs/selection/select_k.hpp new file mode 100644 index 0000000000..dc34caf419 --- /dev/null +++ b/cpp/include/cuvs/selection/select_k.hpp @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include + +#include + +namespace cuvs::selection { +using SelectAlgo = raft::matrix::SelectAlgo; + +/** + * @defgroup select_k Batched-select k smallest or largest key/values + * @{ + */ + +/** + * Select k smallest or largest key/values from each row in the input data. + * + * If you think of the input data `in_val` as a row-major matrix with `len` columns and + * `batch_size` rows, then this function selects `k` smallest/largest values in each row and fills + * in the row-major matrix `out_val` of size (batch_size, k). + * + * Example usage + * @code{.cpp} + * using namespace raft; + * // get a 2D row-major array of values to search through + * auto in_values = {... input device_matrix_view ...} + * // prepare output arrays + * auto out_extents = make_extents(in_values.extent(0), k); + * auto out_values = make_device_mdarray(handle, out_extents); + * auto out_indices = make_device_mdarray(handle, out_extents); + * // search `k` smallest values in each row + * cuvs::selection::select_k( + * handle, in_values, std::nullopt, out_values.view(), out_indices.view(), true); + * @endcode + * + * @param[in] handle container of reusable resources + * @param[in] in_val + * inputs values [batch_size, len]; + * these are compared and selected. + * @param[in] in_idx + * optional input payload [batch_size, len]; + * typically, these are indices of the corresponding `in_val`. + * If `in_idx` is `std::nullopt`, a contiguous array `0...len-1` is implied. + * @param[out] out_val + * output values [batch_size, k]; + * the k smallest/largest values from each row of the `in_val`. + * @param[out] out_idx + * output payload (e.g. indices) [batch_size, k]; + * the payload selected together with `out_val`. + * @param[in] select_min + * whether to select k smallest (true) or largest (false) keys. + * @param[in] sorted + * whether to make sure selected pairs are sorted by value + * @param[in] algo + * the selection algorithm to use + * @param[in] len_i + * optional array of size (batch_size) providing lengths for each individual row + */ +void select_k( + raft::resources const& handle, + raft::device_matrix_view in_val, + std::optional> in_idx, + raft::device_matrix_view out_val, + raft::device_matrix_view out_idx, + bool select_min, + bool sorted = false, + SelectAlgo algo = SelectAlgo::kAuto, + std::optional> len_i = std::nullopt); + +/** + * Select k smallest or largest key/values from each row in the input data. + * + * If you think of the input data `in_val` as a row-major matrix with `len` columns and + * `batch_size` rows, then this function selects `k` smallest/largest values in each row and fills + * in the row-major matrix `out_val` of size (batch_size, k). + * + * Example usage + * @code{.cpp} + * using namespace raft; + * // get a 2D row-major array of values to search through + * auto in_values = {... input device_matrix_view ...} + * // prepare output arrays + * auto out_extents = make_extents(in_values.extent(0), k); + * auto out_values = make_device_mdarray(handle, out_extents); + * auto out_indices = make_device_mdarray(handle, out_extents); + * // search `k` smallest values in each row + * cuvs::selection::select_k( + * handle, in_values, std::nullopt, out_values.view(), out_indices.view(), true); + * @endcode + * + * @param[in] handle container of reusable resources + * @param[in] in_val + * inputs values [batch_size, len]; + * these are compared and selected. + * @param[in] in_idx + * optional input payload [batch_size, len]; + * typically, these are indices of the corresponding `in_val`. + * If `in_idx` is `std::nullopt`, a contiguous array `0...len-1` is implied. + * @param[out] out_val + * output values [batch_size, k]; + * the k smallest/largest values from each row of the `in_val`. + * @param[out] out_idx + * output payload (e.g. indices) [batch_size, k]; + * the payload selected together with `out_val`. + * @param[in] select_min + * whether to select k smallest (true) or largest (false) keys. + * @param[in] sorted + * whether to make sure selected pairs are sorted by value + * @param[in] algo + * the selection algorithm to use + * @param[in] len_i + * optional array of size (batch_size) providing lengths for each individual row + */ +void select_k( + raft::resources const& handle, + raft::device_matrix_view in_val, + std::optional> in_idx, + raft::device_matrix_view out_val, + raft::device_matrix_view out_idx, + bool select_min, + bool sorted = false, + SelectAlgo algo = SelectAlgo::kAuto, + std::optional> len_i = std::nullopt); + +/** + * Select k smallest or largest key/values from each row in the input data. + * + * If you think of the input data `in_val` as a row-major matrix with `len` columns and + * `batch_size` rows, then this function selects `k` smallest/largest values in each row and fills + * in the row-major matrix `out_val` of size (batch_size, k). + * + * Example usage + * @code{.cpp} + * using namespace raft; + * // get a 2D row-major array of values to search through + * auto in_values = {... input device_matrix_view ...} + * // prepare output arrays + * auto out_extents = make_extents(in_values.extent(0), k); + * auto out_values = make_device_mdarray(handle, out_extents); + * auto out_indices = make_device_mdarray(handle, out_extents); + * // search `k` smallest values in each row + * cuvs::selection::select_k( + * handle, in_values, std::nullopt, out_values.view(), out_indices.view(), true); + * @endcode + * + * @param[in] handle container of reusable resources + * @param[in] in_val + * inputs values [batch_size, len]; + * these are compared and selected. + * @param[in] in_idx + * optional input payload [batch_size, len]; + * typically, these are indices of the corresponding `in_val`. + * If `in_idx` is `std::nullopt`, a contiguous array `0...len-1` is implied. + * @param[out] out_val + * output values [batch_size, k]; + * the k smallest/largest values from each row of the `in_val`. + * @param[out] out_idx + * output payload (e.g. indices) [batch_size, k]; + * the payload selected together with `out_val`. + * @param[in] select_min + * whether to select k smallest (true) or largest (false) keys. + * @param[in] sorted + * whether to make sure selected pairs are sorted by value + * @param[in] algo + * the selection algorithm to use + * @param[in] len_i + * optional array of size (batch_size) providing lengths for each individual row + */ +void select_k( + raft::resources const& handle, + raft::device_matrix_view in_val, + std::optional> in_idx, + raft::device_matrix_view out_val, + raft::device_matrix_view out_idx, + bool select_min, + bool sorted = false, + SelectAlgo algo = SelectAlgo::kAuto, + std::optional> len_i = std::nullopt); +/** @} */ // end of group select_k + +} // namespace cuvs::selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh index 4dd53600ce..bc1266fb4b 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_kernel.cuh @@ -30,9 +30,8 @@ #include -#include - #include +#include // TODO: This shouldn't be invoking anything from spatial/knn #include "../ann_utils.cuh" @@ -817,7 +816,7 @@ struct search : search_plan_impl { output_values_storage.resize(sizeBatch * topK, stream); } - raft::matrix::select_k( + cuvs::selection::select_k( handle, raft::make_device_matrix_view(inputKeys, sizeBatch, numElements), raft::make_device_matrix_view(inputVals, sizeBatch, numElements), diff --git a/cpp/src/neighbors/detail/knn_brute_force.cuh b/cpp/src/neighbors/detail/knn_brute_force.cuh index 4865ade77d..97f7fba75d 100644 --- a/cpp/src/neighbors/detail/knn_brute_force.cuh +++ b/cpp/src/neighbors/detail/knn_brute_force.cuh @@ -18,6 +18,7 @@ #include #include +#include #include "../../distance/detail/distance_ops/l2_exp.cuh" #include "./faiss_distance_utils.h" @@ -38,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -247,7 +247,7 @@ void tiled_brute_force_knn(const raft::resources& handle, }); } - raft::matrix::select_k( + cuvs::selection::select_k( handle, raft::make_device_matrix_view( temp_distances.data(), current_query_size, current_centroid_size), @@ -289,7 +289,7 @@ void tiled_brute_force_knn(const raft::resources& handle, if (tile_cols != n) { // select the actual top-k items here from the temporary output - raft::matrix::select_k( + cuvs::selection::select_k( handle, raft::make_device_matrix_view( temp_out_distances.data(), current_query_size, temp_out_cols), diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh index f55e99c185..d5efdeb2dc 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh @@ -23,15 +23,15 @@ #include // none_ivf_sample_filter #include // raft::neighbors::ivf_flat::index -#include "../detail/ann_utils.cuh" // utils::mapping -#include // is_min_close, DistanceType -#include // RAFT_LOG_TRACE +#include "../detail/ann_utils.cuh" // utils::mapping +#include // is_min_close, DistanceType +#include // cuvs::selection::select_k +#include // RAFT_LOG_TRACE #include -#include // raft::resources -#include // raft::linalg::gemm -#include // raft::linalg::norm -#include // raft::linalg::unary_op -#include // matrix::detail::select_k +#include // raft::resources +#include // raft::linalg::gemm +#include // raft::linalg::norm +#include // raft::linalg::unary_op #include @@ -145,15 +145,17 @@ void search_impl(raft::resources const& handle, stream); RAFT_LOG_TRACE_VEC(distance_buffer_dev.data(), std::min(20, index.n_lists())); - raft::matrix::detail::select_k(handle, - distance_buffer_dev.data(), - nullptr, - n_queries, - index.n_lists(), - n_probes, - coarse_distances_dev.data(), - coarse_indices_dev.data(), - select_min); + + cuvs::selection::select_k( + handle, + raft::make_device_matrix_view( + distance_buffer_dev.data(), n_queries, index.n_lists()), + std::nullopt, + raft::make_device_matrix_view(coarse_distances_dev.data(), n_queries, n_probes), + raft::make_device_matrix_view( + coarse_indices_dev.data(), n_queries, n_probes), + select_min); + RAFT_LOG_TRACE_VEC(coarse_indices_dev.data(), n_probes); RAFT_LOG_TRACE_VEC(coarse_distances_dev.data(), n_probes); @@ -238,19 +240,25 @@ void search_impl(raft::resources const& handle, // Merge topk values from different blocks if (!manage_local_topk || grid_dim_x > 1) { - raft::matrix::detail::select_k( + std::optional> num_samples_vector; + if (!manage_local_topk) { + num_samples_vector = + raft::make_device_vector_view(num_samples.data(), n_queries); + } + + auto cols = manage_local_topk ? (k * grid_dim_x) : max_samples; + + cuvs::selection::select_k( handle, - distances_tmp_dev.data(), - indices_tmp_dev.data(), - n_queries, - manage_local_topk ? (k * grid_dim_x) : max_samples, - k, - distances, - neighbors_uint32, + raft::make_device_matrix_view(distances_tmp_dev.data(), n_queries, cols), + raft::make_device_matrix_view( + indices_tmp_dev.data(), n_queries, cols), + raft::make_device_matrix_view(distances, n_queries, k), + raft::make_device_matrix_view(neighbors_uint32, n_queries, k), select_min, false, - raft::matrix::SelectAlgo::kAuto, - manage_local_topk ? nullptr : num_samples.data()); + cuvs::selection::SelectAlgo::kAuto, + num_samples_vector); } if (!manage_local_topk) { // post process distances && neighbor IDs diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh index 16cf7e7437..e131b8fb30 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -37,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -158,15 +158,13 @@ void select_clusters(raft::resources const& handle, // Select neighbor clusters for each query. rmm::device_uvector cluster_dists(n_queries * n_probes, stream, mr); - raft::matrix::detail::select_k(handle, - qc_distances.data(), - nullptr, - n_queries, - n_lists, - n_probes, - cluster_dists.data(), - clusters_to_probe, - true); + cuvs::selection::select_k( + handle, + raft::make_device_matrix_view(qc_distances.data(), n_queries, n_lists), + std::nullopt, + raft::make_device_matrix_view(cluster_dists.data(), n_queries, n_probes), + raft::make_device_matrix_view(clusters_to_probe, n_queries, n_probes), + true); } /** @@ -440,19 +438,23 @@ void ivfpq_search_worker(raft::resources const& handle, // Select topk vectors for each query rmm::device_uvector topk_dists(n_queries * topK, stream, mr); - raft::matrix::detail::select_k( + + std::optional> num_samples_vector; + if (!manage_local_topk) { + num_samples_vector = + raft::make_device_vector_view(num_samples.data(), n_queries); + } + + cuvs::selection::select_k( handle, - distances_buf.data(), - neighbors_ptr, - n_queries, - topk_len, - topK, - topk_dists.data(), - neighbors_uint32, + raft::make_device_matrix_view(distances_buf.data(), n_queries, topk_len), + raft::make_device_matrix_view(neighbors_ptr, n_queries, topk_len), + raft::make_device_matrix_view(topk_dists.data(), n_queries, topK), + raft::make_device_matrix_view(neighbors_uint32, n_queries, topK), true, false, - raft::matrix::SelectAlgo::kAuto, - manage_local_topk ? nullptr : num_samples.data()); + cuvs::selection::SelectAlgo::kAuto, + num_samples_vector); // Postprocessing ivf::detail::postprocess_distances( diff --git a/cpp/src/selection/select_k.cuh b/cpp/src/selection/select_k.cuh new file mode 100644 index 0000000000..78c30f98a5 --- /dev/null +++ b/cpp/src/selection/select_k.cuh @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cuvs::selection::detail { + +template +void select_k(raft::resources const& handle, + raft::device_matrix_view in_val, + std::optional> in_idx, + raft::device_matrix_view out_val, + raft::device_matrix_view out_idx, + bool select_min, + bool sorted, + SelectAlgo algo, + std::optional> len_i) +{ + RAFT_EXPECTS(out_val.extent(1) <= int64_t(std::numeric_limits::max()), + "output k must fit the int type."); + auto batch_size = in_val.extent(0); + auto len = in_val.extent(1); + auto k = int(out_val.extent(1)); + RAFT_EXPECTS(batch_size == out_val.extent(0), "batch sizes must be equal"); + RAFT_EXPECTS(batch_size == out_idx.extent(0), "batch sizes must be equal"); + if (in_idx.has_value()) { + RAFT_EXPECTS(batch_size == in_idx->extent(0), "batch sizes must be equal"); + RAFT_EXPECTS(len == in_idx->extent(1), "value and index input lengths must be equal"); + } + RAFT_EXPECTS(int64_t(k) == out_idx.extent(1), "value and index output lengths must be equal"); + + // just delegate implementation to raft - the primary benefit here is to have + // instantiations only compiled once in cuvs + return raft::matrix::detail::select_k( + handle, + in_val.data_handle(), + in_idx.has_value() ? in_idx->data_handle() : nullptr, + batch_size, + len, + k, + out_val.data_handle(), + out_idx.data_handle(), + select_min, + sorted, + algo, + len_i.has_value() ? len_i->data_handle() : nullptr); +} +} // namespace cuvs::selection::detail + +#define instantiate_cuvs_selection_select_k(T, IdxT) \ + void cuvs::selection::select_k( \ + raft::resources const& handle, \ + raft::device_matrix_view in_val, \ + std::optional> in_idx, \ + raft::device_matrix_view out_val, \ + raft::device_matrix_view out_idx, \ + bool select_min, \ + bool sorted, \ + SelectAlgo algo, \ + std::optional> len_i) \ + { \ + detail::select_k( \ + handle, in_val, in_idx, out_val, out_idx, select_min, sorted, algo, len_i); \ + } diff --git a/cpp/src/selection/select_k_float_int64_t.cu b/cpp/src/selection/select_k_float_int64_t.cu new file mode 100644 index 0000000000..707d6add2d --- /dev/null +++ b/cpp/src/selection/select_k_float_int64_t.cu @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "./select_k.cuh" + +instantiate_cuvs_selection_select_k(float, int64_t); diff --git a/cpp/src/selection/select_k_float_uint32_t.cu b/cpp/src/selection/select_k_float_uint32_t.cu new file mode 100644 index 0000000000..05202f4812 --- /dev/null +++ b/cpp/src/selection/select_k_float_uint32_t.cu @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "./select_k.cuh" + +instantiate_cuvs_selection_select_k(float, uint32_t); diff --git a/cpp/src/selection/select_k_half_uint32_t.cu b/cpp/src/selection/select_k_half_uint32_t.cu new file mode 100644 index 0000000000..b05cb2299a --- /dev/null +++ b/cpp/src/selection/select_k_half_uint32_t.cu @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "./select_k.cuh" + +instantiate_cuvs_selection_select_k(half, uint32_t); diff --git a/docs/source/cpp_api.rst b/docs/source/cpp_api.rst index e04fff0b87..270d5c06ed 100644 --- a/docs/source/cpp_api.rst +++ b/docs/source/cpp_api.rst @@ -9,3 +9,4 @@ C++ API Documentation cpp_api/distance.rst cpp_api/neighbors.rst + cpp_api/selection.rst diff --git a/docs/source/cpp_api/selection.rst b/docs/source/cpp_api/selection.rst new file mode 100644 index 0000000000..5abe81662f --- /dev/null +++ b/docs/source/cpp_api/selection.rst @@ -0,0 +1,19 @@ +Selection +========= + +This page provides C++ class references for the publicly-exposed elements of the `cuvs/selection` +package. + +.. role:: py(code) + :language: c++ + :class: highlight + +Select-K +-------- + +``#include `` + +namespace *cuvs::selection* + +.. doxygengroup:: select_k + :project: cuvs From e0d169e3415dc34b893b4b8cad05b4392911a0fc Mon Sep 17 00:00:00 2001 From: Shunya Ueta Date: Thu, 30 May 2024 21:30:40 +0900 Subject: [PATCH 07/11] update: replace to cuvs from RAFT in PULL_REQUEST_TEMPLATE (#92) ## What SSIA Authors: - Shunya Ueta (https://github.com/hurutoriya) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cuvs/pull/92 --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 9c42cda720..23d3a11feb 100755 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,6 +1,6 @@