From 9b4adb1e3f5e3d76db9cb3b1dc971c63617b7da1 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 23 Sep 2021 18:46:08 -0400 Subject: [PATCH] Random Ball Cover Algorithm for 2D Haversine/Euclidean (#213) This PR is a proof of concept to use the triangle inequality to prune the tree of exhaustive distance computations into something smaller, such as on the order of where c is called an expansion constant, based on the dimensionality. This should (hopefully) be able to benefit both sparse and dense k-nearest neighbors and all algorithms that use them, hopefully providing a significant speedup for our sparse semirings primitive when only the k-nearest neighbors are desired. The goal here is to construct a tree out of the random ball cover algorithm such that we can utilize it in algorithms which would otherwise be able to make efficient use of a ball tree. However, there are additional challenges to this algorithm on the GPU, such as being able to batch the tree lookups. Authors: - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - William Hicks (https://github.com/wphicks) - Dante Gama Dessavre (https://github.com/dantegd) URL: https://github.com/rapidsai/raft/pull/213 --- cpp/include/raft/cache/cache_util.cuh | 6 +- cpp/include/raft/sparse/linalg/degree.cuh | 10 +- cpp/include/raft/sparse/selection/knn.cuh | 7 +- .../raft/sparse/selection/knn_graph.cuh | 3 +- cpp/include/raft/spatial/knn/ball_cover.hpp | 162 ++++ .../raft/spatial/knn/ball_cover_common.h | 99 +++ .../knn/detail/ann_quantized_faiss.cuh | 2 + .../raft/spatial/knn/detail/ball_cover.cuh | 351 +++++++++ .../spatial/knn/detail/ball_cover/common.cuh | 91 +++ .../knn/detail/ball_cover/registers.cuh | 537 +++++++++++++ .../spatial/knn/detail/block_select_faiss.cuh | 224 ++++++ .../knn/detail/knn_brute_force_faiss.cuh | 37 +- .../knn/detail/selection_faiss.cuh} | 19 +- .../spatial/knn/detail/warp_select_faiss.cuh | 739 ++++++++++++++++++ cpp/include/raft/spatial/knn/knn.hpp | 56 +- cpp/test/CMakeLists.txt | 3 +- cpp/test/spatial/ball_cover.cu | 271 +++++++ cpp/test/spatial/knn.cu | 9 - cpp/test/{sparse => spatial}/selection.cu | 11 +- cpp/test/spatial/spatial_data.h | 27 + cpp/test/test_utils.h | 42 + 21 files changed, 2645 insertions(+), 61 deletions(-) create mode 100644 cpp/include/raft/spatial/knn/ball_cover.hpp create mode 100644 cpp/include/raft/spatial/knn/ball_cover_common.h create mode 100644 cpp/include/raft/spatial/knn/detail/ball_cover.cuh create mode 100644 cpp/include/raft/spatial/knn/detail/ball_cover/common.cuh create mode 100644 cpp/include/raft/spatial/knn/detail/ball_cover/registers.cuh create mode 100644 cpp/include/raft/spatial/knn/detail/block_select_faiss.cuh rename cpp/include/raft/{sparse/selection/selection.cuh => spatial/knn/detail/selection_faiss.cuh} (94%) create mode 100644 cpp/include/raft/spatial/knn/detail/warp_select_faiss.cuh create mode 100644 cpp/test/spatial/ball_cover.cu rename cpp/test/{sparse => spatial}/selection.cu (93%) create mode 100644 cpp/test/spatial/spatial_data.h diff --git a/cpp/include/raft/cache/cache_util.cuh b/cpp/include/raft/cache/cache_util.cuh index ce8ef9a095..a65227c402 100644 --- a/cpp/include/raft/cache/cache_util.cuh +++ b/cpp/include/raft/cache/cache_util.cuh @@ -41,9 +41,9 @@ namespace cache { * @param [in] n the number of elements that need to be collected * @param [out] out vectors collected from the cache, size [n_vec * n] */ -template -__global__ void get_vecs(const math_t *cache, int n_vec, const int *cache_idx, - int n, math_t *out) { +template +__global__ void get_vecs(const math_t *cache, int_t n_vec, + const idx_t *cache_idx, int_t n, math_t *out) { int tid = threadIdx.x + blockIdx.x * blockDim.x; int row = tid % n_vec; // row idx if (tid < n_vec * n) { diff --git a/cpp/include/raft/sparse/linalg/degree.cuh b/cpp/include/raft/sparse/linalg/degree.cuh index 9bd322c90a..ef6a067c39 100644 --- a/cpp/include/raft/sparse/linalg/degree.cuh +++ b/cpp/include/raft/sparse/linalg/degree.cuh @@ -43,11 +43,11 @@ namespace linalg { * @param nnz the size of the rows array * @param results array to place results */ -template -__global__ void coo_degree_kernel(const int *rows, int nnz, int *results) { +template +__global__ void coo_degree_kernel(const T *rows, int nnz, T *results) { int row = (blockIdx.x * TPB_X) + threadIdx.x; if (row < nnz) { - raft::myAtomicAdd(results + rows[row], 1); + atomicAdd(results + rows[row], (T)1); } } @@ -59,8 +59,8 @@ __global__ void coo_degree_kernel(const int *rows, int nnz, int *results) { * @param results: output result array * @param stream: cuda stream to use */ -template -void coo_degree(const int *rows, int nnz, int *results, cudaStream_t stream) { +template +void coo_degree(const T *rows, int nnz, T *results, cudaStream_t stream) { dim3 grid_rc(raft::ceildiv(nnz, TPB_X), 1, 1); dim3 blk_rc(TPB_X, 1, 1); diff --git a/cpp/include/raft/sparse/selection/knn.cuh b/cpp/include/raft/sparse/selection/knn.cuh index 3566939bc4..49573a679d 100644 --- a/cpp/include/raft/sparse/selection/knn.cuh +++ b/cpp/include/raft/sparse/selection/knn.cuh @@ -31,8 +31,6 @@ #include #include #include -#include - #include #include @@ -339,8 +337,9 @@ class sparse_knn_t { if (metric == raft::distance::DistanceType::InnerProduct) ascending = false; // kernel to slice first (min) k cols and copy into batched merge buffer - select_k(batch_dists, batch_indices, batch_rows, batch_cols, out_dists, - out_indices, ascending, n_neighbors, handle.get_stream()); + raft::spatial::knn::select_k(batch_dists, batch_indices, batch_rows, + batch_cols, out_dists, out_indices, ascending, + n_neighbors, handle.get_stream()); } void compute_distances(csr_batcher_t &idx_batcher, diff --git a/cpp/include/raft/sparse/selection/knn_graph.cuh b/cpp/include/raft/sparse/selection/knn_graph.cuh index 1cdd66f516..3df1c77081 100644 --- a/cpp/include/raft/sparse/selection/knn_graph.cuh +++ b/cpp/include/raft/sparse/selection/knn_graph.cuh @@ -88,11 +88,12 @@ void conv_indices(in_t *inds, out_t *out, size_t size, cudaStream_t stream) { * @param[in] n number of observations (columns) in X * @param[in] metric distance metric to use when constructing neighborhoods * @param[out] out output edge list + * @param[out] out output edge list * @param c */ template void knn_graph(const handle_t &handle, const value_t *X, size_t m, size_t n, - distance::DistanceType metric, + raft::distance::DistanceType metric, raft::sparse::COO &out, int c = 15) { int k = build_k(m, c); diff --git a/cpp/include/raft/spatial/knn/ball_cover.hpp b/cpp/include/raft/spatial/knn/ball_cover.hpp new file mode 100644 index 0000000000..e4b50c77e3 --- /dev/null +++ b/cpp/include/raft/spatial/knn/ball_cover.hpp @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include "ball_cover_common.h" +#include "detail/ball_cover.cuh" +#include "detail/ball_cover/common.cuh" + +namespace raft { +namespace spatial { +namespace knn { + +template +void rbc_build_index(const raft::handle_t &handle, + BallCoverIndex &index) { + ASSERT(index.n == 2, + "Random ball cover currently only works in 2-dimensions"); + if (index.metric == raft::distance::DistanceType::Haversine) { + detail::rbc_build_index(handle, index, detail::HaversineFunc()); + } else if (index.metric == raft::distance::DistanceType::L2SqrtExpanded || + index.metric == raft::distance::DistanceType::L2SqrtUnexpanded) { + detail::rbc_build_index(handle, index, detail::EuclideanFunc()); + } else { + RAFT_FAIL("Metric not support"); + } + + index.set_index_trained(); +} + +/** + * Performs a faster exact knn in metric spaces using the triangle + * inequality with a number of landmark points to reduce the + * number of distance computations from O(n^2) to O(sqrt(n)). This + * performs an all neighbors knn, which can reuse memory when + * the index and query are the same array. This function will + * build the index and assumes rbc_build_index() has not already + * been called. + * @tparam value_idx knn index type + * @tparam value_t knn distance type + * @tparam value_int type for integers, such as number of rows/cols + * @param handle raft handle for resource management + * @param index ball cover index which has not yet been built + * @param k number of nearest neighbors to find + * @param perform_post_filtering if this is false, only the closest k landmarks + * are considered (which will return approximate + * results). + * @param[out] inds output knn indices + * @param[out] dists output knn distances + * @param weight a weight for overlap between the closest landmark and + * the radius of other landmarks when pruning distances. + * Setting this value below 1 can effectively turn off + * computing distances against many other balls, enabling + * approximate nearest neighbors. Recall can be adjusted + * based on how many relevant balls are ignored. Note that + * many datasets can still have great recall even by only + * looking in the closest landmark. + */ +template +void rbc_all_knn_query(const raft::handle_t &handle, + BallCoverIndex &index, + value_int k, value_idx *inds, value_t *dists, + bool perform_post_filtering = true, float weight = 1.0) { + ASSERT(index.n == 2, + "Random ball cover currently only works in 2-dimensions"); + if (index.metric == raft::distance::DistanceType::Haversine) { + detail::rbc_all_knn_query(handle, index, k, inds, dists, + detail::HaversineFunc(), perform_post_filtering, + weight); + } else if (index.metric == raft::distance::DistanceType::L2SqrtExpanded || + index.metric == raft::distance::DistanceType::L2SqrtUnexpanded) { + detail::rbc_all_knn_query(handle, index, k, inds, dists, + detail::EuclideanFunc(), perform_post_filtering, + weight); + } else { + RAFT_FAIL("Metric not supported"); + } + + index.set_index_trained(); +} + +/** + * Performs a faster exact knn in metric spaces using the triangle + * inequality with a number of landmark points to reduce the + * number of distance computations from O(n^2) to O(sqrt(n)). This + * function does not build the index and assumes rbc_build_index() has + * already been called. Use this function when the index and + * query arrays are different, otherwise use rbc_all_knn_query(). + * @tparam value_idx index type + * @tparam value_t distances type + * @tparam value_int integer type for size info + * @param handle raft handle for resource management + * @param index ball cover index which has not yet been built + * @param k number of nearest neighbors to find + * @param query the + * @param perform_post_filtering if this is false, only the closest k landmarks + * are considered (which will return approximate + * results). + * @param[out] inds output knn indices + * @param[out] dists output knn distances + * @param weight a weight for overlap between the closest landmark and + * the radius of other landmarks when pruning distances. + * Setting this value below 1 can effectively turn off + * computing distances against many other balls, enabling + * approximate nearest neighbors. Recall can be adjusted + * based on how many relevant balls are ignored. Note that + * many datasets can still have great recall even by only + * looking in the closest landmark. + * @param k + * @param inds + * @param dists + * @param n_samples + */ +template +void rbc_knn_query(const raft::handle_t &handle, + BallCoverIndex &index, + value_int k, const value_t *query, value_int n_query_pts, + value_idx *inds, value_t *dists, + bool perform_post_filtering = true, float weight = 1.0) { + ASSERT(index.n == 2, + "Random ball cover currently only works in 2-dimensions"); + if (index.metric == raft::distance::DistanceType::Haversine) { + detail::rbc_knn_query(handle, index, k, query, n_query_pts, inds, dists, + detail::HaversineFunc(), perform_post_filtering, + weight); + } else if (index.metric == raft::distance::DistanceType::L2SqrtExpanded || + index.metric == raft::distance::DistanceType::L2SqrtUnexpanded) { + detail::rbc_knn_query(handle, index, k, query, n_query_pts, inds, dists, + detail::EuclideanFunc(), perform_post_filtering, + weight); + } else { + RAFT_FAIL("Metric not supported"); + } +} + +// TODO: implement functions for: +// 4. rbc_eps_neigh() - given a populated index, perform query against different query array +// 5. rbc_all_eps_neigh() - populate a BallCoverIndex and query against training data + +} // namespace knn +} // namespace spatial +} // namespace raft diff --git a/cpp/include/raft/spatial/knn/ball_cover_common.h b/cpp/include/raft/spatial/knn/ball_cover_common.h new file mode 100644 index 0000000000..ca614bb0cb --- /dev/null +++ b/cpp/include/raft/spatial/knn/ball_cover_common.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace raft { +namespace spatial { +namespace knn { + +/** + * Stores raw index data points, sampled landmarks, the 1-nns of index points + * to their closest landmarks, and the ball radii of each landmark. This + * class is intended to be constructed once and reused across subsequent + * queries. + * @tparam value_idx + * @tparam value_t + * @tparam value_int + */ +template +class BallCoverIndex { + public: + explicit BallCoverIndex(const raft::handle_t &handle_, const value_t *X_, + value_int m_, value_int n_, + raft::distance::DistanceType metric_) + : handle(handle_), + X(X_), + m(m_), + n(n_), + metric(metric_), + /** + * the sqrt() here makes the sqrt(m)^2 a linear-time lower bound + * + * Total memory footprint of index: (2 * sqrt(m)) + (n * sqrt(m)) + (2 * m) + */ + n_landmarks(sqrt(m_)), + R_indptr(sqrt(m_) + 1, handle.get_stream()), + R_1nn_cols(m_, handle.get_stream()), + R_1nn_dists(m_, handle.get_stream()), + R(sqrt(m_) * n_, handle.get_stream()), + R_radius(sqrt(m_), handle.get_stream()), + index_trained(false) {} + + value_idx *get_R_indptr() { return R_indptr.data(); } + value_idx *get_R_1nn_cols() { return R_1nn_cols.data(); } + value_t *get_R_1nn_dists() { return R_1nn_dists.data(); } + value_t *get_R_radius() { return R_radius.data(); } + value_t *get_R() { return R.data(); } + const value_t *get_X() { return X; } + + bool is_index_trained() const { return index_trained; }; + + // This should only be set by internal functions + void set_index_trained() { index_trained = true; } + + const raft::handle_t &handle; + + const value_int m; + const value_int n; + const value_int n_landmarks; + + const value_t *X; + + raft::distance::DistanceType metric; + + private: + // CSR storing the neighborhoods for each data point + rmm::device_uvector R_indptr; + rmm::device_uvector R_1nn_cols; + rmm::device_uvector R_1nn_dists; + + rmm::device_uvector R_radius; + + rmm::device_uvector R; + + protected: + bool index_trained; +}; +} // namespace knn +} // namespace spatial +} // namespace raft diff --git a/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh b/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh index 77ad4afe96..0e91b5225d 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh @@ -17,12 +17,14 @@ #pragma once #include "../ann_common.h" +#include "knn_brute_force_faiss.cuh" #include "common_faiss.h" #include "processing.hpp" #include #include +#include "processing.hpp" #include