Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forward-merge branch-23.04 to branch-23.06 #1400

Merged
merged 2 commits into from
Apr 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ jobs:
build_type: pull-request
package-name: raft_dask
# Always want to test against latest dask/distributed.
test-before-amd64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/[email protected]"
test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/[email protected]"
test-before-amd64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/[email protected]"
test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibraft_cu11 rapids-download-wheels-from-s3 ./local-pylibraft-dep && pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl && pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/[email protected]"
test-unittest: "python -m pytest -v ./python/raft-dask/raft_dask/test"
test-smoketest: "python ./ci/wheel_smoke_test_raft_dask.py"
4 changes: 2 additions & 2 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,6 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
package-name: raft_dask
test-before-amd64: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/[email protected]"
test-before-arm64: "pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/[email protected]"
test-before-amd64: "pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/[email protected]"
test-before-arm64: "pip install git+https://github.com/dask/dask.git@2023.3.2 git+https://github.com/dask/distributed.git@2023.3.2.1 git+https://github.com/rapidsai/[email protected]"
test-unittest: "python -m pytest -v ./python/raft-dask/raft_dask/test"
5 changes: 3 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ dependencies:
- cupy
- cxx-compiler
- cython>=0.29,<0.30
- dask-core==2023.3.2
- dask-cuda==23.4.*
- dask>=2023.1.1
- distributed>=2023.1.1
- dask==2023.3.2
- distributed==2023.3.2.1
- doxygen>=1.8.20
- gcc_linux-64=11.*
- graphviz
Expand Down
5 changes: 3 additions & 2 deletions conda/recipes/raft-dask/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,10 @@ requirements:
run:
- {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
- cuda-python >=11.7.1,<12.0
- dask >=2023.1.1
- dask ==2023.3.2
- dask-core ==2023.3.2
- dask-cuda ={{ minor_version }}
- distributed >=2023.1.1
- distributed ==2023.3.2.1
- joblib >=0.11
- nccl >=2.9.9
- pylibraft {{ version }}
Expand Down
226 changes: 226 additions & 0 deletions cpp/include/raft/neighbors/cagra.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "detail/cagra/cagra_build.cuh"
#include "detail/cagra/cagra_search.cuh"
#include "detail/cagra/graph_core.cuh"

#include <raft/core/device_mdspan.hpp>
#include <raft/core/device_resources.hpp>
#include <raft/core/host_device_accessor.hpp>
#include <raft/core/mdspan.hpp>
#include <raft/neighbors/cagra_types.hpp>
#include <rmm/cuda_stream_view.hpp>

namespace raft::neighbors::experimental::cagra {

/**
* @defgroup cagra CUDA ANN Graph-based nearest neighbor search
* @{
*/

/**
* @brief Build a kNN graph.
*
* The kNN graph is the first building block for CAGRA index.
* This function uses the IVF-PQ method to build a kNN graph.
*
* The output is a dense matrix that stores the neighbor indices for each pont in the dataset.
* Each point has the same number of neighbors.
*
* See [cagra::build](#cagra::build) for an alternative method.
*
* The following distance metrics are supported:
* - L2Expanded
*
* Usage example:
* @code{.cpp}
* using namespace raft::neighbors;
* // use default index parameters
* ivf_pq::index_params build_params;
* ivf_pq::search_params search_params
* auto knn_graph = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), 128);
* // create knn graph
* cagra::build_knn_graph(res, dataset, knn_graph.view(), 2, build_params, search_params);
* auto pruned_gaph = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), 64);
* cagra::prune(res, dataset, knn_graph.view(), pruned_graph.view());
* // Construct an index from dataset and pruned knn_graph
* auto index = cagra::index<T, IdxT>(res, build_params.metric(), dataset, pruned_graph.view());
* @endcode
*
* @tparam T data element type
* @tparam IdxT type of the indices in the source dataset
*
* @param[in] res raft resources
* @param[in] dataset a matrix view (host or device) to a row-major matrix [n_rows, dim]
* @param[out] knn_graph a host matrix view to store the output knn graph [n_rows, graph_degree]
* @param[in] refine_rate refinement rate for ivf-pq search
* @param[in] build_params (optional) ivf_pq index building parameters for knn graph
* @param[in] search_params (optional) ivf_pq search parameters
*/
template <typename DataT, typename IdxT, typename accessor>
void build_knn_graph(raft::device_resources const& res,
mdspan<const DataT, matrix_extent<IdxT>, row_major, accessor> dataset,
raft::host_matrix_view<IdxT, IdxT, row_major> knn_graph,
std::optional<float> refine_rate = std::nullopt,
std::optional<ivf_pq::index_params> build_params = std::nullopt,
std::optional<ivf_pq::search_params> search_params = std::nullopt)
{
detail::build_knn_graph(res, dataset, knn_graph, refine_rate, build_params, search_params);
}

/**
* @brief Prune a KNN graph.
*
* Decrease the number of neighbors for each node.
*
* See [cagra::build_knn_graph](#cagra::build_knn_graph) for usage example
*
* @tparam T data element type
* @tparam IdxT type of the indices in the source dataset
*
* @param[in] res raft resources
* @param[in] dataset a matrix view (host or device) to a row-major matrix [n_rows, dim]
* @param[in] knn_graph a matrix view (host or device) of the input knn graph [n_rows,
* knn_graph_degree]
* @param[out] new_graph a host matrix view of the pruned knn graph [n_rows, graph_degree]
*/
template <class DATA_T,
typename IdxT = uint32_t,
typename d_accessor =
host_device_accessor<std::experimental::default_accessor<DATA_T>, memory_type::device>,
typename g_accessor =
host_device_accessor<std::experimental::default_accessor<DATA_T>, memory_type::host>>
void prune(raft::device_resources const& res,
mdspan<const DATA_T, matrix_extent<IdxT>, row_major, d_accessor> dataset,
mdspan<IdxT, matrix_extent<IdxT>, row_major, g_accessor> knn_graph,
raft::host_matrix_view<IdxT, IdxT, row_major> new_graph)
{
detail::graph::prune(res, dataset, knn_graph, new_graph);
}

/**
* @brief Build the index from the dataset for efficient search.
*
* The build consist of two steps: build an intermediate knn-graph, and prune it to
* create the final graph. The index_params struct controls the node degree of these
* graphs.
*
* It is required that dataset and the pruned graph fit the GPU memory.
*
* To customize the parameters for knn-graph building and pruning, and to reuse the
* intermediate results, you could build the index in two steps using
* [cagra::build_knn_graph](#cagra::build_knn_graph) and [cagra::prune](#cagra::prune).
*
* The following distance metrics are supported:
* - L2
*
* Usage example:
* @code{.cpp}
* using namespace raft::neighbors;
* // use default index parameters
* cagra::index_params index_params;
* // create and fill the index from a [N, D] dataset
* auto index = cagra::build(res, index_params, dataset);
* // use default search parameters
* ivf_pq::search_params search_params;
* // search K nearest neighbours
* auto neighbors = raft::make_device_matrix<uint32_t>(res, n_queries, k);
* auto distances = raft::make_device_matrix<float>(res, n_queries, k);
* ivf_pq::search(res, search_params, index, queries, neighbors, distances);
* @endcode
*
* @tparam T data element type
* @tparam IdxT type of the indices in the source dataset
*
* @param[in] res
* @param[in] params parameters for building the index
* @param[in] dataset a matrix view (host or device) to a row-major matrix [n_rows, dim]
*
* @return the constructed cagra index
*/
template <typename T,
typename IdxT = uint32_t,
typename Accessor =
host_device_accessor<std::experimental::default_accessor<T>, memory_type::host>>
index<T, IdxT> build(raft::device_resources const& res,
const index_params& params,
mdspan<const T, matrix_extent<IdxT>, row_major, Accessor> dataset)
{
size_t degree = params.intermediate_graph_degree;
if (degree >= dataset.extent(0)) {
RAFT_LOG_WARN(
"Intermediate graph degree cannot be larger than dataset size, reducing it to %lu",
dataset.extent(0));
degree = dataset.extent(0) - 1;
}
RAFT_EXPECTS(degree >= params.graph_degree,
"Intermediate graph degree cannot be smaller than final graph degree");

auto knn_graph = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), degree);

build_knn_graph(res, dataset, knn_graph.view());

auto cagra_graph = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), params.graph_degree);

prune<T, IdxT>(res, dataset, knn_graph.view(), cagra_graph.view());

// Construct an index from dataset and pruned knn graph.
return index<T, IdxT>(res, params.metric, dataset, cagra_graph.view());
}

/**
* @brief Search ANN using the constructed index.
*
* See the [cagra::build](#cagra::build) documentation for a usage example.
*
* @tparam T data element type
* @tparam IdxT type of the indices
*
* @param[in] res raft resources
* @param[in] params configure the search
* @param[in] idx cagra index
* @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()]
* @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset
* [n_queries, k]
* @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries,
* k]
*/
template <typename T, typename IdxT>
void search(raft::device_resources const& res,
const search_params& params,
const index<T, IdxT>& idx,
raft::device_matrix_view<const T, IdxT, row_major> queries,
raft::device_matrix_view<IdxT, IdxT, row_major> neighbors,
raft::device_matrix_view<float, IdxT, row_major> distances)
{
RAFT_EXPECTS(
queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0),
"Number of rows in output neighbors and distances matrices must equal the number of queries.");

RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1),
"Number of columns in output neighbors and distances matrices must equal k");

RAFT_EXPECTS(queries.extent(1) == idx.dim(),
"Number of query dimensions should equal number of dimensions in the index.");

detail::search_main(res, params, idx, queries, neighbors, distances);
}
/** @} */ // end group cagra

} // namespace raft::neighbors::experimental::cagra
Loading