Skip to content

Commit

Permalink
Cagra index construction without copying device mdarrays (#1494)
Browse files Browse the repository at this point in the history
This PR aims to improve the workflow when dealing with large datasets. When experimenting with different versions of the knn-graph, we might want to construct indices with the same dataset (see #1435 for further discussion).

If the dataset is already in device memory (and rows are properly aligned / padded), then we only store a reference to the dataset, therefore multiple indices can refer to the same dataset. Similarly, when `knn_graph` is a device array, then store only a reference. 

Additionally, this PR adds `update_dataset` and `update_graph` methods to the index.
Closes #1479

Authors:
  - Tamas Bela Feher (https://github.com/tfeher)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: #1494
  • Loading branch information
tfeher authored Jul 20, 2023
1 parent 0abedc6 commit db4797f
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 33 deletions.
17 changes: 10 additions & 7 deletions cpp/bench/prims/neighbors/cagra_bench.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -47,34 +47,35 @@ struct CagraBench : public fixture {
explicit CagraBench(const params& ps)
: fixture(true),
params_(ps),
queries_(make_device_matrix<T, IdxT>(handle, ps.n_queries, ps.n_dims))
queries_(make_device_matrix<T, IdxT>(handle, ps.n_queries, ps.n_dims)),
dataset_(make_device_matrix<T, IdxT>(handle, ps.n_samples, ps.n_dims)),
knn_graph_(make_device_matrix<IdxT, IdxT>(handle, ps.n_samples, ps.degree))
{
// Generate random dataset and queriees
auto dataset = make_device_matrix<T, IdxT>(handle, ps.n_samples, ps.n_dims);
raft::random::RngState state{42};
constexpr T kRangeMax = std::is_integral_v<T> ? std::numeric_limits<T>::max() : T(1);
constexpr T kRangeMin = std::is_integral_v<T> ? std::numeric_limits<T>::min() : T(-1);
if constexpr (std::is_integral_v<T>) {
raft::random::uniformInt(
state, dataset.data_handle(), dataset.size(), kRangeMin, kRangeMax, stream);
state, dataset_.data_handle(), dataset_.size(), kRangeMin, kRangeMax, stream);
raft::random::uniformInt(
state, queries_.data_handle(), queries_.size(), kRangeMin, kRangeMax, stream);
} else {
raft::random::uniform(
state, dataset.data_handle(), dataset.size(), kRangeMin, kRangeMax, stream);
state, dataset_.data_handle(), dataset_.size(), kRangeMin, kRangeMax, stream);
raft::random::uniform(
state, queries_.data_handle(), queries_.size(), kRangeMin, kRangeMax, stream);
}

// Generate random knn graph
auto knn_graph = make_device_matrix<IdxT, IdxT>(handle, ps.n_samples, ps.degree);

raft::random::uniformInt<IdxT>(
state, knn_graph.data_handle(), knn_graph.size(), 0, ps.n_samples - 1, stream);
state, knn_graph_.data_handle(), knn_graph_.size(), 0, ps.n_samples - 1, stream);

auto metric = raft::distance::DistanceType::L2Expanded;

index_.emplace(raft::neighbors::experimental::cagra::index<T, IdxT>(
handle, metric, make_const_mdspan(dataset.view()), knn_graph.view()));
handle, metric, make_const_mdspan(dataset_.view()), make_const_mdspan(knn_graph_.view())));
}

void run_benchmark(::benchmark::State& state) override
Expand Down Expand Up @@ -125,6 +126,8 @@ struct CagraBench : public fixture {
const params params_;
std::optional<const raft::neighbors::experimental::cagra::index<T, IdxT>> index_;
raft::device_matrix<T, IdxT, row_major> queries_;
raft::device_matrix<T, IdxT, row_major> dataset_;
raft::device_matrix<IdxT, IdxT, row_major> knn_graph_;
};

inline const std::vector<params> generate_inputs()
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/raft/neighbors/cagra.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ index<T, IdxT> build(raft::resources const& res,
optimize<IdxT>(res, knn_graph.view(), cagra_graph.view());

// Construct an index from dataset and optimized knn graph.
return index<T, IdxT>(res, params.metric, dataset, cagra_graph.view());
return index<T, IdxT>(res, params.metric, dataset, raft::make_const_mdspan(cagra_graph.view()));
}

/**
Expand Down
156 changes: 137 additions & 19 deletions cpp/include/raft/neighbors/cagra_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <thrust/fill.h>
#include <type_traits>

#include <raft/core/logger.hpp>
namespace raft::neighbors::experimental::cagra {
/**
* @ingroup cagra
Expand Down Expand Up @@ -137,7 +138,7 @@ struct index : ann::index {
/** Graph degree */
[[nodiscard]] constexpr inline auto graph_degree() const noexcept -> uint32_t
{
return graph_.extent(1);
return graph_view_.extent(1);
}

/** Dataset [size, dim] */
Expand All @@ -148,15 +149,12 @@ struct index : ann::index {
}

/** neighborhood graph [size, graph-degree] */
inline auto graph() noexcept -> device_matrix_view<IdxT, IdxT, row_major>
{
return graph_.view();
}
inline auto graph() noexcept -> device_matrix_view<IdxT, IdxT, row_major> { return graph_view_; }

[[nodiscard]] inline auto graph() const noexcept
-> device_matrix_view<const IdxT, IdxT, row_major>
{
return graph_.view();
return graph_view_;
}

// Don't allow copying the index for performance reasons (try avoiding copying data)
Expand All @@ -175,20 +173,145 @@ struct index : ann::index {
{
}

/** Construct an index from dataset and knn_graph arrays */
/** Construct an index from dataset and knn_graph arrays
*
* If the dataset and graph is already in GPU memory, then the index is just a thin wrapper around
* these that stores a non-owning a reference to the arrays.
*
* The constructor also accepts host arrays. In that case they are copied to the device, and the
* device arrays will be owned by the index.
*
* In case the dasates rows are not 16 bytes aligned, then we create a padded copy in device
* memory to ensure alignment for vectorized load.
*
* Usage examples:
*
* - Cagra index is normally created by the cagra::build
* @code{.cpp}
* using namespace raft::neighbors::experimental;
* auto dataset = raft::make_host_matrix<float>(n_rows, n_cols);
* load_dataset(dataset.view());
* // use default index parameters
* cagra::index_params index_params;
* // create and fill the index from a [N, D] dataset
* auto index = cagra::build(res, index_params, dataset);
* // use default search parameters
* cagra::search_params search_params;
* // search K nearest neighbours
* auto neighbors = raft::make_device_matrix<uint32_t>(res, n_queries, k);
* auto distances = raft::make_device_matrix<float>(res, n_queries, k);
* cagra::search(res, search_params, index, queries, neighbors, distances);
* @endcode
* In the above example, we have passed a host dataset to build. The returned index will own a
* device copy of the dataset and the knn_graph. In contrast, if we pass the dataset as a
* device_mdspan to build, then it will only store a reference to it.
*
* - Constructing index using existing knn-graph
* @code{.cpp}
* using namespace raft::neighbors::experimental;
*
* auto dataset = raft::make_device_matrix<float>(res, n_rows, n_cols);
* auto knn_graph = raft::make_device_matrix<uint32_n>(res, n_rows, graph_degree);
*
* // custom loading and graph creation
* // load_dataset(dataset.view());
* // create_knn_graph(knn_graph.view());
*
* // Wrap the existing device arrays into an index structure
* cagra::index<T, IdxT> index(res, metric, raft::make_const_mdspan(dataset.view()),
* raft::make_const_mdspan(knn_graph.view()));
*
* // Both knn_graph and dataset objects have to be in scope while the index is used because
* // the index only stores a reference to these.
* cagra::search(res, search_params, index, queries, neighbors, distances);
* @endcode
*
*/
template <typename data_accessor, typename graph_accessor>
index(raft::resources const& res,
raft::distance::DistanceType metric,
mdspan<const T, matrix_extent<IdxT>, row_major, data_accessor> dataset,
mdspan<IdxT, matrix_extent<IdxT>, row_major, graph_accessor> knn_graph)
mdspan<const IdxT, matrix_extent<IdxT>, row_major, graph_accessor> knn_graph)
: ann::index(),
metric_(metric),
dataset_(
make_device_matrix<T, IdxT>(res, dataset.extent(0), AlignDim::roundUp(dataset.extent(1)))),
graph_(make_device_matrix<IdxT, IdxT>(res, knn_graph.extent(0), knn_graph.extent(1)))
dataset_(make_device_matrix<T, IdxT>(res, 0, 0)),
graph_(make_device_matrix<IdxT, IdxT>(res, 0, 0))
{
RAFT_EXPECTS(dataset.extent(0) == knn_graph.extent(0),
"Dataset and knn_graph must have equal number of rows");
update_dataset(res, dataset);
update_graph(res, knn_graph);
resource::sync_stream(res);
}

/**
* Replace the dataset with a new dataset.
*
* If the new dataset rows are aligned on 16 bytes, then only a reference is stored to the
* dataset. It is the caller's responsibility to ensure that dataset stays alive as long as the
* index.
*/
void update_dataset(raft::resources const& res,
raft::device_matrix_view<const T, IdxT, row_major> dataset)
{
if (dataset.extent(1) % AlignDim::Value != 0) {
RAFT_LOG_DEBUG("Creating a padded copy of CAGRA dataset in device memory");
copy_padded(res, dataset);
} else {
dataset_view_ = make_device_strided_matrix_view<const T, IdxT>(
dataset.data_handle(), dataset.extent(0), dataset.extent(1), dataset.extent(1));
}
}

/**
* Replace the dataset with a new dataset.
*
* We create a copy of the dataset on the device. The index manages the lifetime of this copy.
*/
void update_dataset(raft::resources const& res,
raft::host_matrix_view<const T, IdxT, row_major> dataset)
{
RAFT_LOG_DEBUG("Copying CAGRA dataset from host to device");
copy_padded(res, dataset);
}

/**
* Replace the graph with a new graph.
*
* Since the new graph is a device array, we store a reference to that, and it is
* the caller's responsibility to ensure that knn_graph stays alive as long as the index.
*/
void update_graph(raft::resources const& res,
raft::device_matrix_view<const IdxT, IdxT, row_major> knn_graph)
{
graph_view_ = knn_graph;
}

/**
* Replace the graph with a new graph.
*
* We create a copy of the graph on the device. The index manages the lifetime of this copy.
*/
void update_graph(raft::resources const& res,
raft::host_matrix_view<const IdxT, IdxT, row_major> knn_graph)
{
RAFT_LOG_DEBUG("Copying CAGRA knn graph from host to device");
graph_ = make_device_matrix<IdxT, IdxT>(res, knn_graph.extent(0), knn_graph.extent(1));
raft::copy(graph_.data_handle(),
knn_graph.data_handle(),
knn_graph.size(),
resource::get_cuda_stream(res));
graph_view_ = graph_.view();
}

private:
/** Create a device copy of the dataset, and pad it if necessary. */
template <typename data_accessor>
void copy_padded(raft::resources const& res,
mdspan<const T, matrix_extent<IdxT>, row_major, data_accessor> dataset)
{
dataset_ =
make_device_matrix<T, IdxT>(res, dataset.extent(0), AlignDim::roundUp(dataset.extent(1)));
if (dataset_.extent(1) == dataset.extent(1)) {
raft::copy(dataset_.data_handle(),
dataset.data_handle(),
Expand All @@ -207,24 +330,19 @@ struct index : ann::index {
cudaMemcpyDefault,
resource::get_cuda_stream(res)));
}
dataset_view_ = make_device_strided_matrix_view<T, IdxT>(
dataset_view_ = make_device_strided_matrix_view<const T, IdxT>(
dataset_.data_handle(), dataset_.extent(0), dataset.extent(1), dataset_.extent(1));
RAFT_LOG_DEBUG("CAGRA dataset strided matrix view %zux%zu, stride %zu",
static_cast<size_t>(dataset_view_.extent(0)),
static_cast<size_t>(dataset_view_.extent(1)),
static_cast<size_t>(dataset_view_.stride(0)));
raft::copy(graph_.data_handle(),
knn_graph.data_handle(),
knn_graph.size(),
resource::get_cuda_stream(res));
resource::sync_stream(res);
}

private:
raft::distance::DistanceType metric_;
raft::device_matrix<T, IdxT, row_major> dataset_;
raft::device_matrix<IdxT, IdxT, row_major> graph_;
raft::device_matrix_view<T, IdxT, layout_stride> dataset_view_;
raft::device_matrix_view<const T, IdxT, layout_stride> dataset_view_;
raft::device_matrix_view<const IdxT, IdxT, row_major> graph_view_;
};

/** @} */
Expand Down
5 changes: 3 additions & 2 deletions cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct check_index_layout {
"paste in the new size and consider updating the serialization logic");
};

constexpr size_t expected_size = 176;
constexpr size_t expected_size = 200;
template struct check_index_layout<sizeof(index<double, std::uint64_t>), expected_size>;

/**
Expand Down Expand Up @@ -116,7 +116,8 @@ auto deserialize(raft::resources const& res, std::istream& is) -> index<T, IdxT>
deserialize_mdspan(res, is, dataset.view());
deserialize_mdspan(res, is, graph.view());

return index<T, IdxT>(res, metric, raft::make_const_mdspan(dataset.view()), graph.view());
return index<T, IdxT>(
res, metric, raft::make_const_mdspan(dataset.view()), raft::make_const_mdspan(graph.view()));
}

template <typename T, typename IdxT>
Expand Down
1 change: 0 additions & 1 deletion cpp/include/raft/neighbors/detail/cagra/search_plan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ struct search_plan_impl_base : public search_params {
max_dim = 128;
while (max_dim < dim && max_dim <= 1024)
max_dim *= 2;
if (team_size != 0) { RAFT_LOG_WARN("Overriding team size parameter."); }
// To keep binary size in check we limit only one team size specialization for each max_dim.
// TODO(tfeher): revise this decision.
switch (max_dim) {
Expand Down
3 changes: 0 additions & 3 deletions cpp/test/neighbors/ann_cagra.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -266,11 +266,8 @@ class AnnCagraTest : public ::testing::TestWithParam<AnnCagraInputs> {

void SetUp() override
{
std::cout << "Resizing database: " << ps.n_rows * ps.dim << std::endl;
database.resize(((size_t)ps.n_rows) * ps.dim, stream_);
std::cout << "Done.\nResizing queries" << std::endl;
search_queries.resize(ps.n_queries * ps.dim, stream_);
std::cout << "Done.\nRuning rng" << std::endl;
raft::random::Rng r(1234ULL);
if constexpr (std::is_same<DataT, float>{}) {
r.normal(database.data(), ps.n_rows * ps.dim, DataT(0.1), DataT(2.0), stream_);
Expand Down

0 comments on commit db4797f

Please sign in to comment.