From f40f3bad82e91760abbaac58675201cdfc9ab660 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 2 Nov 2023 12:23:39 -0700 Subject: [PATCH 01/13] initial draft of serialize method --- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 2 +- .../raft/neighbors/cagra_serialize.cuh | 17 ++++ .../detail/cagra/cagra_serialize.cuh | 94 +++++++++++++++++++ 3 files changed, 112 insertions(+), 1 deletion(-) diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index f1c8154b7c..1f551f02e4 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -129,7 +129,7 @@ void RaftCagra::set_search_dataset(const T* dataset, size_t nrow) template void RaftCagra::save(const std::string& file) const { - raft::neighbors::cagra::serialize(handle_, file, *index_, false); + raft::neighbors::cagra::serialize_to_hnswlib(handle_, file, *index_); } template diff --git a/cpp/include/raft/neighbors/cagra_serialize.cuh b/cpp/include/raft/neighbors/cagra_serialize.cuh index 0a806402d2..85016ee2a4 100644 --- a/cpp/include/raft/neighbors/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/cagra_serialize.cuh @@ -93,6 +93,23 @@ void serialize(raft::resources const& handle, detail::serialize(handle, filename, index, include_dataset); } +template +void serialize_to_hnswlib(raft::resources const& handle, + std::ostream& os, + const index& index) +{ + detail::serialize_to_hnswlib(handle, os, index); +} + +template +void serialize_to_hnswlib(raft::resources const& handle, + const std::string& filename, + const index& index, + bool include_dataset = true) +{ + detail::serialize_to_hnswlib(handle, filename, index); +} + /** * Load index from input stream * diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 8261f637e1..5a0122e8e3 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -16,12 +16,19 @@ #pragma once +#include "raft/core/host_mdarray.hpp" +#include "raft/core/mdspan_types.hpp" +#include "raft/core/resource/cuda_stream.hpp" +#include +#include +#include #include #include #include #include #include +#include namespace raft::neighbors::cagra::detail { @@ -104,6 +111,93 @@ void serialize(raft::resources const& res, if (!of) { RAFT_FAIL("Error writing output %s", filename.c_str()); } } +template +void serialize_to_hnswlib(raft::resources const& res, + std::ostream& os, + const index& index_) +{ + common::nvtx::range fun_scope("cagra::serialize_to_hnswlib"); + RAFT_LOG_DEBUG( + "Saving CAGRA index to hnswlib format, size %zu, dim %u", static_cast(index_.size()), index_.dim()); + + serialize_scalar(res, os, std::size_t{0}); + serialize_scalar(res, os, static_cast(index_.size())); + serialize_scalar(res, os, static_cast(index_.size())); + // Example:M: 16, dim = 128, data_t = float, index_t = uint32_t, list_size_type = uint32_t, labeltype: size_t + // size_data_per_element_ = M * 2 * sizeof(index_t) + sizeof(list_size_type) + dim * sizeof(data_t) + sizeof(labeltype) + auto size_data_per_element = static_cast(index_.graph_degree() * 4 + 4 + index_.size() * 4 + 8); + serialize_scalar(res, os, size_data_per_element); + serialize_scalar(res, os, size_data_per_element - 8); + serialize_scalar(res, os, static_cast(index_.graph_degree() * 4 + 4)); + serialize_scalar(res, os, std::int32_t{1}); + serialize_scalar(res, os, static_cast(index_.size() / 2)); + serialize_scalar(res, os, static_cast(index_.graph_degree() / 2)); + serialize_scalar(res, os, static_cast(index_.graph_degree())); + serialize_scalar(res, os, static_cast(index_.graph_degree() / 2)); + serialize_scalar(res, os, static_cast(0.42424242)); + serialize_scalar(res, os, std::size_t{500}); + + auto dataset = index_.dataset(); + // Remove padding before saving the dataset + auto host_dataset = make_host_matrix(dataset.extent(0), dataset.extent(1)); + RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(), + sizeof(T) * host_dataset.extent(1), + dataset.data_handle(), + sizeof(T) * dataset.stride(0), + sizeof(T) * host_dataset.extent(1), + dataset.extent(0), + cudaMemcpyDefault, + resource::get_cuda_stream(res))); + resource::sync_stream(res); + + auto graph = index_.graph(); + // auto host_graph = raft::make_host_matrix(graph.extent(0), graph.extent(1)); + // std::vector host_graph_t(graph.size()); + IdxT* host_graph = new IdxT[graph.size()]; + // thrust::copy(raft::resource::get_thrust_policy(res), graph.data_handle(), graph.data_handle() + graph.size(), host_graph.data_handle()); + raft::copy(host_graph, graph.data_handle(), graph.size(), raft::resource::get_cuda_stream(res)); + + // Write one dataset and graph row at a time + for (std::size_t i = 0; i < index_.size(); i++) { + serialize_scalar(res, os, static_cast(index_.graph_degree())); + + auto graph_row = host_graph + (index_.graph_degree() * i); + auto graph_row_mds = raft::make_host_vector_view(graph_row, index_.graph_degree()); + serialize_mdspan(res, os, graph_row_mds); + + auto data_row = host_dataset.data_handle() + (index_.dim() * i); + if constexpr (std::is_same_v) { + auto data_row_mds = raft::make_host_vector_view(data_row, index_.dim()); + serialize_mdspan(res, os, data_row_mds); + } + else if constexpr (std::is_same_v or std::is_same_v) { + auto data_row_int = raft::make_host_vector(index_.dim()); + std::copy(data_row, data_row + index_.size(), data_row_int.data_handle()); + serialize_mdspan(res, os, data_row_int.view()); + } + + serialize_scalar(res, os, i); + } + + for (std::size_t i = 0; i < index_.size(); i++) { + serialize_scalar(res, os, std::int32_t{0}); + } + delete [] host_graph; +} + +template +void serialize_to_hnswlib(raft::resources const& res, + const std::string& filename, + const index& index_) { + std::ofstream of(filename, std::ios::out | std::ios::binary); + if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } + + detail::serialize_to_hnswlib(res, of, index_); + + of.close(); + if (!of) { RAFT_FAIL("Error writing output %s", filename.c_str()); } +} + /** Load an index from file. * * Experimental, both the API and the serialization format are subject to change. From 5afd2b913bfec1660b30ebf2bdd6c26e72a4ad9d Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 2 Nov 2023 13:48:26 -0700 Subject: [PATCH 02/13] add new benchmark for cagra+hnsw --- cpp/bench/ann/CMakeLists.txt | 20 +- cpp/bench/ann/src/raft/raft_benchmark.cu | 51 ++++ cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu | 22 ++ .../ann/src/raft/raft_cagra_hnswlib_wrapper.h | 218 ++++++++++++++++++ cpp/cmake/thirdparty/get_hnswlib.cmake | 5 + .../src/raft-ann-bench/run/algos.yaml | 3 + .../run/conf/algos/raft_cagra.yaml | 5 - .../run/conf/algos/raft_cagra_hnswlib.yaml | 11 + 8 files changed, 329 insertions(+), 6 deletions(-) create mode 100644 cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu create mode 100644 cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h create mode 100644 python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index d6a5fddb98..889c885f9b 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -30,6 +30,7 @@ option(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ "Include faiss' cpu ivf pq algorithm option(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT "Include raft's ivf flat algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_RAFT_CAGRA "Include raft's CAGRA in benchmark" ON) +option(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB "Include raft's CAGRA in benchmark" ON) option(RAFT_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" ON) option(RAFT_ANN_BENCH_SINGLE_EXE @@ -54,6 +55,7 @@ if(BUILD_CPU_ONLY) set(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT OFF) set(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ OFF) set(RAFT_ANN_BENCH_USE_RAFT_CAGRA OFF) + set(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB OFF) set(RAFT_ANN_BENCH_USE_GGNN OFF) else() # Disable faiss benchmarks on CUDA 12 since faiss is not yet CUDA 12-enabled. @@ -88,6 +90,7 @@ if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ OR RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE OR RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT OR RAFT_ANN_BENCH_USE_RAFT_CAGRA + OR RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB ) set(RAFT_ANN_BENCH_USE_RAFT ON) endif() @@ -95,7 +98,7 @@ endif() # ################################################################################################## # * Fetch requirements ------------------------------------------------------------- -if(RAFT_ANN_BENCH_USE_HNSWLIB) +if(RAFT_ANN_BENCH_USE_HNSWLIB OR RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) include(cmake/thirdparty/get_hnswlib.cmake) endif() @@ -250,6 +253,21 @@ if(RAFT_ANN_BENCH_USE_RAFT_CAGRA) ) endif() +if(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) + ConfigureAnnBench( + NAME + RAFT_CAGRA_HNSWLIB + PATH + bench/ann/src/raft/raft_benchmark.cu + $<$:bench/ann/src/raft/raft_cagra_hnswlib.cu> + INCLUDES + ${CMAKE_CURRENT_BINARY_DIR}/_deps/hnswlib-src/hnswlib + LINKS + raft::compiled + CXXFLAGS "${HNSW_CXX_FLAGS}" + ) +endif() + set(RAFT_FAISS_TARGETS faiss::faiss) if(TARGET faiss::faiss_avx2) set(RAFT_FAISS_TARGETS faiss::faiss_avx2) diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index 6888340b4d..694cd745ba 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -47,6 +47,12 @@ extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; #endif +#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB +#include "raft_cagra_hnswlib_wrapper.h" +extern template class raft::bench::ann::RaftCagraHnswlib; +extern template class raft::bench::ann::RaftCagraHnswlib; +extern template class raft::bench::ann::RaftCagraHnswlib; +#endif #define JSON_DIAGNOSTICS 1 #include @@ -182,6 +188,37 @@ void parse_search_param(const nlohmann::json& conf, } #endif +#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB +template +void parse_build_param(const nlohmann::json& conf, + typename raft::bench::ann::RaftCagraHnswlib::BuildParam& param) +{ + if (conf.contains("graph_degree")) { + param.graph_degree = conf.at("graph_degree"); + param.intermediate_graph_degree = param.graph_degree * 2; + } + if (conf.contains("intermediate_graph_degree")) { + param.intermediate_graph_degree = conf.at("intermediate_graph_degree"); + } + if (conf.contains("graph_build_algo")) { + if (conf.at("graph_build_algo") == "IVF_PQ") { + param.build_algo = raft::neighbors::cagra::graph_build_algo::IVF_PQ; + } else if (conf.at("graph_build_algo") == "NN_DESCENT") { + param.build_algo = raft::neighbors::cagra::graph_build_algo::NN_DESCENT; + } + } + if (conf.contains("nn_descent_niter")) { param.nn_descent_niter = conf.at("nn_descent_niter"); } +} + +template +void parse_search_param(const nlohmann::json& conf, + typename raft::bench::ann::RaftCagraHnswlib::SearchParam& param) +{ + param.ef = conf.at("ef"); + if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); } +} +#endif + template std::unique_ptr> create_algo(const std::string& algo, const std::string& distance, @@ -223,6 +260,13 @@ std::unique_ptr> create_algo(const std::string& algo, parse_build_param(conf, param); ann = std::make_unique>(metric, dim, param); } +#endif +#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB + if (algo == "raft_cagra_hnswlib") { + typename raft::bench::ann::RaftCagraHnswlib::BuildParam param; + parse_build_param(conf, param); + ann = std::make_unique>(metric, dim, param); + } #endif if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); } @@ -260,6 +304,13 @@ std::unique_ptr::AnnSearchParam> create_search parse_search_param(conf, *param); return param; } +#endif +#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB + if (algo == "raft_cagra_hnswlib") { + auto param = std::make_unique::SearchParam>(); + parse_search_param(conf, *param); + return param; + } #endif // else throw std::runtime_error("invalid algo: '" + algo + "'"); diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu b/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu new file mode 100644 index 0000000000..27e7d22c47 --- /dev/null +++ b/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "raft_cagra_hnswlib_wrapper.h" + +namespace raft::bench::ann { +template class RaftCagraHnswlib; +template class RaftCagraHnswlib; +template class RaftCagraHnswlib; +} // namespace raft::bench::ann diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h new file mode 100644 index 0000000000..98cb5b7142 --- /dev/null +++ b/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../common/ann_types.hpp" +#include "../common/thread_pool.hpp" +#include "raft_ann_bench_utils.h" +#include + +#include + +namespace raft::bench::ann { + +template +struct hnsw_dist_t { + using type = void; +}; + +template <> +struct hnsw_dist_t { + using type = float; +}; + +template <> +struct hnsw_dist_t { + using type = int; +}; + +template <> +struct hnsw_dist_t { + using type = int; +}; + +template +class RaftCagraHnswlib : public ANN { + public: + using typename ANN::AnnSearchParam; + + struct SearchParam : public AnnSearchParam { + int ef; + int num_threads = 1; + }; + + using BuildParam = raft::neighbors::cagra::index_params; + + RaftCagraHnswlib(Metric metric, int dim, const BuildParam& param, int concurrent_searches = 1) + : ANN(metric, dim), index_params_(param), dimension_(dim), handle_(cudaStreamPerThread) + { + index_params_.metric = parse_metric_type(metric); + RAFT_CUDA_TRY(cudaGetDevice(&device_)); + } + + ~RaftCagraHnswlib() noexcept {} + + void build(const T* dataset, size_t nrow, cudaStream_t stream) final; + + void set_search_param(const AnnSearchParam& param) override; + + void set_search_dataset(const T* dataset, size_t nrow) override; + + // TODO: if the number of results is less than k, the remaining elements of 'neighbors' + // will be filled with (size_t)-1 + void search(const T* queries, + int batch_size, + int k, + size_t* neighbors, + float* distances, + cudaStream_t stream = 0) const override; + + // to enable dataset access from GPU memory + AlgoProperty get_preference() const override + { + AlgoProperty property; + property.dataset_memory_type = MemoryType::HostMmap; + property.query_memory_type = MemoryType::Host; + return property; + } + void save(const std::string& file) const override; + void load(const std::string&) override; + + private: + void get_search_knn_results_(const T* query, int k, size_t* indices, float* distances) const; + + raft::device_resources handle_; + BuildParam index_params_; + std::optional> index_; + int device_; + int dimension_; + + std::unique_ptr::type>> appr_alg_; + std::unique_ptr::type>> space_; + int num_threads_; + std::unique_ptr thread_pool_; + + Objective metric_objective_; +}; + +template +void RaftCagraHnswlib::build(const T* dataset, size_t nrow, cudaStream_t) +{ + if (raft::get_device_for_address(dataset) == -1) { + auto dataset_view = + raft::make_host_matrix_view(dataset, IdxT(nrow), dimension_); + index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view)); + return; + } else { + auto dataset_view = + raft::make_device_matrix_view(dataset, IdxT(nrow), dimension_); + index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view)); + return; + } +} + +template +void RaftCagraHnswlib::set_search_param(const AnnSearchParam& param_) +{ + auto param = dynamic_cast(param_); + appr_alg_->ef_ = param.ef; + metric_objective_ = param.metric_objective; + + bool use_pool = (metric_objective_ == Objective::LATENCY && param.num_threads > 1) && + (!thread_pool_ || num_threads_ != param.num_threads); + if (use_pool) { + num_threads_ = param.num_threads; + thread_pool_ = std::make_unique(num_threads_); + } +} + +template +void RaftCagraHnswlib::save(const std::string& file) const +{ + raft::neighbors::cagra::serialize_to_hnswlib(handle_, file, *index_); +} + +template +void RaftCagraHnswlib::load(const std::string& file) +{ + if constexpr (std::is_same_v) { + if (static_cast(index_params_.metric) == Metric::kInnerProduct) { + space_ = std::make_unique(dimension_); + } else { + space_ = std::make_unique(dimension_); + } + } else if constexpr (std::is_same_v) { + space_ = std::make_unique(dimension_); + } + + appr_alg_ = std::make_unique::type>>( + space_.get(), file); + appr_alg_->base_layer_only = true; +} + +template +void RaftCagraHnswlib::search( + const T* queries, int batch_size, int k, size_t* neighbors, float* distances, cudaStream_t) const +{ + auto f = [&](int i) { + // hnsw can only handle a single vector at a time. + get_search_knn_results_(queries + i * dimension_, k, neighbors + i * k, distances + i * k); + }; + if (metric_objective_ == Objective::LATENCY) { + thread_pool_->submit(f, batch_size); + } else { + for (int i = 0; i < batch_size; i++) { + f(i); + } + } +} + +template +void RaftCagraHnswlib::get_search_knn_results_(const T* query, + int k, + size_t* indices, + float* distances) const +{ + auto result = appr_alg_->searchKnn(query, k); + assert(result.size() >= static_cast(k)); + + for (int i = k - 1; i >= 0; --i) { + indices[i] = result.top().second; + distances[i] = result.top().first; + result.pop(); + } +} + +} // namespace raft::bench::ann diff --git a/cpp/cmake/thirdparty/get_hnswlib.cmake b/cpp/cmake/thirdparty/get_hnswlib.cmake index 94033e8333..a4ceacae38 100644 --- a/cpp/cmake/thirdparty/get_hnswlib.cmake +++ b/cpp/cmake/thirdparty/get_hnswlib.cmake @@ -26,6 +26,11 @@ function(find_and_configure_hnswlib) COMMAND git clone --branch=v0.6.2 https://github.com/nmslib/hnswlib.git hnswlib-src WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/_deps ) + message("SOURCE ${CMAKE_CURRENT_SOURCE_DIR}") + execute_process ( + COMMAND git apply ${CMAKE_CURRENT_SOURCE_DIR}/cmake/patches/hnswlib.patch + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/_deps/hnswlib-src + ) endif () include(cmake/modules/FindAVX.cmake) diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml index 7ea360e0c9..e382bdcba6 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml +++ b/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml @@ -37,3 +37,6 @@ ggnn: hnswlib: executable: HNSWLIB_ANN_BENCH requires_gpu: false +raft_cagra_hnswlib: + executable: RAFT_CAGRA_HNSWLIB_ANN_BENCH + requires_gpu: true diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra.yaml index 0f80608eef..d8015da5c6 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra.yaml +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra.yaml @@ -10,8 +10,3 @@ groups: search: itopk: [32, 64, 128, 256, 512] search_width: [1, 2, 4, 8, 16, 32, 64] - - - - - diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml new file mode 100644 index 0000000000..5d1ee2fb16 --- /dev/null +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml @@ -0,0 +1,11 @@ +name: raft_cagra_hnswlib +constraints: + search: raft-ann-bench.constraints.hnswlib_search_constraints +groups: + base: + build: + graph_degree: [32] + intermediate_graph_degree: [64] + graph_build_algo: ["NN_DESCENT"] + search: + ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800] \ No newline at end of file From 45356489e1fa51cf8b8237e640774bae621462c8 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 2 Nov 2023 19:04:49 -0700 Subject: [PATCH 03/13] fix serializer --- cpp/bench/ann/CMakeLists.txt | 4 +- cpp/bench/ann/src/raft/raft_benchmark.cu | 10 +-- cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu | 42 ++++----- .../ann/src/raft/raft_cagra_hnswlib_wrapper.h | 4 +- cpp/cmake/modules/ConfigureCUDA.cmake | 6 +- cpp/cmake/patches/hnswlib.patch | 74 ++++++++++++++++ .../detail/cagra/cagra_serialize.cuh | 87 +++++++++++++------ 7 files changed, 166 insertions(+), 61 deletions(-) create mode 100644 cpp/cmake/patches/hnswlib.patch diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 889c885f9b..75af81ae83 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -31,7 +31,7 @@ option(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT "Include raft's ivf flat algorithm in be option(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_RAFT_CAGRA "Include raft's CAGRA in benchmark" ON) option(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB "Include raft's CAGRA in benchmark" ON) -option(RAFT_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON) +option(RAFT_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" OFF) option(RAFT_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" ON) option(RAFT_ANN_BENCH_SINGLE_EXE "Make a single executable with benchmark as shared library modules" OFF @@ -259,7 +259,7 @@ if(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) RAFT_CAGRA_HNSWLIB PATH bench/ann/src/raft/raft_benchmark.cu - $<$:bench/ann/src/raft/raft_cagra_hnswlib.cu> + $<$:bench/ann/src/raft/raft_cagra_hnswlib.cu> INCLUDES ${CMAKE_CURRENT_BINARY_DIR}/_deps/hnswlib-src/hnswlib LINKS diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index 694cd745ba..27a3ddf4c0 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -49,9 +49,9 @@ extern template class raft::bench::ann::RaftCagra; #endif #ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB #include "raft_cagra_hnswlib_wrapper.h" -extern template class raft::bench::ann::RaftCagraHnswlib; -extern template class raft::bench::ann::RaftCagraHnswlib; -extern template class raft::bench::ann::RaftCagraHnswlib; +// extern template class raft::bench::ann::RaftCagraHnswlib; +// extern template class raft::bench::ann::RaftCagraHnswlib; +// extern template class raft::bench::ann::RaftCagraHnswlib; #endif #define JSON_DIAGNOSTICS 1 #include @@ -210,9 +210,9 @@ void parse_build_param(const nlohmann::json& conf, if (conf.contains("nn_descent_niter")) { param.nn_descent_niter = conf.at("nn_descent_niter"); } } -template +template void parse_search_param(const nlohmann::json& conf, - typename raft::bench::ann::RaftCagraHnswlib::SearchParam& param) + typename raft::bench::ann::RaftCagraHnswlib::SearchParam& param) { param.ef = conf.at("ef"); if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); } diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu b/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu index 27e7d22c47..ca3ed7a737 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu +++ b/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu @@ -1,22 +1,22 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "raft_cagra_hnswlib_wrapper.h" +// /* +// * Copyright (c) 2023, NVIDIA CORPORATION. +// * +// * Licensed under the Apache License, Version 2.0 (the "License"); +// * you may not use this file except in compliance with the License. +// * You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// */ +// #include "raft_cagra_hnswlib_wrapper.h" -namespace raft::bench::ann { -template class RaftCagraHnswlib; -template class RaftCagraHnswlib; -template class RaftCagraHnswlib; -} // namespace raft::bench::ann +// namespace raft::bench::ann { +// template class RaftCagraHnswlib; +// template class RaftCagraHnswlib; +// template class RaftCagraHnswlib; +// } // namespace raft::bench::ann diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h index 98cb5b7142..2ad5aa6ddc 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h @@ -88,8 +88,6 @@ class RaftCagraHnswlib : public ANN { void set_search_param(const AnnSearchParam& param) override; - void set_search_dataset(const T* dataset, size_t nrow) override; - // TODO: if the number of results is less than k, the remaining elements of 'neighbors' // will be filled with (size_t)-1 void search(const T* queries, @@ -177,8 +175,10 @@ void RaftCagraHnswlib::load(const std::string& file) space_ = std::make_unique(dimension_); } + std::cout << "about to create index" << std::endl; appr_alg_ = std::make_unique::type>>( space_.get(), file); + std::cout << "about to failed" << std::endl; appr_alg_->base_layer_only = true; } diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index ea8a077b0c..8e77ec697a 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -20,12 +20,12 @@ endif() # Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with # clang) if(CMAKE_COMPILER_IS_GNUCXX) - list(APPEND RAFT_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) - list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations) + # list(APPEND RAFT_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) + # list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations) # set warnings as errors if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0) - list(APPEND RAFT_CUDA_FLAGS -Werror=all-warnings) + # list(APPEND RAFT_CUDA_FLAGS -Werror=all-warnings) endif() endif() diff --git a/cpp/cmake/patches/hnswlib.patch b/cpp/cmake/patches/hnswlib.patch new file mode 100644 index 0000000000..468c1d8af9 --- /dev/null +++ b/cpp/cmake/patches/hnswlib.patch @@ -0,0 +1,74 @@ +diff --git a/hnswlib/hnswalg.h b/hnswlib/hnswalg.h +index e95e0b5..ebacfdf 100644 +--- a/hnswlib/hnswalg.h ++++ b/hnswlib/hnswalg.h +@@ -16,6 +16,8 @@ namespace hnswlib { + template + class HierarchicalNSW : public AlgorithmInterface { + public: ++ bool base_layer_only{false}; ++ int num_seeds=32; + static const tableint max_update_element_locks = 65536; + HierarchicalNSW(SpaceInterface *s) { + } +@@ -1119,28 +1121,41 @@ namespace hnswlib { + tableint currObj = enterpoint_node_; + dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_); + +- for (int level = maxlevel_; level > 0; level--) { +- bool changed = true; +- while (changed) { +- changed = false; +- unsigned int *data; ++ if (base_layer_only) { ++ // You can increase the number of seeds when testing large-scale dataset, num_seeds = 48 for 100M-scale ++ for (int i = 0; i < num_seeds; i++) { ++ tableint obj = i * (max_elements_ / num_seeds); ++ dist_t dist = fstdistfunc_(query_data, getDataByInternalId(obj), dist_func_param_); ++ if (dist < curdist) { ++ curdist = dist; ++ currObj = obj; ++ } ++ } ++ } ++ else{ ++ for (int level = maxlevel_; level > 0; level--) { ++ bool changed = true; ++ while (changed) { ++ changed = false; ++ unsigned int *data; + +- data = (unsigned int *) get_linklist(currObj, level); +- int size = getListCount(data); +- metric_hops++; +- metric_distance_computations+=size; ++ data = (unsigned int *) get_linklist(currObj, level); ++ int size = getListCount(data); ++ metric_hops++; ++ metric_distance_computations+=size; + +- tableint *datal = (tableint *) (data + 1); +- for (int i = 0; i < size; i++) { +- tableint cand = datal[i]; +- if (cand < 0 || cand > max_elements_) +- throw std::runtime_error("cand error"); +- dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); ++ tableint *datal = (tableint *) (data + 1); ++ for (int i = 0; i < size; i++) { ++ tableint cand = datal[i]; ++ if (cand < 0 || cand > max_elements_) ++ throw std::runtime_error("cand error"); ++ dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); + +- if (d < curdist) { +- curdist = d; +- currObj = cand; +- changed = true; ++ if (d < curdist) { ++ curdist = d; ++ currObj = cand; ++ changed = true; ++ } + } + } + } diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 5a0122e8e3..9287d2f780 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -120,22 +120,46 @@ void serialize_to_hnswlib(raft::resources const& res, RAFT_LOG_DEBUG( "Saving CAGRA index to hnswlib format, size %zu, dim %u", static_cast(index_.size()), index_.dim()); - serialize_scalar(res, os, std::size_t{0}); - serialize_scalar(res, os, static_cast(index_.size())); - serialize_scalar(res, os, static_cast(index_.size())); + // offset_level_0 + std::size_t offset_level_0 = 0; + os.write(reinterpret_cast(&offset_level_0), sizeof(std::size_t)); + // max_element + std::size_t max_element = index_.size(); + os.write(reinterpret_cast(&max_element), sizeof(std::size_t)); + // curr_element_count + std::size_t curr_element_count = index_.size(); + os.write(reinterpret_cast(&curr_element_count), sizeof(std::size_t)); // Example:M: 16, dim = 128, data_t = float, index_t = uint32_t, list_size_type = uint32_t, labeltype: size_t // size_data_per_element_ = M * 2 * sizeof(index_t) + sizeof(list_size_type) + dim * sizeof(data_t) + sizeof(labeltype) auto size_data_per_element = static_cast(index_.graph_degree() * 4 + 4 + index_.size() * 4 + 8); - serialize_scalar(res, os, size_data_per_element); - serialize_scalar(res, os, size_data_per_element - 8); - serialize_scalar(res, os, static_cast(index_.graph_degree() * 4 + 4)); - serialize_scalar(res, os, std::int32_t{1}); - serialize_scalar(res, os, static_cast(index_.size() / 2)); - serialize_scalar(res, os, static_cast(index_.graph_degree() / 2)); - serialize_scalar(res, os, static_cast(index_.graph_degree())); - serialize_scalar(res, os, static_cast(index_.graph_degree() / 2)); - serialize_scalar(res, os, static_cast(0.42424242)); - serialize_scalar(res, os, std::size_t{500}); + os.write(reinterpret_cast(&size_data_per_element), sizeof(std::size_t)); + // label_offset + std::size_t label_offset = size_data_per_element - 8; + os.write(reinterpret_cast(&label_offset), sizeof(std::size_t)); + // offset_data + auto offset_data = static_cast(index_.graph_degree() * 4 + 4); + os.write(reinterpret_cast(&offset_data), sizeof(std::size_t)); + // max_level + int max_level = 1; + os.write(reinterpret_cast(&max_level), sizeof(int)); + // entrypoint_node + auto entrypoint_node = static_cast(index_.size() / 2); + os.write(reinterpret_cast(&entrypoint_node), sizeof(int)); + // max_M + auto max_M = static_cast(index_.graph_degree() / 2); + os.write(reinterpret_cast(&max_M), sizeof(std::size_t)); + // max_M0 + std::size_t max_M0 = index_.graph_degree(); + os.write(reinterpret_cast(&max_M0), sizeof(std::size_t)); + // M + auto M = static_cast(index_.graph_degree() / 2); + os.write(reinterpret_cast(&M), sizeof(std::size_t)); + // mult, can be anything + double mult = 0.42424242; + os.write(reinterpret_cast(&mult), sizeof(double)); + // efConstruction, can be anything + std::size_t efConstruction = 500; + os.write(reinterpret_cast(&efConstruction), sizeof(std::size_t)); auto dataset = index_.dataset(); // Remove padding before saving the dataset @@ -151,38 +175,45 @@ void serialize_to_hnswlib(raft::resources const& res, resource::sync_stream(res); auto graph = index_.graph(); - // auto host_graph = raft::make_host_matrix(graph.extent(0), graph.extent(1)); + auto host_graph = raft::make_host_matrix(graph.extent(0), graph.extent(1)); // std::vector host_graph_t(graph.size()); - IdxT* host_graph = new IdxT[graph.size()]; + // IdxT* host_graph = new IdxT[graph.extent(0), graph.extent(1)]; // thrust::copy(raft::resource::get_thrust_policy(res), graph.data_handle(), graph.data_handle() + graph.size(), host_graph.data_handle()); - raft::copy(host_graph, graph.data_handle(), graph.size(), raft::resource::get_cuda_stream(res)); + raft::copy(host_graph.data_handle(), graph.data_handle(), graph.size(), raft::resource::get_cuda_stream(res)); // Write one dataset and graph row at a time for (std::size_t i = 0; i < index_.size(); i++) { - serialize_scalar(res, os, static_cast(index_.graph_degree())); + std::size_t graph_degree = index_.graph_degree(); + os.write(reinterpret_cast(&graph_degree), sizeof(std::size_t)); - auto graph_row = host_graph + (index_.graph_degree() * i); - auto graph_row_mds = raft::make_host_vector_view(graph_row, index_.graph_degree()); - serialize_mdspan(res, os, graph_row_mds); + for (std::size_t j = 0; j < index_.graph_degree(); ++j) { + auto graph_elem = host_graph(i, j); + os.write(reinterpret_cast(&graph_elem), sizeof(IdxT)); + } auto data_row = host_dataset.data_handle() + (index_.dim() * i); if constexpr (std::is_same_v) { - auto data_row_mds = raft::make_host_vector_view(data_row, index_.dim()); - serialize_mdspan(res, os, data_row_mds); + for (std::size_t j = 0; j < index_.dim(); ++j) { + auto data_elem = host_dataset(i, j); + os.write(reinterpret_cast(&data_elem), sizeof(T)); + } } else if constexpr (std::is_same_v or std::is_same_v) { - auto data_row_int = raft::make_host_vector(index_.dim()); - std::copy(data_row, data_row + index_.size(), data_row_int.data_handle()); - serialize_mdspan(res, os, data_row_int.view()); + for (std::size_t j = 0; j < index_.dim(); ++j) { + auto data_elem = static_cast(host_dataset(i, j)); + os.write(reinterpret_cast(&data_elem), sizeof(int)); + } } - serialize_scalar(res, os, i); + os.write(reinterpret_cast(&i), sizeof(std::size_t)); } for (std::size_t i = 0; i < index_.size(); i++) { - serialize_scalar(res, os, std::int32_t{0}); + // zeroes + auto zero = 0; + os.write(reinterpret_cast(&zero), sizeof(int)); } - delete [] host_graph; + // delete [] host_graph; } template From 9b97901e847c53a4a05995dec37af338b4a2b77b Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 2 Nov 2023 19:05:45 -0700 Subject: [PATCH 04/13] add stream sync --- cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 9287d2f780..2f44afc81a 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -176,10 +176,8 @@ void serialize_to_hnswlib(raft::resources const& res, auto graph = index_.graph(); auto host_graph = raft::make_host_matrix(graph.extent(0), graph.extent(1)); - // std::vector host_graph_t(graph.size()); - // IdxT* host_graph = new IdxT[graph.extent(0), graph.extent(1)]; - // thrust::copy(raft::resource::get_thrust_policy(res), graph.data_handle(), graph.data_handle() + graph.size(), host_graph.data_handle()); raft::copy(host_graph.data_handle(), graph.data_handle(), graph.size(), raft::resource::get_cuda_stream(res)); + resource::sync_stream(res); // Write one dataset and graph row at a time for (std::size_t i = 0; i < index_.size(); i++) { From 46401e02c3bd1e27bce665fff8b805646ead0b4e Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 2 Nov 2023 20:26:54 -0700 Subject: [PATCH 05/13] working benchmark --- cpp/bench/ann/CMakeLists.txt | 2 +- cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h | 2 -- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 2 +- cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh | 6 +++--- .../raft-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml | 4 ++-- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 75af81ae83..08eb01f21b 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -31,7 +31,7 @@ option(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT "Include raft's ivf flat algorithm in be option(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_RAFT_CAGRA "Include raft's CAGRA in benchmark" ON) option(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB "Include raft's CAGRA in benchmark" ON) -option(RAFT_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" OFF) +option(RAFT_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" ON) option(RAFT_ANN_BENCH_SINGLE_EXE "Make a single executable with benchmark as shared library modules" OFF diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h index 2ad5aa6ddc..188996ebb9 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h @@ -175,10 +175,8 @@ void RaftCagraHnswlib::load(const std::string& file) space_ = std::make_unique(dimension_); } - std::cout << "about to create index" << std::endl; appr_alg_ = std::make_unique::type>>( space_.get(), file); - std::cout << "about to failed" << std::endl; appr_alg_->base_layer_only = true; } diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 1f551f02e4..2c5a884ab7 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -129,7 +129,7 @@ void RaftCagra::set_search_dataset(const T* dataset, size_t nrow) template void RaftCagra::save(const std::string& file) const { - raft::neighbors::cagra::serialize_to_hnswlib(handle_, file, *index_); + raft::neighbors::cagra::serialize(handle_, file, *index_); } template diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 2f44afc81a..7f40406286 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -131,7 +131,7 @@ void serialize_to_hnswlib(raft::resources const& res, os.write(reinterpret_cast(&curr_element_count), sizeof(std::size_t)); // Example:M: 16, dim = 128, data_t = float, index_t = uint32_t, list_size_type = uint32_t, labeltype: size_t // size_data_per_element_ = M * 2 * sizeof(index_t) + sizeof(list_size_type) + dim * sizeof(data_t) + sizeof(labeltype) - auto size_data_per_element = static_cast(index_.graph_degree() * 4 + 4 + index_.size() * 4 + 8); + auto size_data_per_element = static_cast(index_.graph_degree() * 4 + 4 + index_.dim() * 4 + 8); os.write(reinterpret_cast(&size_data_per_element), sizeof(std::size_t)); // label_offset std::size_t label_offset = size_data_per_element - 8; @@ -181,8 +181,8 @@ void serialize_to_hnswlib(raft::resources const& res, // Write one dataset and graph row at a time for (std::size_t i = 0; i < index_.size(); i++) { - std::size_t graph_degree = index_.graph_degree(); - os.write(reinterpret_cast(&graph_degree), sizeof(std::size_t)); + auto graph_degree = static_cast(index_.graph_degree()); + os.write(reinterpret_cast(&graph_degree), sizeof(int)); for (std::size_t j = 0; j < index_.graph_degree(); ++j) { auto graph_elem = host_graph(i, j); diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml index 5d1ee2fb16..a3ecae9c86 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml @@ -4,8 +4,8 @@ constraints: groups: base: build: - graph_degree: [32] - intermediate_graph_degree: [64] + graph_degree: [32, 64, 128, 256] + intermediate_graph_degree: [32, 64, 96, 128] graph_build_algo: ["NN_DESCENT"] search: ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800] \ No newline at end of file From 13afe894a98ad4f66853e9725262a3fbcfee63dc Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Nov 2023 18:23:49 -0800 Subject: [PATCH 06/13] rework to maximize reuse --- cpp/bench/ann/CMakeLists.txt | 3 +- cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h | 4 + .../src/raft/raft_ann_bench_param_parser.h | 233 ++++++++++++++++ cpp/bench/ann/src/raft/raft_benchmark.cu | 263 +----------------- cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu | 116 ++++++-- .../ann/src/raft/raft_cagra_hnswlib_wrapper.h | 171 ++++-------- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 7 + 7 files changed, 392 insertions(+), 405 deletions(-) create mode 100644 cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 08eb01f21b..eb44e58cb5 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -258,8 +258,7 @@ if(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) NAME RAFT_CAGRA_HNSWLIB PATH - bench/ann/src/raft/raft_benchmark.cu - $<$:bench/ann/src/raft/raft_cagra_hnswlib.cu> + bench/ann/src/raft/raft_cagra_hnswlib.cu INCLUDES ${CMAKE_CURRENT_BINARY_DIR}/_deps/hnswlib-src/hnswlib LINKS diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h index 364da81f77..2f8b6e7490 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h +++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h @@ -91,6 +91,10 @@ class HnswLib : public ANN { return property; } + void set_base_layer_only() { + appr_alg_->base_layer_only = true; + } + private: void get_search_knn_results_(const T* query, int k, size_t* indices, float* distances) const; diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h new file mode 100644 index 0000000000..ae8d562420 --- /dev/null +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#define JSON_DIAGNOSTICS 1 +#include + +#include + +#undef WARP_SIZE +#ifdef RAFT_ANN_BENCH_USE_RAFT_BFKNN +#include "raft_wrapper.h" +#endif +#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT +#include "raft_ivf_flat_wrapper.h" +extern template class raft::bench::ann::RaftIvfFlatGpu; +extern template class raft::bench::ann::RaftIvfFlatGpu; +extern template class raft::bench::ann::RaftIvfFlatGpu; +#endif +#if defined(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) +#include "raft_ivf_pq_wrapper.h" +#endif +#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_PQ +extern template class raft::bench::ann::RaftIvfPQ; +extern template class raft::bench::ann::RaftIvfPQ; +extern template class raft::bench::ann::RaftIvfPQ; +#endif +#if defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) +#include "raft_cagra_wrapper.h" +#endif +#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA +extern template class raft::bench::ann::RaftCagra; +extern template class raft::bench::ann::RaftCagra; +extern template class raft::bench::ann::RaftCagra; +#endif + +#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT +template +void parse_build_param(const nlohmann::json& conf, + typename raft::bench::ann::RaftIvfFlatGpu::BuildParam& param) +{ + param.n_lists = conf.at("nlist"); + if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); } + if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); } +} + +template +void parse_search_param(const nlohmann::json& conf, + typename raft::bench::ann::RaftIvfFlatGpu::SearchParam& param) +{ + param.ivf_flat_params.n_probes = conf.at("nprobe"); +} +#endif + +#if defined(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) +template +void parse_build_param(const nlohmann::json& conf, + typename raft::bench::ann::RaftIvfPQ::BuildParam& param) +{ + if (conf.contains("nlist")) { param.n_lists = conf.at("nlist"); } + if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); } + if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); } + if (conf.contains("pq_bits")) { param.pq_bits = conf.at("pq_bits"); } + if (conf.contains("pq_dim")) { param.pq_dim = conf.at("pq_dim"); } + if (conf.contains("codebook_kind")) { + std::string kind = conf.at("codebook_kind"); + if (kind == "cluster") { + param.codebook_kind = raft::neighbors::ivf_pq::codebook_gen::PER_CLUSTER; + } else if (kind == "subspace") { + param.codebook_kind = raft::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE; + } else { + throw std::runtime_error("codebook_kind: '" + kind + + "', should be either 'cluster' or 'subspace'"); + } + } +} + +template +void parse_search_param(const nlohmann::json& conf, + typename raft::bench::ann::RaftIvfPQ::SearchParam& param) +{ + if (conf.contains("nprobe")) { param.pq_param.n_probes = conf.at("nprobe"); } + if (conf.contains("internalDistanceDtype")) { + std::string type = conf.at("internalDistanceDtype"); + if (type == "float") { + param.pq_param.internal_distance_dtype = CUDA_R_32F; + } else if (type == "half") { + param.pq_param.internal_distance_dtype = CUDA_R_16F; + } else { + throw std::runtime_error("internalDistanceDtype: '" + type + + "', should be either 'float' or 'half'"); + } + } else { + // set half as default type + param.pq_param.internal_distance_dtype = CUDA_R_16F; + } + + if (conf.contains("smemLutDtype")) { + std::string type = conf.at("smemLutDtype"); + if (type == "float") { + param.pq_param.lut_dtype = CUDA_R_32F; + } else if (type == "half") { + param.pq_param.lut_dtype = CUDA_R_16F; + } else if (type == "fp8") { + param.pq_param.lut_dtype = CUDA_R_8U; + } else { + throw std::runtime_error("smemLutDtype: '" + type + + "', should be either 'float', 'half' or 'fp8'"); + } + } else { + // set half as default + param.pq_param.lut_dtype = CUDA_R_16F; + } + if (conf.contains("refine_ratio")) { + param.refine_ratio = conf.at("refine_ratio"); + if (param.refine_ratio < 1.0f) { throw std::runtime_error("refine_ratio should be >= 1.0"); } + } +} +#endif + +#if defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) +template +void parse_build_param(const nlohmann::json& conf, + raft::neighbors::experimental::nn_descent::index_params& param) +{ + if (conf.contains("graph_degree")) { param.graph_degree = conf.at("graph_degree"); } + if (conf.contains("intermediate_graph_degree")) { + param.intermediate_graph_degree = conf.at("intermediate_graph_degree"); + } + // we allow niter shorthand for max_iterations + if (conf.contains("niter")) { param.max_iterations = conf.at("niter"); } + if (conf.contains("max_iterations")) { param.max_iterations = conf.at("max_iterations"); } + if (conf.contains("termination_threshold")) { + param.termination_threshold = conf.at("termination_threshold"); + } +} + +nlohmann::json collect_conf_with_prefix(const nlohmann::json& conf, + const std::string& prefix, + bool remove_prefix = true) +{ + nlohmann::json out; + for (auto& i : conf.items()) { + if (i.key().compare(0, prefix.size(), prefix) == 0) { + auto new_key = remove_prefix ? i.key().substr(prefix.size()) : i.key(); + out[new_key] = i.value(); + } + } + return out; +} + +template +void parse_build_param(const nlohmann::json& conf, + typename raft::bench::ann::RaftCagra::BuildParam& param) +{ + if (conf.contains("graph_degree")) { + param.cagra_params.graph_degree = conf.at("graph_degree"); + param.cagra_params.intermediate_graph_degree = param.cagra_params.graph_degree * 2; + } + if (conf.contains("intermediate_graph_degree")) { + param.cagra_params.intermediate_graph_degree = conf.at("intermediate_graph_degree"); + } + if (conf.contains("graph_build_algo")) { + if (conf.at("graph_build_algo") == "IVF_PQ") { + param.cagra_params.build_algo = raft::neighbors::cagra::graph_build_algo::IVF_PQ; + } else if (conf.at("graph_build_algo") == "NN_DESCENT") { + param.cagra_params.build_algo = raft::neighbors::cagra::graph_build_algo::NN_DESCENT; + } + } + nlohmann::json ivf_pq_build_conf = collect_conf_with_prefix(conf, "ivf_pq_build_"); + if (!ivf_pq_build_conf.empty()) { + raft::neighbors::ivf_pq::index_params bparam; + parse_build_param(ivf_pq_build_conf, bparam); + param.ivf_pq_build_params = bparam; + } + nlohmann::json ivf_pq_search_conf = collect_conf_with_prefix(conf, "ivf_pq_search_"); + if (!ivf_pq_search_conf.empty()) { + typename raft::bench::ann::RaftIvfPQ::SearchParam sparam; + parse_search_param(ivf_pq_search_conf, sparam); + param.ivf_pq_search_params = sparam.pq_param; + param.ivf_pq_refine_rate = sparam.refine_ratio; + } + nlohmann::json nn_descent_conf = collect_conf_with_prefix(conf, "nn_descent_"); + if (!nn_descent_conf.empty()) { + raft::neighbors::experimental::nn_descent::index_params nn_param; + nn_param.intermediate_graph_degree = 1.5 * param.cagra_params.intermediate_graph_degree; + parse_build_param(nn_descent_conf, nn_param); + if (nn_param.graph_degree != param.cagra_params.intermediate_graph_degree) { + // RAFT_LOG_WARN( + // "nn_descent_graph_degree has to be equal to CAGRA intermediate_grpah_degree, overriding"); + nn_param.graph_degree = param.cagra_params.intermediate_graph_degree; + } + param.nn_descent_params = nn_param; + } +} + +template +void parse_search_param(const nlohmann::json& conf, + typename raft::bench::ann::RaftCagra::SearchParam& param) +{ + if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); } + if (conf.contains("search_width")) { param.p.search_width = conf.at("search_width"); } + if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); } + if (conf.contains("algo")) { + if (conf.at("algo") == "single_cta") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::SINGLE_CTA; + } else if (conf.at("algo") == "multi_cta") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_CTA; + } else if (conf.at("algo") == "multi_kernel") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_KERNEL; + } else if (conf.at("algo") == "auto") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::AUTO; + } else { + std::string tmp = conf.at("algo"); + THROW("Invalid value for algo: %s", tmp.c_str()); + } + } +} +#endif diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index aa9d3c90d1..f8c65a2d6e 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -16,6 +16,8 @@ #include "../common/ann_types.hpp" +#include "raft_ann_bench_param_parser.h" + #include #include #include @@ -26,256 +28,11 @@ #include #include -#undef WARP_SIZE -#ifdef RAFT_ANN_BENCH_USE_RAFT_BFKNN -#include "raft_wrapper.h" -#endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT -#include "raft_ivf_flat_wrapper.h" -extern template class raft::bench::ann::RaftIvfFlatGpu; -extern template class raft::bench::ann::RaftIvfFlatGpu; -extern template class raft::bench::ann::RaftIvfFlatGpu; -#endif -#if defined(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) -#include "raft_ivf_pq_wrapper.h" -#endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_PQ -extern template class raft::bench::ann::RaftIvfPQ; -extern template class raft::bench::ann::RaftIvfPQ; -extern template class raft::bench::ann::RaftIvfPQ; -#endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA -#include "raft_cagra_wrapper.h" -extern template class raft::bench::ann::RaftCagra; -extern template class raft::bench::ann::RaftCagra; -extern template class raft::bench::ann::RaftCagra; -#endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB -#include "raft_cagra_hnswlib_wrapper.h" -// extern template class raft::bench::ann::RaftCagraHnswlib; -// extern template class raft::bench::ann::RaftCagraHnswlib; -// extern template class raft::bench::ann::RaftCagraHnswlib; -#endif #define JSON_DIAGNOSTICS 1 #include namespace raft::bench::ann { -#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT -template -void parse_build_param(const nlohmann::json& conf, - typename raft::bench::ann::RaftIvfFlatGpu::BuildParam& param) -{ - param.n_lists = conf.at("nlist"); - if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); } - if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); } -} - -template -void parse_search_param(const nlohmann::json& conf, - typename raft::bench::ann::RaftIvfFlatGpu::SearchParam& param) -{ - param.ivf_flat_params.n_probes = conf.at("nprobe"); -} -#endif - -#if defined(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) -template -void parse_build_param(const nlohmann::json& conf, - typename raft::bench::ann::RaftIvfPQ::BuildParam& param) -{ - if (conf.contains("nlist")) { param.n_lists = conf.at("nlist"); } - if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); } - if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); } - if (conf.contains("pq_bits")) { param.pq_bits = conf.at("pq_bits"); } - if (conf.contains("pq_dim")) { param.pq_dim = conf.at("pq_dim"); } - if (conf.contains("codebook_kind")) { - std::string kind = conf.at("codebook_kind"); - if (kind == "cluster") { - param.codebook_kind = raft::neighbors::ivf_pq::codebook_gen::PER_CLUSTER; - } else if (kind == "subspace") { - param.codebook_kind = raft::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE; - } else { - throw std::runtime_error("codebook_kind: '" + kind + - "', should be either 'cluster' or 'subspace'"); - } - } -} - -template -void parse_search_param(const nlohmann::json& conf, - typename raft::bench::ann::RaftIvfPQ::SearchParam& param) -{ - if (conf.contains("nprobe")) { param.pq_param.n_probes = conf.at("nprobe"); } - if (conf.contains("internalDistanceDtype")) { - std::string type = conf.at("internalDistanceDtype"); - if (type == "float") { - param.pq_param.internal_distance_dtype = CUDA_R_32F; - } else if (type == "half") { - param.pq_param.internal_distance_dtype = CUDA_R_16F; - } else { - throw std::runtime_error("internalDistanceDtype: '" + type + - "', should be either 'float' or 'half'"); - } - } else { - // set half as default type - param.pq_param.internal_distance_dtype = CUDA_R_16F; - } - - if (conf.contains("smemLutDtype")) { - std::string type = conf.at("smemLutDtype"); - if (type == "float") { - param.pq_param.lut_dtype = CUDA_R_32F; - } else if (type == "half") { - param.pq_param.lut_dtype = CUDA_R_16F; - } else if (type == "fp8") { - param.pq_param.lut_dtype = CUDA_R_8U; - } else { - throw std::runtime_error("smemLutDtype: '" + type + - "', should be either 'float', 'half' or 'fp8'"); - } - } else { - // set half as default - param.pq_param.lut_dtype = CUDA_R_16F; - } - if (conf.contains("refine_ratio")) { - param.refine_ratio = conf.at("refine_ratio"); - if (param.refine_ratio < 1.0f) { throw std::runtime_error("refine_ratio should be >= 1.0"); } - } -} -#endif - -#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA -template -void parse_build_param(const nlohmann::json& conf, - raft::neighbors::experimental::nn_descent::index_params& param) -{ - if (conf.contains("graph_degree")) { param.graph_degree = conf.at("graph_degree"); } - if (conf.contains("intermediate_graph_degree")) { - param.intermediate_graph_degree = conf.at("intermediate_graph_degree"); - } - // we allow niter shorthand for max_iterations - if (conf.contains("niter")) { param.max_iterations = conf.at("niter"); } - if (conf.contains("max_iterations")) { param.max_iterations = conf.at("max_iterations"); } - if (conf.contains("termination_threshold")) { - param.termination_threshold = conf.at("termination_threshold"); - } -} - -nlohmann::json collect_conf_with_prefix(const nlohmann::json& conf, - const std::string& prefix, - bool remove_prefix = true) -{ - nlohmann::json out; - for (auto& i : conf.items()) { - if (i.key().compare(0, prefix.size(), prefix) == 0) { - auto new_key = remove_prefix ? i.key().substr(prefix.size()) : i.key(); - out[new_key] = i.value(); - } - } - return out; -} - -template -void parse_build_param(const nlohmann::json& conf, - typename raft::bench::ann::RaftCagra::BuildParam& param) -{ - if (conf.contains("graph_degree")) { - param.cagra_params.graph_degree = conf.at("graph_degree"); - param.cagra_params.intermediate_graph_degree = param.cagra_params.graph_degree * 2; - } - if (conf.contains("intermediate_graph_degree")) { - param.cagra_params.intermediate_graph_degree = conf.at("intermediate_graph_degree"); - } - if (conf.contains("graph_build_algo")) { - if (conf.at("graph_build_algo") == "IVF_PQ") { - param.cagra_params.build_algo = raft::neighbors::cagra::graph_build_algo::IVF_PQ; - } else if (conf.at("graph_build_algo") == "NN_DESCENT") { - param.cagra_params.build_algo = raft::neighbors::cagra::graph_build_algo::NN_DESCENT; - } - } - nlohmann::json ivf_pq_build_conf = collect_conf_with_prefix(conf, "ivf_pq_build_"); - if (!ivf_pq_build_conf.empty()) { - raft::neighbors::ivf_pq::index_params bparam; - parse_build_param(ivf_pq_build_conf, bparam); - param.ivf_pq_build_params = bparam; - } - nlohmann::json ivf_pq_search_conf = collect_conf_with_prefix(conf, "ivf_pq_search_"); - if (!ivf_pq_search_conf.empty()) { - typename raft::bench::ann::RaftIvfPQ::SearchParam sparam; - parse_search_param(ivf_pq_search_conf, sparam); - param.ivf_pq_search_params = sparam.pq_param; - param.ivf_pq_refine_rate = sparam.refine_ratio; - } - nlohmann::json nn_descent_conf = collect_conf_with_prefix(conf, "nn_descent_"); - if (!nn_descent_conf.empty()) { - raft::neighbors::experimental::nn_descent::index_params nn_param; - nn_param.intermediate_graph_degree = 1.5 * param.cagra_params.intermediate_graph_degree; - parse_build_param(nn_descent_conf, nn_param); - if (nn_param.graph_degree != param.cagra_params.intermediate_graph_degree) { - RAFT_LOG_WARN( - "nn_descent_graph_degree has to be equal to CAGRA intermediate_grpah_degree, overriding"); - nn_param.graph_degree = param.cagra_params.intermediate_graph_degree; - } - param.nn_descent_params = nn_param; - } -} - -template -void parse_search_param(const nlohmann::json& conf, - typename raft::bench::ann::RaftCagra::SearchParam& param) -{ - if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); } - if (conf.contains("search_width")) { param.p.search_width = conf.at("search_width"); } - if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); } - if (conf.contains("algo")) { - if (conf.at("algo") == "single_cta") { - param.p.algo = raft::neighbors::experimental::cagra::search_algo::SINGLE_CTA; - } else if (conf.at("algo") == "multi_cta") { - param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_CTA; - } else if (conf.at("algo") == "multi_kernel") { - param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_KERNEL; - } else if (conf.at("algo") == "auto") { - param.p.algo = raft::neighbors::experimental::cagra::search_algo::AUTO; - } else { - std::string tmp = conf.at("algo"); - THROW("Invalid value for algo: %s", tmp.c_str()); - } - } -} -#endif - -#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB -template -void parse_build_param(const nlohmann::json& conf, - typename raft::bench::ann::RaftCagraHnswlib::BuildParam& param) -{ - if (conf.contains("graph_degree")) { - param.graph_degree = conf.at("graph_degree"); - param.intermediate_graph_degree = param.graph_degree * 2; - } - if (conf.contains("intermediate_graph_degree")) { - param.intermediate_graph_degree = conf.at("intermediate_graph_degree"); - } - if (conf.contains("graph_build_algo")) { - if (conf.at("graph_build_algo") == "IVF_PQ") { - param.build_algo = raft::neighbors::cagra::graph_build_algo::IVF_PQ; - } else if (conf.at("graph_build_algo") == "NN_DESCENT") { - param.build_algo = raft::neighbors::cagra::graph_build_algo::NN_DESCENT; - } - } - if (conf.contains("nn_descent_niter")) { param.nn_descent_niter = conf.at("nn_descent_niter"); } -} - -template -void parse_search_param(const nlohmann::json& conf, - typename raft::bench::ann::RaftCagraHnswlib::SearchParam& param) -{ - param.ef = conf.at("ef"); - if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); } -} -#endif - template std::unique_ptr> create_algo(const std::string& algo, const std::string& distance, @@ -318,13 +75,7 @@ std::unique_ptr> create_algo(const std::string& algo, ann = std::make_unique>(metric, dim, param); } #endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB - if (algo == "raft_cagra_hnswlib") { - typename raft::bench::ann::RaftCagraHnswlib::BuildParam param; - parse_build_param(conf, param); - ann = std::make_unique>(metric, dim, param); - } -#endif + if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); } return ann; @@ -362,13 +113,7 @@ std::unique_ptr::AnnSearchParam> create_search return param; } #endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB - if (algo == "raft_cagra_hnswlib") { - auto param = std::make_unique::SearchParam>(); - parse_search_param(conf, *param); - return param; - } -#endif + // else throw std::runtime_error("invalid algo: '" + algo + "'"); } diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu b/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu index ca3ed7a737..dd22fa61ff 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu +++ b/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu @@ -1,22 +1,94 @@ -// /* -// * Copyright (c) 2023, NVIDIA CORPORATION. -// * -// * Licensed under the Apache License, Version 2.0 (the "License"); -// * you may not use this file except in compliance with the License. -// * You may obtain a copy of the License at -// * -// * http://www.apache.org/licenses/LICENSE-2.0 -// * -// * Unless required by applicable law or agreed to in writing, software -// * distributed under the License is distributed on an "AS IS" BASIS, -// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// * See the License for the specific language governing permissions and -// * limitations under the License. -// */ -// #include "raft_cagra_hnswlib_wrapper.h" - -// namespace raft::bench::ann { -// template class RaftCagraHnswlib; -// template class RaftCagraHnswlib; -// template class RaftCagraHnswlib; -// } // namespace raft::bench::ann +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../common/ann_types.hpp" +#include "raft_ann_bench_param_parser.h" +#include "raft_cagra_hnswlib_wrapper.h" + +#include + +#define JSON_DIAGNOSTICS 1 +#include + +namespace raft::bench::ann { + +template +void parse_search_param(const nlohmann::json& conf, + typename raft::bench::ann::RaftCagraHnswlib::SearchParam& param) +{ + param.ef = conf.at("ef"); + if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); } +} + +template +std::unique_ptr> create_algo(const std::string& algo, + const std::string& distance, + int dim, + const nlohmann::json& conf, + const std::vector& dev_list) +{ + // stop compiler warning; not all algorithms support multi-GPU so it may not be used + (void)dev_list; + + raft::bench::ann::Metric metric = parse_metric(distance); + std::unique_ptr> ann; + + if constexpr (std::is_same_v or std::is_same_v) { + if (algo == "raft_cagra_hnswlib") { + typename raft::bench::ann::RaftCagraHnswlib::BuildParam param; + parse_build_param(conf, param); + ann = std::make_unique>(metric, dim, param); + } + } + + if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); } + + return ann; +} + + +template +std::unique_ptr::AnnSearchParam> create_search_param( + const std::string& algo, const nlohmann::json& conf) { + if (algo == "raft_cagra_hnswlib") { + auto param = std::make_unique::SearchParam>(); + parse_search_param(conf, *param); + return param; + } + + throw std::runtime_error("invalid algo: '" + algo + "'"); +} + +} // namespace raft::bench::ann + +REGISTER_ALGO_INSTANCE(float); +REGISTER_ALGO_INSTANCE(std::int8_t); +REGISTER_ALGO_INSTANCE(std::uint8_t); + +#ifdef ANN_BENCH_BUILD_MAIN +#include "../common/benchmark.hpp" +int main(int argc, char** argv) +{ + rmm::mr::cuda_memory_resource cuda_mr; + // Construct a resource that uses a coalescing best-fit pool allocator + rmm::mr::pool_memory_resource pool_mr{&cuda_mr}; + rmm::mr::set_current_device_resource( + &pool_mr); // Updates the current device resource pointer to `pool_mr` + rmm::mr::device_memory_resource* mr = + rmm::mr::get_current_device_resource(); // Points to `pool_mr` + return raft::bench::ann::run_main(argc, argv); +} +#endif diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h index 188996ebb9..e48ada2e2b 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h @@ -15,71 +15,48 @@ */ #pragma once -#include -#include -#include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include + +// #include "../common/ann_types.hpp" +// #include "../common/thread_pool.hpp" +// #include "raft_ann_bench_utils.h" +// #include + +// #include + +#include "raft_cagra_wrapper.h" +#include "../hnswlib/hnswlib_wrapper.h" #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../common/ann_types.hpp" -#include "../common/thread_pool.hpp" -#include "raft_ann_bench_utils.h" -#include - -#include namespace raft::bench::ann { -template -struct hnsw_dist_t { - using type = void; -}; - -template <> -struct hnsw_dist_t { - using type = float; -}; - -template <> -struct hnsw_dist_t { - using type = int; -}; - -template <> -struct hnsw_dist_t { - using type = int; -}; - template class RaftCagraHnswlib : public ANN { public: using typename ANN::AnnSearchParam; - - struct SearchParam : public AnnSearchParam { - int ef; - int num_threads = 1; - }; - - using BuildParam = raft::neighbors::cagra::index_params; + using BuildParam = typename RaftCagra::BuildParam; + using SearchParam = typename HnswLib::SearchParam; RaftCagraHnswlib(Metric metric, int dim, const BuildParam& param, int concurrent_searches = 1) - : ANN(metric, dim), index_params_(param), dimension_(dim), handle_(cudaStreamPerThread) + : ANN(metric, dim), metric_(metric), index_params_(param), dimension_(dim), handle_(cudaStreamPerThread) { - index_params_.metric = parse_metric_type(metric); - RAFT_CUDA_TRY(cudaGetDevice(&device_)); } ~RaftCagraHnswlib() noexcept {} @@ -109,108 +86,58 @@ class RaftCagraHnswlib : public ANN { void load(const std::string&) override; private: - void get_search_knn_results_(const T* query, int k, size_t* indices, float* distances) const; raft::device_resources handle_; + Metric metric_; BuildParam index_params_; - std::optional> index_; - int device_; int dimension_; - std::unique_ptr::type>> appr_alg_; - std::unique_ptr::type>> space_; - int num_threads_; - std::unique_ptr thread_pool_; + std::unique_ptr> cagra_build_; + std::unique_ptr> hnswlib_search_; Objective metric_objective_; }; template -void RaftCagraHnswlib::build(const T* dataset, size_t nrow, cudaStream_t) +void RaftCagraHnswlib::build(const T* dataset, size_t nrow, cudaStream_t stream) { - if (raft::get_device_for_address(dataset) == -1) { - auto dataset_view = - raft::make_host_matrix_view(dataset, IdxT(nrow), dimension_); - index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view)); - return; - } else { - auto dataset_view = - raft::make_device_matrix_view(dataset, IdxT(nrow), dimension_); - index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view)); - return; + if (not cagra_build_) { + cagra_build_ = std::make_unique>(metric_, dimension_, index_params_); } + cagra_build_->build(dataset, nrow, stream); } template void RaftCagraHnswlib::set_search_param(const AnnSearchParam& param_) { - auto param = dynamic_cast(param_); - appr_alg_->ef_ = param.ef; - metric_objective_ = param.metric_objective; - - bool use_pool = (metric_objective_ == Objective::LATENCY && param.num_threads > 1) && - (!thread_pool_ || num_threads_ != param.num_threads); - if (use_pool) { - num_threads_ = param.num_threads; - thread_pool_ = std::make_unique(num_threads_); - } + hnswlib_search_->set_search_param(param_); } template void RaftCagraHnswlib::save(const std::string& file) const { - raft::neighbors::cagra::serialize_to_hnswlib(handle_, file, *index_); + cagra_build_->save_to_hnswlib(file); } template void RaftCagraHnswlib::load(const std::string& file) { - if constexpr (std::is_same_v) { - if (static_cast(index_params_.metric) == Metric::kInnerProduct) { - space_ = std::make_unique(dimension_); - } else { - space_ = std::make_unique(dimension_); - } - } else if constexpr (std::is_same_v) { - space_ = std::make_unique(dimension_); + typename HnswLib::BuildParam param; + // these values don't matter since we don't build with HnswLib + param.M = 50; + param.ef_construction = 100; + if (not hnswlib_search_) { + hnswlib_search_ = std::make_unique>(metric_, dimension_, param); } - - appr_alg_ = std::make_unique::type>>( - space_.get(), file); - appr_alg_->base_layer_only = true; + hnswlib_search_->load(file); + hnswlib_search_->set_base_layer_only(); } template void RaftCagraHnswlib::search( const T* queries, int batch_size, int k, size_t* neighbors, float* distances, cudaStream_t) const { - auto f = [&](int i) { - // hnsw can only handle a single vector at a time. - get_search_knn_results_(queries + i * dimension_, k, neighbors + i * k, distances + i * k); - }; - if (metric_objective_ == Objective::LATENCY) { - thread_pool_->submit(f, batch_size); - } else { - for (int i = 0; i < batch_size; i++) { - f(i); - } - } -} - -template -void RaftCagraHnswlib::get_search_knn_results_(const T* query, - int k, - size_t* indices, - float* distances) const -{ - auto result = appr_alg_->searchKnn(query, k); - assert(result.size() >= static_cast(k)); - - for (int i = k - 1; i >= 0; --i) { - indices[i] = result.top().second; - distances[i] = result.top().first; - result.pop(); - } + hnswlib_search_->search(queries, batch_size, k, neighbors, distances); } } // namespace raft::bench::ann diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 86f7eb036e..bf526101be 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -98,6 +98,7 @@ class RaftCagra : public ANN { } void save(const std::string& file) const override; void load(const std::string&) override; + void save_to_hnswlib(const std::string& file) const; private: raft::device_resources handle_; @@ -146,6 +147,12 @@ void RaftCagra::save(const std::string& file) const raft::neighbors::cagra::serialize(handle_, file, *index_); } +template +void RaftCagra::save_to_hnswlib(const std::string& file) const +{ + raft::neighbors::cagra::serialize_to_hnswlib(handle_, file, *index_); +} + template void RaftCagra::load(const std::string& file) { From 5fb92f763e77738cf3d6eb748e73bd72458719d5 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Nov 2023 18:27:58 -0800 Subject: [PATCH 07/13] add docs --- .../raft/neighbors/cagra_serialize.cuh | 53 ++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/neighbors/cagra_serialize.cuh b/cpp/include/raft/neighbors/cagra_serialize.cuh index 85016ee2a4..ec012f8ef0 100644 --- a/cpp/include/raft/neighbors/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/cagra_serialize.cuh @@ -93,6 +93,31 @@ void serialize(raft::resources const& handle, detail::serialize(handle, filename, index, include_dataset); } +/** + * Write the CAGRA built index as a base layer HNSW index to an output stream + * + * Experimental, both the API and the serialization format are subject to change. + * + * @code{.cpp} + * #include + * + * raft::resources handle; + * + * // create an output stream + * std::ostream os(std::cout.rdbuf()); + * // create an index with `auto index = cagra::build(...);` + * raft::serialize_to_hnswlib(handle, os, index); + * @endcode + * + * @tparam T data element type + * @tparam IdxT type of the indices + * + * @param[in] handle the raft handle + * @param[in] os output stream + * @param[in] index CAGRA index + * @param[in] include_dataset Whether or not to write out the dataset to the file. + * + */ template void serialize_to_hnswlib(raft::resources const& handle, std::ostream& os, @@ -101,11 +126,35 @@ void serialize_to_hnswlib(raft::resources const& handle, detail::serialize_to_hnswlib(handle, os, index); } +/** + * Write the CAGRA built index as a base layer HNSW index to file + * + * Experimental, both the API and the serialization format are subject to change. + * + * @code{.cpp} + * #include + * + * raft::resources handle; + * + * // create a string with a filepath + * std::string filename("/path/to/index"); + * // create an index with `auto index = cagra::build(...);` + * raft::serialize_to_hnswlib(handle, filename, index); + * @endcode + * + * @tparam T data element type + * @tparam IdxT type of the indices + * + * @param[in] handle the raft handle + * @param[in] filename the file name for saving the index + * @param[in] index CAGRA index + * @param[in] include_dataset Whether or not to write out the dataset to the file. + * + */ template void serialize_to_hnswlib(raft::resources const& handle, const std::string& filename, - const index& index, - bool include_dataset = true) + const index& index) { detail::serialize_to_hnswlib(handle, filename, index); } From 2c360ed37ab834f5acb406633911a6428d031d8e Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Nov 2023 18:38:45 -0800 Subject: [PATCH 08/13] style fix --- cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h | 4 +- .../src/raft/raft_ann_bench_param_parser.h | 12 +++--- cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu | 14 ++++--- .../ann/src/raft/raft_cagra_hnswlib_wrapper.h | 37 ++++--------------- .../raft/neighbors/cagra_serialize.cuh | 8 ++-- .../detail/cagra/cagra_serialize.cuh | 32 +++++++++------- 6 files changed, 44 insertions(+), 63 deletions(-) diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h index 2f8b6e7490..921d72decc 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h +++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h @@ -91,9 +91,7 @@ class HnswLib : public ANN { return property; } - void set_base_layer_only() { - appr_alg_->base_layer_only = true; - } + void set_base_layer_only() { appr_alg_->base_layer_only = true; } private: void get_search_knn_results_(const T* query, int k, size_t* indices, float* distances) const; diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h index ae8d562420..479a90e3b5 100644 --- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -19,8 +19,6 @@ #define JSON_DIAGNOSTICS 1 #include -#include - #undef WARP_SIZE #ifdef RAFT_ANN_BENCH_USE_RAFT_BFKNN #include "raft_wrapper.h" @@ -31,7 +29,8 @@ extern template class raft::bench::ann::RaftIvfFlatGpu; extern template class raft::bench::ann::RaftIvfFlatGpu; extern template class raft::bench::ann::RaftIvfFlatGpu; #endif -#if defined(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) +#if defined(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || \ + defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) #include "raft_ivf_pq_wrapper.h" #endif #ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_PQ @@ -39,7 +38,7 @@ extern template class raft::bench::ann::RaftIvfPQ; extern template class raft::bench::ann::RaftIvfPQ; extern template class raft::bench::ann::RaftIvfPQ; #endif -#if defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) +#if defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) #include "raft_cagra_wrapper.h" #endif #ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA @@ -66,7 +65,8 @@ void parse_search_param(const nlohmann::json& conf, } #endif -#if defined(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) +#if defined(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || \ + defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB) template void parse_build_param(const nlohmann::json& conf, typename raft::bench::ann::RaftIvfPQ::BuildParam& param) @@ -200,8 +200,6 @@ void parse_build_param(const nlohmann::json& conf, nn_param.intermediate_graph_degree = 1.5 * param.cagra_params.intermediate_graph_degree; parse_build_param(nn_descent_conf, nn_param); if (nn_param.graph_degree != param.cagra_params.intermediate_graph_degree) { - // RAFT_LOG_WARN( - // "nn_descent_graph_degree has to be equal to CAGRA intermediate_grpah_degree, overriding"); nn_param.graph_degree = param.cagra_params.intermediate_graph_degree; } param.nn_descent_params = nn_param; diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu b/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu index dd22fa61ff..f4fda27d1f 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu +++ b/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu @@ -48,9 +48,9 @@ std::unique_ptr> create_algo(const std::string& algo, if constexpr (std::is_same_v or std::is_same_v) { if (algo == "raft_cagra_hnswlib") { - typename raft::bench::ann::RaftCagraHnswlib::BuildParam param; - parse_build_param(conf, param); - ann = std::make_unique>(metric, dim, param); + typename raft::bench::ann::RaftCagraHnswlib::BuildParam param; + parse_build_param(conf, param); + ann = std::make_unique>(metric, dim, param); } } @@ -62,14 +62,16 @@ std::unique_ptr> create_algo(const std::string& algo, template std::unique_ptr::AnnSearchParam> create_search_param( - const std::string& algo, const nlohmann::json& conf) { + const std::string& algo, const nlohmann::json& conf) +{ if (algo == "raft_cagra_hnswlib") { - auto param = std::make_unique::SearchParam>(); + auto param = + std::make_unique::SearchParam>(); parse_search_param(conf, *param); return param; } - throw std::runtime_error("invalid algo: '" + algo + "'"); + throw std::runtime_error("invalid algo: '" + algo + "'"); } } // namespace raft::bench::ann diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h index e48ada2e2b..e7ff231a2d 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h @@ -15,32 +15,6 @@ */ #pragma once -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include - -// #include "../common/ann_types.hpp" -// #include "../common/thread_pool.hpp" -// #include "raft_ann_bench_utils.h" -// #include - -// #include - #include "raft_cagra_wrapper.h" #include "../hnswlib/hnswlib_wrapper.h" #include @@ -51,11 +25,15 @@ template class RaftCagraHnswlib : public ANN { public: using typename ANN::AnnSearchParam; - using BuildParam = typename RaftCagra::BuildParam; + using BuildParam = typename RaftCagra::BuildParam; using SearchParam = typename HnswLib::SearchParam; RaftCagraHnswlib(Metric metric, int dim, const BuildParam& param, int concurrent_searches = 1) - : ANN(metric, dim), metric_(metric), index_params_(param), dimension_(dim), handle_(cudaStreamPerThread) + : ANN(metric, dim), + metric_(metric), + index_params_(param), + dimension_(dim), + handle_(cudaStreamPerThread) { } @@ -86,7 +64,6 @@ class RaftCagraHnswlib : public ANN { void load(const std::string&) override; private: - raft::device_resources handle_; Metric metric_; BuildParam index_params_; @@ -124,7 +101,7 @@ void RaftCagraHnswlib::load(const std::string& file) { typename HnswLib::BuildParam param; // these values don't matter since we don't build with HnswLib - param.M = 50; + param.M = 50; param.ef_construction = 100; if (not hnswlib_search_) { hnswlib_search_ = std::make_unique>(metric_, dimension_, param); diff --git a/cpp/include/raft/neighbors/cagra_serialize.cuh b/cpp/include/raft/neighbors/cagra_serialize.cuh index ec012f8ef0..4930d2a494 100644 --- a/cpp/include/raft/neighbors/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/cagra_serialize.cuh @@ -120,8 +120,8 @@ void serialize(raft::resources const& handle, */ template void serialize_to_hnswlib(raft::resources const& handle, - std::ostream& os, - const index& index) + std::ostream& os, + const index& index) { detail::serialize_to_hnswlib(handle, os, index); } @@ -153,8 +153,8 @@ void serialize_to_hnswlib(raft::resources const& handle, */ template void serialize_to_hnswlib(raft::resources const& handle, - const std::string& filename, - const index& index) + const std::string& filename, + const index& index) { detail::serialize_to_hnswlib(handle, filename, index); } diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 7f40406286..7ac0530281 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -19,7 +19,6 @@ #include "raft/core/host_mdarray.hpp" #include "raft/core/mdspan_types.hpp" #include "raft/core/resource/cuda_stream.hpp" -#include #include #include #include @@ -113,12 +112,14 @@ void serialize(raft::resources const& res, template void serialize_to_hnswlib(raft::resources const& res, - std::ostream& os, - const index& index_) + std::ostream& os, + const index& index_) { common::nvtx::range fun_scope("cagra::serialize_to_hnswlib"); RAFT_LOG_DEBUG( - "Saving CAGRA index to hnswlib format, size %zu, dim %u", static_cast(index_.size()), index_.dim()); + "Saving CAGRA index to hnswlib format, size %zu, dim %u", + static_cast(index_.size()), + index_.dim()); // offset_level_0 std::size_t offset_level_0 = 0; @@ -129,9 +130,11 @@ void serialize_to_hnswlib(raft::resources const& res, // curr_element_count std::size_t curr_element_count = index_.size(); os.write(reinterpret_cast(&curr_element_count), sizeof(std::size_t)); - // Example:M: 16, dim = 128, data_t = float, index_t = uint32_t, list_size_type = uint32_t, labeltype: size_t - // size_data_per_element_ = M * 2 * sizeof(index_t) + sizeof(list_size_type) + dim * sizeof(data_t) + sizeof(labeltype) - auto size_data_per_element = static_cast(index_.graph_degree() * 4 + 4 + index_.dim() * 4 + 8); + // Example:M: 16, dim = 128, data_t = float, index_t = uint32_t, list_size_type = uint32_t, + // labeltype: size_t size_data_per_element_ = M * 2 * sizeof(index_t) + sizeof(list_size_type) + + // dim * sizeof(data_t) + sizeof(labeltype) + auto size_data_per_element = + static_cast(index_.graph_degree() * 4 + 4 + index_.dim() * 4 + 8); os.write(reinterpret_cast(&size_data_per_element), sizeof(std::size_t)); // label_offset std::size_t label_offset = size_data_per_element - 8; @@ -175,8 +178,12 @@ void serialize_to_hnswlib(raft::resources const& res, resource::sync_stream(res); auto graph = index_.graph(); - auto host_graph = raft::make_host_matrix(graph.extent(0), graph.extent(1)); - raft::copy(host_graph.data_handle(), graph.data_handle(), graph.size(), raft::resource::get_cuda_stream(res)); + auto host_graph = + raft::make_host_matrix(graph.extent(0), graph.extent(1)); + raft::copy(host_graph.data_handle(), + graph.data_handle(), + graph.size(), + raft::resource::get_cuda_stream(res)); resource::sync_stream(res); // Write one dataset and graph row at a time @@ -195,8 +202,7 @@ void serialize_to_hnswlib(raft::resources const& res, auto data_elem = host_dataset(i, j); os.write(reinterpret_cast(&data_elem), sizeof(T)); } - } - else if constexpr (std::is_same_v or std::is_same_v) { + } else if constexpr (std::is_same_v or std::is_same_v) { for (std::size_t j = 0; j < index_.dim(); ++j) { auto data_elem = static_cast(host_dataset(i, j)); os.write(reinterpret_cast(&data_elem), sizeof(int)); @@ -216,8 +222,8 @@ void serialize_to_hnswlib(raft::resources const& res, template void serialize_to_hnswlib(raft::resources const& res, - const std::string& filename, - const index& index_) { + const std::string& filename, + const index& index_) { std::ofstream of(filename, std::ios::out | std::ios::binary); if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } From f06e4311a35ca2cbc042b242464fc5a44faf41ce Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Nov 2023 18:46:06 -0800 Subject: [PATCH 09/13] more style fixes --- cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu | 1 - .../ann/src/raft/raft_cagra_hnswlib_wrapper.h | 2 +- cpp/cmake/modules/ConfigureCUDA.cmake | 6 +++--- .../detail/cagra/cagra_serialize.cuh | 20 +++++++++---------- 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu b/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu index f4fda27d1f..ce6fa255b2 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu +++ b/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu @@ -59,7 +59,6 @@ std::unique_ptr> create_algo(const std::string& algo, return ann; } - template std::unique_ptr::AnnSearchParam> create_search_param( const std::string& algo, const nlohmann::json& conf) diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h index e7ff231a2d..432caecfcc 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h @@ -15,8 +15,8 @@ */ #pragma once -#include "raft_cagra_wrapper.h" #include "../hnswlib/hnswlib_wrapper.h" +#include "raft_cagra_wrapper.h" #include namespace raft::bench::ann { diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake index 8e77ec697a..ea8a077b0c 100644 --- a/cpp/cmake/modules/ConfigureCUDA.cmake +++ b/cpp/cmake/modules/ConfigureCUDA.cmake @@ -20,12 +20,12 @@ endif() # Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with # clang) if(CMAKE_COMPILER_IS_GNUCXX) - # list(APPEND RAFT_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) - # list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations) + list(APPEND RAFT_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations) + list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations) # set warnings as errors if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0) - # list(APPEND RAFT_CUDA_FLAGS -Werror=all-warnings) + list(APPEND RAFT_CUDA_FLAGS -Werror=all-warnings) endif() endif() diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 7ac0530281..dd909429b9 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -16,9 +16,9 @@ #pragma once -#include "raft/core/host_mdarray.hpp" -#include "raft/core/mdspan_types.hpp" -#include "raft/core/resource/cuda_stream.hpp" +#include +#include +#include #include #include #include @@ -116,10 +116,9 @@ void serialize_to_hnswlib(raft::resources const& res, const index& index_) { common::nvtx::range fun_scope("cagra::serialize_to_hnswlib"); - RAFT_LOG_DEBUG( - "Saving CAGRA index to hnswlib format, size %zu, dim %u", - static_cast(index_.size()), - index_.dim()); + RAFT_LOG_DEBUG("Saving CAGRA index to hnswlib format, size %zu, dim %u", + static_cast(index_.size()), + index_.dim()); // offset_level_0 std::size_t offset_level_0 = 0; @@ -133,7 +132,7 @@ void serialize_to_hnswlib(raft::resources const& res, // Example:M: 16, dim = 128, data_t = float, index_t = uint32_t, list_size_type = uint32_t, // labeltype: size_t size_data_per_element_ = M * 2 * sizeof(index_t) + sizeof(list_size_type) + // dim * sizeof(data_t) + sizeof(labeltype) - auto size_data_per_element = + auto size_data_per_element = static_cast(index_.graph_degree() * 4 + 4 + index_.dim() * 4 + 8); os.write(reinterpret_cast(&size_data_per_element), sizeof(std::size_t)); // label_offset @@ -178,7 +177,7 @@ void serialize_to_hnswlib(raft::resources const& res, resource::sync_stream(res); auto graph = index_.graph(); - auto host_graph = + auto host_graph = raft::make_host_matrix(graph.extent(0), graph.extent(1)); raft::copy(host_graph.data_handle(), graph.data_handle(), @@ -223,7 +222,8 @@ void serialize_to_hnswlib(raft::resources const& res, template void serialize_to_hnswlib(raft::resources const& res, const std::string& filename, - const index& index_) { + const index& index_) +{ std::ofstream of(filename, std::ios::out | std::ios::binary); if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } From f9b306e4b7c543dc0b483a2e0c1286ff71dc6ee6 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Nov 2023 19:09:23 -0800 Subject: [PATCH 10/13] add patch to fix hnswlib warnings --- cpp/cmake/patches/hnswlib.patch | 64 +++++++++++++++++-- .../run/conf/algos/raft_cagra_hnswlib.yaml | 2 +- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/cpp/cmake/patches/hnswlib.patch b/cpp/cmake/patches/hnswlib.patch index 468c1d8af9..32c1537c58 100644 --- a/cpp/cmake/patches/hnswlib.patch +++ b/cpp/cmake/patches/hnswlib.patch @@ -1,8 +1,16 @@ diff --git a/hnswlib/hnswalg.h b/hnswlib/hnswalg.h -index e95e0b5..ebacfdf 100644 +index e95e0b5..f0fe50a 100644 --- a/hnswlib/hnswalg.h +++ b/hnswlib/hnswalg.h -@@ -16,6 +16,8 @@ namespace hnswlib { +@@ -3,6 +3,7 @@ + #include "visited_list_pool.h" + #include "hnswlib.h" + #include ++#include + #include + #include + #include +@@ -16,6 +17,8 @@ namespace hnswlib { template class HierarchicalNSW : public AlgorithmInterface { public: @@ -11,7 +19,34 @@ index e95e0b5..ebacfdf 100644 static const tableint max_update_element_locks = 65536; HierarchicalNSW(SpaceInterface *s) { } -@@ -1119,28 +1121,41 @@ namespace hnswlib { +@@ -56,7 +59,7 @@ namespace hnswlib { + visited_list_pool_ = new VisitedListPool(1, max_elements); + + //initializations for special treatment of the first node +- enterpoint_node_ = -1; ++ enterpoint_node_ = std::numeric_limits::max(); + maxlevel_ = -1; + + linkLists_ = (char **) malloc(sizeof(void *) * max_elements_); +@@ -527,7 +530,7 @@ namespace hnswlib { + tableint *datal = (tableint *) (data + 1); + for (int i = 0; i < size; i++) { + tableint cand = datal[i]; +- if (cand < 0 || cand > max_elements_) ++ if (cand > max_elements_) + throw std::runtime_error("cand error"); + dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); + +@@ -1067,7 +1070,7 @@ namespace hnswlib { + tableint *datal = (tableint *) (data + 1); + for (int i = 0; i < size; i++) { + tableint cand = datal[i]; +- if (cand < 0 || cand > max_elements_) ++ if (cand > max_elements_) + throw std::runtime_error("cand error"); + dist_t d = fstdistfunc_(data_point, getDataByInternalId(cand), dist_func_param_); + if (d < curdist) { +@@ -1119,28 +1122,41 @@ namespace hnswlib { tableint currObj = enterpoint_node_; dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_); @@ -56,7 +91,7 @@ index e95e0b5..ebacfdf 100644 + tableint *datal = (tableint *) (data + 1); + for (int i = 0; i < size; i++) { + tableint cand = datal[i]; -+ if (cand < 0 || cand > max_elements_) ++ if (cand > max_elements_) + throw std::runtime_error("cand error"); + dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); @@ -72,3 +107,24 @@ index e95e0b5..ebacfdf 100644 } } } +diff --git a/hnswlib/visited_list_pool.h b/hnswlib/visited_list_pool.h +index 5e1a4a5..4195ebd 100644 +--- a/hnswlib/visited_list_pool.h ++++ b/hnswlib/visited_list_pool.h +@@ -3,6 +3,7 @@ + #include + #include + #include ++#include + + namespace hnswlib { + typedef unsigned short int vl_type; +@@ -14,7 +15,7 @@ namespace hnswlib { + unsigned int numelements; + + VisitedList(int numelements1) { +- curV = -1; ++ curV = std::numeric_limits::max(); + numelements = numelements1; + mass = new vl_type[numelements]; + } diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml index a3ecae9c86..787675d65d 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_cagra_hnswlib.yaml @@ -8,4 +8,4 @@ groups: intermediate_graph_degree: [32, 64, 96, 128] graph_build_algo: ["NN_DESCENT"] search: - ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800] \ No newline at end of file + ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800] From f15f6b45a51902b4ff7aeaebbad3074aba009f2a Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Nov 2023 19:10:38 -0800 Subject: [PATCH 11/13] include order --- cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index dd909429b9..eb21b75d3a 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -16,13 +16,13 @@ #pragma once -#include -#include -#include #include #include +#include #include +#include #include +#include #include #include From 26cd90ca0ade2818ebcbf90d02ed70a7c45efb63 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 9 Nov 2023 15:21:12 -0500 Subject: [PATCH 12/13] FIxing docs --- cpp/include/raft/neighbors/cagra_serialize.cuh | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/include/raft/neighbors/cagra_serialize.cuh b/cpp/include/raft/neighbors/cagra_serialize.cuh index 4930d2a494..40a364df69 100644 --- a/cpp/include/raft/neighbors/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/cagra_serialize.cuh @@ -148,7 +148,6 @@ void serialize_to_hnswlib(raft::resources const& handle, * @param[in] handle the raft handle * @param[in] filename the file name for saving the index * @param[in] index CAGRA index - * @param[in] include_dataset Whether or not to write out the dataset to the file. * */ template From aa227829e1f08db00664536892697af064d1151a Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 9 Nov 2023 15:48:49 -0800 Subject: [PATCH 13/13] fix doxygen and add docs --- cpp/include/raft/neighbors/cagra_serialize.cuh | 2 -- docs/source/ann_benchmarks_param_tuning.md | 11 +++++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cpp/include/raft/neighbors/cagra_serialize.cuh b/cpp/include/raft/neighbors/cagra_serialize.cuh index 4930d2a494..c801bc9eda 100644 --- a/cpp/include/raft/neighbors/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/cagra_serialize.cuh @@ -115,7 +115,6 @@ void serialize(raft::resources const& handle, * @param[in] handle the raft handle * @param[in] os output stream * @param[in] index CAGRA index - * @param[in] include_dataset Whether or not to write out the dataset to the file. * */ template @@ -148,7 +147,6 @@ void serialize_to_hnswlib(raft::resources const& handle, * @param[in] handle the raft handle * @param[in] filename the file name for saving the index * @param[in] index CAGRA index - * @param[in] include_dataset Whether or not to write out the dataset to the file. * */ template diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index cdc7958714..4c95b9e520 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -46,7 +46,7 @@ IVF-pq is an inverted-file index, which partitions the vectors into a series of ### `raft_cagra` -CAGRA uses a graph-based index, which creates an intermediate, approximate kNN graph using IVF-PQ and then further refining and optimizing to create a final kNN graph. This kNN graph is used by CAGRA as an index for search. +CAGRA uses a graph-based index, which creates an intermediate, approximate kNN graph using IVF-PQ and then further refining and optimizing to create a final kNN graph. This kNN graph is used by CAGRA as an index for search. | Parameter | Type | Required | Data Type | Default | Description | |-----------------------------|----------------|----------|----------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| @@ -83,6 +83,13 @@ Alternatively, if `graph_build_algo == "NN_DESCENT"`, then we can customize the | `nn_descent_max_iterations` | `build_param` | N | Positive Integer>0 | 20 | Alias for `nn_descent_niter` | | `nn_descent_termination_threshold` | `build_param` | N | Positive float>0 | 0.0001 | Termination threshold for NN descent. | +### `raft_cagra_hnswlib` +This is a benchmark that enables interoperability between `CAGRA` built `HNSW` search. It uses the `CAGRA` built graph as the base layer of an `hnswlib` index to search queries only within the base layer (this is enabled with a simple patch to `hnswlib`). + +`build_param` : Same as `build_param` of [CAGRA](#raft-cagra) + +`search_param` : Same as `search_param` of [hnswlib](#hnswlib) + ## FAISS Indexes ### `faiss_gpu_flat` @@ -152,7 +159,7 @@ Use FAISS IVF-PQ index on CPU ## HNSW - + ### `hnswlib` | Parameter | Type | Required | Data Type | Default | Description |