Merge branch 'branch-24.04' into fix-cuco-conflicts

rapidsai · Jan 30, 2024 · 4e82910 · 4e82910
2 parents 276a7d9 + d4ae271
commit 4e82910
Show file tree

Hide file tree

Showing 87 changed files with 4,327 additions and 749 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -27,7 +27,7 @@ repos:
                 types_or: [python, cython]
                 additional_dependencies: ["flake8-force"]
       - repo: https://github.com/pre-commit/mirrors-mypy
-        rev: 'v0.971'
+        rev: 'v1.3.0'
         hooks:
               - id: mypy
                 additional_dependencies: [types-cachetools]

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -57,6 +57,7 @@ option(BUILD_SHARED_LIBS "Build raft shared libraries" ON)
 option(BUILD_TESTS "Build raft unit-tests" ON)
 option(BUILD_PRIMS_BENCH "Build raft C++ benchmark tests" OFF)
 option(BUILD_ANN_BENCH "Build raft ann benchmarks" OFF)
+option(BUILD_CAGRA_HNSWLIB "Build CAGRA+hnswlib interface" ON)
 option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF)
 option(CUDA_ENABLE_LINEINFO
        "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF
@@ -195,6 +196,10 @@ if(BUILD_PRIMS_BENCH OR BUILD_ANN_BENCH)
   rapids_cpm_gbench()
 endif()
 
+if(BUILD_CAGRA_HNSWLIB)
+  include(cmake/thirdparty/get_hnswlib.cmake)
+endif()
+
 # ##################################################################################################
 # * raft ---------------------------------------------------------------------
 add_library(raft INTERFACE)
@@ -203,6 +208,9 @@ add_library(raft::raft ALIAS raft)
 target_include_directories(
   raft INTERFACE "$<BUILD_INTERFACE:${RAFT_SOURCE_DIR}/include>" "$<INSTALL_INTERFACE:include>"
 )
+if(BUILD_CAGRA_HNSWLIB)
+  target_link_libraries(raft INTERFACE hnswlib::hnswlib)
+endif()
 
 if(NOT BUILD_CPU_ONLY)
   # Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target.
@@ -424,6 +432,8 @@ if(RAFT_COMPILE_LIBRARY)
     src/raft_runtime/neighbors/cagra_build.cu
     src/raft_runtime/neighbors/cagra_search.cu
     src/raft_runtime/neighbors/cagra_serialize.cu
+    src/raft_runtime/neighbors/eps_neighborhood.cu
+    $<$<BOOL:${BUILD_CAGRA_HNSWLIB}>:src/raft_runtime/neighbors/hnsw.cpp>
     src/raft_runtime/neighbors/ivf_flat_build.cu
     src/raft_runtime/neighbors/ivf_flat_search.cu
     src/raft_runtime/neighbors/ivf_flat_serialize.cu
@@ -443,6 +453,7 @@ if(RAFT_COMPILE_LIBRARY)
     src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu
     src/raft_runtime/random/rmat_rectangular_generator_int_double.cu
     src/raft_runtime/random/rmat_rectangular_generator_int_float.cu
+    src/spatial/knn/detail/ball_cover/registers_eps_pass_euclidean.cu
     src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu
     src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu
     src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu

diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
@@ -225,9 +225,7 @@ endfunction()
 
 if(RAFT_ANN_BENCH_USE_HNSWLIB)
   ConfigureAnnBench(
-    NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp
-    LINKS
-    hnswlib::hnswlib
+    NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp LINKS hnswlib::hnswlib
   )
 
 endif()
@@ -276,12 +274,7 @@ endif()
 
 if(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB)
   ConfigureAnnBench(
-    NAME
-    RAFT_CAGRA_HNSWLIB
-    PATH
-    bench/ann/src/raft/raft_cagra_hnswlib.cu
-    LINKS
-    raft::compiled
+    NAME RAFT_CAGRA_HNSWLIB PATH bench/ann/src/raft/raft_cagra_hnswlib.cu LINKS raft::compiled
     hnswlib::hnswlib
   )
 endif()
@@ -336,10 +329,7 @@ endif()
 
 if(RAFT_ANN_BENCH_USE_GGNN)
   include(cmake/thirdparty/get_glog.cmake)
-  ConfigureAnnBench(
-    NAME GGNN PATH bench/ann/src/ggnn/ggnn_benchmark.cu
-    LINKS glog::glog ggnn::ggnn
-  )
+  ConfigureAnnBench(NAME GGNN PATH bench/ann/src/ggnn/ggnn_benchmark.cu LINKS glog::glog ggnn::ggnn)
 endif()
 
 # ##################################################################################################

diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -287,11 +287,11 @@ void bench_search(::benchmark::State& state,
     std::make_shared<buf<std::size_t>>(current_algo_props->query_memory_type, k * query_set_size);
 
   cuda_timer gpu_timer;
-  auto start = std::chrono::high_resolution_clock::now();
   {
     nvtx_case nvtx{state.name()};
 
-    auto algo = dynamic_cast<ANN<T>*>(current_algo.get())->copy();
+    auto algo  = dynamic_cast<ANN<T>*>(current_algo.get())->copy();
+    auto start = std::chrono::high_resolution_clock::now();
     for (auto _ : state) {
       [[maybe_unused]] auto ntx_lap = nvtx.lap();
       [[maybe_unused]] auto gpu_lap = gpu_timer.lap();
@@ -314,17 +314,15 @@ void bench_search(::benchmark::State& state,
 
       queries_processed += n_queries;
     }
+    auto end      = std::chrono::high_resolution_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count();
+    if (state.thread_index() == 0) { state.counters.insert({{"end_to_end", duration}}); }
+    state.counters.insert({"Latency", {duration, benchmark::Counter::kAvgIterations}});
   }
-  auto end      = std::chrono::high_resolution_clock::now();
-  auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count();
-  if (state.thread_index() == 0) { state.counters.insert({{"end_to_end", duration}}); }
-  state.counters.insert(
-    {"Latency", {duration / double(state.iterations()), benchmark::Counter::kAvgThreads}});
 
   state.SetItemsProcessed(queries_processed);
   if (cudart.found()) {
-    double gpu_time_per_iteration = gpu_timer.total_time() / (double)state.iterations();
-    state.counters.insert({"GPU", {gpu_time_per_iteration, benchmark::Counter::kAvgThreads}});
+    state.counters.insert({"GPU", {gpu_timer.total_time(), benchmark::Counter::kAvgIterations}});
   }
 
   // This will be the total number of queries across all threads

diff --git a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -49,13 +49,6 @@ class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
    */
   [[nodiscard]] bool supports_streams() const noexcept override { return false; }
 
-  /**
-   * @brief Query whether the resource supports the get_mem_info API.
-   *
-   * @return true
-   */
-  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }
-
  private:
   /**
    * @brief Allocates memory of size at least `bytes` using cudaMalloc.
@@ -112,21 +105,5 @@ class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
   {
     return dynamic_cast<cuda_huge_page_resource const*>(&other) != nullptr;
   }
-
-  /**
-   * @brief Get free and available memory for memory resource
-   *
-   * @throws `rmm::cuda_error` if unable to retrieve memory info.
-   *
-   * @return std::pair contaiing free_size and total_size of memory
-   */
-  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
-    rmm::cuda_stream_view) const override
-  {
-    std::size_t free_size{};
-    std::size_t total_size{};
-    RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
-    return std::make_pair(free_size, total_size);
-  }
 };
-}  // namespace raft::mr
+}  // namespace raft::mr
diff --git a/cpp/bench/ann/src/common/cuda_pinned_resource.hpp b/cpp/bench/ann/src/common/cuda_pinned_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -53,13 +53,6 @@ class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
    */
   [[nodiscard]] bool supports_streams() const noexcept override { return false; }
 
-  /**
-   * @brief Query whether the resource supports the get_mem_info API.
-   *
-   * @return true
-   */
-  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }
-
  private:
   /**
    * @brief Allocates memory of size at least `bytes` using cudaMalloc.
@@ -110,21 +103,5 @@ class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
   {
     return dynamic_cast<cuda_pinned_resource const*>(&other) != nullptr;
   }
-
-  /**
-   * @brief Get free and available memory for memory resource
-   *
-   * @throws `rmm::cuda_error` if unable to retrieve memory info.
-   *
-   * @return std::pair contaiing free_size and total_size of memory
-   */
-  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
-    rmm::cuda_stream_view) const override
-  {
-    std::size_t free_size{};
-    std::size_t total_size{};
-    RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
-    return std::make_pair(free_size, total_size);
-  }
 };
-}  // namespace raft::mr
+}  // namespace raft::mr
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
@@ -52,6 +52,11 @@ struct hnsw_dist_t<uint8_t> {
   using type = int;
 };
 
+template <>
+struct hnsw_dist_t<int8_t> {
+  using type = int;
+};
+
 template <typename T>
 class HnswLib : public ANN<T> {
  public:
@@ -135,7 +140,7 @@ void HnswLib<T>::build(const T* dataset, size_t nrow, cudaStream_t)
       space_ = std::make_shared<hnswlib::L2Space>(dim_);
     }
   } else if constexpr (std::is_same_v<T, uint8_t>) {
-    space_ = std::make_shared<hnswlib::L2SpaceI>(dim_);
+    space_ = std::make_shared<hnswlib::L2SpaceI<T>>(dim_);
   }
 
   appr_alg_ = std::make_shared<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>(
@@ -205,7 +210,7 @@ void HnswLib<T>::load(const std::string& path_to_index)
       space_ = std::make_shared<hnswlib::L2Space>(dim_);
     }
   } else if constexpr (std::is_same_v<T, uint8_t>) {
-    space_ = std::make_shared<hnswlib::L2SpaceI>(dim_);
+    space_ = std::make_shared<hnswlib::L2SpaceI<T>>(dim_);
   }
 
   appr_alg_ = std::make_shared<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>(

diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -87,6 +87,9 @@ void parse_build_param(const nlohmann::json& conf,
                                "', should be either 'cluster' or 'subspace'");
     }
   }
+  if (conf.contains("max_train_points_per_pq_code")) {
+    param.max_train_points_per_pq_code = conf.at("max_train_points_per_pq_code");
+  }
 }
 
 template <typename T, typename IdxT>

diff --git a/cpp/cmake/patches/hnswlib.diff b/cpp/cmake/patches/hnswlib.diff
@@ -105,6 +105,63 @@
                          }
                      }
                  }
+diff --git a/hnswlib/space_l2.h b/hnswlib/space_l2.h
+index 4413537..c3240f3 100644
+--- a/hnswlib/space_l2.h
++++ b/hnswlib/space_l2.h
+@@ -252,13 +252,14 @@ namespace hnswlib {
+         ~L2Space() {}
+     };
+
++    template <typename T>
+     static int
+     L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {
+
+         size_t qty = *((size_t *) qty_ptr);
+         int res = 0;
+-        unsigned char *a = (unsigned char *) pVect1;
+-        unsigned char *b = (unsigned char *) pVect2;
++        T *a = (T *) pVect1;
++        T *b = (T *) pVect2;
+
+         qty = qty >> 2;
+         for (size_t i = 0; i < qty; i++) {
+@@ -279,11 +280,12 @@ namespace hnswlib {
+         return (res);
+     }
+
++    template <typename T>
+     static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2, const void* __restrict qty_ptr) {
+         size_t qty = *((size_t*)qty_ptr);
+         int res = 0;
+-        unsigned char* a = (unsigned char*)pVect1;
+-        unsigned char* b = (unsigned char*)pVect2;
++        T* a = (T*)pVect1;
++        T* b = (T*)pVect2;
+
+         for(size_t i = 0; i < qty; i++)
+         {
+@@ -294,6 +296,7 @@ namespace hnswlib {
+         return (res);
+     }
+
++    template <typename T>
+     class L2SpaceI : public SpaceInterface<int> {
+
+         DISTFUNC<int> fstdistfunc_;
+@@ -302,10 +305,10 @@ namespace hnswlib {
+     public:
+         L2SpaceI(size_t dim) {
+             if(dim % 4 == 0) {
+-                fstdistfunc_ = L2SqrI4x;
++                fstdistfunc_ = L2SqrI4x<T>;
+             }
+             else {
+-                fstdistfunc_ = L2SqrI;
++                fstdistfunc_ = L2SqrI<T>;
+             }
+             dim_ = dim;
+             data_size_ = dim * sizeof(unsigned char);
 diff --git a/hnswlib/visited_list_pool.h b/hnswlib/visited_list_pool.h
 index 5e1a4a5..4195ebd 100644
 --- a/hnswlib/visited_list_pool.h

diff --git a/cpp/cmake/thirdparty/get_hnswlib.cmake b/cpp/cmake/thirdparty/get_hnswlib.cmake
@@ -30,6 +30,8 @@ function(find_and_configure_hnswlib)
   rapids_cpm_find(
     hnswlib ${PKG_VERSION}
     GLOBAL_TARGETS hnswlib::hnswlib
+    BUILD_EXPORT_SET raft-exports
+    INSTALL_EXPORT_SET raft-exports
     CPM_ARGS
     GIT_REPOSITORY ${PKG_REPOSITORY}
     GIT_TAG ${PKG_PINNED_TAG}
@@ -51,11 +53,13 @@ function(find_and_configure_hnswlib)
       # write export rules
       rapids_export(
         BUILD hnswlib
+        VERSION ${PKG_VERSION}
         EXPORT_SET hnswlib-exports
         GLOBAL_TARGETS hnswlib
         NAMESPACE hnswlib::)
       rapids_export(
         INSTALL hnswlib
+        VERSION ${PKG_VERSION}
         EXPORT_SET hnswlib-exports
         GLOBAL_TARGETS hnswlib
         NAMESPACE hnswlib::)
@@ -74,5 +78,5 @@ endif()
 find_and_configure_hnswlib(VERSION 0.6.2
         REPOSITORY       ${RAFT_HNSWLIB_GIT_REPOSITORY}
         PINNED_TAG       ${RAFT_HNSWLIB_GIT_TAG}
-        EXCLUDE_FROM_ALL ON
+        EXCLUDE_FROM_ALL OFF
         )