Merge branch 'cagra_large_topk' of github.com:benfred/raft into cagra…

…_large_topk
rapidsai · Jan 22, 2024 · 1d6c2e7 · 1d6c2e7
2 parents ca5478f + a7ef7f9
commit 1d6c2e7
Show file tree

Hide file tree

Showing 28 changed files with 383 additions and 317 deletions.
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -136,7 +136,7 @@ endif()
 function(ConfigureAnnBench)
 
   set(oneValueArgs NAME)
-  set(multiValueArgs PATH LINKS CXXFLAGS INCLUDES)
+  set(multiValueArgs PATH LINKS CXXFLAGS)
 
   if(NOT BUILD_CPU_ONLY)
     set(GPU_BUILD ON)
@@ -225,9 +225,11 @@ endfunction()
 
 if(RAFT_ANN_BENCH_USE_HNSWLIB)
   ConfigureAnnBench(
-    NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp INCLUDES
-    ${CMAKE_CURRENT_BINARY_DIR}/_deps/hnswlib-src/hnswlib CXXFLAGS "${HNSW_CXX_FLAGS}"
+    NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp
+    LINKS
+    hnswlib::hnswlib
   )
+
 endif()
 
 if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ)
@@ -278,12 +280,9 @@ if(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB)
     RAFT_CAGRA_HNSWLIB
     PATH
     bench/ann/src/raft/raft_cagra_hnswlib.cu
-    INCLUDES
-    ${CMAKE_CURRENT_BINARY_DIR}/_deps/hnswlib-src/hnswlib
     LINKS
     raft::compiled
-    CXXFLAGS
-    "${HNSW_CXX_FLAGS}"
+    hnswlib::hnswlib
   )
 endif()
 
@@ -338,8 +337,8 @@ endif()
 if(RAFT_ANN_BENCH_USE_GGNN)
   include(cmake/thirdparty/get_glog.cmake)
   ConfigureAnnBench(
-    NAME GGNN PATH bench/ann/src/ggnn/ggnn_benchmark.cu INCLUDES
-    ${CMAKE_CURRENT_BINARY_DIR}/_deps/ggnn-src/include LINKS glog::glog
+    NAME GGNN PATH bench/ann/src/ggnn/ggnn_benchmark.cu
+    LINKS glog::glog ggnn::ggnn
   )
 endif()
 

diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,7 +33,7 @@
 
 #include "../common/ann_types.hpp"
 #include "../common/thread_pool.hpp"
-#include <hnswlib.h>
+#include <hnswlib/hnswlib.h>
 
 namespace raft::bench::ann {
 

diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_utils.h b/cpp/bench/ann/src/raft/raft_ann_bench_utils.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,15 +14,20 @@
  * limitations under the License.
  */
 #pragma once
-#include <cassert>
-#include <fstream>
-#include <iostream>
-#include <memory>
+
 #include <raft/core/device_mdspan.hpp>
+#include <raft/core/device_resources.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/distance/distance_types.hpp>
 #include <raft/util/cudart_utils.hpp>
+
 #include <rmm/device_uvector.hpp>
+#include <rmm/mr/device/pool_memory_resource.hpp>
+
+#include <cassert>
+#include <fstream>
+#include <iostream>
+#include <memory>
 #include <sstream>
 #include <stdexcept>
 #include <string>

diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,23 +15,23 @@
  */
 
 #include "../common/ann_types.hpp"
-
 #include "raft_ann_bench_param_parser.h"
 
+#include <raft/core/logger.hpp>
+
+#include <rmm/mr/device/per_device_resource.hpp>
+
+#define JSON_DIAGNOSTICS 1
+#include <nlohmann/json.hpp>
+
 #include <algorithm>
 #include <cmath>
 #include <memory>
-#include <raft/core/logger.hpp>
-#include <rmm/mr/device/per_device_resource.hpp>
-#include <rmm/mr/device/pool_memory_resource.hpp>
 #include <stdexcept>
 #include <string>
 #include <type_traits>
 #include <utility>
 
-#define JSON_DIAGNOSTICS 1
-#include <nlohmann/json.hpp>
-
 namespace raft::bench::ann {
 
 template <typename T>
@@ -44,7 +44,7 @@ std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
   // stop compiler warning; not all algorithms support multi-GPU so it may not be used
   (void)dev_list;
 
-  raft::bench::ann::Metric metric = parse_metric(distance);
+  [[maybe_unused]] raft::bench::ann::Metric metric = parse_metric(distance);
   std::unique_ptr<raft::bench::ann::ANN<T>> ann;
 
   if constexpr (std::is_same_v<T, float>) {

diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu b/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include "raft_ann_bench_param_parser.h"
 #include "raft_cagra_hnswlib_wrapper.h"
 
+#include <rmm/cuda_device.hpp>
 #include <rmm/mr/device/pool_memory_resource.hpp>
 
 #define JSON_DIAGNOSTICS 1
@@ -43,7 +44,7 @@ std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
   // stop compiler warning; not all algorithms support multi-GPU so it may not be used
   (void)dev_list;
 
-  raft::bench::ann::Metric metric = parse_metric(distance);
+  [[maybe_unused]] raft::bench::ann::Metric metric = parse_metric(distance);
   std::unique_ptr<raft::bench::ann::ANN<T>> ann;
 
   if constexpr (std::is_same_v<T, float> or std::is_same_v<T, std::uint8_t>) {
@@ -85,7 +86,9 @@ int main(int argc, char** argv)
 {
   rmm::mr::cuda_memory_resource cuda_mr;
   // Construct a resource that uses a coalescing best-fit pool allocator
-  rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> pool_mr{&cuda_mr};
+  // and is initially sized to half of free device memory.
+  rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> pool_mr{
+    &cuda_mr, rmm::percent_of_free_device_memory(50)};
   rmm::mr::set_current_device_resource(
     &pool_mr);  // Updates the current device resource pointer to `pool_mr`
   rmm::mr::device_memory_resource* mr =

diff --git a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,10 +15,9 @@
  */
 #pragma once
 
-#include <cassert>
-#include <fstream>
-#include <iostream>
-#include <memory>
+#include "../common/ann_types.hpp"
+#include "raft_ann_bench_utils.h"
+
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/device_resources.hpp>
 #include <raft/core/logger.hpp>
@@ -28,16 +27,15 @@
 #include <raft/neighbors/ivf_flat.cuh>
 #include <raft/neighbors/ivf_flat_types.hpp>
 #include <raft/util/cudart_utils.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/mr/device/pool_memory_resource.hpp>
+
+#include <cassert>
+#include <fstream>
+#include <iostream>
+#include <memory>
 #include <stdexcept>
 #include <string>
 #include <type_traits>
 
-#include "../common/ann_types.hpp"
-#include "raft_ann_bench_utils.h"
-#include <raft/util/cudart_utils.hpp>
-
 namespace raft::bench::ann {
 
 template <typename T, typename IdxT>

diff --git a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,9 @@
  */
 #pragma once
 
+#include "../common/ann_types.hpp"
+#include "raft_ann_bench_utils.h"
+
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/device_resources.hpp>
@@ -28,14 +31,11 @@
 #include <raft/util/cudart_utils.hpp>
 #include <raft_runtime/neighbors/ivf_pq.hpp>
 #include <raft_runtime/neighbors/refine.hpp>
+
 #include <rmm/device_uvector.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
-#include <rmm/mr/device/pool_memory_resource.hpp>
-#include <type_traits>
 
-#include "../common/ann_types.hpp"
-#include "raft_ann_bench_utils.h"
-#include <raft/util/cudart_utils.hpp>
+#include <type_traits>
 
 namespace raft::bench::ann {
 

diff --git a/cpp/bench/prims/common/benchmark.hpp b/cpp/bench/prims/common/benchmark.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,6 +28,7 @@
 
 #include <benchmark/benchmark.h>
 
+#include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
@@ -43,7 +44,7 @@ namespace raft::bench {
 struct using_pool_memory_res {
  private:
   rmm::mr::device_memory_resource* orig_res_;
-  rmm::mr::cuda_memory_resource cuda_res_;
+  rmm::mr::cuda_memory_resource cuda_res_{};
   rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> pool_res_;
 
  public:
@@ -54,7 +55,9 @@ struct using_pool_memory_res {
     rmm::mr::set_current_device_resource(&pool_res_);
   }
 
-  using_pool_memory_res() : orig_res_(rmm::mr::get_current_device_resource()), pool_res_(&cuda_res_)
+  using_pool_memory_res()
+    : orig_res_(rmm::mr::get_current_device_resource()),
+      pool_res_(&cuda_res_, rmm::percent_of_free_device_memory(50))
   {
     rmm::mr::set_current_device_resource(&pool_res_);
   }
@@ -114,7 +117,8 @@ class fixture {
   raft::device_resources handle;
   rmm::cuda_stream_view stream;
 
-  fixture(bool use_pool_memory_resource = false) : stream{resource::get_cuda_stream(handle)}
+  explicit fixture(bool use_pool_memory_resource = false)
+    : stream{resource::get_cuda_stream(handle)}
   {
     // Cache memory pool between test runs, since it is expensive to create.
     // This speeds up the time required to run the select_k bench by over 3x.

diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/prims/matrix/select_k.cu
@@ -30,7 +30,6 @@
 
 #include <rmm/device_uvector.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
-#include <rmm/mr/device/pool_memory_resource.hpp>
 
 #include <cstdint>
 #include <cstring>

diff --git a/cpp/bench/prims/neighbors/refine.cuh b/cpp/bench/prims/neighbors/refine.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,6 +27,7 @@
 #include <raft/neighbors/refine.cuh>
 #include <raft/random/rng.cuh>
 
+#include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/mr/device/pool_memory_resource.hpp>
@@ -58,7 +59,8 @@ class RefineAnn : public fixture {
     state.SetLabel(label_stream.str());
 
     auto old_mr = rmm::mr::get_current_device_resource();
-    rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> pool_mr(old_mr);
+    rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> pool_mr(
+      old_mr, rmm::percent_of_free_device_memory(50));
     rmm::mr::set_current_device_resource(&pool_mr);
 
     if (data.p.host_data) {

diff --git a/cpp/cmake/patches/ggnn.patch → cpp/cmake/patches/ggnn.diff b/cpp/cmake/patches/ggnn.patch → cpp/cmake/patches/ggnn.diff
@@ -1,5 +1,3 @@
-diff --git a/include/ggnn/cache/cuda_simple_knn_sym_cache.cuh b/include/ggnn/cache/cuda_simple_knn_sym_cache.cuh
-index 890420e..d792903 100644
 --- a/include/ggnn/cache/cuda_simple_knn_sym_cache.cuh
 +++ b/include/ggnn/cache/cuda_simple_knn_sym_cache.cuh
 @@ -62,7 +62,7 @@ struct SimpleKNNSymCache {
@@ -227,3 +225,6 @@ index 49d76a1..eef69e6 100644
  #include <cub/cub.cuh>
 
  #include "ggnn/utils/cuda_knn_constants.cuh"
+-- 
+2.43.0
+
diff --git a/cpp/cmake/patches/hnswlib.patch → cpp/cmake/patches/hnswlib.diff b/cpp/cmake/patches/hnswlib.patch → cpp/cmake/patches/hnswlib.diff
@@ -1,5 +1,3 @@
-diff --git a/hnswlib/hnswalg.h b/hnswlib/hnswalg.h
-index e95e0b5..f0fe50a 100644
 --- a/hnswlib/hnswalg.h
 +++ b/hnswlib/hnswalg.h
 @@ -3,6 +3,7 @@
@@ -128,3 +126,6 @@ index 5e1a4a5..4195ebd 100644
              numelements = numelements1;
              mass = new vl_type[numelements];
          }
+-- 
+2.43.0
+