Merge branch 'branch-24.04' into test-cuda-12.2

jameslamb · Jan 24, 2024 · d6767d1 · d6767d1
2 parents 8ea3858 + c73fd0d
commit d6767d1
Show file tree

Hide file tree

Showing 79 changed files with 4,071 additions and 1,265 deletions.
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -424,6 +424,7 @@ if(RAFT_COMPILE_LIBRARY)
     src/raft_runtime/neighbors/cagra_build.cu
     src/raft_runtime/neighbors/cagra_search.cu
     src/raft_runtime/neighbors/cagra_serialize.cu
+    src/raft_runtime/neighbors/eps_neighborhood.cu
     src/raft_runtime/neighbors/ivf_flat_build.cu
     src/raft_runtime/neighbors/ivf_flat_search.cu
     src/raft_runtime/neighbors/ivf_flat_serialize.cu
@@ -443,6 +444,7 @@ if(RAFT_COMPILE_LIBRARY)
     src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu
     src/raft_runtime/random/rmat_rectangular_generator_int_double.cu
     src/raft_runtime/random/rmat_rectangular_generator_int_float.cu
+    src/spatial/knn/detail/ball_cover/registers_eps_pass_euclidean.cu
     src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu
     src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu
     src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu

diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -287,11 +287,11 @@ void bench_search(::benchmark::State& state,
     std::make_shared<buf<std::size_t>>(current_algo_props->query_memory_type, k * query_set_size);
 
   cuda_timer gpu_timer;
-  auto start = std::chrono::high_resolution_clock::now();
   {
     nvtx_case nvtx{state.name()};
 
-    auto algo = dynamic_cast<ANN<T>*>(current_algo.get())->copy();
+    auto algo  = dynamic_cast<ANN<T>*>(current_algo.get())->copy();
+    auto start = std::chrono::high_resolution_clock::now();
     for (auto _ : state) {
       [[maybe_unused]] auto ntx_lap = nvtx.lap();
       [[maybe_unused]] auto gpu_lap = gpu_timer.lap();
@@ -314,17 +314,15 @@ void bench_search(::benchmark::State& state,
 
       queries_processed += n_queries;
     }
+    auto end      = std::chrono::high_resolution_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count();
+    if (state.thread_index() == 0) { state.counters.insert({{"end_to_end", duration}}); }
+    state.counters.insert({"Latency", {duration, benchmark::Counter::kAvgIterations}});
   }
-  auto end      = std::chrono::high_resolution_clock::now();
-  auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count();
-  if (state.thread_index() == 0) { state.counters.insert({{"end_to_end", duration}}); }
-  state.counters.insert(
-    {"Latency", {duration / double(state.iterations()), benchmark::Counter::kAvgThreads}});
 
   state.SetItemsProcessed(queries_processed);
   if (cudart.found()) {
-    double gpu_time_per_iteration = gpu_timer.total_time() / (double)state.iterations();
-    state.counters.insert({"GPU", {gpu_time_per_iteration, benchmark::Counter::kAvgThreads}});
+    state.counters.insert({"GPU", {gpu_timer.total_time(), benchmark::Counter::kAvgIterations}});
   }
 
   // This will be the total number of queries across all threads

diff --git a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -49,13 +49,6 @@ class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
    */
   [[nodiscard]] bool supports_streams() const noexcept override { return false; }
 
-  /**
-   * @brief Query whether the resource supports the get_mem_info API.
-   *
-   * @return true
-   */
-  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }
-
  private:
   /**
    * @brief Allocates memory of size at least `bytes` using cudaMalloc.
@@ -112,21 +105,5 @@ class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
   {
     return dynamic_cast<cuda_huge_page_resource const*>(&other) != nullptr;
   }
-
-  /**
-   * @brief Get free and available memory for memory resource
-   *
-   * @throws `rmm::cuda_error` if unable to retrieve memory info.
-   *
-   * @return std::pair contaiing free_size and total_size of memory
-   */
-  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
-    rmm::cuda_stream_view) const override
-  {
-    std::size_t free_size{};
-    std::size_t total_size{};
-    RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
-    return std::make_pair(free_size, total_size);
-  }
 };
-}  // namespace raft::mr
+}  // namespace raft::mr
diff --git a/cpp/bench/ann/src/common/cuda_pinned_resource.hpp b/cpp/bench/ann/src/common/cuda_pinned_resource.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -53,13 +53,6 @@ class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
    */
   [[nodiscard]] bool supports_streams() const noexcept override { return false; }
 
-  /**
-   * @brief Query whether the resource supports the get_mem_info API.
-   *
-   * @return true
-   */
-  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }
-
  private:
   /**
    * @brief Allocates memory of size at least `bytes` using cudaMalloc.
@@ -110,21 +103,5 @@ class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
   {
     return dynamic_cast<cuda_pinned_resource const*>(&other) != nullptr;
   }
-
-  /**
-   * @brief Get free and available memory for memory resource
-   *
-   * @throws `rmm::cuda_error` if unable to retrieve memory info.
-   *
-   * @return std::pair contaiing free_size and total_size of memory
-   */
-  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
-    rmm::cuda_stream_view) const override
-  {
-    std::size_t free_size{};
-    std::size_t total_size{};
-    RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
-    return std::make_pair(free_size, total_size);
-  }
 };
-}  // namespace raft::mr
+}  // namespace raft::mr
diff --git a/cpp/include/raft/core/math.hpp b/cpp/include/raft/core/math.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -49,12 +49,42 @@ RAFT_INLINE_FUNCTION auto abs(T x)
 template <typename T>
 constexpr RAFT_INLINE_FUNCTION auto abs(T x)
   -> std::enable_if_t<!std::is_same_v<float, T> && !std::is_same_v<double, T> &&
+#if defined(_RAFT_HAS_CUDA)
+                        !std::is_same_v<__half, T> && !std::is_same_v<nv_bfloat16, T> &&
+#endif
                         !std::is_same_v<int, T> && !std::is_same_v<long int, T> &&
                         !std::is_same_v<long long int, T>,
                       T>
 {
   return x < T{0} ? -x : x;
 }
+#if defined(_RAFT_HAS_CUDA)
+template <typename T>
+RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t<std::is_same_v<T, __half>, __half> abs(T x)
+{
+#if (__CUDA_ARCH__ >= 530)
+  return ::__habs(x);
+#else
+  // Fail during template instantiation if the compute capability doesn't support this operation
+  static_assert(sizeof(T) != sizeof(T), "__half is only supported on __CUDA_ARCH__ >= 530");
+  return T{};
+#endif
+}
+
+template <typename T>
+RAFT_DEVICE_INLINE_FUNCTION typename std::enable_if_t<std::is_same_v<T, nv_bfloat16>, nv_bfloat16>
+abs(T x)
+{
+#if (__CUDA_ARCH__ >= 800)
+  return ::__habs(x);
+#else
+  // Fail during template instantiation if the compute capability doesn't support this operation
+  static_assert(sizeof(T) != sizeof(T), "nv_bfloat16 is only supported on __CUDA_ARCH__ >= 800");
+  return T{};
+#endif
+}
+#endif
+/** @} */
 
 /** Inverse cosine */
 template <typename T>

diff --git a/cpp/include/raft/core/resource/cublas_handle.hpp b/cpp/include/raft/core/resource/cublas_handle.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -60,8 +60,8 @@ class cublas_resource_factory : public resource_factory {
  */
 
 /**
- * Load a cublasres_t from raft res if it exists, otherwise
- * add it and return it.
+ * Load a `cublasHandle_t` from raft res if it exists, otherwise add it and return it.
+ *
  * @param[in] res the raft resources object
  * @return cublas handle
  */

diff --git a/cpp/include/raft/core/resource/cublaslt_handle.hpp b/cpp/include/raft/core/resource/cublaslt_handle.hpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cublasLt.h>
+#include <raft/core/cublas_macros.hpp>
+#include <raft/core/resource/resource_types.hpp>
+#include <raft/core/resources.hpp>
+
+#include <memory>
+
+namespace raft::resource {
+
+class cublaslt_resource : public resource {
+ public:
+  cublaslt_resource() { RAFT_CUBLAS_TRY(cublasLtCreate(&handle_)); }
+  ~cublaslt_resource() noexcept override { RAFT_CUBLAS_TRY_NO_THROW(cublasLtDestroy(handle_)); }
+  auto get_resource() -> void* override { return &handle_; }
+
+ private:
+  cublasLtHandle_t handle_;
+};
+
+/** Factory that knows how to construct a specific raft::resource to populate the res_t. */
+class cublaslt_resource_factory : public resource_factory {
+ public:
+  auto get_resource_type() -> resource_type override { return resource_type::CUBLASLT_HANDLE; }
+  auto make_resource() -> resource* override { return new cublaslt_resource(); }
+};
+
+/**
+ * @defgroup resource_cublaslt cuBLASLt handle resource functions
+ * @{
+ */
+
+/**
+ * Load a `cublasLtHandle_t` from raft res if it exists, otherwise add it and return it.
+ *
+ * @param[in] res the raft resources object
+ * @return cublasLt handle
+ */
+inline auto get_cublaslt_handle(resources const& res) -> cublasLtHandle_t
+{
+  if (!res.has_resource_factory(resource_type::CUBLASLT_HANDLE)) {
+    res.add_resource_factory(std::make_shared<cublaslt_resource_factory>());
+  }
+  auto ret = *res.get_resource<cublasLtHandle_t>(resource_type::CUBLASLT_HANDLE);
+  return ret;
+};
+
+/**
+ * @}
+ */
+
+}  // namespace raft::resource
diff --git a/cpp/include/raft/core/resource/custom_resource.hpp b/cpp/include/raft/core/resource/custom_resource.hpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <raft/core/resource/resource_types.hpp>
+#include <raft/core/resources.hpp>
+
+#include <algorithm>
+#include <memory>
+#include <typeindex>
+
+namespace raft::resource {
+
+class custom_resource : public resource {
+ public:
+  custom_resource()                    = default;
+  ~custom_resource() noexcept override = default;
+  auto get_resource() -> void* override { return this; }
+
+  template <typename ResourceT>
+  auto load() -> ResourceT*
+  {
+    std::lock_guard<std::mutex> _(lock_);
+    auto key = std::type_index{typeid(ResourceT)};
+    auto pos = std::lower_bound(store_.begin(), store_.end(), kv{key, {nullptr}});
+    if ((pos != store_.end()) && std::get<0>(*pos) == key) {
+      return reinterpret_cast<ResourceT*>(std::get<1>(*pos).get());
+    }
+    auto store_ptr = new ResourceT{};
+    store_.insert(pos, kv{key, std::shared_ptr<void>(store_ptr, [](void* ptr) {
+                            delete reinterpret_cast<ResourceT*>(ptr);
+                          })});
+    return store_ptr;
+  }
+
+ private:
+  using kv = std::tuple<std::type_index, std::shared_ptr<void>>;
+  std::mutex lock_{};
+  std::vector<kv> store_{};
+};
+
+/** Factory that knows how to construct a specific raft::resource to populate the res_t. */
+class custom_resource_factory : public resource_factory {
+ public:
+  auto get_resource_type() -> resource_type override { return resource_type::CUSTOM; }
+  auto make_resource() -> resource* override { return new custom_resource(); }
+};
+
+/**
+ * @defgroup resource_custom custom resource functions
+ * @{
+ */
+
+/**
+ * Get the custom default-constructible resource if it exists, create it otherwise.
+ *
+ * Note: in contrast to the other, hard-coded resources, there's no information about the custom
+ * resources at compile time. Hence, custom resources are kept in a hashmap and looked-up at
+ * runtime. This leads to slightly slower access times.
+ *
+ * @tparam ResourceT the type of the resource; it must be complete and default-constructible.
+ *
+ * @param[in] res the raft resources object
+ * @return a pointer to the custom resource.
+ */
+template <typename ResourceT>
+auto get_custom_resource(resources const& res) -> ResourceT*
+{
+  static_assert(std::is_default_constructible_v<ResourceT>);
+  if (!res.has_resource_factory(resource_type::CUSTOM)) {
+    res.add_resource_factory(std::make_shared<custom_resource_factory>());
+  }
+  return res.get_resource<custom_resource>(resource_type::CUSTOM)->load<ResourceT>();
+};
+
+/**
+ * @}
+ */
+
+}  // namespace raft::resource
diff --git a/cpp/include/raft/core/resource/resource_types.hpp b/cpp/include/raft/core/resource/resource_types.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -43,6 +43,8 @@ enum resource_type {
                            // CUDA-free builds
   THRUST_POLICY,           // thrust execution policy
   WORKSPACE_RESOURCE,      // rmm device memory resource
+  CUBLASLT_HANDLE,         // cublasLt handle
+  CUSTOM,                  // runtime-shared default-constructible resource
 
   LAST_KEY  // reserved for the last key
 };