rapidsai · rapids-bot · Sep 2, 2023 · Aug 31, 2023 · Aug 31, 2023 · Aug 31, 2023
@@ -254,7 +254,7 @@ if(RAFT_ANN_BENCH_SINGLE_EXE)
   target_compile_definitions(
     ANN_BENCH
     PRIVATE
-      $<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}">
+      $<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}.${CUDAToolkit_VERSION_PATCH}">
       $<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>
   )
 

@@ -18,14 +18,12 @@
 
 #pragma once
 
+#include "cuda_stub.hpp"  // cudaStream_t
+
 #include <stdexcept>
 #include <string>
 #include <vector>
 
-#ifndef CPU_ONLY
-#include <cuda_runtime_api.h>  // cudaStream_t
-#endif
-
 namespace raft::bench::ann {
 
 enum class MemoryType {

@@ -33,16 +33,6 @@
 #include <unistd.h>
 #include <vector>
 
-#ifdef ANN_BENCH_BUILD_MAIN
-#ifdef CPU_ONLY
-#define CUDART_FOUND false
-#else
-#define CUDART_FOUND true
-#endif
-#else
-#define CUDART_FOUND (cudart.found())
-#endif
-
 namespace raft::bench::ann {
 
 static inline std::unique_ptr<AnnBase> current_algo{nullptr};
@@ -255,7 +245,7 @@ void bench_search(::benchmark::State& state,
   }
   state.SetItemsProcessed(queries_processed);
   state.counters.insert({{"k", k}, {"n_queries", n_queries}});
-  if (CUDART_FOUND) {
+  if (cudart.found()) {
     state.counters.insert({{"GPU Time", gpu_timer.total_time() / state.iterations()},
                            {"GPU QPS", queries_processed / gpu_timer.total_time()}});
   }
@@ -357,7 +347,7 @@ void dispatch_benchmark(const Configuration& conf,
                         std::string index_prefix,
                         kv_series override_kv)
 {
-  if (CUDART_FOUND) {
+  if (cudart.found()) {
     for (auto [key, value] : cuda_info()) {
       ::benchmark::AddCustomContext(key, value);
     }
@@ -506,7 +496,9 @@ inline auto run_main(int argc, char** argv) -> int
     return -1;
   }
 
-  if (!CUDART_FOUND) { log_warn("cudart library is not found, GPU-based indices won't work."); }
+  if (cudart.needed() && !cudart.found()) {
+    log_warn("cudart library is not found, GPU-based indices won't work.");
+  }
 
   Configuration conf(conf_stream);
   std::string dtype = conf.get_dataset_conf().dtype;

@@ -15,37 +15,110 @@
  */
 #pragma once
 
-#ifdef ANN_BENCH_LINK_CUDART
+/*
+The content of this header is governed by two preprocessor definitions:
+
+  - CPU_ONLY - whether none of the CUDA functions are used.
+  - ANN_BENCH_LINK_CUDART - dynamically link against this string if defined.
+
+______________________________________________________________________________
+|CPU_ONLY | ANN_BENCH_LINK_CUDART |         cudart      | cuda_runtime_api.h |
+|         |                       |  found    |  needed |      included      |
+|---------|-----------------------|-----------|---------|--------------------|
+|   ON    |    <not defined>      |  false    |  false  |       NO           |
+|   ON    |   "cudart.so.xx.xx"   |  false    |  false  |       NO           |
+|  OFF    |     <nod defined>     |   true    |   true  |      YES           |
+|  OFF    |   "cudart.so.xx.xx"   | <runtime> |   true  |      YES           |
+------------------------------------------------------------------------------
+*/
+
+#ifndef CPU_ONLY
 #include <cuda_runtime_api.h>
+#ifdef ANN_BENCH_LINK_CUDART
+#include <cstring>
+#include <dlfcn.h>
+#endif
 #else
-#define CPU_ONLY
 typedef void* cudaStream_t;
 typedef void* cudaEvent_t;
 #endif
 
-#include <dlfcn.h>
-
 namespace raft::bench::ann {
 
 struct cuda_lib_handle {
   void* handle{nullptr};
   explicit cuda_lib_handle()
   {
 #ifdef ANN_BENCH_LINK_CUDART
-    handle = dlopen(ANN_BENCH_LINK_CUDART, RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE);
+    constexpr int kFlags = RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE;
+    // The full name of the linked cudart library 'cudart.so.MAJOR.MINOR.PATCH'
+    char libname[] = ANN_BENCH_LINK_CUDART;  // NOLINT
+    handle         = dlopen(ANN_BENCH_LINK_CUDART, kFlags);
+    if (handle != nullptr) { return; }
+    // try strip the PATCH
+    auto p = strrchr(libname, '.');
+    p[0]   = 0;
+    handle = dlopen(libname, kFlags);
+    if (handle != nullptr) { return; }
+    // try set the MINOR version to 0
+    p      = strrchr(libname, '.');
+    p[1]   = '0';
+    p[2]   = 0;
+    handle = dlopen(libname, kFlags);
+    if (handle != nullptr) { return; }
+    // try strip the MINOR
+    p[0]   = 0;
+    handle = dlopen(libname, kFlags);
+    if (handle != nullptr) { return; }
+    // try strip the MAJOR
+    p      = strrchr(libname, '.');
+    p[0]   = 0;
+    handle = dlopen(libname, kFlags);
 #endif
   }
   ~cuda_lib_handle() noexcept
   {
+#ifdef ANN_BENCH_LINK_CUDART
     if (handle != nullptr) { dlclose(handle); }
+#endif
+  }
+
+  template <typename Symbol>
+  auto sym(const char* name) -> Symbol
+  {
+#ifdef ANN_BENCH_LINK_CUDART
+    return reinterpret_cast<Symbol>(dlsym(handle, name));
+#else
+    return nullptr;
+#endif
   }
 
-  [[nodiscard]] inline auto found() const -> bool { return handle != nullptr; }
+  /** Whether this is NOT a cpu-only package. */
+  [[nodiscard]] constexpr inline auto needed() const -> bool
+  {
+#if defined(CPU_ONLY)
+    return false;
+#else
+    return true;
+#endif
+  }
+
+  /** CUDA found, either at compile time or at runtime. */
+  [[nodiscard]] inline auto found() const -> bool
+  {
+#if defined(CPU_ONLY)
+    return false;
+#elif defined(ANN_BENCH_LINK_CUDART)
+    return handle != nullptr;
+#else
+    return true;
+#endif
+  }
 };
 
 static inline cuda_lib_handle cudart{};
 
-#ifndef CPU_ONLY
+#ifdef ANN_BENCH_LINK_CUDART
 namespace stub {
 
 [[gnu::weak, gnu::noinline]] cudaError_t cudaMemcpy(void* dst,
@@ -130,10 +203,9 @@ namespace stub {
 
 }  // namespace stub
 
-#define RAFT_DECLARE_CUDART(fun)                                                        \
-  static inline decltype(&stub::fun) fun =                                              \
-    cudart.found() ? reinterpret_cast<decltype(&stub::fun)>(dlsym(cudart.handle, #fun)) \
-                   : &stub::fun
+#define RAFT_DECLARE_CUDART(fun)           \
+  static inline decltype(&stub::fun) fun = \
+    cudart.found() ? cudart.sym<decltype(&stub::fun)>(#fun) : &stub::fun
 
 RAFT_DECLARE_CUDART(cudaMemcpy);
 RAFT_DECLARE_CUDART(cudaMalloc);

@@ -16,6 +16,7 @@
 #pragma once
 
 #include "ann_types.hpp"
+#include "cuda_stub.hpp"  // cuda-related utils
 
 #ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
 #include <nvtx3/nvToolsExt.h>