diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 119a5c0a73..54cddb678e 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -254,7 +254,7 @@ if(RAFT_ANN_BENCH_SINGLE_EXE) target_compile_definitions( ANN_BENCH PRIVATE - $<$:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}"> + $<$:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}.${CUDAToolkit_VERSION_PATCH}"> $<$:ANN_BENCH_NVTX3_HEADERS_FOUND> ) diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp index e0c22d1798..bdcfd95b2e 100644 --- a/cpp/bench/ann/src/common/ann_types.hpp +++ b/cpp/bench/ann/src/common/ann_types.hpp @@ -18,14 +18,12 @@ #pragma once +#include "cuda_stub.hpp" // cudaStream_t + #include #include #include -#ifndef CPU_ONLY -#include // cudaStream_t -#endif - namespace raft::bench::ann { enum class MemoryType { diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index 5ce453a116..4e91ee0690 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -33,16 +33,6 @@ #include #include -#ifdef ANN_BENCH_BUILD_MAIN -#ifdef CPU_ONLY -#define CUDART_FOUND false -#else -#define CUDART_FOUND true -#endif -#else -#define CUDART_FOUND (cudart.found()) -#endif - namespace raft::bench::ann { static inline std::unique_ptr current_algo{nullptr}; @@ -255,7 +245,7 @@ void bench_search(::benchmark::State& state, } state.SetItemsProcessed(queries_processed); state.counters.insert({{"k", k}, {"n_queries", n_queries}}); - if (CUDART_FOUND) { + if (cudart.found()) { state.counters.insert({{"GPU Time", gpu_timer.total_time() / state.iterations()}, {"GPU QPS", queries_processed / gpu_timer.total_time()}}); } @@ -357,7 +347,7 @@ void dispatch_benchmark(const Configuration& conf, std::string index_prefix, kv_series override_kv) { - if (CUDART_FOUND) { + if (cudart.found()) { for (auto [key, value] : cuda_info()) { ::benchmark::AddCustomContext(key, value); } @@ -506,7 +496,9 @@ inline auto run_main(int argc, char** argv) -> int return -1; } - if (!CUDART_FOUND) { log_warn("cudart library is not found, GPU-based indices won't work."); } + if (cudart.needed() && !cudart.found()) { + log_warn("cudart library is not found, GPU-based indices won't work."); + } Configuration conf(conf_stream); std::string dtype = conf.get_dataset_conf().dtype; diff --git a/cpp/bench/ann/src/common/cuda_stub.hpp b/cpp/bench/ann/src/common/cuda_stub.hpp index e3f9aa9e84..7c11194842 100644 --- a/cpp/bench/ann/src/common/cuda_stub.hpp +++ b/cpp/bench/ann/src/common/cuda_stub.hpp @@ -15,16 +15,34 @@ */ #pragma once -#ifdef ANN_BENCH_LINK_CUDART +/* +The content of this header is governed by two preprocessor definitions: + + - CPU_ONLY - whether none of the CUDA functions are used. + - ANN_BENCH_LINK_CUDART - dynamically link against this string if defined. + +______________________________________________________________________________ +|CPU_ONLY | ANN_BENCH_LINK_CUDART | cudart | cuda_runtime_api.h | +| | | found | needed | included | +|---------|-----------------------|-----------|---------|--------------------| +| ON | | false | false | NO | +| ON | "cudart.so.xx.xx" | false | false | NO | +| OFF | | true | true | YES | +| OFF | "cudart.so.xx.xx" | | true | YES | +------------------------------------------------------------------------------ +*/ + +#ifndef CPU_ONLY #include +#ifdef ANN_BENCH_LINK_CUDART +#include +#include +#endif #else -#define CPU_ONLY typedef void* cudaStream_t; typedef void* cudaEvent_t; #endif -#include - namespace raft::bench::ann { struct cuda_lib_handle { @@ -32,20 +50,75 @@ struct cuda_lib_handle { explicit cuda_lib_handle() { #ifdef ANN_BENCH_LINK_CUDART - handle = dlopen(ANN_BENCH_LINK_CUDART, RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE); + constexpr int kFlags = RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE; + // The full name of the linked cudart library 'cudart.so.MAJOR.MINOR.PATCH' + char libname[] = ANN_BENCH_LINK_CUDART; // NOLINT + handle = dlopen(ANN_BENCH_LINK_CUDART, kFlags); + if (handle != nullptr) { return; } + // try strip the PATCH + auto p = strrchr(libname, '.'); + p[0] = 0; + handle = dlopen(libname, kFlags); + if (handle != nullptr) { return; } + // try set the MINOR version to 0 + p = strrchr(libname, '.'); + p[1] = '0'; + p[2] = 0; + handle = dlopen(libname, kFlags); + if (handle != nullptr) { return; } + // try strip the MINOR + p[0] = 0; + handle = dlopen(libname, kFlags); + if (handle != nullptr) { return; } + // try strip the MAJOR + p = strrchr(libname, '.'); + p[0] = 0; + handle = dlopen(libname, kFlags); #endif } ~cuda_lib_handle() noexcept { +#ifdef ANN_BENCH_LINK_CUDART if (handle != nullptr) { dlclose(handle); } +#endif + } + + template + auto sym(const char* name) -> Symbol + { +#ifdef ANN_BENCH_LINK_CUDART + return reinterpret_cast(dlsym(handle, name)); +#else + return nullptr; +#endif } - [[nodiscard]] inline auto found() const -> bool { return handle != nullptr; } + /** Whether this is NOT a cpu-only package. */ + [[nodiscard]] constexpr inline auto needed() const -> bool + { +#if defined(CPU_ONLY) + return false; +#else + return true; +#endif + } + + /** CUDA found, either at compile time or at runtime. */ + [[nodiscard]] inline auto found() const -> bool + { +#if defined(CPU_ONLY) + return false; +#elif defined(ANN_BENCH_LINK_CUDART) + return handle != nullptr; +#else + return true; +#endif + } }; static inline cuda_lib_handle cudart{}; -#ifndef CPU_ONLY +#ifdef ANN_BENCH_LINK_CUDART namespace stub { [[gnu::weak, gnu::noinline]] cudaError_t cudaMemcpy(void* dst, @@ -130,10 +203,9 @@ namespace stub { } // namespace stub -#define RAFT_DECLARE_CUDART(fun) \ - static inline decltype(&stub::fun) fun = \ - cudart.found() ? reinterpret_cast(dlsym(cudart.handle, #fun)) \ - : &stub::fun +#define RAFT_DECLARE_CUDART(fun) \ + static inline decltype(&stub::fun) fun = \ + cudart.found() ? cudart.sym(#fun) : &stub::fun RAFT_DECLARE_CUDART(cudaMemcpy); RAFT_DECLARE_CUDART(cudaMalloc); diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp index faf440071d..49c4de1b11 100644 --- a/cpp/bench/ann/src/common/util.hpp +++ b/cpp/bench/ann/src/common/util.hpp @@ -16,6 +16,7 @@ #pragma once #include "ann_types.hpp" +#include "cuda_stub.hpp" // cuda-related utils #ifdef ANN_BENCH_NVTX3_HEADERS_FOUND #include