Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ANN-bench: more flexible cuda_stub.hpp #1792

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/bench/ann/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ if(RAFT_ANN_BENCH_SINGLE_EXE)
target_compile_definitions(
ANN_BENCH
PRIVATE
$<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}">
$<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}.${CUDAToolkit_VERSION_PATCH}">
$<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>
)

Expand Down
6 changes: 2 additions & 4 deletions cpp/bench/ann/src/common/ann_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,12 @@

#pragma once

#include "cuda_stub.hpp" // cudaStream_t

#include <stdexcept>
#include <string>
#include <vector>

#ifndef CPU_ONLY
#include <cuda_runtime_api.h> // cudaStream_t
#endif

namespace raft::bench::ann {

enum class MemoryType {
Expand Down
18 changes: 5 additions & 13 deletions cpp/bench/ann/src/common/benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,6 @@
#include <unistd.h>
#include <vector>

#ifdef ANN_BENCH_BUILD_MAIN
#ifdef CPU_ONLY
#define CUDART_FOUND false
#else
#define CUDART_FOUND true
#endif
#else
#define CUDART_FOUND (cudart.found())
#endif

namespace raft::bench::ann {

static inline std::unique_ptr<AnnBase> current_algo{nullptr};
Expand Down Expand Up @@ -255,7 +245,7 @@ void bench_search(::benchmark::State& state,
}
state.SetItemsProcessed(queries_processed);
state.counters.insert({{"k", k}, {"n_queries", n_queries}});
if (CUDART_FOUND) {
if (cudart.found()) {
state.counters.insert({{"GPU Time", gpu_timer.total_time() / state.iterations()},
{"GPU QPS", queries_processed / gpu_timer.total_time()}});
}
Expand Down Expand Up @@ -357,7 +347,7 @@ void dispatch_benchmark(const Configuration& conf,
std::string index_prefix,
kv_series override_kv)
{
if (CUDART_FOUND) {
if (cudart.found()) {
for (auto [key, value] : cuda_info()) {
::benchmark::AddCustomContext(key, value);
}
Expand Down Expand Up @@ -506,7 +496,9 @@ inline auto run_main(int argc, char** argv) -> int
return -1;
}

if (!CUDART_FOUND) { log_warn("cudart library is not found, GPU-based indices won't work."); }
if (cudart.needed() && !cudart.found()) {
log_warn("cudart library is not found, GPU-based indices won't work.");
}

Configuration conf(conf_stream);
std::string dtype = conf.get_dataset_conf().dtype;
Expand Down
94 changes: 83 additions & 11 deletions cpp/bench/ann/src/common/cuda_stub.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,37 +15,110 @@
*/
#pragma once

#ifdef ANN_BENCH_LINK_CUDART
/*
The content of this header is governed by two preprocessor definitions:

- CPU_ONLY - whether none of the CUDA functions are used.
- ANN_BENCH_LINK_CUDART - dynamically link against this string if defined.

______________________________________________________________________________
|CPU_ONLY | ANN_BENCH_LINK_CUDART | cudart | cuda_runtime_api.h |
| | | found | needed | included |
|---------|-----------------------|-----------|---------|--------------------|
| ON | <not defined> | false | false | NO |
| ON | "cudart.so.xx.xx" | false | false | NO |
| OFF | <nod defined> | true | true | YES |
| OFF | "cudart.so.xx.xx" | <runtime> | true | YES |
------------------------------------------------------------------------------
*/

#ifndef CPU_ONLY
#include <cuda_runtime_api.h>
#ifdef ANN_BENCH_LINK_CUDART
#include <cstring>
#include <dlfcn.h>
#endif
#else
#define CPU_ONLY
typedef void* cudaStream_t;
typedef void* cudaEvent_t;
#endif

#include <dlfcn.h>

namespace raft::bench::ann {

struct cuda_lib_handle {
void* handle{nullptr};
explicit cuda_lib_handle()
{
#ifdef ANN_BENCH_LINK_CUDART
handle = dlopen(ANN_BENCH_LINK_CUDART, RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE);
constexpr int kFlags = RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE;
// The full name of the linked cudart library 'cudart.so.MAJOR.MINOR.PATCH'
char libname[] = ANN_BENCH_LINK_CUDART; // NOLINT
handle = dlopen(ANN_BENCH_LINK_CUDART, kFlags);
if (handle != nullptr) { return; }
// try strip the PATCH
auto p = strrchr(libname, '.');
p[0] = 0;
handle = dlopen(libname, kFlags);
if (handle != nullptr) { return; }
// try set the MINOR version to 0
p = strrchr(libname, '.');
p[1] = '0';
p[2] = 0;
handle = dlopen(libname, kFlags);
if (handle != nullptr) { return; }
// try strip the MINOR
p[0] = 0;
handle = dlopen(libname, kFlags);
if (handle != nullptr) { return; }
// try strip the MAJOR
p = strrchr(libname, '.');
p[0] = 0;
handle = dlopen(libname, kFlags);
#endif
}
~cuda_lib_handle() noexcept
{
#ifdef ANN_BENCH_LINK_CUDART
if (handle != nullptr) { dlclose(handle); }
#endif
}

template <typename Symbol>
auto sym(const char* name) -> Symbol
{
#ifdef ANN_BENCH_LINK_CUDART
return reinterpret_cast<Symbol>(dlsym(handle, name));
#else
return nullptr;
#endif
}

[[nodiscard]] inline auto found() const -> bool { return handle != nullptr; }
/** Whether this is NOT a cpu-only package. */
[[nodiscard]] constexpr inline auto needed() const -> bool
{
#if defined(CPU_ONLY)
return false;
#else
return true;
#endif
}

/** CUDA found, either at compile time or at runtime. */
[[nodiscard]] inline auto found() const -> bool
{
#if defined(CPU_ONLY)
return false;
#elif defined(ANN_BENCH_LINK_CUDART)
return handle != nullptr;
#else
return true;
#endif
}
};

static inline cuda_lib_handle cudart{};

#ifndef CPU_ONLY
#ifdef ANN_BENCH_LINK_CUDART
namespace stub {

[[gnu::weak, gnu::noinline]] cudaError_t cudaMemcpy(void* dst,
Expand Down Expand Up @@ -130,10 +203,9 @@ namespace stub {

} // namespace stub

#define RAFT_DECLARE_CUDART(fun) \
static inline decltype(&stub::fun) fun = \
cudart.found() ? reinterpret_cast<decltype(&stub::fun)>(dlsym(cudart.handle, #fun)) \
: &stub::fun
#define RAFT_DECLARE_CUDART(fun) \
static inline decltype(&stub::fun) fun = \
cudart.found() ? cudart.sym<decltype(&stub::fun)>(#fun) : &stub::fun

RAFT_DECLARE_CUDART(cudaMemcpy);
RAFT_DECLARE_CUDART(cudaMalloc);
Expand Down
1 change: 1 addition & 0 deletions cpp/bench/ann/src/common/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include "ann_types.hpp"
#include "cuda_stub.hpp" // cuda-related utils

#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
#include <nvtx3/nvToolsExt.h>
Expand Down