Skip to content

Commit

Permalink
Add nvtx annotations to benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
achirkin committed Aug 9, 2023
1 parent 44f0aca commit 9ba9854
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 23 deletions.
2 changes: 1 addition & 1 deletion cpp/bench/ann/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ target_include_directories(ANN_BENCH PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECT

target_link_libraries(
ANN_BENCH PRIVATE nlohmann_json::nlohmann_json benchmark_static dl -static-libgcc
-static-libstdc++
-static-libstdc++ CUDA::nvtx3
)
set_target_properties(
ANN_BENCH
Expand Down
52 changes: 30 additions & 22 deletions cpp/bench/ann/src/common/benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,12 +129,16 @@ void bench_build(::benchmark::State& state,
std::size_t index_size = dataset->base_set_size();

cuda_timer gpu_timer;
for (auto _ : state) {
auto gpu_lap = gpu_timer.lap();
try {
algo->build(base_set, index_size, gpu_timer.stream());
} catch (const std::exception& e) {
state.SkipWithError(std::string(e.what()));
{
nvtx_case nvtx{state.name()};
for (auto _ : state) {
auto ntx_lap = nvtx.lap();
auto gpu_lap = gpu_timer.lap();
try {
algo->build(base_set, index_size, gpu_timer.stream());
} catch (const std::exception& e) {
state.SkipWithError(std::string(e.what()));
}
}
}
state.counters.insert(
Expand Down Expand Up @@ -206,23 +210,27 @@ void bench_search(::benchmark::State& state,
std::ptrdiff_t batch_offset = 0;
std::size_t queries_processed = 0;
cuda_timer gpu_timer;
for (auto _ : state) {
// measure the GPU time using the RAII helper
auto gpu_lap = gpu_timer.lap();
// run the search
try {
algo->search(query_set + batch_offset * dataset->dim(),
n_queries,
k,
neighbors.data + batch_offset * k,
distances.data + batch_offset * k,
gpu_timer.stream());
} catch (const std::exception& e) {
state.SkipWithError(std::string(e.what()));
{
nvtx_case nvtx{state.name()};
for (auto _ : state) {
// measure the GPU time using the RAII helper
auto ntx_lap = nvtx.lap();
auto gpu_lap = gpu_timer.lap();
// run the search
try {
algo->search(query_set + batch_offset * dataset->dim(),
n_queries,
k,
neighbors.data + batch_offset * k,
distances.data + batch_offset * k,
gpu_timer.stream());
} catch (const std::exception& e) {
state.SkipWithError(std::string(e.what()));
}
// advance to the next batch
batch_offset = (batch_offset + n_queries) % query_set_size;
queries_processed += n_queries;
}
// advance to the next batch
batch_offset = (batch_offset + n_queries) % query_set_size;
queries_processed += n_queries;
}
state.SetItemsProcessed(queries_processed);
state.counters.insert({{"k", k}, {"n_queries", n_queries}});
Expand Down
62 changes: 62 additions & 0 deletions cpp/bench/ann/src/common/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "ann_types.hpp"

#include "cuda_stub.hpp"
#include <nvtx3/nvToolsExt.h>

#include <sys/stat.h>
#include <sys/types.h>
Expand All @@ -31,6 +32,7 @@
#include <vector>

#include <filesystem>
#include <functional>

namespace raft::bench::ann {

Expand Down Expand Up @@ -161,6 +163,66 @@ inline auto cuda_info()
return props;
}

struct nvtx_case {
private:
std::string case_name_;
std::array<char, 32> iter_name_{0};
nvtxDomainHandle_t domain_;
int64_t iteration_ = 0;
nvtxEventAttributes_t case_attrib_{0};
nvtxEventAttributes_t iter_attrib_{0};

public:
struct nvtx_lap {
private:
nvtxDomainHandle_t domain_;

public:
nvtx_lap(nvtxDomainHandle_t domain, nvtxEventAttributes_t* attr) : domain_(domain)
{
nvtxDomainRangePushEx(domain_, attr);
}
nvtx_lap() = delete;
~nvtx_lap() noexcept { nvtxDomainRangePop(domain_); }
};

explicit nvtx_case(std::string case_name)
: case_name_(std::move(case_name)), domain_(nvtxDomainCreateA("ANN benchmark"))
{
case_attrib_.version = NVTX_VERSION;
iter_attrib_.version = NVTX_VERSION;
case_attrib_.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
iter_attrib_.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
case_attrib_.colorType = NVTX_COLOR_ARGB;
iter_attrib_.colorType = NVTX_COLOR_ARGB;
case_attrib_.messageType = NVTX_MESSAGE_TYPE_ASCII;
iter_attrib_.messageType = NVTX_MESSAGE_TYPE_ASCII;
case_attrib_.message.ascii = case_name_.c_str();
auto c = std::hash<std::string>{}(case_name_);
case_attrib_.color = c | 0xA0A0A0;
nvtxDomainRangePushEx(domain_, &case_attrib_);
}

~nvtx_case()
{
nvtxDomainRangePop(domain_);
nvtxDomainDestroy(domain_);
}

[[nodiscard]] auto lap() -> nvtx_case::nvtx_lap
{
auto i = iteration_++;
uint32_t c = (i % 5);
uint32_t r = 150 + c * 20;
uint32_t g = 200 + c * 10;
uint32_t b = 220 + c * 5;
std::snprintf(iter_name_.data(), iter_name_.size(), "Lap %zd", i);
iter_attrib_.message.ascii = iter_name_.data();
iter_attrib_.color = (r << 16) + (g << 8) + b;
return nvtx_lap{domain_, &iter_attrib_};
}
};

inline std::vector<std::string> split(const std::string& s, char delimiter)
{
std::vector<std::string> tokens;
Expand Down

0 comments on commit 9ba9854

Please sign in to comment.