From b9736b2ef65b399c484de658a7325da3660866e9 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 9 Jan 2024 22:09:10 +0100 Subject: [PATCH] Decrease latency measurement overhead --- cpp/bench/ann/src/common/benchmark.hpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index e61de6745e..28c91035e2 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -287,15 +287,14 @@ void bench_search(::benchmark::State& state, std::make_shared>(current_algo_props->query_memory_type, k * query_set_size); cuda_timer gpu_timer; - auto start = std::chrono::high_resolution_clock::now(); { nvtx_case nvtx{state.name()}; + [[maybe_unused]] auto ntx_lap = nvtx.lap(); + [[maybe_unused]] auto gpu_lap = gpu_timer.lap(); + auto start = std::chrono::high_resolution_clock::now(); auto algo = dynamic_cast*>(current_algo.get())->copy(); for (auto _ : state) { - [[maybe_unused]] auto ntx_lap = nvtx.lap(); - [[maybe_unused]] auto gpu_lap = gpu_timer.lap(); - // run the search try { algo->search(query_set + batch_offset * dataset->dim(), @@ -314,12 +313,12 @@ void bench_search(::benchmark::State& state, queries_processed += n_queries; } + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast>(end - start).count(); + if (state.thread_index() == 0) { state.counters.insert({{"end_to_end", duration}}); } + state.counters.insert( + {"Latency", {duration / double(state.iterations()), benchmark::Counter::kAvgThreads}}); } - auto end = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast>(end - start).count(); - if (state.thread_index() == 0) { state.counters.insert({{"end_to_end", duration}}); } - state.counters.insert( - {"Latency", {duration / double(state.iterations()), benchmark::Counter::kAvgThreads}}); state.SetItemsProcessed(queries_processed); if (cudart.found()) {