From a15550ca25c5d07404b80d3a85bea0be9fd155c6 Mon Sep 17 00:00:00 2001 From: achirkin Date: Tue, 14 May 2024 13:58:39 +0200 Subject: [PATCH] Introduces AnnGPU::uses_stream() to determine whether stream sync is needed for a GPU algo between iterations --- cpp/bench/ann/src/common/ann_types.hpp | 11 ++++++++++- cpp/bench/ann/src/common/util.hpp | 4 +++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp index c6213059dc..776d29a906 100644 --- a/cpp/bench/ann/src/common/ann_types.hpp +++ b/cpp/bench/ann/src/common/ann_types.hpp @@ -98,7 +98,16 @@ class AnnGPU { * end. */ [[nodiscard]] virtual auto get_sync_stream() const noexcept -> cudaStream_t = 0; - virtual ~AnnGPU() noexcept = default; + /** + * By default a GPU algorithm uses a fixed stream to order GPU operations. + * However, an algorithm may need to synchronize with the host at the end of its execution. + * In that case, also synchronizing with a benchmark event would put it at disadvantage. + * + * We can disable event sync by passing `false` here + * - ONLY IF THE ALGORITHM HAS PRODUCED ITS OUTPUT BY THE TIME IT SYNCHRONIZES WITH CPU. + */ + [[nodiscard]] virtual auto uses_stream() const noexcept -> bool { return true; } + virtual ~AnnGPU() noexcept = default; }; template diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp index 6cdff316e9..f1677bc139 100644 --- a/cpp/bench/ann/src/common/util.hpp +++ b/cpp/bench/ann/src/common/util.hpp @@ -118,7 +118,9 @@ struct cuda_timer { static inline auto extract_stream(AnnT* algo) -> std::optional { auto gpu_ann = dynamic_cast(algo); - if (gpu_ann != nullptr) { return std::make_optional(gpu_ann->get_sync_stream()); } + if (gpu_ann != nullptr && gpu_ann->uses_stream()) { + return std::make_optional(gpu_ann->get_sync_stream()); + } return std::nullopt; }