From 6cc71344f442a66ad3b72ce0ca429c7ea7e0fa82 Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Wed, 15 May 2024 15:55:42 +0200 Subject: [PATCH] ANN_BENCH: AnnGPU::uses_stream() for optional algo GPU sync (#2314) Introduce a new virtual member `uses_stream()` for the `AnnGPU` class. Overriding this allows an algorithm inform the benchmark whether the stream synchronization is needed between benchmark iterations. This is relevant for a potential persistent kernel where the CPU threads use an independent mechanics to synchronize and get the results from the GPU. This is different from just not implementing `AnnGPU` for an algorithm in that it allows the algorithm to decide whether the synchronization is needed (depending on input parameters at runtime), while still providing the `get_sync_stream()` functionality. Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2314 --- cpp/bench/ann/src/common/ann_types.hpp | 11 ++++++++++- cpp/bench/ann/src/common/util.hpp | 4 +++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp index c6213059dc..776d29a906 100644 --- a/cpp/bench/ann/src/common/ann_types.hpp +++ b/cpp/bench/ann/src/common/ann_types.hpp @@ -98,7 +98,16 @@ class AnnGPU { * end. */ [[nodiscard]] virtual auto get_sync_stream() const noexcept -> cudaStream_t = 0; - virtual ~AnnGPU() noexcept = default; + /** + * By default a GPU algorithm uses a fixed stream to order GPU operations. + * However, an algorithm may need to synchronize with the host at the end of its execution. + * In that case, also synchronizing with a benchmark event would put it at disadvantage. + * + * We can disable event sync by passing `false` here + * - ONLY IF THE ALGORITHM HAS PRODUCED ITS OUTPUT BY THE TIME IT SYNCHRONIZES WITH CPU. + */ + [[nodiscard]] virtual auto uses_stream() const noexcept -> bool { return true; } + virtual ~AnnGPU() noexcept = default; }; template diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp index ebcdf82e7c..96185c79eb 100644 --- a/cpp/bench/ann/src/common/util.hpp +++ b/cpp/bench/ann/src/common/util.hpp @@ -67,7 +67,9 @@ struct cuda_timer { static inline auto extract_stream(AnnT* algo) -> std::optional { auto gpu_ann = dynamic_cast(algo); - if (gpu_ann != nullptr) { return std::make_optional(gpu_ann->get_sync_stream()); } + if (gpu_ann != nullptr && gpu_ann->uses_stream()) { + return std::make_optional(gpu_ann->get_sync_stream()); + } return std::nullopt; }