From 6c3cdc9058e280885f94753f96b94c0284d69d74 Mon Sep 17 00:00:00 2001 From: achirkin Date: Fri, 10 Dec 2021 10:49:17 +0100 Subject: [PATCH 01/12] Copy the nvtx helpers implementation from cuml --- cpp/include/raft/common/detail/nvtx.cuh | 202 ++++++++++++++++++++++++ cpp/include/raft/common/nvtx.hpp | 119 ++++++++++++++ cpp/include/raft/linalg/svd.cuh | 4 + cpp/test/CMakeLists.txt | 6 + 4 files changed, 331 insertions(+) create mode 100644 cpp/include/raft/common/detail/nvtx.cuh create mode 100644 cpp/include/raft/common/nvtx.hpp diff --git a/cpp/include/raft/common/detail/nvtx.cuh b/cpp/include/raft/common/detail/nvtx.cuh new file mode 100644 index 0000000000..83ba124c92 --- /dev/null +++ b/cpp/include/raft/common/detail/nvtx.cuh @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace raft { +namespace common { +namespace detail { + +/** + * @brief An internal struct to store associated state with the color + * generator + */ +struct ColorGenState { + /** collection of all tagged colors generated so far */ + static std::unordered_map allColors; + /** mutex for accessing the above map */ + static std::mutex mapMutex; + /** saturation */ + static constexpr float S = 0.9f; + /** value */ + static constexpr float V = 0.85f; + /** golden ratio */ + static constexpr float Phi = 1.61803f; + /** inverse golden ratio */ + static constexpr float InvPhi = 1.f / Phi; +}; + +std::unordered_map ColorGenState::allColors; +std::mutex ColorGenState::mapMutex; + +// all h, s, v are in range [0, 1] +// Ref: http://en.wikipedia.org/wiki/HSL_and_HSV#Converting_to_RGB +uint32_t hsv2rgb(float h, float s, float v) +{ + uint32_t out = 0xff000000u; + if (s <= 0.0f) { return out; } + // convert hue from [0, 1] range to [0, 360] + float h_deg = h * 360.f; + if (0.f > h_deg || h_deg >= 360.f) h_deg = 0.f; + h_deg /= 60.f; + int h_range = (int)h_deg; + float h_mod = h_deg - h_range; + float x = v * (1.f - s); + float y = v * (1.f - (s * h_mod)); + float z = v * (1.f - (s * (1.f - h_mod))); + float r, g, b; + switch (h_range) { + case 0: + r = v; + g = z; + b = x; + break; + case 1: + r = y; + g = v; + b = x; + break; + case 2: + r = x; + g = v; + b = z; + break; + case 3: + r = x; + g = y; + b = v; + break; + case 4: + r = z; + g = x; + b = v; + break; + case 5: + default: + r = v; + g = x; + b = y; + break; + } + out |= (uint32_t(r * 256.f) << 16); + out |= (uint32_t(g * 256.f) << 8); + out |= uint32_t(b * 256.f); + return out; +} + +/** + * @brief Helper method to generate 'visually distinct' colors. + * Inspired from https://martin.ankerl.com/2009/12/09/how-to-create-random-colors-programmatically/ + * However, if an associated tag is passed, it will look up in its history for + * any generated color against this tag and if found, just returns it, else + * generates a new color, assigns a tag to it and stores it for future usage. + * Such a thing is very useful for nvtx markers where the ranges associated + * with a specific tag should ideally get the same color for the purpose of + * visualizing it on nsight-systems timeline. + * @param tag look for any previously generated colors with this tag or + * associate the currently generated color with it + * @return returns 32b RGB integer with alpha channel set of 0xff + */ +uint32_t generateNextColor(const std::string& tag) +{ + std::lock_guard guard(ColorGenState::mapMutex); + if (!tag.empty()) { + auto itr = ColorGenState::allColors.find(tag); + if (itr != ColorGenState::allColors.end()) { return itr->second; } + } + float h = rand() * 1.f / RAND_MAX; + h += ColorGenState::InvPhi; + if (h >= 1.f) h -= 1.f; + auto rgb = hsv2rgb(h, ColorGenState::S, ColorGenState::V); + if (!tag.empty()) { ColorGenState::allColors[tag] = rgb; } + return rgb; +} + +#ifdef NVTX_ENABLED + +#include + +nvtxDomainHandle_t domain = nvtxDomainCreateA("raft"); + +void pushRange_name(const char* name) +{ + nvtxEventAttributes_t eventAttrib = {0}; + eventAttrib.version = NVTX_VERSION; + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + eventAttrib.colorType = NVTX_COLOR_ARGB; + eventAttrib.color = generateNextColor(name); + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + eventAttrib.message.ascii = name; + nvtxDomainRangePushEx(domain, &eventAttrib); +} + +template +void pushRange(const char* format, Args... args) +{ + if constexpr (sizeof...(args) > 0) { + int length = std::snprintf(nullptr, 0, format, args...); + assert(length >= 0); + auto buf = std::make_unique(length + 1); + std::snprintf(buf.get(), length + 1, format, args...); + pushRange_name(buf.get()); + } else { + pushRange_name(format); + } +} + +template +void pushRange(rmm::cuda_stream_view stream, const char* format, Args... args) +{ + stream.synchronize(); + pushRange(format, args...); +} + +void popRange() { nvtxDomainRangePop(domain); } + +void popRange(rmm::cuda_stream_view stream) +{ + stream.synchronize(); + popRange(); +} + +#else // NVTX_ENABLED + +template +void pushRange(const char* format, Args... args) +{ +} + +template +void pushRange(rmm::cuda_stream_view stream, const char* format, Args... args) +{ +} + +void popRange() {} + +void popRange(rmm::cuda_stream_view stream) {} + +#endif // NVTX_ENABLED + +} // namespace detail +} // namespace common +} // namespace raft diff --git a/cpp/include/raft/common/nvtx.hpp b/cpp/include/raft/common/nvtx.hpp new file mode 100644 index 0000000000..a51722152e --- /dev/null +++ b/cpp/include/raft/common/nvtx.hpp @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "detail/nvtx.cuh" + +namespace raft { +namespace common { + +/** + * @brief Push a named nvtx range + * @param format range name format (accepts printf-style arguments) + * @param args the arguments for the printf-style formatting + */ +template +void PUSH_RANGE(const char* format, Args... args) +{ + detail::pushRange(format, args...); +} + +/** + * @brief Synchronize CUDA stream and push a named nvtx range + * @param format range name format (accepts printf-style arguments) + * @param args the arguments for the printf-style formatting + * @param stream stream to synchronize + */ +template +void PUSH_RANGE(rmm::cuda_stream_view stream, const char* format, Args... args) +{ + detail::pushRange(stream, format, args...); +} + +/** Pop the latest range */ +void POP_RANGE() { detail::popRange(); } + +/** + * @brief Synchronize CUDA stream and pop the latest nvtx range + * @param stream stream to synchronize + */ +void POP_RANGE(rmm::cuda_stream_view stream) { detail::popRange(stream); } + +/** Push a named nvtx range that would be popped at the end of the object lifetime. */ +class AUTO_RANGE { + private: + std::optional streamMaybe; + + /* This object is not meant to be touched. */ + AUTO_RANGE(const AUTO_RANGE&) = delete; + AUTO_RANGE(AUTO_RANGE&&) = delete; + AUTO_RANGE& operator=(const AUTO_RANGE&) = delete; + AUTO_RANGE& operator=(AUTO_RANGE&&) = delete; + + public: + /** + * Synchronize CUDA stream and push a named nvtx range + * At the end of the object lifetime, synchronize again and pop the range. + * + * @param stream stream to synchronize + * @param format range name format (accepts printf-style arguments) + * @param args the arguments for the printf-style formatting + */ + template + AUTO_RANGE(rmm::cuda_stream_view stream, const char* format, Args... args) + : streamMaybe(std::make_optional(stream)) + { + PUSH_RANGE(stream, format, args...); + } + + /** + * Push a named nvtx range. + * At the end of the object lifetime, pop the range back. + * + * @param format range name format (accepts printf-style arguments) + * @param args the arguments for the printf-style formatting + */ + template + AUTO_RANGE(const char* format, Args... args) : streamMaybe(std::nullopt) + { + PUSH_RANGE(format, args...); + } + + ~AUTO_RANGE() + { + if (streamMaybe.has_value()) + POP_RANGE(streamMaybe.value()); + else + POP_RANGE(); + } +}; + +/*! + \def RAFT_USING_RANGE(...) + When NVTX is enabled, push a named nvtx range and pop it at the end of the enclosing code block. + + This macro initializes a dummy AUTO_RANGE variable on the stack, + which pushes the range in its constructor and pops it in the destructor. +*/ +#ifdef NVTX_ENABLED +#define RAFT_USING_RANGE(...) raft::common::AUTO_RANGE _AUTO_RANGE_##__LINE__(__VA_ARGS__) +#else +#define RAFT_USING_RANGE(...) (void)0 +#endif + +} // namespace common +} // namespace raft diff --git a/cpp/include/raft/linalg/svd.cuh b/cpp/include/raft/linalg/svd.cuh index c4dd8a3fd4..f83ba83c9b 100644 --- a/cpp/include/raft/linalg/svd.cuh +++ b/cpp/include/raft/linalg/svd.cuh @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -63,6 +64,7 @@ void svdQR(const raft::handle_t& handle, bool gen_right_vec, cudaStream_t stream) { + RAFT_USING_RANGE("raft::linalg::svdQR(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); cublasHandle_t cublasH = handle.get_cublas_handle(); @@ -140,6 +142,7 @@ void svdEig(const raft::handle_t& handle, bool gen_left_vec, cudaStream_t stream) { + RAFT_USING_RANGE("raft::linalg::svdEig(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); cublasHandle_t cublasH = handle.get_cublas_handle(); @@ -218,6 +221,7 @@ void svdJacobi(const raft::handle_t& handle, int max_sweeps, cudaStream_t stream) { + RAFT_USING_RANGE("raft::linalg::svdJacobi(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); gesvdjInfo_t gesvdj_params = NULL; diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 14052293cf..ed47ee7b77 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -130,6 +130,7 @@ PRIVATE CUDA::cusolver CUDA::cudart CUDA::cusparse + $<$:CUDA::nvToolsExt> rmm::rmm cuco::cuco FAISS::FAISS @@ -139,3 +140,8 @@ PRIVATE $ $ ) + +target_compile_definitions(test_raft +PRIVATE + $<$:NVTX_ENABLED> +) From 76af5459d0ca05c634e03c2bff1f7665f6536f00 Mon Sep 17 00:00:00 2001 From: achirkin Date: Fri, 10 Dec 2021 14:09:01 +0100 Subject: [PATCH 02/12] Make sure the helpers can be used in multiple tranlsation units. --- cpp/include/raft/common/detail/nvtx.cuh | 60 ++++++++++++------------- cpp/include/raft/common/nvtx.hpp | 8 ++-- cpp/test/distance/distance_base.cuh | 4 ++ cpp/test/eigen_solvers.cu | 3 ++ 4 files changed, 41 insertions(+), 34 deletions(-) diff --git a/cpp/include/raft/common/detail/nvtx.cuh b/cpp/include/raft/common/detail/nvtx.cuh index 83ba124c92..c4df6d5554 100644 --- a/cpp/include/raft/common/detail/nvtx.cuh +++ b/cpp/include/raft/common/detail/nvtx.cuh @@ -16,42 +16,43 @@ #pragma once -#include -#include -#include #include -#include -#include namespace raft { namespace common { namespace detail { +#ifdef NVTX_ENABLED + +#include +#include +#include +#include +#include +#include + /** * @brief An internal struct to store associated state with the color * generator */ struct ColorGenState { /** collection of all tagged colors generated so far */ - static std::unordered_map allColors; + static inline std::unordered_map allColors; /** mutex for accessing the above map */ - static std::mutex mapMutex; + static inline std::mutex mapMutex; /** saturation */ - static constexpr float S = 0.9f; + static inline constexpr float S = 0.9f; /** value */ - static constexpr float V = 0.85f; + static inline constexpr float V = 0.85f; /** golden ratio */ - static constexpr float Phi = 1.61803f; + static inline constexpr float Phi = 1.61803f; /** inverse golden ratio */ - static constexpr float InvPhi = 1.f / Phi; + static inline constexpr float InvPhi = 1.f / Phi; }; -std::unordered_map ColorGenState::allColors; -std::mutex ColorGenState::mapMutex; - // all h, s, v are in range [0, 1] // Ref: http://en.wikipedia.org/wiki/HSL_and_HSV#Converting_to_RGB -uint32_t hsv2rgb(float h, float s, float v) +inline uint32_t hsv2rgb(float h, float s, float v) { uint32_t out = 0xff000000u; if (s <= 0.0f) { return out; } @@ -117,8 +118,11 @@ uint32_t hsv2rgb(float h, float s, float v) * associate the currently generated color with it * @return returns 32b RGB integer with alpha channel set of 0xff */ -uint32_t generateNextColor(const std::string& tag) +inline uint32_t generateNextColor(const std::string& tag) { + // std::unordered_map ColorGenState::allColors; + // std::mutex ColorGenState::mapMutex; + std::lock_guard guard(ColorGenState::mapMutex); if (!tag.empty()) { auto itr = ColorGenState::allColors.find(tag); @@ -132,13 +136,9 @@ uint32_t generateNextColor(const std::string& tag) return rgb; } -#ifdef NVTX_ENABLED - -#include - -nvtxDomainHandle_t domain = nvtxDomainCreateA("raft"); +static inline nvtxDomainHandle_t domain = nvtxDomainCreateA("raft"); -void pushRange_name(const char* name) +inline void pushRange_name(const char* name) { nvtxEventAttributes_t eventAttrib = {0}; eventAttrib.version = NVTX_VERSION; @@ -151,7 +151,7 @@ void pushRange_name(const char* name) } template -void pushRange(const char* format, Args... args) +inline void pushRange(const char* format, Args... args) { if constexpr (sizeof...(args) > 0) { int length = std::snprintf(nullptr, 0, format, args...); @@ -165,15 +165,15 @@ void pushRange(const char* format, Args... args) } template -void pushRange(rmm::cuda_stream_view stream, const char* format, Args... args) +inline void pushRange(rmm::cuda_stream_view stream, const char* format, Args... args) { stream.synchronize(); pushRange(format, args...); } -void popRange() { nvtxDomainRangePop(domain); } +inline void popRange() { nvtxDomainRangePop(domain); } -void popRange(rmm::cuda_stream_view stream) +inline void popRange(rmm::cuda_stream_view stream) { stream.synchronize(); popRange(); @@ -182,18 +182,18 @@ void popRange(rmm::cuda_stream_view stream) #else // NVTX_ENABLED template -void pushRange(const char* format, Args... args) +inline void pushRange(const char* format, Args... args) { } template -void pushRange(rmm::cuda_stream_view stream, const char* format, Args... args) +inline void pushRange(rmm::cuda_stream_view stream, const char* format, Args... args) { } -void popRange() {} +inline void popRange() {} -void popRange(rmm::cuda_stream_view stream) {} +inline void popRange(rmm::cuda_stream_view stream) {} #endif // NVTX_ENABLED diff --git a/cpp/include/raft/common/nvtx.hpp b/cpp/include/raft/common/nvtx.hpp index a51722152e..8489fd749b 100644 --- a/cpp/include/raft/common/nvtx.hpp +++ b/cpp/include/raft/common/nvtx.hpp @@ -27,7 +27,7 @@ namespace common { * @param args the arguments for the printf-style formatting */ template -void PUSH_RANGE(const char* format, Args... args) +inline void PUSH_RANGE(const char* format, Args... args) { detail::pushRange(format, args...); } @@ -39,19 +39,19 @@ void PUSH_RANGE(const char* format, Args... args) * @param stream stream to synchronize */ template -void PUSH_RANGE(rmm::cuda_stream_view stream, const char* format, Args... args) +inline void PUSH_RANGE(rmm::cuda_stream_view stream, const char* format, Args... args) { detail::pushRange(stream, format, args...); } /** Pop the latest range */ -void POP_RANGE() { detail::popRange(); } +inline void POP_RANGE() { detail::popRange(); } /** * @brief Synchronize CUDA stream and pop the latest nvtx range * @param stream stream to synchronize */ -void POP_RANGE(rmm::cuda_stream_view stream) { detail::popRange(stream); } +inline void POP_RANGE(rmm::cuda_stream_view stream) { detail::popRange(stream); } /** Push a named nvtx range that would be popped at the end of the object lifetime. */ class AUTO_RANGE { diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh index ec9d35bb09..f20f9dd6ba 100644 --- a/cpp/test/distance/distance_base.cuh +++ b/cpp/test/distance/distance_base.cuh @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -410,6 +411,9 @@ class DistanceTest : public ::testing::TestWithParam> { void SetUp() override { + auto testInfo = testing::UnitTest::GetInstance()->current_test_info(); + RAFT_USING_RANGE("test::%s/%s", testInfo->test_suite_name(), testInfo->name()); + raft::random::Rng r(params.seed); int m = params.m; int n = params.n; diff --git a/cpp/test/eigen_solvers.cu b/cpp/test/eigen_solvers.cu index dc7de92eb8..1354124d6a 100644 --- a/cpp/test/eigen_solvers.cu +++ b/cpp/test/eigen_solvers.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include @@ -27,6 +28,7 @@ namespace raft { TEST(Raft, EigenSolvers) { + RAFT_USING_RANGE("test::EigenSolvers"); using namespace matrix; using index_type = int; using value_type = double; @@ -67,6 +69,7 @@ TEST(Raft, EigenSolvers) TEST(Raft, SpectralSolvers) { + RAFT_USING_RANGE("test::SpectralSolvers"); using namespace matrix; using index_type = int; using value_type = double; From 3d3cec8a1671e84ddef30bebc0a12bd83a840916 Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Fri, 10 Dec 2021 15:52:26 +0100 Subject: [PATCH 03/12] Update cpp/include/raft/common/nvtx.hpp Co-authored-by: William Hicks --- cpp/include/raft/common/nvtx.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/common/nvtx.hpp b/cpp/include/raft/common/nvtx.hpp index 8489fd749b..e2727d37b6 100644 --- a/cpp/include/raft/common/nvtx.hpp +++ b/cpp/include/raft/common/nvtx.hpp @@ -96,7 +96,7 @@ class AUTO_RANGE { ~AUTO_RANGE() { if (streamMaybe.has_value()) - POP_RANGE(streamMaybe.value()); + POP_RANGE(*streamMaybe); else POP_RANGE(); } From ac5d356f4e653681b6f852212fec5829dedba52f Mon Sep 17 00:00:00 2001 From: achirkin Date: Mon, 13 Dec 2021 08:17:10 +0100 Subject: [PATCH 04/12] Rafactor names --- cpp/include/raft/common/detail/nvtx.cuh | 2 +- cpp/include/raft/common/nvtx.hpp | 47 +++++++++++++------------ cpp/include/raft/linalg/svd.cuh | 6 ++-- cpp/test/distance/distance_base.cuh | 2 +- cpp/test/eigen_solvers.cu | 4 +-- 5 files changed, 32 insertions(+), 29 deletions(-) diff --git a/cpp/include/raft/common/detail/nvtx.cuh b/cpp/include/raft/common/detail/nvtx.cuh index c4df6d5554..9da32c41d9 100644 --- a/cpp/include/raft/common/detail/nvtx.cuh +++ b/cpp/include/raft/common/detail/nvtx.cuh @@ -136,7 +136,7 @@ inline uint32_t generateNextColor(const std::string& tag) return rgb; } -static inline nvtxDomainHandle_t domain = nvtxDomainCreateA("raft"); +static inline nvtxDomainHandle_t domain = nvtxDomainCreateA("application"); inline void pushRange_name(const char* name) { diff --git a/cpp/include/raft/common/nvtx.hpp b/cpp/include/raft/common/nvtx.hpp index e2727d37b6..b6e6317f14 100644 --- a/cpp/include/raft/common/nvtx.hpp +++ b/cpp/include/raft/common/nvtx.hpp @@ -27,7 +27,7 @@ namespace common { * @param args the arguments for the printf-style formatting */ template -inline void PUSH_RANGE(const char* format, Args... args) +inline void PUSH_NVTX_RANGE(const char* format, Args... args) { detail::pushRange(format, args...); } @@ -39,30 +39,32 @@ inline void PUSH_RANGE(const char* format, Args... args) * @param stream stream to synchronize */ template -inline void PUSH_RANGE(rmm::cuda_stream_view stream, const char* format, Args... args) +inline void PUSH_NVTX_RANGE(rmm::cuda_stream_view stream, const char* format, Args... args) { detail::pushRange(stream, format, args...); } /** Pop the latest range */ -inline void POP_RANGE() { detail::popRange(); } +inline void POP_NVTX_RANGE() { detail::popRange(); } /** * @brief Synchronize CUDA stream and pop the latest nvtx range * @param stream stream to synchronize */ -inline void POP_RANGE(rmm::cuda_stream_view stream) { detail::popRange(stream); } +inline void POP_NVTX_RANGE(rmm::cuda_stream_view stream) { detail::popRange(stream); } /** Push a named nvtx range that would be popped at the end of the object lifetime. */ -class AUTO_RANGE { +class NvtxRange { private: std::optional streamMaybe; /* This object is not meant to be touched. */ - AUTO_RANGE(const AUTO_RANGE&) = delete; - AUTO_RANGE(AUTO_RANGE&&) = delete; - AUTO_RANGE& operator=(const AUTO_RANGE&) = delete; - AUTO_RANGE& operator=(AUTO_RANGE&&) = delete; + NvtxRange(const NvtxRange&) = delete; + NvtxRange(NvtxRange&&) = delete; + NvtxRange& operator=(const NvtxRange&) = delete; + NvtxRange& operator=(NvtxRange&&) = delete; + static void* operator new(std::size_t) = delete; + static void* operator new[](std::size_t) = delete; public: /** @@ -74,10 +76,10 @@ class AUTO_RANGE { * @param args the arguments for the printf-style formatting */ template - AUTO_RANGE(rmm::cuda_stream_view stream, const char* format, Args... args) + NvtxRange(rmm::cuda_stream_view stream, const char* format, Args... args) : streamMaybe(std::make_optional(stream)) { - PUSH_RANGE(stream, format, args...); + PUSH_NVTX_RANGE(stream, format, args...); } /** @@ -88,31 +90,32 @@ class AUTO_RANGE { * @param args the arguments for the printf-style formatting */ template - AUTO_RANGE(const char* format, Args... args) : streamMaybe(std::nullopt) + NvtxRange(const char* format, Args... args) : streamMaybe(std::nullopt) { - PUSH_RANGE(format, args...); + PUSH_NVTX_RANGE(format, args...); } - ~AUTO_RANGE() + ~NvtxRange() { - if (streamMaybe.has_value()) - POP_RANGE(*streamMaybe); - else - POP_RANGE(); + if (streamMaybe.has_value()) { + POP_NVTX_RANGE(*streamMaybe); + } else { + POP_NVTX_RANGE(); + } } }; /*! - \def RAFT_USING_RANGE(...) + \def RAFT_USING_NVTX_RANGE(...) When NVTX is enabled, push a named nvtx range and pop it at the end of the enclosing code block. - This macro initializes a dummy AUTO_RANGE variable on the stack, + This macro initializes a dummy NvtxRange variable on the stack, which pushes the range in its constructor and pops it in the destructor. */ #ifdef NVTX_ENABLED -#define RAFT_USING_RANGE(...) raft::common::AUTO_RANGE _AUTO_RANGE_##__LINE__(__VA_ARGS__) +#define RAFT_USING_NVTX_RANGE(...) raft::common::NvtxRange _AUTO_RANGE_##__LINE__(__VA_ARGS__) #else -#define RAFT_USING_RANGE(...) (void)0 +#define RAFT_USING_NVTX_RANGE(...) (void)0 #endif } // namespace common diff --git a/cpp/include/raft/linalg/svd.cuh b/cpp/include/raft/linalg/svd.cuh index f83ba83c9b..8b6417c817 100644 --- a/cpp/include/raft/linalg/svd.cuh +++ b/cpp/include/raft/linalg/svd.cuh @@ -64,7 +64,7 @@ void svdQR(const raft::handle_t& handle, bool gen_right_vec, cudaStream_t stream) { - RAFT_USING_RANGE("raft::linalg::svdQR(%d, %d)", n_rows, n_cols); + RAFT_USING_NVTX_RANGE("raft::linalg::svdQR(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); cublasHandle_t cublasH = handle.get_cublas_handle(); @@ -142,7 +142,7 @@ void svdEig(const raft::handle_t& handle, bool gen_left_vec, cudaStream_t stream) { - RAFT_USING_RANGE("raft::linalg::svdEig(%d, %d)", n_rows, n_cols); + RAFT_USING_NVTX_RANGE("raft::linalg::svdEig(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); cublasHandle_t cublasH = handle.get_cublas_handle(); @@ -221,7 +221,7 @@ void svdJacobi(const raft::handle_t& handle, int max_sweeps, cudaStream_t stream) { - RAFT_USING_RANGE("raft::linalg::svdJacobi(%d, %d)", n_rows, n_cols); + RAFT_USING_NVTX_RANGE("raft::linalg::svdJacobi(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); gesvdjInfo_t gesvdj_params = NULL; diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh index f20f9dd6ba..b58a0135d4 100644 --- a/cpp/test/distance/distance_base.cuh +++ b/cpp/test/distance/distance_base.cuh @@ -412,7 +412,7 @@ class DistanceTest : public ::testing::TestWithParam> { void SetUp() override { auto testInfo = testing::UnitTest::GetInstance()->current_test_info(); - RAFT_USING_RANGE("test::%s/%s", testInfo->test_suite_name(), testInfo->name()); + RAFT_USING_NVTX_RANGE("test::%s/%s", testInfo->test_suite_name(), testInfo->name()); raft::random::Rng r(params.seed); int m = params.m; diff --git a/cpp/test/eigen_solvers.cu b/cpp/test/eigen_solvers.cu index 1354124d6a..934ae0440a 100644 --- a/cpp/test/eigen_solvers.cu +++ b/cpp/test/eigen_solvers.cu @@ -28,7 +28,7 @@ namespace raft { TEST(Raft, EigenSolvers) { - RAFT_USING_RANGE("test::EigenSolvers"); + RAFT_USING_NVTX_RANGE("test::EigenSolvers"); using namespace matrix; using index_type = int; using value_type = double; @@ -69,7 +69,7 @@ TEST(Raft, EigenSolvers) TEST(Raft, SpectralSolvers) { - RAFT_USING_RANGE("test::SpectralSolvers"); + RAFT_USING_NVTX_RANGE("test::SpectralSolvers"); using namespace matrix; using index_type = int; using value_type = double; From 8921a5d8e28348da63e5d5dcad440052040064b9 Mon Sep 17 00:00:00 2001 From: achirkin Date: Mon, 13 Dec 2021 09:45:32 +0100 Subject: [PATCH 05/12] Move NVTX-related CMakeLists parameters from tests to the library --- cpp/CMakeLists.txt | 6 ++++++ cpp/test/CMakeLists.txt | 17 ++--------------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 66b13b6710..0e1688d6f8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -131,10 +131,16 @@ INTERFACE CUDA::cusolver CUDA::cudart CUDA::cusparse + $<$:CUDA::nvToolsExt> rmm::rmm cuco::cuco ) +target_compile_definitions(raft +INTERFACE + $<$:NVTX_ENABLED> + ) + target_compile_features(raft INTERFACE cxx_std_17 $) install(TARGETS raft diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index ed47ee7b77..67ce9d1c1c 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -117,22 +117,14 @@ target_compile_options(test_raft ) target_include_directories(test_raft - PUBLIC "$" - "$" + PUBLIC "$" "${FAISS_GPU_HEADERS}" ) target_link_libraries(test_raft PRIVATE - CUDA::cublas - CUDA::curand - CUDA::cusolver - CUDA::cudart - CUDA::cusparse - $<$:CUDA::nvToolsExt> - rmm::rmm - cuco::cuco + raft # transitively links all CUDA libs, etc FAISS::FAISS GTest::gtest GTest::gtest_main @@ -140,8 +132,3 @@ PRIVATE $ $ ) - -target_compile_definitions(test_raft -PRIVATE - $<$:NVTX_ENABLED> -) From aca8cd442932acb2a4870b45659d9aab6d9b85df Mon Sep 17 00:00:00 2001 From: achirkin Date: Mon, 13 Dec 2021 11:01:00 +0100 Subject: [PATCH 06/12] Copy nvtx-related tests from cuml --- .../raft/common/detail/{nvtx.cuh => nvtx.hpp} | 0 cpp/include/raft/common/nvtx.hpp | 2 +- cpp/test/CMakeLists.txt | 1 + cpp/test/nvtx.cpp | 50 +++++++++++++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) rename cpp/include/raft/common/detail/{nvtx.cuh => nvtx.hpp} (100%) create mode 100644 cpp/test/nvtx.cpp diff --git a/cpp/include/raft/common/detail/nvtx.cuh b/cpp/include/raft/common/detail/nvtx.hpp similarity index 100% rename from cpp/include/raft/common/detail/nvtx.cuh rename to cpp/include/raft/common/detail/nvtx.hpp diff --git a/cpp/include/raft/common/nvtx.hpp b/cpp/include/raft/common/nvtx.hpp index b6e6317f14..5535103273 100644 --- a/cpp/include/raft/common/nvtx.hpp +++ b/cpp/include/raft/common/nvtx.hpp @@ -16,7 +16,7 @@ #pragma once -#include "detail/nvtx.cuh" +#include "detail/nvtx.hpp" namespace raft { namespace common { diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 67ce9d1c1c..b714aa3abc 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -36,6 +36,7 @@ add_executable(test_raft test/eigen_solvers.cu test/handle.cpp test/integer_utils.cpp + test/nvtx.cpp test/pow2_utils.cu test/label/label.cu test/label/merge_labels.cu diff --git a/cpp/test/nvtx.cpp b/cpp/test/nvtx.cpp new file mode 100644 index 0000000000..ed811afeca --- /dev/null +++ b/cpp/test/nvtx.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifdef NVTX_ENABLED +#include +#include +/** + * tests for the functionality of generating next color based on string + * entered in the NVTX Range marker wrappers + */ + +namespace raft { + +class NvtxNextColorTest : public ::testing::Test { + protected: + void SetUp() override + { + const std::string temp1 = "foo"; + const std::string temp2 = "bar"; + + diff_string_diff_color = + common::detail::generateNextColor(temp1) != common::detail::generateNextColor(temp2); + same_string_same_color = + common::detail::generateNextColor(temp1) == common::detail::generateNextColor(temp1); + } + void TearDown() {} + bool diff_string_diff_color = false; + bool same_string_same_color = false; +}; + +TEST_F(NvtxNextColorTest, generateNextColor) +{ + EXPECT_TRUE(diff_string_diff_color); + EXPECT_TRUE(same_string_same_color); +} + +} // end namespace raft +#endif From 2e0e38c0b9bbe473de1724e09338bce4b334a3c0 Mon Sep 17 00:00:00 2001 From: achirkin Date: Mon, 13 Dec 2021 13:21:00 +0100 Subject: [PATCH 07/12] Copy (and add a bit) the python nvtx interface from cuml --- cpp/include/raft/common/nvtx.hpp | 5 +++- python/raft/common/cuda.pxd | 7 +++++- python/raft/common/cuda.pyx | 41 +++++++++++++++++++++++++++++++- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/cpp/include/raft/common/nvtx.hpp b/cpp/include/raft/common/nvtx.hpp index 5535103273..881df4d128 100644 --- a/cpp/include/raft/common/nvtx.hpp +++ b/cpp/include/raft/common/nvtx.hpp @@ -113,7 +113,10 @@ class NvtxRange { which pushes the range in its constructor and pops it in the destructor. */ #ifdef NVTX_ENABLED -#define RAFT_USING_NVTX_RANGE(...) raft::common::NvtxRange _AUTO_RANGE_##__LINE__(__VA_ARGS__) +#define _RAFT_USING_NVTX_RANGE_CAT(a, b) a##b +#define _RAFT_USING_NVTX_RANGE_UNIQUE_NAME(ln) _RAFT_USING_NVTX_RANGE_CAT(_NvtxRange_, ln) +#define RAFT_USING_NVTX_RANGE(...) \ + raft::common::NvtxRange _RAFT_USING_NVTX_RANGE_UNIQUE_NAME(__LINE__)(__VA_ARGS__) #else #define RAFT_USING_NVTX_RANGE(...) (void)0 #endif diff --git a/python/raft/common/cuda.pxd b/python/raft/common/cuda.pxd index e407213f44..b53608ccb0 100644 --- a/python/raft/common/cuda.pxd +++ b/python/raft/common/cuda.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -34,3 +34,8 @@ cdef extern from "cuda_runtime_api.h" nogil: _Error cudaGetLastError() const char* cudaGetErrorString(_Error e) const char* cudaGetErrorName(_Error e) + + +cdef extern from "raft/common/nvtx.hpp" namespace "raft::common": + void PUSH_NVTX_RANGE(const char* name) + void POP_NVTX_RANGE() diff --git a/python/raft/common/cuda.pyx b/python/raft/common/cuda.pyx index baa46bfef8..6ed09daa4a 100644 --- a/python/raft/common/cuda.pyx +++ b/python/raft/common/cuda.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ # cython: language_level = 3 import functools +import contextlib from libcpp.string cimport string @@ -86,3 +87,41 @@ cdef class Stream: def getStream(self): return self.s + + +def nvtx_range_push(name: str): + """ + Create an NVTX range with name `name`. + + NB: consider using `nvtx_range`, which guarantees to pop the range. + """ + cdef string s = name.encode("UTF-8") + PUSH_NVTX_RANGE(s.c_str()) + + +def nvtx_range_pop(): + """ + End an NVTX range + + NB: consider using `nvtx_range`, which guarantees to pop the range. + """ + POP_NVTX_RANGE() + + +@contextlib.contextmanager +def nvtx_range(name: str): + """Annotate a code block with an NVTX range.""" + nvtx_range_push(name) + try: + yield + finally: + nvtx_range_pop() + + +def nvtx_range_wrap(func): + """Decorator that wraps the function into an `nvtx_range`.""" + @functools.wraps(func) + def wrapper(*args, **kwargs): + with nvtx_range(func.__name__): + return func(*args, **kwargs) + return wrapper From bec66f05aa316219774e36d1cbc15775b592e464 Mon Sep 17 00:00:00 2001 From: achirkin Date: Mon, 13 Dec 2021 15:45:02 +0100 Subject: [PATCH 08/12] remove python nvtx bindings in favor of package nvtx --- python/raft/common/cuda.pxd | 5 ----- python/raft/common/cuda.pyx | 42 ------------------------------------- 2 files changed, 47 deletions(-) diff --git a/python/raft/common/cuda.pxd b/python/raft/common/cuda.pxd index b53608ccb0..13d935da8c 100644 --- a/python/raft/common/cuda.pxd +++ b/python/raft/common/cuda.pxd @@ -34,8 +34,3 @@ cdef extern from "cuda_runtime_api.h" nogil: _Error cudaGetLastError() const char* cudaGetErrorString(_Error e) const char* cudaGetErrorName(_Error e) - - -cdef extern from "raft/common/nvtx.hpp" namespace "raft::common": - void PUSH_NVTX_RANGE(const char* name) - void POP_NVTX_RANGE() diff --git a/python/raft/common/cuda.pyx b/python/raft/common/cuda.pyx index 6ed09daa4a..0b97eeba67 100644 --- a/python/raft/common/cuda.pyx +++ b/python/raft/common/cuda.pyx @@ -19,10 +19,6 @@ # cython: embedsignature = True # cython: language_level = 3 -import functools -import contextlib -from libcpp.string cimport string - class CudaRuntimeError(RuntimeError): def __init__(self, extraMsg=None): @@ -87,41 +83,3 @@ cdef class Stream: def getStream(self): return self.s - - -def nvtx_range_push(name: str): - """ - Create an NVTX range with name `name`. - - NB: consider using `nvtx_range`, which guarantees to pop the range. - """ - cdef string s = name.encode("UTF-8") - PUSH_NVTX_RANGE(s.c_str()) - - -def nvtx_range_pop(): - """ - End an NVTX range - - NB: consider using `nvtx_range`, which guarantees to pop the range. - """ - POP_NVTX_RANGE() - - -@contextlib.contextmanager -def nvtx_range(name: str): - """Annotate a code block with an NVTX range.""" - nvtx_range_push(name) - try: - yield - finally: - nvtx_range_pop() - - -def nvtx_range_wrap(func): - """Decorator that wraps the function into an `nvtx_range`.""" - @functools.wraps(func) - def wrapper(*args, **kwargs): - with nvtx_range(func.__name__): - return func(*args, **kwargs) - return wrapper From a87ea7ccefb2d8582c17a4b3e2de3093d9d230ff Mon Sep 17 00:00:00 2001 From: achirkin Date: Tue, 14 Dec 2021 11:10:39 +0100 Subject: [PATCH 09/12] Run clang-tidy --- cpp/include/raft/common/detail/nvtx.hpp | 98 ++++++++++++------------- cpp/include/raft/common/nvtx.hpp | 75 ++++++++----------- cpp/include/raft/linalg/svd.cuh | 6 +- cpp/test/distance/distance_base.cuh | 2 +- cpp/test/eigen_solvers.cu | 4 +- cpp/test/nvtx.cpp | 8 +- 6 files changed, 86 insertions(+), 107 deletions(-) diff --git a/cpp/include/raft/common/detail/nvtx.hpp b/cpp/include/raft/common/detail/nvtx.hpp index 9da32c41d9..f2993f0ec6 100644 --- a/cpp/include/raft/common/detail/nvtx.hpp +++ b/cpp/include/raft/common/detail/nvtx.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,15 +18,13 @@ #include -namespace raft { -namespace common { -namespace detail { +namespace raft::common::detail { #ifdef NVTX_ENABLED #include -#include -#include +#include +#include #include #include #include @@ -35,24 +33,24 @@ namespace detail { * @brief An internal struct to store associated state with the color * generator */ -struct ColorGenState { +struct color_gen_state { /** collection of all tagged colors generated so far */ - static inline std::unordered_map allColors; + static inline std::unordered_map all_colors_; /** mutex for accessing the above map */ - static inline std::mutex mapMutex; + static inline std::mutex map_mutex_; /** saturation */ - static inline constexpr float S = 0.9f; + static inline constexpr float kS = 0.9f; /** value */ - static inline constexpr float V = 0.85f; + static inline constexpr float kV = 0.85f; /** golden ratio */ - static inline constexpr float Phi = 1.61803f; + static inline constexpr float kPhi = 1.61803f; /** inverse golden ratio */ - static inline constexpr float InvPhi = 1.f / Phi; + static inline constexpr float kInvPhi = 1.f / kPhi; }; // all h, s, v are in range [0, 1] // Ref: http://en.wikipedia.org/wiki/HSL_and_HSV#Converting_to_RGB -inline uint32_t hsv2rgb(float h, float s, float v) +inline auto hsv2rgb(float h, float s, float v) -> uint32_t { uint32_t out = 0xff000000u; if (s <= 0.0f) { return out; } @@ -60,7 +58,7 @@ inline uint32_t hsv2rgb(float h, float s, float v) float h_deg = h * 360.f; if (0.f > h_deg || h_deg >= 360.f) h_deg = 0.f; h_deg /= 60.f; - int h_range = (int)h_deg; + int h_range = static_cast(h_deg); float h_mod = h_deg - h_range; float x = v * (1.f - s); float y = v * (1.f - (s * h_mod)); @@ -118,85 +116,83 @@ inline uint32_t hsv2rgb(float h, float s, float v) * associate the currently generated color with it * @return returns 32b RGB integer with alpha channel set of 0xff */ -inline uint32_t generateNextColor(const std::string& tag) +inline auto generate_next_color(const std::string& tag) -> uint32_t { - // std::unordered_map ColorGenState::allColors; - // std::mutex ColorGenState::mapMutex; + // std::unordered_map color_gen_state::all_colors_; + // std::mutex color_gen_state::map_mutex_; - std::lock_guard guard(ColorGenState::mapMutex); + std::lock_guard guard(color_gen_state::map_mutex_); if (!tag.empty()) { - auto itr = ColorGenState::allColors.find(tag); - if (itr != ColorGenState::allColors.end()) { return itr->second; } + auto itr = color_gen_state::all_colors_.find(tag); + if (itr != color_gen_state::all_colors_.end()) { return itr->second; } } - float h = rand() * 1.f / RAND_MAX; - h += ColorGenState::InvPhi; + auto h = static_cast(rand()) / static_cast(RAND_MAX); + h += color_gen_state::kInvPhi; if (h >= 1.f) h -= 1.f; - auto rgb = hsv2rgb(h, ColorGenState::S, ColorGenState::V); - if (!tag.empty()) { ColorGenState::allColors[tag] = rgb; } + auto rgb = hsv2rgb(h, color_gen_state::kS, color_gen_state::kV); + if (!tag.empty()) { color_gen_state::all_colors_[tag] = rgb; } return rgb; } static inline nvtxDomainHandle_t domain = nvtxDomainCreateA("application"); -inline void pushRange_name(const char* name) +inline void push_range_name(const char* name) { - nvtxEventAttributes_t eventAttrib = {0}; - eventAttrib.version = NVTX_VERSION; - eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; - eventAttrib.colorType = NVTX_COLOR_ARGB; - eventAttrib.color = generateNextColor(name); - eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; - eventAttrib.message.ascii = name; - nvtxDomainRangePushEx(domain, &eventAttrib); + nvtxEventAttributes_t event_attrib = {0}; + event_attrib.version = NVTX_VERSION; + event_attrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + event_attrib.colorType = NVTX_COLOR_ARGB; + event_attrib.color = generate_next_color(name); + event_attrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + event_attrib.message.ascii = name; + nvtxDomainRangePushEx(domain, &event_attrib); } template -inline void pushRange(const char* format, Args... args) +inline void push_range(const char* format, Args... args) { if constexpr (sizeof...(args) > 0) { int length = std::snprintf(nullptr, 0, format, args...); assert(length >= 0); - auto buf = std::make_unique(length + 1); - std::snprintf(buf.get(), length + 1, format, args...); - pushRange_name(buf.get()); + std::vector buf(length + 1); + std::snprintf(buf.data(), length + 1, format, args...); + push_range_name(buf.data()); } else { - pushRange_name(format); + push_range_name(format); } } template -inline void pushRange(rmm::cuda_stream_view stream, const char* format, Args... args) +inline void push_range(rmm::cuda_stream_view stream, const char* format, Args... args) { stream.synchronize(); - pushRange(format, args...); + push_range(format, args...); } -inline void popRange() { nvtxDomainRangePop(domain); } +inline void pop_range() { nvtxDomainRangePop(domain); } -inline void popRange(rmm::cuda_stream_view stream) +inline void pop_range(rmm::cuda_stream_view stream) { stream.synchronize(); - popRange(); + pop_range(); } #else // NVTX_ENABLED template -inline void pushRange(const char* format, Args... args) +inline void push_range(const char* format, Args... args) { } template -inline void pushRange(rmm::cuda_stream_view stream, const char* format, Args... args) +inline void push_range(rmm::cuda_stream_view stream, const char* format, Args... args) { } -inline void popRange() {} +inline void pop_range() {} -inline void popRange(rmm::cuda_stream_view stream) {} +inline void pop_range(rmm::cuda_stream_view stream) {} #endif // NVTX_ENABLED -} // namespace detail -} // namespace common -} // namespace raft +} // namespace raft::common::detail diff --git a/cpp/include/raft/common/nvtx.hpp b/cpp/include/raft/common/nvtx.hpp index 881df4d128..5ce1c2960c 100644 --- a/cpp/include/raft/common/nvtx.hpp +++ b/cpp/include/raft/common/nvtx.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,10 @@ #pragma once +#include #include "detail/nvtx.hpp" -namespace raft { -namespace common { +namespace raft::common { /** * @brief Push a named nvtx range @@ -27,9 +27,9 @@ namespace common { * @param args the arguments for the printf-style formatting */ template -inline void PUSH_NVTX_RANGE(const char* format, Args... args) +inline void push_nvtx_range(const char* format, Args... args) { - detail::pushRange(format, args...); + detail::push_range(format, args...); } /** @@ -39,32 +39,24 @@ inline void PUSH_NVTX_RANGE(const char* format, Args... args) * @param stream stream to synchronize */ template -inline void PUSH_NVTX_RANGE(rmm::cuda_stream_view stream, const char* format, Args... args) +inline void push_nvtx_range(rmm::cuda_stream_view stream, const char* format, Args... args) { - detail::pushRange(stream, format, args...); + detail::push_range(stream, format, args...); } /** Pop the latest range */ -inline void POP_NVTX_RANGE() { detail::popRange(); } +inline void pop_nvtx_range() { detail::pop_range(); } /** * @brief Synchronize CUDA stream and pop the latest nvtx range * @param stream stream to synchronize */ -inline void POP_NVTX_RANGE(rmm::cuda_stream_view stream) { detail::popRange(stream); } +inline void pop_nvtx_range(rmm::cuda_stream_view stream) { detail::pop_range(stream); } /** Push a named nvtx range that would be popped at the end of the object lifetime. */ -class NvtxRange { +class nvtx_range { private: - std::optional streamMaybe; - - /* This object is not meant to be touched. */ - NvtxRange(const NvtxRange&) = delete; - NvtxRange(NvtxRange&&) = delete; - NvtxRange& operator=(const NvtxRange&) = delete; - NvtxRange& operator=(NvtxRange&&) = delete; - static void* operator new(std::size_t) = delete; - static void* operator new[](std::size_t) = delete; + std::optional stream_maybe_; public: /** @@ -76,10 +68,10 @@ class NvtxRange { * @param args the arguments for the printf-style formatting */ template - NvtxRange(rmm::cuda_stream_view stream, const char* format, Args... args) - : streamMaybe(std::make_optional(stream)) + explicit nvtx_range(rmm::cuda_stream_view stream, const char* format, Args... args) + : stream_maybe_(std::make_optional(stream)) { - PUSH_NVTX_RANGE(stream, format, args...); + push_nvtx_range(stream, format, args...); } /** @@ -90,36 +82,27 @@ class NvtxRange { * @param args the arguments for the printf-style formatting */ template - NvtxRange(const char* format, Args... args) : streamMaybe(std::nullopt) + explicit nvtx_range(const char* format, Args... args) : stream_maybe_(std::nullopt) { - PUSH_NVTX_RANGE(format, args...); + push_nvtx_range(format, args...); } - ~NvtxRange() + ~nvtx_range() { - if (streamMaybe.has_value()) { - POP_NVTX_RANGE(*streamMaybe); + if (stream_maybe_.has_value()) { + pop_nvtx_range(*stream_maybe_); } else { - POP_NVTX_RANGE(); + pop_nvtx_range(); } } -}; -/*! - \def RAFT_USING_NVTX_RANGE(...) - When NVTX is enabled, push a named nvtx range and pop it at the end of the enclosing code block. - - This macro initializes a dummy NvtxRange variable on the stack, - which pushes the range in its constructor and pops it in the destructor. -*/ -#ifdef NVTX_ENABLED -#define _RAFT_USING_NVTX_RANGE_CAT(a, b) a##b -#define _RAFT_USING_NVTX_RANGE_UNIQUE_NAME(ln) _RAFT_USING_NVTX_RANGE_CAT(_NvtxRange_, ln) -#define RAFT_USING_NVTX_RANGE(...) \ - raft::common::NvtxRange _RAFT_USING_NVTX_RANGE_UNIQUE_NAME(__LINE__)(__VA_ARGS__) -#else -#define RAFT_USING_NVTX_RANGE(...) (void)0 -#endif + /* This object is not meant to be touched. */ + nvtx_range(const nvtx_range&) = delete; + nvtx_range(nvtx_range&&) = delete; + auto operator=(const nvtx_range&) -> nvtx_range& = delete; + auto operator=(nvtx_range&&) -> nvtx_range& = delete; + static auto operator new(std::size_t) -> void* = delete; + static auto operator new[](std::size_t) -> void* = delete; +}; -} // namespace common -} // namespace raft +} // namespace raft::common diff --git a/cpp/include/raft/linalg/svd.cuh b/cpp/include/raft/linalg/svd.cuh index 8b6417c817..7079030684 100644 --- a/cpp/include/raft/linalg/svd.cuh +++ b/cpp/include/raft/linalg/svd.cuh @@ -64,7 +64,7 @@ void svdQR(const raft::handle_t& handle, bool gen_right_vec, cudaStream_t stream) { - RAFT_USING_NVTX_RANGE("raft::linalg::svdQR(%d, %d)", n_rows, n_cols); + common::nvtx_range fun_scope("raft::linalg::svdQR(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); cublasHandle_t cublasH = handle.get_cublas_handle(); @@ -142,7 +142,7 @@ void svdEig(const raft::handle_t& handle, bool gen_left_vec, cudaStream_t stream) { - RAFT_USING_NVTX_RANGE("raft::linalg::svdEig(%d, %d)", n_rows, n_cols); + common::nvtx_range fun_scope("raft::linalg::svdEig(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); cublasHandle_t cublasH = handle.get_cublas_handle(); @@ -221,7 +221,7 @@ void svdJacobi(const raft::handle_t& handle, int max_sweeps, cudaStream_t stream) { - RAFT_USING_NVTX_RANGE("raft::linalg::svdJacobi(%d, %d)", n_rows, n_cols); + common::nvtx_range fun_scope("raft::linalg::svdJacobi(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); gesvdjInfo_t gesvdj_params = NULL; diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh index b58a0135d4..4b955cfadc 100644 --- a/cpp/test/distance/distance_base.cuh +++ b/cpp/test/distance/distance_base.cuh @@ -412,7 +412,7 @@ class DistanceTest : public ::testing::TestWithParam> { void SetUp() override { auto testInfo = testing::UnitTest::GetInstance()->current_test_info(); - RAFT_USING_NVTX_RANGE("test::%s/%s", testInfo->test_suite_name(), testInfo->name()); + common::nvtx_range fun_scope("test::%s/%s", testInfo->test_suite_name(), testInfo->name()); raft::random::Rng r(params.seed); int m = params.m; diff --git a/cpp/test/eigen_solvers.cu b/cpp/test/eigen_solvers.cu index 934ae0440a..e6bba8d3d8 100644 --- a/cpp/test/eigen_solvers.cu +++ b/cpp/test/eigen_solvers.cu @@ -28,7 +28,7 @@ namespace raft { TEST(Raft, EigenSolvers) { - RAFT_USING_NVTX_RANGE("test::EigenSolvers"); + common::nvtx_range fun_scope("test::EigenSolvers"); using namespace matrix; using index_type = int; using value_type = double; @@ -69,7 +69,7 @@ TEST(Raft, EigenSolvers) TEST(Raft, SpectralSolvers) { - RAFT_USING_NVTX_RANGE("test::SpectralSolvers"); + common::nvtx_range fun_scope("test::SpectralSolvers"); using namespace matrix; using index_type = int; using value_type = double; diff --git a/cpp/test/nvtx.cpp b/cpp/test/nvtx.cpp index ed811afeca..9b43828c0c 100644 --- a/cpp/test/nvtx.cpp +++ b/cpp/test/nvtx.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,16 +31,16 @@ class NvtxNextColorTest : public ::testing::Test { const std::string temp2 = "bar"; diff_string_diff_color = - common::detail::generateNextColor(temp1) != common::detail::generateNextColor(temp2); + common::detail::generate_next_color(temp1) != common::detail::generate_next_color(temp2); same_string_same_color = - common::detail::generateNextColor(temp1) == common::detail::generateNextColor(temp1); + common::detail::generate_next_color(temp1) == common::detail::generate_next_color(temp1); } void TearDown() {} bool diff_string_diff_color = false; bool same_string_same_color = false; }; -TEST_F(NvtxNextColorTest, generateNextColor) +TEST_F(NvtxNextColorTest, generate_next_color) { EXPECT_TRUE(diff_string_diff_color); EXPECT_TRUE(same_string_same_color); From af434ed891a62609b57b9e02e0dfac6e0fa30b42 Mon Sep 17 00:00:00 2001 From: achirkin Date: Tue, 14 Dec 2021 11:15:58 +0100 Subject: [PATCH 10/12] revert unchanged file --- python/raft/common/cuda.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/raft/common/cuda.pxd b/python/raft/common/cuda.pxd index 13d935da8c..e407213f44 100644 --- a/python/raft/common/cuda.pxd +++ b/python/raft/common/cuda.pxd @@ -1,5 +1,5 @@ # -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From d8357b2a9b0d739f9cb2837569f67c0a5fc65f03 Mon Sep 17 00:00:00 2001 From: achirkin Date: Thu, 16 Dec 2021 20:07:15 +0100 Subject: [PATCH 11/12] Removed stream-synced helpers, refactored names, added domains --- cpp/include/raft/common/detail/nvtx.hpp | 59 +++++++------ cpp/include/raft/common/nvtx.hpp | 106 +++++++++++------------- cpp/include/raft/linalg/svd.cuh | 9 +- cpp/test/distance/distance_base.cuh | 2 +- cpp/test/eigen_solvers.cu | 4 +- cpp/test/nvtx.cpp | 8 +- 6 files changed, 95 insertions(+), 93 deletions(-) diff --git a/cpp/include/raft/common/detail/nvtx.hpp b/cpp/include/raft/common/detail/nvtx.hpp index f2993f0ec6..4cef7c07bc 100644 --- a/cpp/include/raft/common/detail/nvtx.hpp +++ b/cpp/include/raft/common/detail/nvtx.hpp @@ -18,15 +18,16 @@ #include -namespace raft::common::detail { +namespace raft::common::nvtx::detail { #ifdef NVTX_ENABLED -#include #include #include #include +#include #include +#include #include /** @@ -134,8 +135,25 @@ inline auto generate_next_color(const std::string& tag) -> uint32_t return rgb; } -static inline nvtxDomainHandle_t domain = nvtxDomainCreateA("application"); +template +struct domain_store { + /* If `Domain::name` does not exist, this default instance is used and throws the error. */ + static_assert(sizeof(Domain) != sizeof(Domain), + "Type used to identify a domain must contain a static member 'char const* name'"); + static inline nvtxDomainHandle_t const kValue = nullptr; +}; + +template +struct domain_store< + Domain, + /* Check if there exists `Domain::name` */ + std::enable_if_t< + std::is_same::type>::value, + Domain>> { + static inline nvtxDomainHandle_t const kValue = nvtxDomainCreateA(Domain::name); +}; +template inline void push_range_name(const char* name) { nvtxEventAttributes_t event_attrib = {0}; @@ -145,10 +163,10 @@ inline void push_range_name(const char* name) event_attrib.color = generate_next_color(name); event_attrib.messageType = NVTX_MESSAGE_TYPE_ASCII; event_attrib.message.ascii = name; - nvtxDomainRangePushEx(domain, &event_attrib); + nvtxDomainRangePushEx(domain_store::kValue, &event_attrib); } -template +template inline void push_range(const char* format, Args... args) { if constexpr (sizeof...(args) > 0) { @@ -156,43 +174,30 @@ inline void push_range(const char* format, Args... args) assert(length >= 0); std::vector buf(length + 1); std::snprintf(buf.data(), length + 1, format, args...); - push_range_name(buf.data()); + push_range_name(buf.data()); } else { - push_range_name(format); + push_range_name(format); } } -template -inline void push_range(rmm::cuda_stream_view stream, const char* format, Args... args) +template +inline void pop_range() { - stream.synchronize(); - push_range(format, args...); -} - -inline void pop_range() { nvtxDomainRangePop(domain); } - -inline void pop_range(rmm::cuda_stream_view stream) -{ - stream.synchronize(); - pop_range(); + nvtxDomainRangePop(domain_store::kValue); } #else // NVTX_ENABLED -template +template inline void push_range(const char* format, Args... args) { } -template -inline void push_range(rmm::cuda_stream_view stream, const char* format, Args... args) +template +inline void pop_range() { } -inline void pop_range() {} - -inline void pop_range(rmm::cuda_stream_view stream) {} - #endif // NVTX_ENABLED -} // namespace raft::common::detail +} // namespace raft::common::nvtx::detail diff --git a/cpp/include/raft/common/nvtx.hpp b/cpp/include/raft/common/nvtx.hpp index 5ce1c2960c..35f5802a5b 100644 --- a/cpp/include/raft/common/nvtx.hpp +++ b/cpp/include/raft/common/nvtx.hpp @@ -16,93 +16,87 @@ #pragma once -#include #include "detail/nvtx.hpp" +#include + +namespace raft::common::nvtx { + +namespace domain { + +/** The default NVTX domain. */ +struct app { + static constexpr char const* name{"application"}; +}; + +/** This NVTX domain is supposed to be used within raft. */ +struct raft { + static constexpr char const* name{"raft"}; +}; -namespace raft::common { +} // namespace domain /** - * @brief Push a named nvtx range + * @brief Push a named NVTX range. + * + * @tparam Domain optional struct that defines the NVTX domain message; + * You can create a new domain with a custom message as follows: + * `struct custom_domain { static constexpr char const* name{"custom message"}; }` + * NB: make sure to use the same domain for `push_range` and `pop_range`. * @param format range name format (accepts printf-style arguments) * @param args the arguments for the printf-style formatting */ -template -inline void push_nvtx_range(const char* format, Args... args) +template +inline void push_range(const char* format, Args... args) { - detail::push_range(format, args...); + detail::push_range(format, args...); } /** - * @brief Synchronize CUDA stream and push a named nvtx range - * @param format range name format (accepts printf-style arguments) - * @param args the arguments for the printf-style formatting - * @param stream stream to synchronize + * @brief Pop the latest range. + * + * @tparam Domain optional struct that defines the NVTX domain message; + * You can create a new domain with a custom message as follows: + * `struct custom_domain { static constexpr char const* name{"custom message"}; }` + * NB: make sure to use the same domain for `push_range` and `pop_range`. */ -template -inline void push_nvtx_range(rmm::cuda_stream_view stream, const char* format, Args... args) +template +inline void pop_range() { - detail::push_range(stream, format, args...); + detail::pop_range(); } -/** Pop the latest range */ -inline void pop_nvtx_range() { detail::pop_range(); } - /** - * @brief Synchronize CUDA stream and pop the latest nvtx range - * @param stream stream to synchronize + * @brief Push a named NVTX range that would be popped at the end of the object lifetime. + * + * @tparam Domain optional struct that defines the NVTX domain message; + * You can create a new domain with a custom message as follows: + * `struct custom_domain { static constexpr char const* name{"custom message"}; }` */ -inline void pop_nvtx_range(rmm::cuda_stream_view stream) { detail::pop_range(stream); } - -/** Push a named nvtx range that would be popped at the end of the object lifetime. */ -class nvtx_range { - private: - std::optional stream_maybe_; - +template +class range { public: /** - * Synchronize CUDA stream and push a named nvtx range - * At the end of the object lifetime, synchronize again and pop the range. - * - * @param stream stream to synchronize - * @param format range name format (accepts printf-style arguments) - * @param args the arguments for the printf-style formatting - */ - template - explicit nvtx_range(rmm::cuda_stream_view stream, const char* format, Args... args) - : stream_maybe_(std::make_optional(stream)) - { - push_nvtx_range(stream, format, args...); - } - - /** - * Push a named nvtx range. + * Push a named NVTX range. * At the end of the object lifetime, pop the range back. * * @param format range name format (accepts printf-style arguments) * @param args the arguments for the printf-style formatting */ template - explicit nvtx_range(const char* format, Args... args) : stream_maybe_(std::nullopt) + explicit range(const char* format, Args... args) { - push_nvtx_range(format, args...); + push_range(format, args...); } - ~nvtx_range() - { - if (stream_maybe_.has_value()) { - pop_nvtx_range(*stream_maybe_); - } else { - pop_nvtx_range(); - } - } + ~range() { pop_range(); } /* This object is not meant to be touched. */ - nvtx_range(const nvtx_range&) = delete; - nvtx_range(nvtx_range&&) = delete; - auto operator=(const nvtx_range&) -> nvtx_range& = delete; - auto operator=(nvtx_range&&) -> nvtx_range& = delete; + range(const range&) = delete; + range(range&&) = delete; + auto operator=(const range&) -> range& = delete; + auto operator=(range&&) -> range& = delete; static auto operator new(std::size_t) -> void* = delete; static auto operator new[](std::size_t) -> void* = delete; }; -} // namespace raft::common +} // namespace raft::common::nvtx diff --git a/cpp/include/raft/linalg/svd.cuh b/cpp/include/raft/linalg/svd.cuh index c08c776095..2afae788a1 100644 --- a/cpp/include/raft/linalg/svd.cuh +++ b/cpp/include/raft/linalg/svd.cuh @@ -64,7 +64,8 @@ void svdQR(const raft::handle_t& handle, bool gen_right_vec, cudaStream_t stream) { - common::nvtx_range fun_scope("raft::linalg::svdQR(%d, %d)", n_rows, n_cols); + common::nvtx::range fun_scope( + "raft::linalg::svdQR(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); cublasHandle_t cublasH = handle.get_cublas_handle(); @@ -142,7 +143,8 @@ void svdEig(const raft::handle_t& handle, bool gen_left_vec, cudaStream_t stream) { - common::nvtx_range fun_scope("raft::linalg::svdEig(%d, %d)", n_rows, n_cols); + common::nvtx::range fun_scope( + "raft::linalg::svdEig(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); cublasHandle_t cublasH = handle.get_cublas_handle(); @@ -221,7 +223,8 @@ void svdJacobi(const raft::handle_t& handle, int max_sweeps, cudaStream_t stream) { - common::nvtx_range fun_scope("raft::linalg::svdJacobi(%d, %d)", n_rows, n_cols); + common::nvtx::range fun_scope( + "raft::linalg::svdJacobi(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); gesvdjInfo_t gesvdj_params = NULL; diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh index 102c18963b..475202137b 100644 --- a/cpp/test/distance/distance_base.cuh +++ b/cpp/test/distance/distance_base.cuh @@ -418,7 +418,7 @@ class DistanceTest : public ::testing::TestWithParam> { void SetUp() override { auto testInfo = testing::UnitTest::GetInstance()->current_test_info(); - common::nvtx_range fun_scope("test::%s/%s", testInfo->test_suite_name(), testInfo->name()); + common::nvtx::range fun_scope("test::%s/%s", testInfo->test_suite_name(), testInfo->name()); raft::random::Rng r(params.seed); int m = params.m; diff --git a/cpp/test/eigen_solvers.cu b/cpp/test/eigen_solvers.cu index e6bba8d3d8..f898d11d2e 100644 --- a/cpp/test/eigen_solvers.cu +++ b/cpp/test/eigen_solvers.cu @@ -28,7 +28,7 @@ namespace raft { TEST(Raft, EigenSolvers) { - common::nvtx_range fun_scope("test::EigenSolvers"); + common::nvtx::range fun_scope("test::EigenSolvers"); using namespace matrix; using index_type = int; using value_type = double; @@ -69,7 +69,7 @@ TEST(Raft, EigenSolvers) TEST(Raft, SpectralSolvers) { - common::nvtx_range fun_scope("test::SpectralSolvers"); + common::nvtx::range fun_scope("test::SpectralSolvers"); using namespace matrix; using index_type = int; using value_type = double; diff --git a/cpp/test/nvtx.cpp b/cpp/test/nvtx.cpp index 9b43828c0c..81f692a215 100644 --- a/cpp/test/nvtx.cpp +++ b/cpp/test/nvtx.cpp @@ -30,10 +30,10 @@ class NvtxNextColorTest : public ::testing::Test { const std::string temp1 = "foo"; const std::string temp2 = "bar"; - diff_string_diff_color = - common::detail::generate_next_color(temp1) != common::detail::generate_next_color(temp2); - same_string_same_color = - common::detail::generate_next_color(temp1) == common::detail::generate_next_color(temp1); + diff_string_diff_color = common::nvtx::detail::generate_next_color(temp1) != + common::nvtx::detail::generate_next_color(temp2); + same_string_same_color = common::nvtx::detail::generate_next_color(temp1) == + common::nvtx::detail::generate_next_color(temp1); } void TearDown() {} bool diff_string_diff_color = false; From 84e9ee790e138e158a8645820342c153a971c24f Mon Sep 17 00:00:00 2001 From: achirkin Date: Fri, 17 Dec 2021 09:59:00 +0100 Subject: [PATCH 12/12] Add usage examples in the doxygen --- cpp/include/raft/common/nvtx.hpp | 63 +++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/cpp/include/raft/common/nvtx.hpp b/cpp/include/raft/common/nvtx.hpp index 35f5802a5b..918d5e10d8 100644 --- a/cpp/include/raft/common/nvtx.hpp +++ b/cpp/include/raft/common/nvtx.hpp @@ -19,16 +19,61 @@ #include "detail/nvtx.hpp" #include +/** + * \section Usage + * + * To add NVTX ranges to your code, use the `nvtx::range` RAII object. A + * range begins when the object is created, and ends when the object is + * destroyed. + * + * The example below creates nested NVTX ranges. The range `fun_scope` spans + * the whole function, while the range `epoch_scope` spans an iteration + * (and appears 5 times in the timeline). + * \code{.cpp} + * #include + * void some_function(int k){ + * // Begins a NVTX range with the messsage "some_function_{k}" + * // The range ends when some_function() returns + * common::nvtx::range fun_scope( r{"some_function_%d", k}; + * + * for(int i = 0; i < 5; i++){ + * common::nvtx::range epoch_scope{"epoch-%d", i}; + * // some logic inside the loop + * } + * } + * \endcode + * + * \section Domains + * + * All NVTX ranges are assigned to domains. A domain defines a named timeline in + * the Nsight Systems view. By default, we put all ranges into a domain `domain::app` + * named "application". This is controlled by the template parameter `Domain`. + * + * The example below defines a domain and uses it in a function. + * \code{.cpp} + * #include + * + * struct my_app_domain { + * static constexpr char const* name{"my application"}; + * } + * + * void some_function(int k){ + * // This NVTX range appears in the timeline named "my application" in Nsight Systems. + * common::nvtx::range fun_scope( r{"some_function_%d", k}; + * // some logic inside the loop + * } + * \endcode + */ namespace raft::common::nvtx { namespace domain { -/** The default NVTX domain. */ +/** @brief The default NVTX domain. */ struct app { static constexpr char const* name{"application"}; }; -/** This NVTX domain is supposed to be used within raft. */ +/** @brief This NVTX domain is supposed to be used within raft. */ struct raft { static constexpr char const* name{"raft"}; }; @@ -40,7 +85,9 @@ struct raft { * * @tparam Domain optional struct that defines the NVTX domain message; * You can create a new domain with a custom message as follows: - * `struct custom_domain { static constexpr char const* name{"custom message"}; }` + * \code{.cpp} + * struct custom_domain { static constexpr char const* name{"custom message"}; } + * \endcode * NB: make sure to use the same domain for `push_range` and `pop_range`. * @param format range name format (accepts printf-style arguments) * @param args the arguments for the printf-style formatting @@ -56,7 +103,9 @@ inline void push_range(const char* format, Args... args) * * @tparam Domain optional struct that defines the NVTX domain message; * You can create a new domain with a custom message as follows: - * `struct custom_domain { static constexpr char const* name{"custom message"}; }` + * \code{.cpp} + * struct custom_domain { static constexpr char const* name{"custom message"}; } + * \endcode * NB: make sure to use the same domain for `push_range` and `pop_range`. */ template @@ -68,9 +117,13 @@ inline void pop_range() /** * @brief Push a named NVTX range that would be popped at the end of the object lifetime. * + * Refer to \ref Usage for the usage examples. + * * @tparam Domain optional struct that defines the NVTX domain message; * You can create a new domain with a custom message as follows: - * `struct custom_domain { static constexpr char const* name{"custom message"}; }` + * \code{.cpp} + * struct custom_domain { static constexpr char const* name{"custom message"}; } + * \endcode */ template class range {