diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b48eff36db..efebfff429 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -145,6 +145,7 @@ set(RAFT_LINK_LIBRARIES CUDA::cusolver CUDA::cudart CUDA::cusparse + $<$:CUDA::nvToolsExt> rmm::rmm cuco::cuco ) @@ -153,6 +154,14 @@ target_link_libraries(raft INTERFACE ${RAFT_LINK_LIBRARIES}) target_link_libraries(raft_distance PUBLIC ${RAFT_LINK_LIBRARIES}) target_link_libraries(raft_nn PUBLIC ${RAFT_LINK_LIBRARIES} FAISS::FAISS) +set(RAFT_COMPILE_DEFINITIONS + $<$:NVTX_ENABLED> + ) + +target_compile_definitions(raft INTERFACE ${RAFT_COMPILE_DEFINITIONS}) +target_compile_definitions(raft_distance PRIVATE ${RAFT_COMPILE_DEFINITIONS}) +target_compile_definitions(raft_nn PRIVATE ${RAFT_COMPILE_DEFINITIONS}) + target_compile_options(raft_distance PRIVATE "$<$:${RAFT_CXX_FLAGS}>" "$<$:${RAFT_CUDA_FLAGS}>" diff --git a/cpp/include/raft/common/detail/nvtx.hpp b/cpp/include/raft/common/detail/nvtx.hpp new file mode 100644 index 0000000000..4cef7c07bc --- /dev/null +++ b/cpp/include/raft/common/detail/nvtx.hpp @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace raft::common::nvtx::detail { + +#ifdef NVTX_ENABLED + +#include +#include +#include +#include +#include +#include +#include + +/** + * @brief An internal struct to store associated state with the color + * generator + */ +struct color_gen_state { + /** collection of all tagged colors generated so far */ + static inline std::unordered_map all_colors_; + /** mutex for accessing the above map */ + static inline std::mutex map_mutex_; + /** saturation */ + static inline constexpr float kS = 0.9f; + /** value */ + static inline constexpr float kV = 0.85f; + /** golden ratio */ + static inline constexpr float kPhi = 1.61803f; + /** inverse golden ratio */ + static inline constexpr float kInvPhi = 1.f / kPhi; +}; + +// all h, s, v are in range [0, 1] +// Ref: http://en.wikipedia.org/wiki/HSL_and_HSV#Converting_to_RGB +inline auto hsv2rgb(float h, float s, float v) -> uint32_t +{ + uint32_t out = 0xff000000u; + if (s <= 0.0f) { return out; } + // convert hue from [0, 1] range to [0, 360] + float h_deg = h * 360.f; + if (0.f > h_deg || h_deg >= 360.f) h_deg = 0.f; + h_deg /= 60.f; + int h_range = static_cast(h_deg); + float h_mod = h_deg - h_range; + float x = v * (1.f - s); + float y = v * (1.f - (s * h_mod)); + float z = v * (1.f - (s * (1.f - h_mod))); + float r, g, b; + switch (h_range) { + case 0: + r = v; + g = z; + b = x; + break; + case 1: + r = y; + g = v; + b = x; + break; + case 2: + r = x; + g = v; + b = z; + break; + case 3: + r = x; + g = y; + b = v; + break; + case 4: + r = z; + g = x; + b = v; + break; + case 5: + default: + r = v; + g = x; + b = y; + break; + } + out |= (uint32_t(r * 256.f) << 16); + out |= (uint32_t(g * 256.f) << 8); + out |= uint32_t(b * 256.f); + return out; +} + +/** + * @brief Helper method to generate 'visually distinct' colors. + * Inspired from https://martin.ankerl.com/2009/12/09/how-to-create-random-colors-programmatically/ + * However, if an associated tag is passed, it will look up in its history for + * any generated color against this tag and if found, just returns it, else + * generates a new color, assigns a tag to it and stores it for future usage. + * Such a thing is very useful for nvtx markers where the ranges associated + * with a specific tag should ideally get the same color for the purpose of + * visualizing it on nsight-systems timeline. + * @param tag look for any previously generated colors with this tag or + * associate the currently generated color with it + * @return returns 32b RGB integer with alpha channel set of 0xff + */ +inline auto generate_next_color(const std::string& tag) -> uint32_t +{ + // std::unordered_map color_gen_state::all_colors_; + // std::mutex color_gen_state::map_mutex_; + + std::lock_guard guard(color_gen_state::map_mutex_); + if (!tag.empty()) { + auto itr = color_gen_state::all_colors_.find(tag); + if (itr != color_gen_state::all_colors_.end()) { return itr->second; } + } + auto h = static_cast(rand()) / static_cast(RAND_MAX); + h += color_gen_state::kInvPhi; + if (h >= 1.f) h -= 1.f; + auto rgb = hsv2rgb(h, color_gen_state::kS, color_gen_state::kV); + if (!tag.empty()) { color_gen_state::all_colors_[tag] = rgb; } + return rgb; +} + +template +struct domain_store { + /* If `Domain::name` does not exist, this default instance is used and throws the error. */ + static_assert(sizeof(Domain) != sizeof(Domain), + "Type used to identify a domain must contain a static member 'char const* name'"); + static inline nvtxDomainHandle_t const kValue = nullptr; +}; + +template +struct domain_store< + Domain, + /* Check if there exists `Domain::name` */ + std::enable_if_t< + std::is_same::type>::value, + Domain>> { + static inline nvtxDomainHandle_t const kValue = nvtxDomainCreateA(Domain::name); +}; + +template +inline void push_range_name(const char* name) +{ + nvtxEventAttributes_t event_attrib = {0}; + event_attrib.version = NVTX_VERSION; + event_attrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + event_attrib.colorType = NVTX_COLOR_ARGB; + event_attrib.color = generate_next_color(name); + event_attrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + event_attrib.message.ascii = name; + nvtxDomainRangePushEx(domain_store::kValue, &event_attrib); +} + +template +inline void push_range(const char* format, Args... args) +{ + if constexpr (sizeof...(args) > 0) { + int length = std::snprintf(nullptr, 0, format, args...); + assert(length >= 0); + std::vector buf(length + 1); + std::snprintf(buf.data(), length + 1, format, args...); + push_range_name(buf.data()); + } else { + push_range_name(format); + } +} + +template +inline void pop_range() +{ + nvtxDomainRangePop(domain_store::kValue); +} + +#else // NVTX_ENABLED + +template +inline void push_range(const char* format, Args... args) +{ +} + +template +inline void pop_range() +{ +} + +#endif // NVTX_ENABLED + +} // namespace raft::common::nvtx::detail diff --git a/cpp/include/raft/common/nvtx.hpp b/cpp/include/raft/common/nvtx.hpp new file mode 100644 index 0000000000..918d5e10d8 --- /dev/null +++ b/cpp/include/raft/common/nvtx.hpp @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "detail/nvtx.hpp" +#include + +/** + * \section Usage + * + * To add NVTX ranges to your code, use the `nvtx::range` RAII object. A + * range begins when the object is created, and ends when the object is + * destroyed. + * + * The example below creates nested NVTX ranges. The range `fun_scope` spans + * the whole function, while the range `epoch_scope` spans an iteration + * (and appears 5 times in the timeline). + * \code{.cpp} + * #include + * void some_function(int k){ + * // Begins a NVTX range with the messsage "some_function_{k}" + * // The range ends when some_function() returns + * common::nvtx::range fun_scope( r{"some_function_%d", k}; + * + * for(int i = 0; i < 5; i++){ + * common::nvtx::range epoch_scope{"epoch-%d", i}; + * // some logic inside the loop + * } + * } + * \endcode + * + * \section Domains + * + * All NVTX ranges are assigned to domains. A domain defines a named timeline in + * the Nsight Systems view. By default, we put all ranges into a domain `domain::app` + * named "application". This is controlled by the template parameter `Domain`. + * + * The example below defines a domain and uses it in a function. + * \code{.cpp} + * #include + * + * struct my_app_domain { + * static constexpr char const* name{"my application"}; + * } + * + * void some_function(int k){ + * // This NVTX range appears in the timeline named "my application" in Nsight Systems. + * common::nvtx::range fun_scope( r{"some_function_%d", k}; + * // some logic inside the loop + * } + * \endcode + */ +namespace raft::common::nvtx { + +namespace domain { + +/** @brief The default NVTX domain. */ +struct app { + static constexpr char const* name{"application"}; +}; + +/** @brief This NVTX domain is supposed to be used within raft. */ +struct raft { + static constexpr char const* name{"raft"}; +}; + +} // namespace domain + +/** + * @brief Push a named NVTX range. + * + * @tparam Domain optional struct that defines the NVTX domain message; + * You can create a new domain with a custom message as follows: + * \code{.cpp} + * struct custom_domain { static constexpr char const* name{"custom message"}; } + * \endcode + * NB: make sure to use the same domain for `push_range` and `pop_range`. + * @param format range name format (accepts printf-style arguments) + * @param args the arguments for the printf-style formatting + */ +template +inline void push_range(const char* format, Args... args) +{ + detail::push_range(format, args...); +} + +/** + * @brief Pop the latest range. + * + * @tparam Domain optional struct that defines the NVTX domain message; + * You can create a new domain with a custom message as follows: + * \code{.cpp} + * struct custom_domain { static constexpr char const* name{"custom message"}; } + * \endcode + * NB: make sure to use the same domain for `push_range` and `pop_range`. + */ +template +inline void pop_range() +{ + detail::pop_range(); +} + +/** + * @brief Push a named NVTX range that would be popped at the end of the object lifetime. + * + * Refer to \ref Usage for the usage examples. + * + * @tparam Domain optional struct that defines the NVTX domain message; + * You can create a new domain with a custom message as follows: + * \code{.cpp} + * struct custom_domain { static constexpr char const* name{"custom message"}; } + * \endcode + */ +template +class range { + public: + /** + * Push a named NVTX range. + * At the end of the object lifetime, pop the range back. + * + * @param format range name format (accepts printf-style arguments) + * @param args the arguments for the printf-style formatting + */ + template + explicit range(const char* format, Args... args) + { + push_range(format, args...); + } + + ~range() { pop_range(); } + + /* This object is not meant to be touched. */ + range(const range&) = delete; + range(range&&) = delete; + auto operator=(const range&) -> range& = delete; + auto operator=(range&&) -> range& = delete; + static auto operator new(std::size_t) -> void* = delete; + static auto operator new[](std::size_t) -> void* = delete; +}; + +} // namespace raft::common::nvtx diff --git a/cpp/include/raft/linalg/svd.cuh b/cpp/include/raft/linalg/svd.cuh index b75497f725..2afae788a1 100644 --- a/cpp/include/raft/linalg/svd.cuh +++ b/cpp/include/raft/linalg/svd.cuh @@ -19,6 +19,7 @@ #include "eig.cuh" #include "gemm.cuh" #include "transpose.h" +#include #include #include #include @@ -63,6 +64,8 @@ void svdQR(const raft::handle_t& handle, bool gen_right_vec, cudaStream_t stream) { + common::nvtx::range fun_scope( + "raft::linalg::svdQR(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); cublasHandle_t cublasH = handle.get_cublas_handle(); @@ -140,6 +143,8 @@ void svdEig(const raft::handle_t& handle, bool gen_left_vec, cudaStream_t stream) { + common::nvtx::range fun_scope( + "raft::linalg::svdEig(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); cublasHandle_t cublasH = handle.get_cublas_handle(); @@ -218,6 +223,8 @@ void svdJacobi(const raft::handle_t& handle, int max_sweeps, cudaStream_t stream) { + common::nvtx::range fun_scope( + "raft::linalg::svdJacobi(%d, %d)", n_rows, n_cols); cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle(); gesvdjInfo_t gesvdj_params = NULL; diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index b270204489..b37c671525 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -36,6 +36,7 @@ add_executable(test_raft test/eigen_solvers.cu test/handle.cpp test/integer_utils.cpp + test/nvtx.cpp test/pow2_utils.cu test/label/label.cu test/label/merge_labels.cu @@ -117,21 +118,14 @@ target_compile_options(test_raft ) target_include_directories(test_raft - PUBLIC "$" - "$" + PUBLIC "$" "${FAISS_GPU_HEADERS}" ) target_link_libraries(test_raft PRIVATE - CUDA::cublas - CUDA::curand - CUDA::cusolver - CUDA::cudart - CUDA::cusparse - rmm::rmm - cuco::cuco + raft # transitively links all CUDA libs, etc raft_distance raft_nn GTest::gtest diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh index 9372a15a91..475202137b 100644 --- a/cpp/test/distance/distance_base.cuh +++ b/cpp/test/distance/distance_base.cuh @@ -16,6 +16,7 @@ #include "../test_utils.h" #include +#include #include #include #include @@ -416,6 +417,9 @@ class DistanceTest : public ::testing::TestWithParam> { void SetUp() override { + auto testInfo = testing::UnitTest::GetInstance()->current_test_info(); + common::nvtx::range fun_scope("test::%s/%s", testInfo->test_suite_name(), testInfo->name()); + raft::random::Rng r(params.seed); int m = params.m; int n = params.n; diff --git a/cpp/test/eigen_solvers.cu b/cpp/test/eigen_solvers.cu index dc7de92eb8..f898d11d2e 100644 --- a/cpp/test/eigen_solvers.cu +++ b/cpp/test/eigen_solvers.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include @@ -27,6 +28,7 @@ namespace raft { TEST(Raft, EigenSolvers) { + common::nvtx::range fun_scope("test::EigenSolvers"); using namespace matrix; using index_type = int; using value_type = double; @@ -67,6 +69,7 @@ TEST(Raft, EigenSolvers) TEST(Raft, SpectralSolvers) { + common::nvtx::range fun_scope("test::SpectralSolvers"); using namespace matrix; using index_type = int; using value_type = double; diff --git a/cpp/test/nvtx.cpp b/cpp/test/nvtx.cpp new file mode 100644 index 0000000000..81f692a215 --- /dev/null +++ b/cpp/test/nvtx.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifdef NVTX_ENABLED +#include +#include +/** + * tests for the functionality of generating next color based on string + * entered in the NVTX Range marker wrappers + */ + +namespace raft { + +class NvtxNextColorTest : public ::testing::Test { + protected: + void SetUp() override + { + const std::string temp1 = "foo"; + const std::string temp2 = "bar"; + + diff_string_diff_color = common::nvtx::detail::generate_next_color(temp1) != + common::nvtx::detail::generate_next_color(temp2); + same_string_same_color = common::nvtx::detail::generate_next_color(temp1) == + common::nvtx::detail::generate_next_color(temp1); + } + void TearDown() {} + bool diff_string_diff_color = false; + bool same_string_same_color = false; +}; + +TEST_F(NvtxNextColorTest, generate_next_color) +{ + EXPECT_TRUE(diff_string_diff_color); + EXPECT_TRUE(same_string_same_color); +} + +} // end namespace raft +#endif diff --git a/python/raft/common/cuda.pyx b/python/raft/common/cuda.pyx index baa46bfef8..0b97eeba67 100644 --- a/python/raft/common/cuda.pyx +++ b/python/raft/common/cuda.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,9 +19,6 @@ # cython: embedsignature = True # cython: language_level = 3 -import functools -from libcpp.string cimport string - class CudaRuntimeError(RuntimeError): def __init__(self, extraMsg=None):