rapidsai · rapids-bot · Dec 17, 2021 · Dec 10, 2021 · Dec 10, 2021 · Dec 10, 2021
diff --git a/cpp/include/raft/common/detail/nvtx.cuh b/cpp/include/raft/common/detail/nvtx.cuh
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <mutex>
+#include <rmm/cuda_stream_view.hpp>
+#include <string>
+#include <unordered_map>
+
+namespace raft {
+namespace common {
+namespace detail {
+
+/**
+ * @brief An internal struct to store associated state with the color
+ * generator
+ */
+struct ColorGenState {
+  /** collection of all tagged colors generated so far */
+  static std::unordered_map<std::string, uint32_t> allColors;
+  /** mutex for accessing the above map */
+  static std::mutex mapMutex;
+  /** saturation */
+  static constexpr float S = 0.9f;
+  /** value */
+  static constexpr float V = 0.85f;
+  /** golden ratio */
+  static constexpr float Phi = 1.61803f;
+  /** inverse golden ratio */
+  static constexpr float InvPhi = 1.f / Phi;
+};
+
+std::unordered_map<std::string, uint32_t> ColorGenState::allColors;
+std::mutex ColorGenState::mapMutex;
+
+// all h, s, v are in range [0, 1]
+// Ref: http://en.wikipedia.org/wiki/HSL_and_HSV#Converting_to_RGB
+uint32_t hsv2rgb(float h, float s, float v)
+{
+  uint32_t out = 0xff000000u;
+  if (s <= 0.0f) { return out; }
+  // convert hue from [0, 1] range to [0, 360]
+  float h_deg = h * 360.f;
+  if (0.f > h_deg || h_deg >= 360.f) h_deg = 0.f;
+  h_deg /= 60.f;
+  int h_range = (int)h_deg;
+  float h_mod = h_deg - h_range;
+  float x     = v * (1.f - s);
+  float y     = v * (1.f - (s * h_mod));
+  float z     = v * (1.f - (s * (1.f - h_mod)));
+  float r, g, b;
+  switch (h_range) {
+    case 0:
+      r = v;
+      g = z;
+      b = x;
+      break;
+    case 1:
+      r = y;
+      g = v;
+      b = x;
+      break;
+    case 2:
+      r = x;
+      g = v;
+      b = z;
+      break;
+    case 3:
+      r = x;
+      g = y;
+      b = v;
+      break;
+    case 4:
+      r = z;
+      g = x;
+      b = v;
+      break;
+    case 5:
+    default:
+      r = v;
+      g = x;
+      b = y;
+      break;
+  }
+  out |= (uint32_t(r * 256.f) << 16);
+  out |= (uint32_t(g * 256.f) << 8);
+  out |= uint32_t(b * 256.f);
+  return out;
+}
+
+/**
+ * @brief Helper method to generate 'visually distinct' colors.
+ * Inspired from https://martin.ankerl.com/2009/12/09/how-to-create-random-colors-programmatically/
+ * However, if an associated tag is passed, it will look up in its history for
+ * any generated color against this tag and if found, just returns it, else
+ * generates a new color, assigns a tag to it and stores it for future usage.
+ * Such a thing is very useful for nvtx markers where the ranges associated
+ * with a specific tag should ideally get the same color for the purpose of
+ * visualizing it on nsight-systems timeline.
+ * @param tag look for any previously generated colors with this tag or
+ * associate the currently generated color with it
+ * @return returns 32b RGB integer with alpha channel set of 0xff
+ */
+uint32_t generateNextColor(const std::string& tag)
+{
+  std::lock_guard<std::mutex> guard(ColorGenState::mapMutex);
+  if (!tag.empty()) {
+    auto itr = ColorGenState::allColors.find(tag);
+    if (itr != ColorGenState::allColors.end()) { return itr->second; }
+  }
+  float h = rand() * 1.f / RAND_MAX;
+  h += ColorGenState::InvPhi;
+  if (h >= 1.f) h -= 1.f;
+  auto rgb = hsv2rgb(h, ColorGenState::S, ColorGenState::V);
+  if (!tag.empty()) { ColorGenState::allColors[tag] = rgb; }
+  return rgb;
+}
+
+#ifdef NVTX_ENABLED
+
+#include <nvToolsExt.h>
+
+nvtxDomainHandle_t domain = nvtxDomainCreateA("raft");
+
+void pushRange_name(const char* name)
+{
+  nvtxEventAttributes_t eventAttrib = {0};
+  eventAttrib.version               = NVTX_VERSION;
+  eventAttrib.size                  = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+  eventAttrib.colorType             = NVTX_COLOR_ARGB;
+  eventAttrib.color                 = generateNextColor(name);
+  eventAttrib.messageType           = NVTX_MESSAGE_TYPE_ASCII;
+  eventAttrib.message.ascii         = name;
+  nvtxDomainRangePushEx(domain, &eventAttrib);
+}
+
+template <typename... Args>
+void pushRange(const char* format, Args... args)
+{
+  if constexpr (sizeof...(args) > 0) {
+    int length = std::snprintf(nullptr, 0, format, args...);
+    assert(length >= 0);
+    auto buf = std::make_unique<char[]>(length + 1);
+    std::snprintf(buf.get(), length + 1, format, args...);
+    pushRange_name(buf.get());
+  } else {
+    pushRange_name(format);
+  }
+}
+
+template <typename... Args>
+void pushRange(rmm::cuda_stream_view stream, const char* format, Args... args)
+{
+  stream.synchronize();
+  pushRange(format, args...);
+}
+
+void popRange() { nvtxDomainRangePop(domain); }
+
+void popRange(rmm::cuda_stream_view stream)
+{
+  stream.synchronize();
+  popRange();
+}
+
+#else  // NVTX_ENABLED
+
+template <typename... Args>
+void pushRange(const char* format, Args... args)
+{
+}
+
+template <typename... Args>
+void pushRange(rmm::cuda_stream_view stream, const char* format, Args... args)
+{
+}
+
+void popRange() {}
+
+void popRange(rmm::cuda_stream_view stream) {}
+
+#endif  // NVTX_ENABLED
+
+}  // namespace detail
+}  // namespace common
+}  // namespace raft
diff --git a/cpp/include/raft/common/nvtx.hpp b/cpp/include/raft/common/nvtx.hpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "detail/nvtx.cuh"
+
+namespace raft {
+namespace common {
+
+/**
+ * @brief Push a named nvtx range
+ * @param format range name format (accepts printf-style arguments)
+ * @param args the arguments for the printf-style formatting
+ */
+template <typename... Args>
+void PUSH_RANGE(const char* format, Args... args)
+{
+  detail::pushRange(format, args...);
+}
+
+/**
+ * @brief Synchronize CUDA stream and push a named nvtx range
+ * @param format range name format (accepts printf-style arguments)
+ * @param args the arguments for the printf-style formatting
+ * @param stream stream to synchronize
+ */
+template <typename... Args>
+void PUSH_RANGE(rmm::cuda_stream_view stream, const char* format, Args... args)
+{
+  detail::pushRange(stream, format, args...);
+}
+
+/** Pop the latest range */
+void POP_RANGE() { detail::popRange(); }
+
+/**
+ * @brief Synchronize CUDA stream and pop the latest nvtx range
+ * @param stream stream to synchronize
+ */
+void POP_RANGE(rmm::cuda_stream_view stream) { detail::popRange(stream); }
+
+/** Push a named nvtx range that would be popped at the end of the object lifetime. */
+class AUTO_RANGE {
+ private:
+  std::optional<rmm::cuda_stream_view> streamMaybe;
+
+  /* This object is not meant to be touched. */
+  AUTO_RANGE(const AUTO_RANGE&) = delete;
+  AUTO_RANGE(AUTO_RANGE&&)      = delete;
+  AUTO_RANGE& operator=(const AUTO_RANGE&) = delete;
+  AUTO_RANGE& operator=(AUTO_RANGE&&) = delete;
+
+ public:
+  /**
+   * Synchronize CUDA stream and push a named nvtx range
+   * At the end of the object lifetime, synchronize again and pop the range.
+   *
+   * @param stream stream to synchronize
+   * @param format range name format (accepts printf-style arguments)
+   * @param args the arguments for the printf-style formatting
+   */
+  template <typename... Args>
+  AUTO_RANGE(rmm::cuda_stream_view stream, const char* format, Args... args)
+    : streamMaybe(std::make_optional(stream))
+  {
+    PUSH_RANGE(stream, format, args...);
+  }
+
+  /**
+   * Push a named nvtx range.
+   * At the end of the object lifetime, pop the range back.
+   *
+   * @param format range name format (accepts printf-style arguments)
+   * @param args the arguments for the printf-style formatting
+   */
+  template <typename... Args>
+  AUTO_RANGE(const char* format, Args... args) : streamMaybe(std::nullopt)
+  {
+    PUSH_RANGE(format, args...);
+  }
+
+  ~AUTO_RANGE()
+  {
+    if (streamMaybe.has_value())
+      POP_RANGE(streamMaybe.value());
+    else
+      POP_RANGE();
+  }
+};
+
+/*!
+  \def RAFT_USING_RANGE(...)
+  When NVTX is enabled, push a named nvtx range and pop it at the end of the enclosing code block.
+
+  This macro initializes a dummy AUTO_RANGE variable on the stack,
+  which pushes the range in its constructor and pops it in the destructor.
+*/
+#ifdef NVTX_ENABLED
+#define RAFT_USING_RANGE(...) raft::common::AUTO_RANGE _AUTO_RANGE_##__LINE__(__VA_ARGS__)
+#else
+#define RAFT_USING_RANGE(...) (void)0
+#endif
+
+}  // namespace common
+}  // namespace raft
diff --git a/cpp/include/raft/linalg/svd.cuh b/cpp/include/raft/linalg/svd.cuh
@@ -19,6 +19,7 @@
 #include <raft/cudart_utils.h>
 #include <raft/linalg/cublas_wrappers.h>
 #include <raft/linalg/cusolver_wrappers.h>
+#include <raft/common/nvtx.hpp>
 #include <raft/cuda_utils.cuh>
 #include <raft/handle.hpp>
 #include <raft/matrix/math.hpp>
@@ -63,6 +64,7 @@ void svdQR(const raft::handle_t& handle,
            bool gen_right_vec,
            cudaStream_t stream)
 {
+  RAFT_USING_RANGE("raft::linalg::svdQR(%d, %d)", n_rows, n_cols);
   cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
   cublasHandle_t cublasH       = handle.get_cublas_handle();
 
@@ -140,6 +142,7 @@ void svdEig(const raft::handle_t& handle,
             bool gen_left_vec,
             cudaStream_t stream)
 {
+  RAFT_USING_RANGE("raft::linalg::svdEig(%d, %d)", n_rows, n_cols);
   cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
   cublasHandle_t cublasH       = handle.get_cublas_handle();
 
@@ -218,6 +221,7 @@ void svdJacobi(const raft::handle_t& handle,
                int max_sweeps,
                cudaStream_t stream)
 {
+  RAFT_USING_RANGE("raft::linalg::svdJacobi(%d, %d)", n_rows, n_cols);
   cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
 
   gesvdjInfo_t gesvdj_params = NULL;

diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
@@ -130,6 +130,7 @@ PRIVATE
   CUDA::cusolver
   CUDA::cudart
   CUDA::cusparse
+  $<$<BOOL:${NVTX}>:CUDA::nvToolsExt>
   rmm::rmm
   cuco::cuco
   FAISS::FAISS
@@ -139,3 +140,8 @@ PRIVATE
   $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
   $<TARGET_NAME_IF_EXISTS:conda_env>
 )
+
+target_compile_definitions(test_raft
+PRIVATE
+  $<$<BOOL:${NVTX}>:NVTX_ENABLED>
+)