Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NVTX range helpers #416

Merged
merged 16 commits into from
Dec 17, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 202 additions & 0 deletions cpp/include/raft/common/detail/nvtx.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <stdint.h>
#include <stdlib.h>
#include <mutex>
#include <rmm/cuda_stream_view.hpp>
#include <string>
#include <unordered_map>

namespace raft {
namespace common {
namespace detail {

/**
* @brief An internal struct to store associated state with the color
* generator
*/
struct ColorGenState {
/** collection of all tagged colors generated so far */
static std::unordered_map<std::string, uint32_t> allColors;
/** mutex for accessing the above map */
static std::mutex mapMutex;
/** saturation */
static constexpr float S = 0.9f;
/** value */
static constexpr float V = 0.85f;
/** golden ratio */
static constexpr float Phi = 1.61803f;
/** inverse golden ratio */
static constexpr float InvPhi = 1.f / Phi;
};

std::unordered_map<std::string, uint32_t> ColorGenState::allColors;
std::mutex ColorGenState::mapMutex;

// all h, s, v are in range [0, 1]
// Ref: http://en.wikipedia.org/wiki/HSL_and_HSV#Converting_to_RGB
uint32_t hsv2rgb(float h, float s, float v)
{
uint32_t out = 0xff000000u;
if (s <= 0.0f) { return out; }
// convert hue from [0, 1] range to [0, 360]
float h_deg = h * 360.f;
if (0.f > h_deg || h_deg >= 360.f) h_deg = 0.f;
h_deg /= 60.f;
int h_range = (int)h_deg;
float h_mod = h_deg - h_range;
float x = v * (1.f - s);
float y = v * (1.f - (s * h_mod));
float z = v * (1.f - (s * (1.f - h_mod)));
float r, g, b;
switch (h_range) {
case 0:
r = v;
g = z;
b = x;
break;
case 1:
r = y;
g = v;
b = x;
break;
case 2:
r = x;
g = v;
b = z;
break;
case 3:
r = x;
g = y;
b = v;
break;
case 4:
r = z;
g = x;
b = v;
break;
case 5:
default:
r = v;
g = x;
b = y;
break;
}
out |= (uint32_t(r * 256.f) << 16);
out |= (uint32_t(g * 256.f) << 8);
out |= uint32_t(b * 256.f);
return out;
}

/**
* @brief Helper method to generate 'visually distinct' colors.
* Inspired from https://martin.ankerl.com/2009/12/09/how-to-create-random-colors-programmatically/
* However, if an associated tag is passed, it will look up in its history for
* any generated color against this tag and if found, just returns it, else
* generates a new color, assigns a tag to it and stores it for future usage.
* Such a thing is very useful for nvtx markers where the ranges associated
* with a specific tag should ideally get the same color for the purpose of
* visualizing it on nsight-systems timeline.
* @param tag look for any previously generated colors with this tag or
* associate the currently generated color with it
* @return returns 32b RGB integer with alpha channel set of 0xff
*/
uint32_t generateNextColor(const std::string& tag)
{
std::lock_guard<std::mutex> guard(ColorGenState::mapMutex);
if (!tag.empty()) {
auto itr = ColorGenState::allColors.find(tag);
if (itr != ColorGenState::allColors.end()) { return itr->second; }
}
float h = rand() * 1.f / RAND_MAX;
h += ColorGenState::InvPhi;
if (h >= 1.f) h -= 1.f;
auto rgb = hsv2rgb(h, ColorGenState::S, ColorGenState::V);
if (!tag.empty()) { ColorGenState::allColors[tag] = rgb; }
return rgb;
}

#ifdef NVTX_ENABLED

#include <nvToolsExt.h>

nvtxDomainHandle_t domain = nvtxDomainCreateA("raft");

void pushRange_name(const char* name)
{
nvtxEventAttributes_t eventAttrib = {0};
eventAttrib.version = NVTX_VERSION;
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
eventAttrib.colorType = NVTX_COLOR_ARGB;
eventAttrib.color = generateNextColor(name);
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
eventAttrib.message.ascii = name;
nvtxDomainRangePushEx(domain, &eventAttrib);
}

template <typename... Args>
void pushRange(const char* format, Args... args)
{
if constexpr (sizeof...(args) > 0) {
int length = std::snprintf(nullptr, 0, format, args...);
assert(length >= 0);
auto buf = std::make_unique<char[]>(length + 1);
std::snprintf(buf.get(), length + 1, format, args...);
pushRange_name(buf.get());
} else {
pushRange_name(format);
}
}

template <typename... Args>
void pushRange(rmm::cuda_stream_view stream, const char* format, Args... args)
{
stream.synchronize();
pushRange(format, args...);
}

void popRange() { nvtxDomainRangePop(domain); }

void popRange(rmm::cuda_stream_view stream)
{
stream.synchronize();
popRange();
}

#else // NVTX_ENABLED

template <typename... Args>
void pushRange(const char* format, Args... args)
{
}

template <typename... Args>
void pushRange(rmm::cuda_stream_view stream, const char* format, Args... args)
{
}

void popRange() {}

void popRange(rmm::cuda_stream_view stream) {}

#endif // NVTX_ENABLED

} // namespace detail
} // namespace common
} // namespace raft
119 changes: 119 additions & 0 deletions cpp/include/raft/common/nvtx.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
achirkin marked this conversation as resolved.
Show resolved Hide resolved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "detail/nvtx.cuh"

namespace raft {
namespace common {

/**
* @brief Push a named nvtx range
* @param format range name format (accepts printf-style arguments)
* @param args the arguments for the printf-style formatting
*/
template <typename... Args>
void PUSH_RANGE(const char* format, Args... args)
{
detail::pushRange(format, args...);
}

/**
* @brief Synchronize CUDA stream and push a named nvtx range
* @param format range name format (accepts printf-style arguments)
* @param args the arguments for the printf-style formatting
* @param stream stream to synchronize
*/
template <typename... Args>
void PUSH_RANGE(rmm::cuda_stream_view stream, const char* format, Args... args)
{
detail::pushRange(stream, format, args...);
}

/** Pop the latest range */
void POP_RANGE() { detail::popRange(); }

/**
* @brief Synchronize CUDA stream and pop the latest nvtx range
* @param stream stream to synchronize
*/
void POP_RANGE(rmm::cuda_stream_view stream) { detail::popRange(stream); }

/** Push a named nvtx range that would be popped at the end of the object lifetime. */
class AUTO_RANGE {
achirkin marked this conversation as resolved.
Show resolved Hide resolved
private:
std::optional<rmm::cuda_stream_view> streamMaybe;

/* This object is not meant to be touched. */
AUTO_RANGE(const AUTO_RANGE&) = delete;
AUTO_RANGE(AUTO_RANGE&&) = delete;
AUTO_RANGE& operator=(const AUTO_RANGE&) = delete;
AUTO_RANGE& operator=(AUTO_RANGE&&) = delete;

public:
/**
* Synchronize CUDA stream and push a named nvtx range
* At the end of the object lifetime, synchronize again and pop the range.
*
* @param stream stream to synchronize
* @param format range name format (accepts printf-style arguments)
* @param args the arguments for the printf-style formatting
*/
template <typename... Args>
AUTO_RANGE(rmm::cuda_stream_view stream, const char* format, Args... args)
: streamMaybe(std::make_optional(stream))
{
PUSH_RANGE(stream, format, args...);
}

/**
* Push a named nvtx range.
* At the end of the object lifetime, pop the range back.
*
* @param format range name format (accepts printf-style arguments)
* @param args the arguments for the printf-style formatting
*/
template <typename... Args>
AUTO_RANGE(const char* format, Args... args) : streamMaybe(std::nullopt)
{
PUSH_RANGE(format, args...);
}

~AUTO_RANGE()
{
if (streamMaybe.has_value())
achirkin marked this conversation as resolved.
Show resolved Hide resolved
POP_RANGE(streamMaybe.value());
achirkin marked this conversation as resolved.
Show resolved Hide resolved
else
POP_RANGE();
}
};

/*!
\def RAFT_USING_RANGE(...)
When NVTX is enabled, push a named nvtx range and pop it at the end of the enclosing code block.

This macro initializes a dummy AUTO_RANGE variable on the stack,
which pushes the range in its constructor and pops it in the destructor.
*/
#ifdef NVTX_ENABLED
#define RAFT_USING_RANGE(...) raft::common::AUTO_RANGE _AUTO_RANGE_##__LINE__(__VA_ARGS__)
#else
#define RAFT_USING_RANGE(...) (void)0
#endif
achirkin marked this conversation as resolved.
Show resolved Hide resolved

} // namespace common
} // namespace raft
4 changes: 4 additions & 0 deletions cpp/include/raft/linalg/svd.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <raft/cudart_utils.h>
#include <raft/linalg/cublas_wrappers.h>
#include <raft/linalg/cusolver_wrappers.h>
#include <raft/common/nvtx.hpp>
#include <raft/cuda_utils.cuh>
#include <raft/handle.hpp>
#include <raft/matrix/math.hpp>
Expand Down Expand Up @@ -63,6 +64,7 @@ void svdQR(const raft::handle_t& handle,
bool gen_right_vec,
cudaStream_t stream)
{
RAFT_USING_RANGE("raft::linalg::svdQR(%d, %d)", n_rows, n_cols);
cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
cublasHandle_t cublasH = handle.get_cublas_handle();

Expand Down Expand Up @@ -140,6 +142,7 @@ void svdEig(const raft::handle_t& handle,
bool gen_left_vec,
cudaStream_t stream)
{
RAFT_USING_RANGE("raft::linalg::svdEig(%d, %d)", n_rows, n_cols);
cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
cublasHandle_t cublasH = handle.get_cublas_handle();

Expand Down Expand Up @@ -218,6 +221,7 @@ void svdJacobi(const raft::handle_t& handle,
int max_sweeps,
cudaStream_t stream)
{
RAFT_USING_RANGE("raft::linalg::svdJacobi(%d, %d)", n_rows, n_cols);
cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();

gesvdjInfo_t gesvdj_params = NULL;
Expand Down
6 changes: 6 additions & 0 deletions cpp/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ PRIVATE
CUDA::cusolver
CUDA::cudart
CUDA::cusparse
$<$<BOOL:${NVTX}>:CUDA::nvToolsExt>
rmm::rmm
cuco::cuco
FAISS::FAISS
Expand All @@ -139,3 +140,8 @@ PRIVATE
$<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
$<TARGET_NAME_IF_EXISTS:conda_env>
)

target_compile_definitions(test_raft
PRIVATE
$<$<BOOL:${NVTX}>:NVTX_ENABLED>
)