Skip to content

Commit

Permalink
Merge branch 'fea-nvtx-ranges' into enh-faster-linewise-ops
Browse files Browse the repository at this point in the history
  • Loading branch information
achirkin committed Dec 10, 2021
2 parents c8d917b + 76af545 commit 488754e
Show file tree
Hide file tree
Showing 150 changed files with 1,633 additions and 1,311 deletions.
202 changes: 202 additions & 0 deletions cpp/include/raft/common/detail/nvtx.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <rmm/cuda_stream_view.hpp>

namespace raft {
namespace common {
namespace detail {

#ifdef NVTX_ENABLED

#include <nvToolsExt.h>
#include <stdint.h>
#include <stdlib.h>
#include <mutex>
#include <string>
#include <unordered_map>

/**
* @brief An internal struct to store associated state with the color
* generator
*/
struct ColorGenState {
/** collection of all tagged colors generated so far */
static inline std::unordered_map<std::string, uint32_t> allColors;
/** mutex for accessing the above map */
static inline std::mutex mapMutex;
/** saturation */
static inline constexpr float S = 0.9f;
/** value */
static inline constexpr float V = 0.85f;
/** golden ratio */
static inline constexpr float Phi = 1.61803f;
/** inverse golden ratio */
static inline constexpr float InvPhi = 1.f / Phi;
};

// all h, s, v are in range [0, 1]
// Ref: http://en.wikipedia.org/wiki/HSL_and_HSV#Converting_to_RGB
inline uint32_t hsv2rgb(float h, float s, float v)
{
uint32_t out = 0xff000000u;
if (s <= 0.0f) { return out; }
// convert hue from [0, 1] range to [0, 360]
float h_deg = h * 360.f;
if (0.f > h_deg || h_deg >= 360.f) h_deg = 0.f;
h_deg /= 60.f;
int h_range = (int)h_deg;
float h_mod = h_deg - h_range;
float x = v * (1.f - s);
float y = v * (1.f - (s * h_mod));
float z = v * (1.f - (s * (1.f - h_mod)));
float r, g, b;
switch (h_range) {
case 0:
r = v;
g = z;
b = x;
break;
case 1:
r = y;
g = v;
b = x;
break;
case 2:
r = x;
g = v;
b = z;
break;
case 3:
r = x;
g = y;
b = v;
break;
case 4:
r = z;
g = x;
b = v;
break;
case 5:
default:
r = v;
g = x;
b = y;
break;
}
out |= (uint32_t(r * 256.f) << 16);
out |= (uint32_t(g * 256.f) << 8);
out |= uint32_t(b * 256.f);
return out;
}

/**
* @brief Helper method to generate 'visually distinct' colors.
* Inspired from https://martin.ankerl.com/2009/12/09/how-to-create-random-colors-programmatically/
* However, if an associated tag is passed, it will look up in its history for
* any generated color against this tag and if found, just returns it, else
* generates a new color, assigns a tag to it and stores it for future usage.
* Such a thing is very useful for nvtx markers where the ranges associated
* with a specific tag should ideally get the same color for the purpose of
* visualizing it on nsight-systems timeline.
* @param tag look for any previously generated colors with this tag or
* associate the currently generated color with it
* @return returns 32b RGB integer with alpha channel set of 0xff
*/
inline uint32_t generateNextColor(const std::string& tag)
{
// std::unordered_map<std::string, uint32_t> ColorGenState::allColors;
// std::mutex ColorGenState::mapMutex;

std::lock_guard<std::mutex> guard(ColorGenState::mapMutex);
if (!tag.empty()) {
auto itr = ColorGenState::allColors.find(tag);
if (itr != ColorGenState::allColors.end()) { return itr->second; }
}
float h = rand() * 1.f / RAND_MAX;
h += ColorGenState::InvPhi;
if (h >= 1.f) h -= 1.f;
auto rgb = hsv2rgb(h, ColorGenState::S, ColorGenState::V);
if (!tag.empty()) { ColorGenState::allColors[tag] = rgb; }
return rgb;
}

static inline nvtxDomainHandle_t domain = nvtxDomainCreateA("raft");

inline void pushRange_name(const char* name)
{
nvtxEventAttributes_t eventAttrib = {0};
eventAttrib.version = NVTX_VERSION;
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
eventAttrib.colorType = NVTX_COLOR_ARGB;
eventAttrib.color = generateNextColor(name);
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
eventAttrib.message.ascii = name;
nvtxDomainRangePushEx(domain, &eventAttrib);
}

template <typename... Args>
inline void pushRange(const char* format, Args... args)
{
if constexpr (sizeof...(args) > 0) {
int length = std::snprintf(nullptr, 0, format, args...);
assert(length >= 0);
auto buf = std::make_unique<char[]>(length + 1);
std::snprintf(buf.get(), length + 1, format, args...);
pushRange_name(buf.get());
} else {
pushRange_name(format);
}
}

template <typename... Args>
inline void pushRange(rmm::cuda_stream_view stream, const char* format, Args... args)
{
stream.synchronize();
pushRange(format, args...);
}

inline void popRange() { nvtxDomainRangePop(domain); }

inline void popRange(rmm::cuda_stream_view stream)
{
stream.synchronize();
popRange();
}

#else // NVTX_ENABLED

template <typename... Args>
inline void pushRange(const char* format, Args... args)
{
}

template <typename... Args>
inline void pushRange(rmm::cuda_stream_view stream, const char* format, Args... args)
{
}

inline void popRange() {}

inline void popRange(rmm::cuda_stream_view stream) {}

#endif // NVTX_ENABLED

} // namespace detail
} // namespace common
} // namespace raft
119 changes: 119 additions & 0 deletions cpp/include/raft/common/nvtx.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "detail/nvtx.cuh"

namespace raft {
namespace common {

/**
* @brief Push a named nvtx range
* @param format range name format (accepts printf-style arguments)
* @param args the arguments for the printf-style formatting
*/
template <typename... Args>
inline void PUSH_RANGE(const char* format, Args... args)
{
detail::pushRange(format, args...);
}

/**
* @brief Synchronize CUDA stream and push a named nvtx range
* @param format range name format (accepts printf-style arguments)
* @param args the arguments for the printf-style formatting
* @param stream stream to synchronize
*/
template <typename... Args>
inline void PUSH_RANGE(rmm::cuda_stream_view stream, const char* format, Args... args)
{
detail::pushRange(stream, format, args...);
}

/** Pop the latest range */
inline void POP_RANGE() { detail::popRange(); }

/**
* @brief Synchronize CUDA stream and pop the latest nvtx range
* @param stream stream to synchronize
*/
inline void POP_RANGE(rmm::cuda_stream_view stream) { detail::popRange(stream); }

/** Push a named nvtx range that would be popped at the end of the object lifetime. */
class AUTO_RANGE {
private:
std::optional<rmm::cuda_stream_view> streamMaybe;

/* This object is not meant to be touched. */
AUTO_RANGE(const AUTO_RANGE&) = delete;
AUTO_RANGE(AUTO_RANGE&&) = delete;
AUTO_RANGE& operator=(const AUTO_RANGE&) = delete;
AUTO_RANGE& operator=(AUTO_RANGE&&) = delete;

public:
/**
* Synchronize CUDA stream and push a named nvtx range
* At the end of the object lifetime, synchronize again and pop the range.
*
* @param stream stream to synchronize
* @param format range name format (accepts printf-style arguments)
* @param args the arguments for the printf-style formatting
*/
template <typename... Args>
AUTO_RANGE(rmm::cuda_stream_view stream, const char* format, Args... args)
: streamMaybe(std::make_optional(stream))
{
PUSH_RANGE(stream, format, args...);
}

/**
* Push a named nvtx range.
* At the end of the object lifetime, pop the range back.
*
* @param format range name format (accepts printf-style arguments)
* @param args the arguments for the printf-style formatting
*/
template <typename... Args>
AUTO_RANGE(const char* format, Args... args) : streamMaybe(std::nullopt)
{
PUSH_RANGE(format, args...);
}

~AUTO_RANGE()
{
if (streamMaybe.has_value())
POP_RANGE(streamMaybe.value());
else
POP_RANGE();
}
};

/*!
\def RAFT_USING_RANGE(...)
When NVTX is enabled, push a named nvtx range and pop it at the end of the enclosing code block.
This macro initializes a dummy AUTO_RANGE variable on the stack,
which pushes the range in its constructor and pops it in the destructor.
*/
#ifdef NVTX_ENABLED
#define RAFT_USING_RANGE(...) raft::common::AUTO_RANGE _AUTO_RANGE_##__LINE__(__VA_ARGS__)
#else
#define RAFT_USING_RANGE(...) (void)0
#endif

} // namespace common
} // namespace raft
2 changes: 1 addition & 1 deletion cpp/include/raft/common/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void scatterImpl(
{
const IdxT nblks = raft::ceildiv(VecLen ? len / VecLen : len, (IdxT)TPB);
scatterKernel<DataT, VecLen, Lambda, IdxT><<<nblks, TPB, 0, stream>>>(out, in, idx, len, op);
CUDA_CHECK(cudaGetLastError());
RAFT_CUDA_TRY(cudaGetLastError());
}

/**
Expand Down
Loading

0 comments on commit 488754e

Please sign in to comment.