Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NVTX range helpers #416

Merged
merged 16 commits into from
Dec 17, 2021
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ set(RAFT_LINK_LIBRARIES
CUDA::cusolver
CUDA::cudart
CUDA::cusparse
$<$<BOOL:${NVTX}>:CUDA::nvToolsExt>
rmm::rmm
cuco::cuco
)
Expand All @@ -153,6 +154,14 @@ target_link_libraries(raft INTERFACE ${RAFT_LINK_LIBRARIES})
target_link_libraries(raft_distance PUBLIC ${RAFT_LINK_LIBRARIES})
target_link_libraries(raft_nn PUBLIC ${RAFT_LINK_LIBRARIES} FAISS::FAISS)

set(RAFT_COMPILE_DEFINITIONS
$<$<BOOL:${NVTX}>:NVTX_ENABLED>
)

target_compile_definitions(raft INTERFACE ${RAFT_COMPILE_DEFINITIONS})
target_compile_definitions(raft_distance PRIVATE ${RAFT_COMPILE_DEFINITIONS})
target_compile_definitions(raft_nn PRIVATE ${RAFT_COMPILE_DEFINITIONS})

target_compile_options(raft_distance
PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${RAFT_CXX_FLAGS}>"
"$<$<COMPILE_LANGUAGE:CUDA>:${RAFT_CUDA_FLAGS}>"
Expand Down
198 changes: 198 additions & 0 deletions cpp/include/raft/common/detail/nvtx.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <rmm/cuda_stream_view.hpp>

namespace raft::common::detail {
achirkin marked this conversation as resolved.
Show resolved Hide resolved

#ifdef NVTX_ENABLED

#include <nvToolsExt.h>
#include <cstdint>
#include <cstdlib>
#include <mutex>
#include <string>
#include <unordered_map>

/**
* @brief An internal struct to store associated state with the color
* generator
*/
struct color_gen_state {
/** collection of all tagged colors generated so far */
static inline std::unordered_map<std::string, uint32_t> all_colors_;
/** mutex for accessing the above map */
static inline std::mutex map_mutex_;
/** saturation */
static inline constexpr float kS = 0.9f;
/** value */
static inline constexpr float kV = 0.85f;
/** golden ratio */
static inline constexpr float kPhi = 1.61803f;
/** inverse golden ratio */
static inline constexpr float kInvPhi = 1.f / kPhi;
};

// all h, s, v are in range [0, 1]
// Ref: http://en.wikipedia.org/wiki/HSL_and_HSV#Converting_to_RGB
inline auto hsv2rgb(float h, float s, float v) -> uint32_t
{
uint32_t out = 0xff000000u;
if (s <= 0.0f) { return out; }
// convert hue from [0, 1] range to [0, 360]
float h_deg = h * 360.f;
if (0.f > h_deg || h_deg >= 360.f) h_deg = 0.f;
h_deg /= 60.f;
int h_range = static_cast<int>(h_deg);
float h_mod = h_deg - h_range;
float x = v * (1.f - s);
float y = v * (1.f - (s * h_mod));
float z = v * (1.f - (s * (1.f - h_mod)));
float r, g, b;
switch (h_range) {
case 0:
r = v;
g = z;
b = x;
break;
case 1:
r = y;
g = v;
b = x;
break;
case 2:
r = x;
g = v;
b = z;
break;
case 3:
r = x;
g = y;
b = v;
break;
case 4:
r = z;
g = x;
b = v;
break;
case 5:
default:
r = v;
g = x;
b = y;
break;
}
out |= (uint32_t(r * 256.f) << 16);
out |= (uint32_t(g * 256.f) << 8);
out |= uint32_t(b * 256.f);
return out;
}

/**
* @brief Helper method to generate 'visually distinct' colors.
* Inspired from https://martin.ankerl.com/2009/12/09/how-to-create-random-colors-programmatically/
* However, if an associated tag is passed, it will look up in its history for
* any generated color against this tag and if found, just returns it, else
* generates a new color, assigns a tag to it and stores it for future usage.
* Such a thing is very useful for nvtx markers where the ranges associated
* with a specific tag should ideally get the same color for the purpose of
* visualizing it on nsight-systems timeline.
* @param tag look for any previously generated colors with this tag or
* associate the currently generated color with it
* @return returns 32b RGB integer with alpha channel set of 0xff
*/
inline auto generate_next_color(const std::string& tag) -> uint32_t
{
// std::unordered_map<std::string, uint32_t> color_gen_state::all_colors_;
// std::mutex color_gen_state::map_mutex_;

std::lock_guard<std::mutex> guard(color_gen_state::map_mutex_);
if (!tag.empty()) {
auto itr = color_gen_state::all_colors_.find(tag);
if (itr != color_gen_state::all_colors_.end()) { return itr->second; }
}
auto h = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
h += color_gen_state::kInvPhi;
if (h >= 1.f) h -= 1.f;
auto rgb = hsv2rgb(h, color_gen_state::kS, color_gen_state::kV);
if (!tag.empty()) { color_gen_state::all_colors_[tag] = rgb; }
return rgb;
}

static inline nvtxDomainHandle_t domain = nvtxDomainCreateA("application");

inline void push_range_name(const char* name)
{
nvtxEventAttributes_t event_attrib = {0};
event_attrib.version = NVTX_VERSION;
event_attrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
event_attrib.colorType = NVTX_COLOR_ARGB;
event_attrib.color = generate_next_color(name);
event_attrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
event_attrib.message.ascii = name;
nvtxDomainRangePushEx(domain, &event_attrib);
}

template <typename... Args>
inline void push_range(const char* format, Args... args)
{
if constexpr (sizeof...(args) > 0) {
int length = std::snprintf(nullptr, 0, format, args...);
assert(length >= 0);
std::vector<char> buf(length + 1);
std::snprintf(buf.data(), length + 1, format, args...);
push_range_name(buf.data());
} else {
push_range_name(format);
}
}

template <typename... Args>
inline void push_range(rmm::cuda_stream_view stream, const char* format, Args... args)
{
stream.synchronize();
push_range(format, args...);
}

inline void pop_range() { nvtxDomainRangePop(domain); }

inline void pop_range(rmm::cuda_stream_view stream)
{
stream.synchronize();
pop_range();
}

#else // NVTX_ENABLED

template <typename... Args>
inline void push_range(const char* format, Args... args)
{
}

template <typename... Args>
inline void push_range(rmm::cuda_stream_view stream, const char* format, Args... args)
{
}

inline void pop_range() {}

inline void pop_range(rmm::cuda_stream_view stream) {}

#endif // NVTX_ENABLED

} // namespace raft::common::detail
108 changes: 108 additions & 0 deletions cpp/include/raft/common/nvtx.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <optional>
#include "detail/nvtx.hpp"

namespace raft::common {

/**
* @brief Push a named nvtx range
* @param format range name format (accepts printf-style arguments)
* @param args the arguments for the printf-style formatting
*/
template <typename... Args>
inline void push_nvtx_range(const char* format, Args... args)
{
detail::push_range(format, args...);
}

/**
* @brief Synchronize CUDA stream and push a named nvtx range
* @param format range name format (accepts printf-style arguments)
* @param args the arguments for the printf-style formatting
* @param stream stream to synchronize
*/
template <typename... Args>
inline void push_nvtx_range(rmm::cuda_stream_view stream, const char* format, Args... args)
{
detail::push_range(stream, format, args...);
}

/** Pop the latest range */
inline void pop_nvtx_range() { detail::pop_range(); }

/**
* @brief Synchronize CUDA stream and pop the latest nvtx range
* @param stream stream to synchronize
*/
inline void pop_nvtx_range(rmm::cuda_stream_view stream) { detail::pop_range(stream); }

/** Push a named nvtx range that would be popped at the end of the object lifetime. */
class nvtx_range {
private:
std::optional<rmm::cuda_stream_view> stream_maybe_;

public:
/**
* Synchronize CUDA stream and push a named nvtx range
* At the end of the object lifetime, synchronize again and pop the range.
*
* @param stream stream to synchronize
* @param format range name format (accepts printf-style arguments)
* @param args the arguments for the printf-style formatting
*/
template <typename... Args>
explicit nvtx_range(rmm::cuda_stream_view stream, const char* format, Args... args)
: stream_maybe_(std::make_optional(stream))
{
push_nvtx_range(stream, format, args...);
}

/**
* Push a named nvtx range.
* At the end of the object lifetime, pop the range back.
*
* @param format range name format (accepts printf-style arguments)
* @param args the arguments for the printf-style formatting
*/
template <typename... Args>
explicit nvtx_range(const char* format, Args... args) : stream_maybe_(std::nullopt)
{
push_nvtx_range(format, args...);
}

~nvtx_range()
{
if (stream_maybe_.has_value()) {
pop_nvtx_range(*stream_maybe_);
} else {
pop_nvtx_range();
}
}

/* This object is not meant to be touched. */
nvtx_range(const nvtx_range&) = delete;
nvtx_range(nvtx_range&&) = delete;
auto operator=(const nvtx_range&) -> nvtx_range& = delete;
auto operator=(nvtx_range&&) -> nvtx_range& = delete;
static auto operator new(std::size_t) -> void* = delete;
static auto operator new[](std::size_t) -> void* = delete;
};

} // namespace raft::common
4 changes: 4 additions & 0 deletions cpp/include/raft/linalg/svd.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <raft/cudart_utils.h>
#include <raft/linalg/cublas_wrappers.h>
#include <raft/linalg/cusolver_wrappers.h>
#include <raft/common/nvtx.hpp>
#include <raft/cuda_utils.cuh>
#include <raft/handle.hpp>
#include <raft/matrix/math.hpp>
Expand Down Expand Up @@ -63,6 +64,7 @@ void svdQR(const raft::handle_t& handle,
bool gen_right_vec,
cudaStream_t stream)
{
common::nvtx_range fun_scope("raft::linalg::svdQR(%d, %d)", n_rows, n_cols);
cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
cublasHandle_t cublasH = handle.get_cublas_handle();

Expand Down Expand Up @@ -140,6 +142,7 @@ void svdEig(const raft::handle_t& handle,
bool gen_left_vec,
cudaStream_t stream)
{
common::nvtx_range fun_scope("raft::linalg::svdEig(%d, %d)", n_rows, n_cols);
cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();
cublasHandle_t cublasH = handle.get_cublas_handle();

Expand Down Expand Up @@ -218,6 +221,7 @@ void svdJacobi(const raft::handle_t& handle,
int max_sweeps,
cudaStream_t stream)
{
common::nvtx_range fun_scope("raft::linalg::svdJacobi(%d, %d)", n_rows, n_cols);
cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();

gesvdjInfo_t gesvdj_params = NULL;
Expand Down
12 changes: 3 additions & 9 deletions cpp/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ add_executable(test_raft
test/eigen_solvers.cu
test/handle.cpp
test/integer_utils.cpp
test/nvtx.cpp
test/pow2_utils.cu
test/label/label.cu
test/label/merge_labels.cu
Expand Down Expand Up @@ -117,21 +118,14 @@ target_compile_options(test_raft
)

target_include_directories(test_raft
PUBLIC "$<BUILD_INTERFACE:${RAFT_SOURCE_DIR}/include>"
"$<BUILD_INTERFACE:${RAFT_SOURCE_DIR}/test>"
PUBLIC "$<BUILD_INTERFACE:${RAFT_SOURCE_DIR}/test>"
"${FAISS_GPU_HEADERS}"
)


target_link_libraries(test_raft
PRIVATE
CUDA::cublas
CUDA::curand
CUDA::cusolver
CUDA::cudart
CUDA::cusparse
rmm::rmm
cuco::cuco
raft # transitively links all CUDA libs, etc
raft_distance
raft_nn
GTest::gtest
Expand Down
Loading