Skip to content

Commit

Permalink
Use nvtx3 and specify range names manually
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Apr 23, 2024
1 parent 6be8810 commit c963082
Show file tree
Hide file tree
Showing 18 changed files with 3,048 additions and 169 deletions.
91 changes: 26 additions & 65 deletions cub/cub/detail/nvtx.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -41,78 +41,39 @@
# include <source_location>
#endif // __has_include(<source_location>)

// NVTX documentation: https://nvidia.github.io/NVTX/
#include <cuda/std/optional>

#include <nvtx3/nvToolsExt.h>
// NVTX C++ wrapper documentation: https://nvidia.github.io/NVTX/doxygen-cpp/index.html
#if __has_include(<nvtx3/nvtx3.hpp>) // TODO(bgruber): better check for the first CTK version shipping the header
# include <nvtx3/nvtx3.hpp>
#else // __has_include(<nvtx3/nvtx3.hpp>)
# include "nvtx3.hpp"
#endif // __has_include(<nvtx3/nvtx3.hpp>)

CUB_NAMESPACE_BEGIN

namespace detail
{
struct NVTXGlobal
struct NVTXCCCLDomain
{
nvtxDomainHandle_t domain;

NVTXGlobal()
{
domain = nvtxDomainCreate("CCCL");
}

~NVTXGlobal()
{
nvtxDomainDestroy(domain);
}
static constexpr const char* name = "CCCL";
};

// Triggers global setup of the NVTX domain for CUB.
// TODO(bgruber): replace by an inline variable in C++17
inline auto nvtxGlobal() -> NVTXGlobal&
{
static NVTXGlobal g;
return g;
}

// A scope guard to create an named NVTX range inside the CUB domain.
struct NVTXGuard
{
// FIXME(bgruber): DEVICE is only needed to make cuda::std::optional happy. Michael promised to fix this soon.
_CCCL_HOST_DEVICE NVTXGuard(const char* name)
{
// CUB device algorithms can be called from within a kernel, but we only record a range when running on the host
NV_IF_TARGET(
NV_IS_HOST,
// TODO(bgruber): documentation mentions a nvtxDomainRangePushA, but it does not exist
nvtxEventAttributes_t eventAttrib{};
eventAttrib.version = NVTX_VERSION;
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
eventAttrib.message.ascii = name;
nvtxDomainRangePushEx(nvtxGlobal().domain, &eventAttrib););
}

#ifdef __cpp_lib_source_location
_CCCL_HOST NVTXGuard(const std::source_location& loc = std::source_location::current())
: NVTXGuard(loc.function_name())
{}
#endif // __cpp_lib_source_location

_CCCL_HOST_DEVICE ~NVTXGuard()
{
NV_IF_TARGET(NV_IS_HOST, nvtxDomainRangePop(nvtxGlobal().domain););
}
};
} // namespace detail
// FIXME(bgruber): check that no scope is introduced by NV_IF_TARGET!

// Inserts a NVTX range starting here until the end of the current function scope
// TODO(bgruber): replace this by NVTX3_FUNC_RANGE from <nvtx3/nvtx3.hpp>, when available. Wasn't available in CTK 12.4.
#define CUB_NVTX_RANGE_SCOPE \
::cub::detail::NVTXGuard __cub_nvtx_guard(__func__); \
(void) __cub_nvtx_guard
#define CUB_NVTX_RANGE_SCOPE_IF(condition) \
::cuda::std::optional<::cub::detail::NVTXGuard> __cub_nvtx_guard; \
if (condition) \
__cub_nvtx_guard.emplace(__func__); \
(void) __cub_nvtx_guard

#define CUB_NVTX_RANGE_SCOPE(name) \
NV_IF_TARGET( \
NV_IS_HOST, \
static const ::nvtx3::registered_string_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain> __cub_nvtx3_func_name{name}; \
static const ::nvtx3::event_attributes __cub_nvtx3_func_attr{__cub_nvtx3_func_name}; \
::nvtx3::scoped_range_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain> __cub_nvtx3_range{__cub_nvtx3_func_attr}; \
(void) __cub_nvtx3_range;)

#define CUB_NVTX_RANGE_SCOPE_IF(condition, name) \
NV_IF_TARGET( \
NV_IS_HOST, \
static const ::nvtx3::registered_string_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain> __cub_nvtx3_func_name{name}; \
static const ::nvtx3::event_attributes __cub_nvtx3_func_attr{__cub_nvtx3_func_name}; \
::nvtx3::v1::detail::optional_scoped_range_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain> __cub_nvtx3_range; \
if (condition) __cub_nvtx3_range.begin(__cub_nvtx3_func_attr); \
(void) __cub_nvtx3_range;)
} // namespace detail
CUB_NAMESPACE_END
Loading

0 comments on commit c963082

Please sign in to comment.