diff --git a/CHANGELOG.md b/CHANGELOG.md index 19699b22038..13e28b6406e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ - PR #3126 Round 2 of snappy decompression optimizations - PR #3046 Define and implement new copying APIs `empty_like` and `allocate_like` - PR #3128 Support MultiIndex in DataFrame.join +- PR #3135 Add nvtx utilities to cudf::nvtx namespace - PR #3021 Java host side concat of serialized buffers - PR #3138 Movey unary files to legacy diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e92427afac7..e21d8c3abd4 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -415,6 +415,7 @@ add_library(cudf src/utilities/legacy/cuda_utils.cu src/utilities/column_utils.cpp src/utilities/legacy/error_utils.cpp + src/utilities/nvtx/nvtx_utils.cpp src/utilities/nvtx/legacy/nvtx_utils.cpp src/copying/copy.cpp src/copying/legacy/copy.cpp diff --git a/cpp/include/cudf/utilities/nvtx_utils.hpp b/cpp/include/cudf/utilities/nvtx_utils.hpp new file mode 100644 index 00000000000..8b0ea67f728 --- /dev/null +++ b/cpp/include/cudf/utilities/nvtx_utils.hpp @@ -0,0 +1,66 @@ +#pragma once + +#include + +namespace cudf { +namespace nvtx { + +enum class color : uint32_t { + GREEN = 0xff00ff00, + BLUE = 0xff0000ff, + YELLOW = 0xffffff00, + PURPLE = 0xffff00ff, + CYAN = 0xff00ffff, + RED = 0xffff0000, + WHITE = 0xffffffff, + DARK_GREEN = 0xff006600, + ORANGE = 0xffffa500, +}; + +constexpr color JOIN_COLOR = color::CYAN; +constexpr color GROUPBY_COLOR = color::GREEN; +constexpr color BINARY_OP_COLOR = color::YELLOW; +constexpr color PARTITION_COLOR = color::PURPLE; +constexpr color READ_CSV_COLOR = color::PURPLE; + +/**---------------------------------------------------------------------------* + * @brief Start an NVTX range. + * + * This function is useful only for profiling with nvvp or Nsight Systems. It + * demarcates the begining of a user-defined range with a specified name and + * color that will show up in the timeline view of nvvp/Nsight Systems. Can be + * nested within other ranges. + * + * @throws cudf::logic_error if `name` is null + * + * @param[in] name The name of the NVTX range + * @param[in] color The color to use for the range + *---------------------------------------------------------------------------**/ +void range_push(const char* name, color color); + +/**---------------------------------------------------------------------------* + * @brief Start a NVTX range with a custom ARGB color code. + * + * This function is useful only for profiling with nvvp or Nsight Systems. It + * demarcates the begining of a user-defined range with a specified name and + * color that will show up in the timeline view of nvvp/Nsight Systems. Can be + * nested within other ranges. + * + * @throws cudf::logic_error if `name` is null + * + * @param[in] name The name of the NVTX range + * @param[in] color The ARGB hex color code to use to color this range (e.g., 0xFF00FF00) + *---------------------------------------------------------------------------**/ +void range_push_hex(const char* name, uint32_t color); + +/**---------------------------------------------------------------------------* + * @brief Ends the inner-most NVTX range. + * + * This function is useful only for profiling with nvvp or Nsight Systems. It + * will demarcate the end of the inner-most range, i.e., the most recent call to + * range_push. + *---------------------------------------------------------------------------**/ +void range_pop(); + +} // namespace nvtx +} // namespace cudf diff --git a/cpp/src/binaryop/compiled/launcher.cuh b/cpp/src/binaryop/compiled/launcher.cuh index 563dcff1dc1..7c8b583f55c 100644 --- a/cpp/src/binaryop/compiled/launcher.cuh +++ b/cpp/src/binaryop/compiled/launcher.cuh @@ -17,7 +17,7 @@ #ifndef COMPILED_BINARY_OPS_LAUNCHER_H #define COMPILED_BINARY_OPS_LAUNCHER_H -#include +#include #include #include @@ -57,7 +57,7 @@ struct BinaryOp { GDF_REQUIRE(lhs->size == output->size, GDF_COLUMN_SIZE_MISMATCH); GDF_REQUIRE(lhs->dtype == rhs->dtype, GDF_UNSUPPORTED_DTYPE); - PUSH_RANGE("LIBGDF_BINARY_OP", BINARY_OP_COLOR); + nvtx::range_push("CUDF_BINARY_OP", nvtx::BINARY_OP_COLOR); // find optimal blocksize int mingridsize, blocksize; CUDA_TRY( @@ -82,7 +82,7 @@ struct BinaryOp { cudaDeviceSynchronize(); - POP_RANGE(); + nvtx::range_pop(); CUDA_CHECK_LAST(); return GDF_SUCCESS; diff --git a/cpp/src/groupby/legacy/groupby_without_aggregation.cu b/cpp/src/groupby/legacy/groupby_without_aggregation.cu index a3293eda5b2..08fee62ec0f 100644 --- a/cpp/src/groupby/legacy/groupby_without_aggregation.cu +++ b/cpp/src/groupby/legacy/groupby_without_aggregation.cu @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/cpp/src/hash/legacy/hashing.cu b/cpp/src/hash/legacy/hashing.cu index 8a4e30e349a..1a83c832d40 100644 --- a/cpp/src/hash/legacy/hashing.cu +++ b/cpp/src/hash/legacy/hashing.cu @@ -23,7 +23,7 @@ #include #include "hash/hash_functions.cuh" #include -#include +#include #include #include @@ -603,7 +603,7 @@ gdf_error gdf_hash_partition(int num_input_cols, return GDF_COLUMN_SIZE_MISMATCH; } - PUSH_RANGE("LIBGDF_HASH_PARTITION", PARTITION_COLOR); + cudf::nvtx::range_push("CUDF_HASH_PARTITION", cudf::nvtx::PARTITION_COLOR); cudf::table input_table(input, num_input_cols); cudf::table output_table(partitioned_output, num_input_cols); @@ -638,7 +638,7 @@ gdf_error gdf_hash_partition(int num_input_cols, gdf_status = GDF_INVALID_HASH_FUNCTION; } - POP_RANGE(); + cudf::nvtx::range_pop(); return gdf_status; } diff --git a/cpp/src/join/legacy/joining.cu b/cpp/src/join/legacy/joining.cu index f4053216fec..9798f80a6dd 100644 --- a/cpp/src/join/legacy/joining.cu +++ b/cpp/src/join/legacy/joining.cu @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -214,7 +214,7 @@ void join_call(cudf::table const& left, cudf::table const& right, gdf_method join_method = join_context->flag_method; gdf_error gdf_error_code{GDF_SUCCESS}; - PUSH_RANGE("LIBGDF_JOIN", JOIN_COLOR); + nvtx::range_push("CUDF_JOIN", nvtx::JOIN_COLOR); switch(join_method) { @@ -245,7 +245,7 @@ void join_call(cudf::table const& left, cudf::table const& right, CUDF_FAIL("Unsupported join Method"); } - POP_RANGE(); + nvtx::range_pop(); } /**---------------------------------------------------------------------------* @@ -322,7 +322,7 @@ cudf::table construct_join_output_df( gdf_column * left_indices, gdf_column * right_indices) { - PUSH_RANGE("LIBGDF_JOIN_OUTPUT", JOIN_COLOR); + nvtx::range_push("CUDF_JOIN_OUTPUT", nvtx::JOIN_COLOR); //create left and right input table with columns not joined on std::vector left_columns_in_common (columns_in_common.size()); std::vector right_columns_in_common (columns_in_common.size()); @@ -406,7 +406,7 @@ cudf::table construct_join_output_df( } CHECK_STREAM(0); - POP_RANGE(); + nvtx::range_pop(); return result; } diff --git a/cpp/src/rolling/rolling.cu b/cpp/src/rolling/rolling.cu index da5d9e3e9fb..c75102cbfa4 100644 --- a/cpp/src/rolling/rolling.cu +++ b/cpp/src/rolling/rolling.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include #include #include @@ -154,7 +154,7 @@ struct rolling_window_launcher typename std::enable_if_t(), std::nullptr_t> = nullptr> void dispatch_aggregation_type(cudf::size_type nrows, cudaStream_t stream, TArgs... FArgs) { - PUSH_RANGE("CUDF_ROLLING", GDF_ORANGE); + cudf::nvtx::range_push("CUDF_ROLLING", cudf::nvtx::color::ORANGE); cudf::size_type block = 256; cudf::size_type grid = (nrows + block-1) / block; @@ -164,7 +164,7 @@ struct rolling_window_launcher // check the stream for debugging CHECK_STREAM(stream); - POP_RANGE(); + cudf::nvtx::range_pop(); } /** diff --git a/cpp/src/transpose/legacy/transpose.cu b/cpp/src/transpose/legacy/transpose.cu index 73bd7624c14..b3127816251 100644 --- a/cpp/src/transpose/legacy/transpose.cu +++ b/cpp/src/transpose/legacy/transpose.cu @@ -1,6 +1,6 @@ /* Copyright 2018 NVIDIA Corporation. All rights reserved. */ -#include +#include #include #include #include @@ -167,7 +167,7 @@ gdf_error gdf_transpose(cudf::size_type ncols, gdf_column** in_cols, } } - PUSH_RANGE("CUDF_TRANSPOSE", GDF_GREEN); + cudf::nvtx::range_push("CUDF_TRANSPOSE", cudf::nvtx::color::GREEN); // Copy input columns `data` and `valid` pointers to device std::vector in_columns_data(ncols); @@ -213,6 +213,6 @@ gdf_error gdf_transpose(cudf::size_type ncols, gdf_column** in_cols, out_cols[i]->null_count = out_columns_nullct[i]; } - POP_RANGE(); + cudf::nvtx::range_pop(); return GDF_SUCCESS; } \ No newline at end of file diff --git a/cpp/src/utilities/nvtx/nvtx_utils.cpp b/cpp/src/utilities/nvtx/nvtx_utils.cpp new file mode 100644 index 00000000000..630da8fb42a --- /dev/null +++ b/cpp/src/utilities/nvtx/nvtx_utils.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "utilities/error_utils.hpp" + +#ifdef USE_NVTX +#include +#endif + +namespace cudf { +namespace nvtx { + +void range_push(const char* name, color color) +{ + range_push_hex(name, static_cast(color)); +} + +void range_push_hex(const char* name, uint32_t color) +{ +#ifdef USE_NVTX + CUDF_EXPECTS(name != nullptr, "Null name string."); + + nvtxEventAttributes_t eventAttrib{}; + eventAttrib.version = NVTX_VERSION; + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + eventAttrib.colorType = NVTX_COLOR_ARGB; + eventAttrib.color = color; + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + eventAttrib.message.ascii = name; + nvtxRangePushEx(&eventAttrib); +#endif +} + +void range_pop() +{ +#ifdef USE_NVTX + nvtxRangePop(); +#endif +} + +} // namespace nvtx +} // namespace cudf diff --git a/java/src/main/native/src/NvtxRangeJni.cpp b/java/src/main/native/src/NvtxRangeJni.cpp index 44622e5b606..a59464eb897 100644 --- a/java/src/main/native/src/NvtxRangeJni.cpp +++ b/java/src/main/native/src/NvtxRangeJni.cpp @@ -17,6 +17,7 @@ #include #include #include "cudf/cudf.h" +#include #include "jni_utils.hpp" @@ -27,7 +28,7 @@ Java_ai_rapids_cudf_NvtxRange_push(JNIEnv *env, jclass clazz, jstring name, jint color_bits) { try { cudf::jni::native_jstring range_name(env, name); - JNI_GDF_TRY(env, , gdf_nvtx_range_push_hex(range_name.get(), color_bits)); + cudf::nvtx::range_push_hex(range_name.get(), color_bits); } CATCH_STD(env, ); } @@ -35,7 +36,7 @@ Java_ai_rapids_cudf_NvtxRange_push(JNIEnv *env, jclass clazz, JNIEXPORT void JNICALL Java_ai_rapids_cudf_NvtxRange_pop(JNIEnv *env, jclass clazz) { try { - JNI_GDF_TRY(env, , gdf_nvtx_range_pop()); + cudf::nvtx::range_pop(); } CATCH_STD(env, ); }