Skip to content

Commit

Permalink
Merge branch 'branch-24.04' into bug_hd_vector
Browse files Browse the repository at this point in the history
  • Loading branch information
ttnghia authored Mar 11, 2024
2 parents 4ed86b9 + c4f1a26 commit 259d082
Show file tree
Hide file tree
Showing 28 changed files with 711 additions and 2,083 deletions.
5 changes: 4 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ endif()
rapids_cpm_init()
# find jitify
include(cmake/thirdparty/get_jitify.cmake)
# find NVTX
include(cmake/thirdparty/get_nvtx.cmake)
# find nvCOMP
include(cmake/thirdparty/get_nvcomp.cmake)
# find CCCL before rmm so that we get cudf's patched version of CCCL
Expand Down Expand Up @@ -382,6 +384,7 @@ add_library(
src/io/json/read_json.cu
src/io/json/legacy/json_gpu.cu
src/io/json/legacy/reader_impl.cu
src/io/json/parser_features.cpp
src/io/json/write_json.cu
src/io/orc/aggregate_orc_metadata.cpp
src/io/orc/dict_enc.cu
Expand Down Expand Up @@ -776,7 +779,7 @@ add_dependencies(cudf jitify_preprocess_run)
target_link_libraries(
cudf
PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm
PRIVATE cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio
PRIVATE nvtx3-cpp cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio
$<TARGET_NAME_IF_EXISTS:cuFile_interface>
)

Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ target_compile_options(
target_link_libraries(
cudf_datagen
PUBLIC GTest::gmock GTest::gtest benchmark::benchmark nvbench::nvbench Threads::Threads cudf
cudftestutil
cudftestutil nvtx3-cpp
PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>
)

Expand Down
53 changes: 42 additions & 11 deletions cpp/benchmarks/groupby/group_max.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,30 @@
#include <nvbench/nvbench.cuh>

template <typename Type>
void bench_groupby_max(nvbench::state& state, nvbench::type_list<Type>)
void groupby_max_helper(nvbench::state& state,
cudf::size_type num_rows,
cudf::size_type cardinality,
double null_probability)
{
auto const size = static_cast<cudf::size_type>(state.get_int64("num_rows"));

auto const keys = [&] {
data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
cudf::type_to_id<int32_t>(), distribution_id::UNIFORM, 0, 100);
return create_random_column(cudf::type_to_id<int32_t>(), row_count{size}, profile);
data_profile const profile =
data_profile_builder()
.cardinality(cardinality)
.no_validity()
.distribution(cudf::type_to_id<int32_t>(), distribution_id::UNIFORM, 0, num_rows);
return create_random_column(cudf::type_to_id<int32_t>(), row_count{num_rows}, profile);
}();

auto const vals = [&] {
auto builder = data_profile_builder().cardinality(0).distribution(
cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 1000);
if (const auto null_freq = state.get_float64("null_probability"); null_freq > 0) {
builder.null_probability(null_freq);
cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, num_rows);
if (null_probability > 0) {
builder.null_probability(null_probability);
} else {
builder.no_validity();
}
return create_random_column(cudf::type_to_id<Type>(), row_count{size}, data_profile{builder});
return create_random_column(
cudf::type_to_id<Type>(), row_count{num_rows}, data_profile{builder});
}();

auto keys_view = keys->view();
Expand All @@ -55,13 +60,39 @@ void bench_groupby_max(nvbench::state& state, nvbench::type_list<Type>)
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); });

auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
state.add_element_count(static_cast<double>(num_rows) / elapsed_time / 1'000'000., "Mrows/s");
state.add_buffer_size(
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
}

template <typename Type>
void bench_groupby_max(nvbench::state& state, nvbench::type_list<Type>)
{
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const null_probability = state.get_float64("null_probability");

groupby_max_helper<Type>(state, num_rows, cardinality, null_probability);
}

template <typename Type>
void bench_groupby_max_cardinality(nvbench::state& state, nvbench::type_list<Type>)
{
auto constexpr num_rows = 20'000'000;
auto constexpr null_probability = 0.;
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));

groupby_max_helper<Type>(state, num_rows, cardinality, null_probability);
}

NVBENCH_BENCH_TYPES(bench_groupby_max,
NVBENCH_TYPE_AXES(nvbench::type_list<int32_t, int64_t, float, double>))
.set_name("groupby_max")
.add_int64_axis("cardinality", {0})
.add_int64_power_of_two_axis("num_rows", {12, 18, 24})
.add_float64_axis("null_probability", {0, 0.1, 0.9});

NVBENCH_BENCH_TYPES(bench_groupby_max_cardinality, NVBENCH_TYPE_AXES(nvbench::type_list<int32_t>))
.set_name("groupby_max_cardinality")
.add_int64_axis("cardinality", {10, 20, 50, 100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000});
19 changes: 13 additions & 6 deletions cpp/benchmarks/groupby/group_nunique.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -39,17 +39,23 @@ auto make_aggregation_request_vector(cudf::column_view const& values, Args&&...
template <typename Type>
void bench_groupby_nunique(nvbench::state& state, nvbench::type_list<Type>)
{
auto const size = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const size = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));

auto const keys = [&] {
data_profile profile = data_profile_builder().cardinality(0).no_validity().distribution(
cudf::type_to_id<int32_t>(), distribution_id::UNIFORM, 0, 100);
data_profile profile =
data_profile_builder()
.cardinality(cardinality)
.no_validity()
.distribution(cudf::type_to_id<int32_t>(), distribution_id::UNIFORM, 0, size);
return create_random_column(cudf::type_to_id<int32_t>(), row_count{size}, profile);
}();

auto const vals = [&] {
data_profile profile = data_profile_builder().cardinality(0).distribution(
cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 1000);
data_profile profile =
data_profile_builder()
.cardinality(cardinality)
.distribution(cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, size);
if (const auto null_freq = state.get_float64("null_probability"); null_freq > 0) {
profile.set_null_probability(null_freq);
} else {
Expand All @@ -71,4 +77,5 @@ void bench_groupby_nunique(nvbench::state& state, nvbench::type_list<Type>)
NVBENCH_BENCH_TYPES(bench_groupby_nunique, NVBENCH_TYPE_AXES(nvbench::type_list<int32_t, int64_t>))
.set_name("groupby_nunique")
.add_int64_power_of_two_axis("num_rows", {12, 16, 20, 24})
.add_int64_axis("cardinality", {0})
.add_float64_axis("null_probability", {0, 0.5});
12 changes: 7 additions & 5 deletions cpp/benchmarks/groupby/group_rank.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -31,10 +31,12 @@ static void nvbench_groupby_rank(nvbench::state& state,

bool const is_sorted = state.get_int64("is_sorted");
cudf::size_type const column_size = state.get_int64("data_size");
constexpr int num_groups = 100;
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));

data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
dtype, distribution_id::UNIFORM, 0, num_groups);
data_profile const profile = data_profile_builder()
.cardinality(cardinality)
.no_validity()
.distribution(dtype, distribution_id::UNIFORM, 0, column_size);

auto source_table = create_random_table({dtype, dtype}, row_count{column_size}, profile);

Expand Down Expand Up @@ -100,5 +102,5 @@ NVBENCH_BENCH_TYPES(nvbench_groupby_rank, NVBENCH_TYPE_AXES(methods))
10000000, // 10M
100000000, // 100M
})

.add_int64_axis("cardinality", {0})
.add_int64_axis("is_sorted", {0, 1});
27 changes: 27 additions & 0 deletions cpp/cmake/thirdparty/get_nvtx.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# =============================================================================
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

# This function finds NVTX and sets any additional necessary environment variables.
function(find_and_configure_nvtx)
rapids_cpm_find(
NVTX3 3.1.0
GLOBAL_TARGETS nvtx3-c nvtx3-cpp
CPM_ARGS
GIT_REPOSITORY https://github.com/NVIDIA/NVTX.git
GIT_TAG v3.1.0
GIT_SHALLOW TRUE SOURCE_SUBDIR c
)
endfunction()

find_and_configure_nvtx()
4 changes: 2 additions & 2 deletions cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -664,11 +664,11 @@ defaults.
## NVTX Ranges

In order to aid in performance optimization and debugging, all compute intensive libcudf functions
should have a corresponding NVTX range. Choose between `CUDF_FUNC_RANGE` or `cudf::thread_range`
should have a corresponding NVTX range. Choose between `CUDF_FUNC_RANGE` or `cudf::scoped_range`
for declaring NVTX ranges in the current scope:
- Use the `CUDF_FUNC_RANGE()` macro if you want to use the name of the function as the name of the
NVTX range
- Use `cudf::thread_range rng{"custom_name"};` to provide a custom name for the current scope's
- Use `cudf::scoped_range rng{"custom_name"};` to provide a custom name for the current scope's
NVTX range

For more information about NVTX, see [here](https://github.com/NVIDIA/NVTX/tree/dev/c).
Expand Down
Loading

0 comments on commit 259d082

Please sign in to comment.