Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/branch-22.06' into devtools-buil…
Browse files Browse the repository at this point in the history
…d-in-docker-for-native
  • Loading branch information
gerashegalov committed Apr 7, 2022
2 parents 3ad98bf + fb03c8b commit acc42a8
Show file tree
Hide file tree
Showing 152 changed files with 1,870 additions and 2,864 deletions.
1 change: 1 addition & 0 deletions ci/cpu/upload.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/bin/bash
# Copyright (c) 2018-2022, NVIDIA CORPORATION.
# Adopted from https://github.com/tmcdonell/travis-scripts/blob/dfaac280ac2082cd6bcaba3217428347899f2975/update-accelerate-buildbot.sh
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

set -e

Expand Down
1 change: 1 addition & 0 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
########################
# cuDF Version Updater #
########################
Expand Down
1 change: 1 addition & 0 deletions ci/utils/nbtestlog2junitxml.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Generate a junit-xml file from parsing a nbtest log

import re
Expand Down
15 changes: 15 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,21 @@ include(cmake/Modules/JitifyPreprocessKernels.cmake)
# find cuFile
include(cmake/Modules/FindcuFile.cmake)

# Workaround until https://github.com/rapidsai/rapids-cmake/issues/176 is resolved
if(NOT BUILD_SHARED_LIBS)
include("${rapids-cmake-dir}/export/find_package_file.cmake")
list(APPEND METADATA_KINDS BUILD INSTALL)
foreach(METADATA_KIND IN LISTS METADATA_KINDS)
rapids_export_find_package_file(
${METADATA_KIND} "${CUDF_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" cudf-exports
)
rapids_export_package(${METADATA_KIND} cuco cudf-exports)
rapids_export_package(${METADATA_KIND} ZLIB cudf-exports)
rapids_export_package(${METADATA_KIND} cuFile cudf-exports)
rapids_export_package(${METADATA_KIND} nvcomp cudf-exports)
endforeach()
endif()

# ##################################################################################################
# * library targets -------------------------------------------------------------------------------

Expand Down
6 changes: 3 additions & 3 deletions cpp/benchmarks/column/concatenate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ static void BM_concatenate(benchmark::State& state)
auto input_columns = input->view();
std::vector<cudf::column_view> column_views(input_columns.begin(), input_columns.end());

CHECK_CUDA(0);
CUDF_CHECK_CUDA(0);

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
Expand Down Expand Up @@ -87,7 +87,7 @@ static void BM_concatenate_tables(benchmark::State& state)
return table->view();
});

CHECK_CUDA(0);
CUDF_CHECK_CUDA(0);

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
Expand Down Expand Up @@ -146,7 +146,7 @@ static void BM_concatenate_strings(benchmark::State& state)
return static_cast<cudf::column_view>(col);
});

CHECK_CUDA(0);
CUDF_CHECK_CUDA(0);

for (auto _ : state) {
cuda_event_timer raii(state, true, rmm::cuda_stream_default);
Expand Down
118 changes: 109 additions & 9 deletions cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,15 @@ T get_distribution_mean(distribution_params<T> const& dist)
}
}

/**
* @brief Computes the average element size in a column, given the data profile.
*
* Random distribution parameters like average string length and maximum list nesting level affect
* the element size of non-fixed-width columns. For lists and structs, `avg_element_size` is called
* recursively to determine the size of nested columns.
*/
size_t avg_element_size(data_profile const& profile, cudf::data_type dtype);

// Utilities to determine the mean size of an element, given the data profile
template <typename T, CUDF_ENABLE_IF(cudf::is_fixed_width<T>())>
size_t non_fixed_width_size(data_profile const& profile)
Expand All @@ -112,10 +121,22 @@ size_t non_fixed_width_size<cudf::list_view>(data_profile const& profile)
{
auto const dist_params = profile.get_distribution_params<cudf::list_view>();
auto const single_level_mean = get_distribution_mean(dist_params.length_params);
auto const element_size = cudf::size_of(cudf::data_type{dist_params.element_type});
auto const element_size = avg_element_size(profile, cudf::data_type{dist_params.element_type});
return element_size * pow(single_level_mean, dist_params.max_depth);
}

template <>
size_t non_fixed_width_size<cudf::struct_view>(data_profile const& profile)
{
auto const dist_params = profile.get_distribution_params<cudf::struct_view>();
return std::accumulate(dist_params.leaf_types.cbegin(),
dist_params.leaf_types.cend(),
0ul,
[&](auto& sum, auto type_id) {
return sum + avg_element_size(profile, cudf::data_type{type_id});
});
}

struct non_fixed_width_size_fn {
template <typename T>
size_t operator()(data_profile const& profile)
Expand Down Expand Up @@ -527,14 +548,6 @@ std::unique_ptr<cudf::column> create_random_column<cudf::dictionary32>(data_prof
CUDF_FAIL("not implemented yet");
}

template <>
std::unique_ptr<cudf::column> create_random_column<cudf::struct_view>(data_profile const& profile,
thrust::minstd_rand& engine,
cudf::size_type num_rows)
{
CUDF_FAIL("not implemented yet");
}

/**
* @brief Functor to dispatch create_random_column calls.
*/
Expand All @@ -549,6 +562,93 @@ struct create_rand_col_fn {
}
};

/**
* @brief Calculates the number of direct parents needed to generate a struct column hierarchy with
* lowest maximum number of children in any nested column.
*
* Used to generate an "evenly distributed" struct column hierarchy with the given number of leaf
* columns and nesting levels. The column tree is considered evenly distributed if all columns have
* nearly the same number of child columns (difference not larger than one).
*/
int num_direct_parents(int num_lvls, int num_leaf_columns)
{
// Estimated average number of children in the hierarchy;
auto const num_children_avg = std::pow(num_leaf_columns, 1. / num_lvls);
// Minimum number of children columns for any column in the hierarchy
int const num_children_min = std::floor(num_children_avg);
// Maximum number of children columns for any column in the hierarchy
int const num_children_max = num_children_min + 1;

// Minimum number of columns needed so that their number of children does not exceed the maximum
int const min_for_current_nesting = std::ceil((double)num_leaf_columns / num_children_max);
// Minimum number of columns needed so that columns at the higher levels have at least the minimum
// number of children
int const min_for_upper_nesting = std::pow(num_children_min, num_lvls - 1);
// Both conditions need to be satisfied
return std::max(min_for_current_nesting, min_for_upper_nesting);
}

template <>
std::unique_ptr<cudf::column> create_random_column<cudf::struct_view>(data_profile const& profile,
thrust::minstd_rand& engine,
cudf::size_type num_rows)
{
auto const dist_params = profile.get_distribution_params<cudf::struct_view>();

// Generate leaf columns
std::vector<std::unique_ptr<cudf::column>> children;
children.reserve(dist_params.leaf_types.size());
std::transform(dist_params.leaf_types.cbegin(),
dist_params.leaf_types.cend(),
std::back_inserter(children),
[&](auto& type_id) {
return cudf::type_dispatcher(
cudf::data_type(type_id), create_rand_col_fn{}, profile, engine, num_rows);
});

auto valid_dist =
random_value_fn<bool>(distribution_params<bool>{1. - profile.get_null_frequency().value_or(0)});

// Generate the column bottom-up
for (int lvl = dist_params.max_depth; lvl > 0; --lvl) {
// Generating the next level
std::vector<std::unique_ptr<cudf::column>> parents;
parents.resize(num_direct_parents(lvl, children.size()));

auto current_child = children.begin();
for (auto current_parent = parents.begin(); current_parent != parents.end(); ++current_parent) {
auto [null_mask, null_count] = [&]() {
if (profile.get_null_frequency().has_value()) {
auto valids = valid_dist(engine, num_rows);
return cudf::detail::valid_if(valids.begin(), valids.end(), thrust::identity<bool>{});
}
return std::pair<rmm::device_buffer, cudf::size_type>{};
}();

// Adopt remaining children as evenly as possible
auto const num_to_adopt = cudf::util::div_rounding_up_unsafe(
std::distance(current_child, children.end()), std::distance(current_parent, parents.end()));
CUDF_EXPECTS(num_to_adopt > 0, "No children columns left to adopt");

std::vector<std::unique_ptr<cudf::column>> children_to_adopt;
children_to_adopt.insert(children_to_adopt.end(),
std::make_move_iterator(current_child),
std::make_move_iterator(current_child + num_to_adopt));
current_child += children_to_adopt.size();

*current_parent = cudf::make_structs_column(
num_rows, std::move(children_to_adopt), null_count, std::move(null_mask));
}

if (lvl == 1) {
CUDF_EXPECTS(parents.size() == 1, "There should be one top-level column");
return std::move(parents.front());
}
children = std::move(parents);
}
CUDF_FAIL("Reached unreachable code in struct column creation");
}

template <typename T>
struct clamp_down : public thrust::unary_function<T, T> {
T max;
Expand Down
39 changes: 38 additions & 1 deletion cpp/benchmarks/common/generate_input.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,15 @@ struct distribution_params<T, std::enable_if_t<std::is_same_v<T, cudf::list_view
cudf::size_type max_depth;
};

/**
* @brief Structs are parameterized by the maximal nesting level, and the leaf column types.
*/
template <typename T>
struct distribution_params<T, std::enable_if_t<std::is_same_v<T, cudf::struct_view>>> {
std::vector<cudf::type_id> leaf_types;
cudf::size_type max_depth;
};

// Present for compilation only. To be implemented once reader/writers support the fixed width type.
template <typename T>
struct distribution_params<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
Expand Down Expand Up @@ -214,6 +223,8 @@ class data_profile {
distribution_params<cudf::string_view> string_dist_desc{{distribution_id::NORMAL, 0, 32}};
distribution_params<cudf::list_view> list_dist_desc{
cudf::type_id::INT32, {distribution_id::GEOMETRIC, 0, 100}, 2};
distribution_params<cudf::struct_view> struct_dist_desc{
{cudf::type_id::INT32, cudf::type_id::FLOAT32, cudf::type_id::STRING}, 2};
std::map<cudf::type_id, distribution_params<__uint128_t>> decimal_params;

double bool_probability = 0.5;
Expand Down Expand Up @@ -281,6 +292,12 @@ class data_profile {
return list_dist_desc;
}

template <typename T, std::enable_if_t<std::is_same_v<T, cudf::struct_view>>* = nullptr>
distribution_params<T> get_distribution_params() const
{
return struct_dist_desc;
}

template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
distribution_params<typename T::rep> get_distribution_params() const
{
Expand Down Expand Up @@ -357,8 +374,28 @@ class data_profile {
void set_cardinality(cudf::size_type c) { cardinality = c; }
void set_avg_run_length(cudf::size_type avg_rl) { avg_run_length = avg_rl; }

void set_list_depth(cudf::size_type max_depth) { list_dist_desc.max_depth = max_depth; }
void set_list_depth(cudf::size_type max_depth)
{
CUDF_EXPECTS(max_depth > 0, "List depth must be positive");
list_dist_desc.max_depth = max_depth;
}

void set_list_type(cudf::type_id type) { list_dist_desc.element_type = type; }

void set_struct_depth(cudf::size_type max_depth)
{
CUDF_EXPECTS(max_depth > 0, "Struct depth must be positive");
struct_dist_desc.max_depth = max_depth;
}

void set_struct_types(std::vector<cudf::type_id> const& types)
{
CUDF_EXPECTS(
std::none_of(
types.cbegin(), types.cend(), [](auto& type) { return type == cudf::type_id::STRUCT; }),
"Cannot include STRUCT as its own subtype");
struct_dist_desc.leaf_types = types;
}
};

/**
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/fixture/benchmark_fixture.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/io/cuio_common.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/io/cuio_common.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down
1 change: 1 addition & 0 deletions cpp/benchmarks/io/orc/orc_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, decimal, type_group_id:
RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, string, cudf::type_id::STRING);
RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, list, cudf::type_id::LIST);
RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, struct, cudf::type_id::STRUCT);

BENCHMARK_DEFINE_F(OrcRead, column_selection)
(::benchmark::State& state) { BM_orc_read_varying_options(state); }
Expand Down
1 change: 1 addition & 0 deletions cpp/benchmarks/io/orc/orc_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, decimal, type_group_id::F
WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, string, cudf::type_id::STRING);
WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, list, cudf::type_id::LIST);
WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, struct, cudf::type_id::STRUCT);

BENCHMARK_DEFINE_F(OrcWrite, writer_options)
(::benchmark::State& state) { BM_orc_write_varying_options(state); }
Expand Down
1 change: 1 addition & 0 deletions cpp/benchmarks/io/parquet/parquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, decimal, type_group_id
RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, string, cudf::type_id::STRING);
RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, list, cudf::type_id::LIST);
RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, struct, cudf::type_id::STRUCT);

BENCHMARK_DEFINE_F(ParquetRead, column_selection)
(::benchmark::State& state) { BM_parq_read_varying_options(state); }
Expand Down
1 change: 1 addition & 0 deletions cpp/benchmarks/io/parquet/parquet_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, decimal, type_group_id::
WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, string, cudf::type_id::STRING);
WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, list, cudf::type_id::LIST);
WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, struct, cudf::type_id::STRUCT);

BENCHMARK_DEFINE_F(ParquetWrite, writer_options)
(::benchmark::State& state) { BM_parq_write_varying_options(state); }
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/join/conditional_join.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down
16 changes: 8 additions & 8 deletions cpp/benchmarks/join/generate_input_tables.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -139,31 +139,31 @@ void generate_input_tables(key_type* const build_tbl,

// Maximize exposed parallelism while minimizing storage for curand state
int num_blocks_init_build_tbl{-1};
CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks_init_build_tbl, init_build_tbl<key_type, size_type>, block_size, 0));

int num_blocks_init_probe_tbl{-1};
CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&num_blocks_init_probe_tbl, init_probe_tbl<key_type, size_type>, block_size, 0));

int dev_id{-1};
CUDA_TRY(cudaGetDevice(&dev_id));
CUDF_CUDA_TRY(cudaGetDevice(&dev_id));

int num_sms{-1};
CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id));
CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id));

const int num_states =
num_sms * std::max(num_blocks_init_build_tbl, num_blocks_init_probe_tbl) * block_size;
rmm::device_uvector<curandState> devStates(num_states, rmm::cuda_stream_default);

init_curand<<<(num_states - 1) / block_size + 1, block_size>>>(devStates.data(), num_states);

CHECK_CUDA(0);
CUDF_CHECK_CUDA(0);

init_build_tbl<key_type, size_type><<<num_sms * num_blocks_init_build_tbl, block_size>>>(
build_tbl, build_tbl_size, multiplicity, devStates.data(), num_states);

CHECK_CUDA(0);
CUDF_CHECK_CUDA(0);

auto const rand_max = std::numeric_limits<key_type>::max();

Expand All @@ -177,5 +177,5 @@ void generate_input_tables(key_type* const build_tbl,
devStates.data(),
num_states);

CHECK_CUDA(0);
CUDF_CHECK_CUDA(0);
}
Loading

0 comments on commit acc42a8

Please sign in to comment.