Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-24.04' into ref/to_pand…
Browse files Browse the repository at this point in the history
…as_multiindex
  • Loading branch information
mroeschke committed Mar 5, 2024
2 parents 04c4af7 + 13d807e commit ee07c5e
Show file tree
Hide file tree
Showing 51 changed files with 457 additions and 649 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ jobs:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.11" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.2.2")))
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
sha: ${{ inputs.sha }}
Expand Down
9 changes: 6 additions & 3 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,17 @@ jobs:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.11" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.2.2")))
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: "ci/build_wheel_dask_cudf.sh"
wheel-tests-dask-cudf:
needs: wheel-build-dask-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.11" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.2.2")))
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: ci/test_wheel_dask_cudf.sh
devcontainer:
Expand All @@ -154,7 +156,8 @@ jobs:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.11" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.2.2")))
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: ci/cudf_pandas_scripts/run_tests.sh
# pandas-tests:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ jobs:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.11" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.2.2")))
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
Expand Down
138 changes: 85 additions & 53 deletions cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ auto deterministic_engine(unsigned seed) { return thrust::minstd_rand{seed}; }
* Computes the mean value for a distribution of given type and value bounds.
*/
template <typename T>
T get_distribution_mean(distribution_params<T> const& dist)
double get_distribution_mean(distribution_params<T> const& dist)
{
switch (dist.id) {
case distribution_id::NORMAL:
Expand All @@ -90,33 +90,67 @@ T get_distribution_mean(distribution_params<T> const& dist)
}
}

/**
* @brief Calculates the number of direct parents needed to generate a struct column hierarchy with
* lowest maximum number of children in any nested column.
*
* Used to generate an "evenly distributed" struct column hierarchy with the given number of leaf
* columns and nesting levels. The column tree is considered evenly distributed if all columns have
* nearly the same number of child columns (difference not larger than one).
*/
int num_direct_parents(int num_lvls, int num_leaf_columns)
{
// Estimated average number of children in the hierarchy;
auto const num_children_avg = std::pow(num_leaf_columns, 1. / num_lvls);
// Minimum number of children columns for any column in the hierarchy
int const num_children_min = std::floor(num_children_avg);
// Maximum number of children columns for any column in the hierarchy
int const num_children_max = num_children_min + 1;

// Minimum number of columns needed so that their number of children does not exceed the maximum
int const min_for_current_nesting =
std::ceil(static_cast<double>(num_leaf_columns) / num_children_max);
// Minimum number of columns needed so that columns at the higher levels have at least the minimum
// number of children
int const min_for_upper_nesting = std::pow(num_children_min, num_lvls - 1);
// Both conditions need to be satisfied
return std::max(min_for_current_nesting, min_for_upper_nesting);
}

// Size of the null mask for each row, in bytes
[[nodiscard]] double row_null_mask_size(data_profile const& profile)
{
return profile.get_null_probability().has_value() ? 1. / 8 : 0.;
}

/**
* @brief Computes the average element size in a column, given the data profile.
*
* Random distribution parameters like average string length and maximum list nesting level affect
* the element size of non-fixed-width columns. For lists and structs, `avg_element_size` is called
* recursively to determine the size of nested columns.
*/
size_t avg_element_size(data_profile const& profile, cudf::data_type dtype);
double avg_element_size(data_profile const& profile, cudf::data_type dtype);

// Utilities to determine the mean size of an element, given the data profile
template <typename T, CUDF_ENABLE_IF(cudf::is_fixed_width<T>())>
size_t non_fixed_width_size(data_profile const& profile)
double non_fixed_width_size(data_profile const& profile)
{
CUDF_FAIL("Should not be called, use `size_of` for this type instead");
}

template <typename T, CUDF_ENABLE_IF(!cudf::is_fixed_width<T>())>
size_t non_fixed_width_size(data_profile const& profile)
double non_fixed_width_size(data_profile const& profile)
{
CUDF_FAIL("not implemented!");
}

template <>
size_t non_fixed_width_size<cudf::string_view>(data_profile const& profile)
double non_fixed_width_size<cudf::string_view>(data_profile const& profile)
{
auto const dist = profile.get_distribution_params<cudf::string_view>().length_params;
return get_distribution_mean(dist);
return get_distribution_mean(dist) * profile.get_valid_probability() + sizeof(cudf::size_type) +
row_null_mask_size(profile);
}

double geometric_sum(size_t n, double p)
Expand All @@ -126,45 +160,65 @@ double geometric_sum(size_t n, double p)
}

template <>
size_t non_fixed_width_size<cudf::list_view>(data_profile const& profile)
double non_fixed_width_size<cudf::list_view>(data_profile const& profile)
{
auto const dist_params = profile.get_distribution_params<cudf::list_view>();
auto const single_level_mean = get_distribution_mean(dist_params.length_params);
auto const dist_params = profile.get_distribution_params<cudf::list_view>();
auto const single_level_mean =
get_distribution_mean(dist_params.length_params) * profile.get_valid_probability();

// Leaf column size
auto const element_size = avg_element_size(profile, cudf::data_type{dist_params.element_type});
auto const element_count = std::pow(single_level_mean, dist_params.max_depth);

auto const offset_size = avg_element_size(profile, cudf::data_type{cudf::type_id::INT32});
// Each nesting level includes offsets, this is the sum of all levels
// Also include an additional offset per level for the size of the last element
auto const total_offset_count =
geometric_sum(dist_params.max_depth, single_level_mean) + dist_params.max_depth;
auto const total_offset_count = geometric_sum(dist_params.max_depth, single_level_mean);

return sizeof(cudf::size_type) * total_offset_count + element_size * element_count;
return element_size * element_count + offset_size * total_offset_count;
}

[[nodiscard]] cudf::size_type num_struct_columns(data_profile const& profile)
{
auto const dist_params = profile.get_distribution_params<cudf::struct_view>();

cudf::size_type children_count = dist_params.leaf_types.size();
cudf::size_type total_parent_count = 0;
for (cudf::size_type lvl = dist_params.max_depth; lvl > 0; --lvl) {
children_count = num_direct_parents(lvl, children_count);
total_parent_count += children_count;
}
return total_parent_count;
}

template <>
size_t non_fixed_width_size<cudf::struct_view>(data_profile const& profile)
double non_fixed_width_size<cudf::struct_view>(data_profile const& profile)
{
auto const dist_params = profile.get_distribution_params<cudf::struct_view>();
return std::accumulate(dist_params.leaf_types.cbegin(),
dist_params.leaf_types.cend(),
0ul,
[&](auto& sum, auto type_id) {
return sum + avg_element_size(profile, cudf::data_type{type_id});
});
auto const total_children_size =
std::accumulate(dist_params.leaf_types.cbegin(),
dist_params.leaf_types.cend(),
0ul,
[&](auto& sum, auto type_id) {
return sum + avg_element_size(profile, cudf::data_type{type_id});
});

// struct columns have a null mask for each row
auto const structs_null_mask_size = num_struct_columns(profile) * row_null_mask_size(profile);

return total_children_size + structs_null_mask_size;
}

struct non_fixed_width_size_fn {
template <typename T>
size_t operator()(data_profile const& profile)
double operator()(data_profile const& profile)
{
return non_fixed_width_size<T>(profile);
}
};

size_t avg_element_size(data_profile const& profile, cudf::data_type dtype)
double avg_element_size(data_profile const& profile, cudf::data_type dtype)
{
if (cudf::is_fixed_width(dtype)) { return cudf::size_of(dtype); }
if (cudf::is_fixed_width(dtype)) { return cudf::size_of(dtype) + row_null_mask_size(profile); }
return cudf::type_dispatcher(dtype, non_fixed_width_size_fn{}, profile);
}

Expand Down Expand Up @@ -596,32 +650,6 @@ struct create_rand_col_fn {
}
};

/**
* @brief Calculates the number of direct parents needed to generate a struct column hierarchy with
* lowest maximum number of children in any nested column.
*
* Used to generate an "evenly distributed" struct column hierarchy with the given number of leaf
* columns and nesting levels. The column tree is considered evenly distributed if all columns have
* nearly the same number of child columns (difference not larger than one).
*/
int num_direct_parents(int num_lvls, int num_leaf_columns)
{
// Estimated average number of children in the hierarchy;
auto const num_children_avg = std::pow(num_leaf_columns, 1. / num_lvls);
// Minimum number of children columns for any column in the hierarchy
int const num_children_min = std::floor(num_children_avg);
// Maximum number of children columns for any column in the hierarchy
int const num_children_max = num_children_min + 1;

// Minimum number of columns needed so that their number of children does not exceed the maximum
int const min_for_current_nesting = std::ceil((double)num_leaf_columns / num_children_max);
// Minimum number of columns needed so that columns at the higher levels have at least the minimum
// number of children
int const min_for_upper_nesting = std::pow(num_children_min, num_lvls - 1);
// Both conditions need to be satisfied
return std::max(min_for_current_nesting, min_for_upper_nesting);
}

template <>
std::unique_ptr<cudf::column> create_random_column<cudf::struct_view>(data_profile const& profile,
thrust::minstd_rand& engine,
Expand Down Expand Up @@ -825,13 +853,17 @@ std::unique_ptr<cudf::table> create_random_table(std::vector<cudf::type_id> cons
data_profile const& profile,
unsigned seed)
{
size_t const avg_row_bytes =
std::accumulate(dtype_ids.begin(), dtype_ids.end(), 0ul, [&](size_t sum, auto tid) {
auto const avg_row_bytes =
std::accumulate(dtype_ids.begin(), dtype_ids.end(), 0., [&](size_t sum, auto tid) {
return sum + avg_element_size(profile, cudf::data_type(tid));
});
cudf::size_type const num_rows = table_bytes.size / avg_row_bytes;
std::size_t const num_rows = std::lround(table_bytes.size / avg_row_bytes);
CUDF_EXPECTS(num_rows > 0, "Table size is too small for the given data types");
CUDF_EXPECTS(num_rows < std::numeric_limits<cudf::size_type>::max(),
"Table size is too large for the given data types");

return create_random_table(dtype_ids, row_count{num_rows}, profile, seed);
return create_random_table(
dtype_ids, row_count{static_cast<cudf::size_type>(num_rows)}, profile, seed);
}

std::unique_ptr<cudf::table> create_random_table(std::vector<cudf::type_id> const& dtype_ids,
Expand Down
7 changes: 4 additions & 3 deletions cpp/benchmarks/common/generate_input.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -313,8 +313,9 @@ class data_profile {
}
}

auto get_bool_probability_true() const { return bool_probability_true; }
auto get_null_probability() const { return null_probability; };
[[nodiscard]] auto get_bool_probability_true() const { return bool_probability_true; }
[[nodiscard]] auto get_null_probability() const { return null_probability; };
[[nodiscard]] auto get_valid_probability() const { return 1. - null_probability.value_or(0.); };
[[nodiscard]] auto get_cardinality() const { return cardinality; };
[[nodiscard]] auto get_avg_run_length() const { return avg_run_length; };

Expand Down
4 changes: 3 additions & 1 deletion cpp/benchmarks/string/case.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ void bench_case(nvbench::state& state)
cudf::type_id::INT8, distribution_id::UNIFORM, 32, 126); // nice ASCII range
auto input = cudf::strings_column_view(col_view);
auto ascii_column = create_random_column(
cudf::type_id::INT8, row_count{input.chars_size(cudf::get_default_stream())}, ascii_profile);
cudf::type_id::INT8,
row_count{static_cast<cudf::size_type>(input.chars_size(cudf::get_default_stream()))},
ascii_profile);
auto ascii_data = ascii_column->view();

col_view = cudf::column_view(col_view.type(),
Expand Down
3 changes: 0 additions & 3 deletions cpp/include/cudf/detail/utilities/device_atomics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,6 @@ struct genericAtomicOperationImpl<float, DeviceSum, 4> {
}
};

#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 600)
// `atomicAdd(double)` is supported after cuda architecture 6.0
template <>
struct genericAtomicOperationImpl<double, DeviceSum, 8> {
using T = double;
Expand All @@ -180,7 +178,6 @@ struct genericAtomicOperationImpl<double, DeviceSum, 8> {
return atomicAdd(addr, update_value);
}
};
#endif

template <>
struct genericAtomicOperationImpl<int32_t, DeviceSum, 4> {
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/strings/strings_column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class strings_column_view : private column_view {
* @param stream CUDA stream used for device memory operations and kernel launches
* @return Number of bytes in the chars child column
*/
[[nodiscard]] size_type chars_size(rmm::cuda_stream_view stream) const noexcept;
[[nodiscard]] int64_t chars_size(rmm::cuda_stream_view stream) const noexcept;

/**
* @brief Return an iterator for the chars child column.
Expand Down
9 changes: 2 additions & 7 deletions cpp/src/filling/repeat.cu
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,8 @@ struct count_accessor {
std::enable_if_t<std::is_integral_v<T>, cudf::size_type> operator()(rmm::cuda_stream_view stream)
{
using ScalarType = cudf::scalar_type_t<T>;
#if 1
// TODO: temporary till cudf::scalar's value() function is marked as const
auto p_count = const_cast<ScalarType*>(static_cast<ScalarType const*>(this->p_scalar));
#else
auto p_count = static_cast<ScalarType const*>(this->p_scalar);
#endif
auto count = p_count->value(stream);
auto p_count = static_cast<ScalarType const*>(this->p_scalar);
auto count = p_count->value(stream);
// static_cast is necessary due to bool
CUDF_EXPECTS(static_cast<int64_t>(count) <= std::numeric_limits<cudf::size_type>::max(),
"count should not exceed the column size limit",
Expand Down
4 changes: 0 additions & 4 deletions cpp/src/hash/managed.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,5 @@ struct managed {

inline bool isPtrManaged(cudaPointerAttributes attr)
{
#if CUDART_VERSION >= 10000
return (attr.type == cudaMemoryTypeManaged);
#else
return attr.isManaged;
#endif
}
10 changes: 0 additions & 10 deletions cpp/src/io/comp/snap.cu
Original file line number Diff line number Diff line change
Expand Up @@ -153,17 +153,7 @@ static __device__ uint8_t* StoreCopy(uint8_t* dst,
*/
static inline __device__ uint32_t HashMatchAny(uint32_t v, uint32_t t)
{
#if (__CUDA_ARCH__ >= 700)
return __match_any_sync(~0, v);
#else
uint32_t err_map = 0;
for (uint32_t i = 0; i < hash_bits; i++, v >>= 1) {
uint32_t b = v & 1;
uint32_t match_b = ballot(b);
err_map |= match_b ^ -(int32_t)b;
}
return ~err_map;
#endif
}

/**
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/fst/agent_dfa.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class DFASimulationCallbackWrapper {
{
uint32_t const count = transducer_table(old_state, symbol_id, read_symbol);
if (write) {
#if __CUDA_ARCH__ > 0
#if defined(__CUDA_ARCH__)
#pragma unroll 1
#endif
for (uint32_t out_char = 0; out_char < count; out_char++) {
Expand Down
Loading

0 comments on commit ee07c5e

Please sign in to comment.