From 1d1a35bd327650742e51c8cc33bc1c9e659006a4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Oct 2021 10:16:12 -0600 Subject: [PATCH 01/48] Add condition to fallback to sort-based aggregates if the input values column has structs --- cpp/src/groupby/groupby.cu | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 7ac7c199b05..69d7f5860db 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -67,13 +67,21 @@ std::pair, std::vector> groupby::disp { using namespace cudf::structs::detail; + // Currently, structs are not supported in hash-based aggregates. + // Therefore, if any request contains structs then we must fallback to sort-based aggregates. + // TODO: Support structs in hash-based aggregates. + auto const has_struct = + std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) { + return r.values.type().id() == type_id::STRUCT; + }); + // If sort groupby has been called once on this groupby object, then // always use sort groupby from now on. Because once keys are sorted, // all the aggs that can be done by hash groupby are efficiently done by // sort groupby as well. // Only use hash groupby if the keys aren't sorted and all requests can be // satisfied with a hash implementation - if (_keys_are_sorted == sorted::NO and not _helper and + if (_keys_are_sorted == sorted::NO and not _helper and (not has_struct) and detail::hash::can_use_hash_groupby(_keys, requests)) { // Optionally flatten nested key columns. auto flattened = flatten_nested_columns(_keys, {}, {}, column_nullability::FORCE); From f36abed8820d5e6a9877e82d20772f22717b7ffa Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Oct 2021 13:32:12 -0600 Subject: [PATCH 02/48] Rename function --- cpp/src/groupby/sort/group_single_pass_reduction_util.cuh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index db2ae5b5d8e..59748110a83 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -124,7 +124,7 @@ struct null_replaced_value_accessor : value_accessor { template struct reduce_functor { template - static constexpr bool is_supported() + static constexpr bool is_natively_supported() { switch (K) { case aggregation::SUM: @@ -140,7 +140,7 @@ struct reduce_functor { } template - std::enable_if_t(), std::unique_ptr> operator()( + std::enable_if_t(), std::unique_ptr> operator()( column_view const& values, size_type num_groups, cudf::device_span group_labels, @@ -207,7 +207,8 @@ struct reduce_functor { } template - std::enable_if_t(), std::unique_ptr> operator()(Args&&... args) + std::enable_if_t(), std::unique_ptr> operator()( + Args&&... args) { CUDF_FAIL("Unsupported type-agg combination"); } From b0b45355f8034779ec2165c75ad7bb72ee7e5a00 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Oct 2021 15:02:13 -0600 Subject: [PATCH 03/48] Implement argmin/argmax for structs --- .../sort/group_single_pass_reduction_util.cuh | 81 ++++++++++++++++++- 1 file changed, 78 insertions(+), 3 deletions(-) diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 59748110a83..98894ee4370 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -21,7 +21,9 @@ #include #include #include +#include #include +#include #include #include @@ -172,7 +174,7 @@ struct reduce_functor { thrust::make_counting_iterator(0), thrust::make_discard_iterator(), resultview->begin(), - thrust::equal_to{}, + thrust::equal_to{}, OpType{*valuesview}); } else { auto init = OpType::template identity(); @@ -206,9 +208,82 @@ struct reduce_functor { return result; } + template + std::enable_if_t() and std::is_same_v and + (K == aggregation::ARGMIN or K == aggregation::ARGMAX), + std::unique_ptr> + operator()(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + using ResultType = cudf::detail::target_type_t; + auto result = make_fixed_width_column( + data_type{type_to_id()}, num_groups, mask_state::UNALLOCATED, stream, mr); + + if (values.is_empty()) { return result; } + + // The comparison and null orders for finding arg_min/arg_max for the min/max elements. + auto const comp_order = K == aggregation::ARGMIN ? order::ASCENDING : order::DESCENDING; + auto const null_precedence = K == aggregation::ARGMIN ? null_order::AFTER : null_order::BEFORE; + + auto const flattened_values = + structs::detail::flatten_nested_columns(table_view{{values}}, + {comp_order}, + {null_precedence}, + structs::detail::column_nullability::MATCH_INCOMING); + auto const values_ptr = table_device_view::create(flattened_values, stream); + + // Perform reduction to find arg_min/arg_max. + auto const do_reduction = [&](auto const& inp_iter, auto const& out_iter, auto const& comp) { + thrust::reduce_by_key(rmm::exec_policy(stream), + group_labels.data(), + group_labels.data() + group_labels.size(), + inp_iter, + thrust::make_discard_iterator(), + out_iter, + thrust::equal_to{}, + comp); + }; + + auto const count_iter = thrust::make_counting_iterator(0); + auto const result_begin = result->mutable_view().template begin(); + if (!values.has_nulls()) { + auto const comp = row_lexicographic_comparator(*values_ptr, + *values_ptr, + flattened_values.orders().data(), + flattened_values.null_orders().data()); + do_reduction(count_iter, result_begin, comp); + } else { + auto const comp = row_lexicographic_comparator(*values_ptr, + *values_ptr, + flattened_values.orders().data(), + flattened_values.null_orders().data()); + do_reduction(count_iter, result_begin, comp); + + // Generate bitmask for the output from the input. + auto const values_ptr = column_device_view::create(values, stream); + auto validity = rmm::device_uvector(num_groups, stream); + do_reduction(cudf::detail::make_validity_iterator(*values_ptr), + validity.begin(), + thrust::logical_or{}); + + auto [null_mask, null_count] = cudf::detail::valid_if( + validity.begin(), validity.end(), thrust::identity{}, stream, mr); + result->set_null_mask(std::move(null_mask)); + result->set_null_count(null_count); + } + + return result; + } + template - std::enable_if_t(), std::unique_ptr> operator()( - Args&&... args) + std::enable_if_t() and + (not std::is_same_v or + (K != aggregation::ARGMIN or K != aggregation::ARGMAX)), + std::unique_ptr> + operator()(Args&&... args) { CUDF_FAIL("Unsupported type-agg combination"); } From 6387ae25115459c09fd3e2bda350d5f5ac62f1e8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 27 Oct 2021 10:52:30 -0600 Subject: [PATCH 04/48] Add comments and cleanup --- .../sort/group_single_pass_reduction_util.cuh | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 98894ee4370..812647fbb93 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -208,6 +208,10 @@ struct reduce_functor { return result; } + // This specialization handles the cases when the input values type: + // - Is not natively supported, and + // - Is struct type, and + // - Aggregation is either ARGMIN or ARGMAX. template std::enable_if_t() and std::is_same_v and (K == aggregation::ARGMIN or K == aggregation::ARGMAX), @@ -218,13 +222,15 @@ struct reduce_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // This is be expected to be size_type. using ResultType = cudf::detail::target_type_t; - auto result = make_fixed_width_column( + + auto result = make_fixed_width_column( data_type{type_to_id()}, num_groups, mask_state::UNALLOCATED, stream, mr); if (values.is_empty()) { return result; } - // The comparison and null orders for finding arg_min/arg_max for the min/max elements. + // The comparison order and null order for finding ARGMIN/ARGMAX. auto const comp_order = K == aggregation::ARGMIN ? order::ASCENDING : order::DESCENDING; auto const null_precedence = K == aggregation::ARGMIN ? null_order::AFTER : null_order::BEFORE; @@ -233,9 +239,9 @@ struct reduce_functor { {comp_order}, {null_precedence}, structs::detail::column_nullability::MATCH_INCOMING); - auto const values_ptr = table_device_view::create(flattened_values, stream); + auto const flattened_values_ptr = table_device_view::create(flattened_values, stream); - // Perform reduction to find arg_min/arg_max. + // Perform segmented reduction to find ARGMIN/ARGMAX. auto const do_reduction = [&](auto const& inp_iter, auto const& out_iter, auto const& comp) { thrust::reduce_by_key(rmm::exec_policy(stream), group_labels.data(), @@ -250,19 +256,19 @@ struct reduce_functor { auto const count_iter = thrust::make_counting_iterator(0); auto const result_begin = result->mutable_view().template begin(); if (!values.has_nulls()) { - auto const comp = row_lexicographic_comparator(*values_ptr, - *values_ptr, + auto const comp = row_lexicographic_comparator(*flattened_values_ptr, + *flattened_values_ptr, flattened_values.orders().data(), flattened_values.null_orders().data()); do_reduction(count_iter, result_begin, comp); } else { - auto const comp = row_lexicographic_comparator(*values_ptr, - *values_ptr, + auto const comp = row_lexicographic_comparator(*flattened_values_ptr, + *flattened_values_ptr, flattened_values.orders().data(), flattened_values.null_orders().data()); do_reduction(count_iter, result_begin, comp); - // Generate bitmask for the output from the input. + // Generate bitmask for the output by segmented reduction of the input bitmask. auto const values_ptr = column_device_view::create(values, stream); auto validity = rmm::device_uvector(num_groups, stream); do_reduction(cudf::detail::make_validity_iterator(*values_ptr), @@ -278,6 +284,10 @@ struct reduce_functor { return result; } + // Throw exception if the input values type: + // - Is not natively supported, or + // - Is not struct type, or + // - Is struct type but aggregation is not neither ARGMIN nor ARGMAX. template std::enable_if_t() and (not std::is_same_v or From fddbac9388225ae966b681b2bed76565b54d15f6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 27 Oct 2021 16:22:18 -0600 Subject: [PATCH 05/48] Cleanup --- .../sort/group_single_pass_reduction_util.cuh | 49 +++++++++++++------ 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 812647fbb93..5efa64f5c27 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -77,6 +77,34 @@ struct ArgMax { } }; +/** + * @brief Binary operator ArgMin/ArgMax with index values into the input table. + */ +template +struct ArgMinMax { + size_type const num_rows; + row_lexicographic_comparator const comp; + + ArgMinMax(size_type const num_rows_, table_device_view const& table_) + : num_rows(num_rows_), comp(table_, table_) + { + } + + CUDA_DEVICE_CALLABLE auto operator()(size_type lhs_idx, size_type rhs_idx) const + { + // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and + // github.com/NVIDIA/thrust/issues/1525 + // where invalid random values may be passed here by thrust::reduce_by_key + if (lhs_idx < 0 || lhs_idx >= num_rows) { return rhs_idx; } + if (rhs_idx < 0 || rhs_idx >= num_rows) { return lhs_idx; } + + // Return `lhs_idx` iff: + // row(lhs_idx) < row(rhs_idx) and finding ArgMin, or + // row(lhs_idx) >= row(rhs_idx) and finding ArgMax. + return comp(lhs_idx, rhs_idx) == arg_min ? lhs_idx : rhs_idx; + } +}; + /** * @brief Value accessor for column which supports dictionary column too. * @@ -230,15 +258,8 @@ struct reduce_functor { if (values.is_empty()) { return result; } - // The comparison order and null order for finding ARGMIN/ARGMAX. - auto const comp_order = K == aggregation::ARGMIN ? order::ASCENDING : order::DESCENDING; - auto const null_precedence = K == aggregation::ARGMIN ? null_order::AFTER : null_order::BEFORE; - auto const flattened_values = - structs::detail::flatten_nested_columns(table_view{{values}}, - {comp_order}, - {null_precedence}, - structs::detail::column_nullability::MATCH_INCOMING); + structs::detail::flatten_nested_columns(table_view{{values}}, {}, {}); auto const flattened_values_ptr = table_device_view::create(flattened_values, stream); // Perform segmented reduction to find ARGMIN/ARGMAX. @@ -256,16 +277,12 @@ struct reduce_functor { auto const count_iter = thrust::make_counting_iterator(0); auto const result_begin = result->mutable_view().template begin(); if (!values.has_nulls()) { - auto const comp = row_lexicographic_comparator(*flattened_values_ptr, - *flattened_values_ptr, - flattened_values.orders().data(), - flattened_values.null_orders().data()); + auto const comp = + ArgMinMax(values.size(), *flattened_values_ptr); do_reduction(count_iter, result_begin, comp); } else { - auto const comp = row_lexicographic_comparator(*flattened_values_ptr, - *flattened_values_ptr, - flattened_values.orders().data(), - flattened_values.null_orders().data()); + auto const comp = + ArgMinMax(values.size(), *flattened_values_ptr); do_reduction(count_iter, result_begin, comp); // Generate bitmask for the output by segmented reduction of the input bitmask. From b86665ee298e4a38886b9c5021bc2a91ece38dd4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 27 Oct 2021 17:59:47 -0600 Subject: [PATCH 06/48] Simplify code --- .../sort/group_single_pass_reduction_util.cuh | 135 +++++++++--------- 1 file changed, 65 insertions(+), 70 deletions(-) diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 5efa64f5c27..0f798fa6ba7 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -40,48 +40,44 @@ namespace groupby { namespace detail { /** - * @brief ArgMin binary operator with index values into input column. + * @brief Binary operator with index values into the input column. * * @tparam T Type of the underlying column. Must support '<' operator. */ -template -struct ArgMin { +template +struct ArgMinMax { column_device_view const d_col; - CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs, size_type const& rhs) const + CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs_idx, size_type const& rhs_idx) const { // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and // github.com/NVIDIA/thrust/issues/1525 // where invalid random values may be passed here by thrust::reduce_by_key - if (lhs < 0 || lhs >= d_col.size() || d_col.is_null(lhs)) { return rhs; } - if (rhs < 0 || rhs >= d_col.size() || d_col.is_null(rhs)) { return lhs; } - return d_col.element(lhs) < d_col.element(rhs) ? lhs : rhs; - } -}; + if (lhs_idx < 0 || lhs_idx >= d_col.size() || (has_nulls && d_col.is_null_nocheck(lhs_idx))) { + return rhs_idx; + } + if (rhs_idx < 0 || rhs_idx >= d_col.size() || (has_nulls && d_col.is_null_nocheck(rhs_idx))) { + return lhs_idx; + } -/** - * @brief ArgMax binary operator with index values into input column. - * - * @tparam T Type of the underlying column. Must support '<' operator. - */ -template -struct ArgMax { - column_device_view const d_col; - CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs, size_type const& rhs) const - { - // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and - // github.com/NVIDIA/thrust/issues/1525 - // where invalid random values may be passed here by thrust::reduce_by_key - if (lhs < 0 || lhs >= d_col.size() || d_col.is_null(lhs)) { return rhs; } - if (rhs < 0 || rhs >= d_col.size() || d_col.is_null(rhs)) { return lhs; } - return d_col.element(rhs) < d_col.element(lhs) ? lhs : rhs; + // Return `lhs_idx` iff: + // row(lhs_idx) < row(rhs_idx) and finding ArgMin, or + // row(lhs_idx) >= row(rhs_idx) and finding ArgMax. + auto const less = d_col.element(lhs_idx) < d_col.element(rhs_idx); + return less == arg_min ? lhs_idx : rhs_idx; } }; /** * @brief Binary operator ArgMin/ArgMax with index values into the input table. + * + * @tparam T Type of the underlying data. This is the fallback for the cases when T does not support + * '<' operator. */ -template -struct ArgMinMax { +template +struct ArgMinMax()>> { size_type const num_rows; row_lexicographic_comparator const comp; @@ -178,7 +174,6 @@ struct reduce_functor { rmm::mr::device_memory_resource* mr) { using DeviceType = device_storage_type_t; - using OpType = cudf::detail::corresponding_operator_t; using ResultType = cudf::detail::target_type_t; using ResultDType = device_storage_type_t; @@ -191,43 +186,44 @@ struct reduce_functor { if (values.is_empty()) { return result; } - auto resultview = mutable_column_device_view::create(result->mutable_view(), stream); - auto valuesview = column_device_view::create(values, stream); - if constexpr (K == aggregation::ARGMAX || K == aggregation::ARGMIN) { - using OpType = - std::conditional_t<(K == aggregation::ARGMAX), ArgMax, ArgMin>; + // Perform segmented reduction. + auto const do_reduction = [&](auto const& inp_iter, auto const& out_iter, auto const& comp) { thrust::reduce_by_key(rmm::exec_policy(stream), group_labels.data(), group_labels.data() + group_labels.size(), - thrust::make_counting_iterator(0), + inp_iter, thrust::make_discard_iterator(), - resultview->begin(), + out_iter, thrust::equal_to{}, - OpType{*valuesview}); + comp); + }; + + auto const d_values_ptr = column_device_view::create(values, stream); + auto const result_begin = result->mutable_view().template begin(); + + if constexpr (K == aggregation::ARGMAX || K == aggregation::ARGMIN) { + auto const count_iter = thrust::make_counting_iterator(0); + if (values.has_nulls()) { + using OpType = ArgMinMax; + do_reduction(count_iter, result_begin, OpType{*d_values_ptr}); + } else { + using OpType = ArgMinMax; + do_reduction(count_iter, result_begin, OpType{*d_values_ptr}); + } } else { - auto init = OpType::template identity(); - auto begin = cudf::detail::make_counting_transform_iterator( - 0, null_replaced_value_accessor{*valuesview, init, values.has_nulls()}); - thrust::reduce_by_key(rmm::exec_policy(stream), - group_labels.data(), - group_labels.data() + group_labels.size(), - begin, - thrust::make_discard_iterator(), - resultview->begin(), - thrust::equal_to{}, - OpType{}); + using OpType = cudf::detail::corresponding_operator_t; + auto init = OpType::template identity(); + auto begin = cudf::detail::make_counting_transform_iterator( + 0, null_replaced_value_accessor{*d_values_ptr, init, values.has_nulls()}); + do_reduction(begin, result_begin, OpType{}); } if (values.has_nulls()) { rmm::device_uvector validity(num_groups, stream); - thrust::reduce_by_key(rmm::exec_policy(stream), - group_labels.data(), - group_labels.data() + group_labels.size(), - cudf::detail::make_validity_iterator(*valuesview), - thrust::make_discard_iterator(), - validity.begin(), - thrust::equal_to{}, - thrust::logical_or{}); + do_reduction(cudf::detail::make_validity_iterator(*d_values_ptr), + validity.begin(), + thrust::logical_or{}); + auto [null_mask, null_count] = cudf::detail::valid_if( validity.begin(), validity.end(), thrust::identity{}, stream, mr); result->set_null_mask(std::move(null_mask)); @@ -260,7 +256,7 @@ struct reduce_functor { auto const flattened_values = structs::detail::flatten_nested_columns(table_view{{values}}, {}, {}); - auto const flattened_values_ptr = table_device_view::create(flattened_values, stream); + auto const d_flattened_values_ptr = table_device_view::create(flattened_values, stream); // Perform segmented reduction to find ARGMIN/ARGMAX. auto const do_reduction = [&](auto const& inp_iter, auto const& out_iter, auto const& comp) { @@ -276,19 +272,15 @@ struct reduce_functor { auto const count_iter = thrust::make_counting_iterator(0); auto const result_begin = result->mutable_view().template begin(); - if (!values.has_nulls()) { - auto const comp = - ArgMinMax(values.size(), *flattened_values_ptr); - do_reduction(count_iter, result_begin, comp); - } else { - auto const comp = - ArgMinMax(values.size(), *flattened_values_ptr); - do_reduction(count_iter, result_begin, comp); + if (values.has_nulls()) { + auto const op = + ArgMinMax(values.size(), *d_flattened_values_ptr); + do_reduction(count_iter, result_begin, op); // Generate bitmask for the output by segmented reduction of the input bitmask. - auto const values_ptr = column_device_view::create(values, stream); - auto validity = rmm::device_uvector(num_groups, stream); - do_reduction(cudf::detail::make_validity_iterator(*values_ptr), + auto const d_values_ptr = column_device_view::create(values, stream); + auto validity = rmm::device_uvector(num_groups, stream); + do_reduction(cudf::detail::make_validity_iterator(*d_values_ptr), validity.begin(), thrust::logical_or{}); @@ -296,15 +288,18 @@ struct reduce_functor { validity.begin(), validity.end(), thrust::identity{}, stream, mr); result->set_null_mask(std::move(null_mask)); result->set_null_count(null_count); + } else { + auto const op = + ArgMinMax(values.size(), *d_flattened_values_ptr); + do_reduction(count_iter, result_begin, op); } return result; } // Throw exception if the input values type: - // - Is not natively supported, or - // - Is not struct type, or - // - Is struct type but aggregation is not neither ARGMIN nor ARGMAX. + // - Is not natively supported, and + // - Is not struct type, or is struct type but aggregation is not neither ARGMIN nor ARGMAX. template std::enable_if_t() and (not std::is_same_v or From 96683ac1d77f92a1b764d8a6b20b2641de0dd075 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 27 Oct 2021 21:19:55 -0600 Subject: [PATCH 07/48] Fix null order --- .../sort/group_single_pass_reduction_util.cuh | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 0f798fa6ba7..b292c74f37f 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -81,8 +82,10 @@ struct ArgMinMax const comp; - ArgMinMax(size_type const num_rows_, table_device_view const& table_) - : num_rows(num_rows_), comp(table_, table_) + ArgMinMax(size_type const num_rows_, + table_device_view const& table_, + null_order const* null_precedence) + : num_rows(num_rows_), comp(table_, table_, nullptr, null_precedence) { } @@ -254,9 +257,17 @@ struct reduce_functor { if (values.is_empty()) { return result; } - auto const flattened_values = - structs::detail::flatten_nested_columns(table_view{{values}}, {}, {}); + // When finding ARGMIN, we need to consider nulls as larger than non-null elements. + // Thing is opposite for ARGMAX. + auto const null_precedence = + (K == aggregation::ARGMIN) ? null_order::AFTER : null_order::BEFORE; + auto const flattened_values = structs::detail::flatten_nested_columns( + table_view{{values}}, {}, std::vector{null_precedence}); auto const d_flattened_values_ptr = table_device_view::create(flattened_values, stream); + auto const flattened_null_precedences = + (K == aggregation::ARGMIN) + ? cudf::detail::make_device_uvector_async(flattened_values.null_orders(), stream) + : rmm::device_uvector(0, stream); // Perform segmented reduction to find ARGMIN/ARGMAX. auto const do_reduction = [&](auto const& inp_iter, auto const& out_iter, auto const& comp) { @@ -273,8 +284,8 @@ struct reduce_functor { auto const count_iter = thrust::make_counting_iterator(0); auto const result_begin = result->mutable_view().template begin(); if (values.has_nulls()) { - auto const op = - ArgMinMax(values.size(), *d_flattened_values_ptr); + auto const op = ArgMinMax( + values.size(), *d_flattened_values_ptr, flattened_null_precedences.data()); do_reduction(count_iter, result_begin, op); // Generate bitmask for the output by segmented reduction of the input bitmask. @@ -289,8 +300,8 @@ struct reduce_functor { result->set_null_mask(std::move(null_mask)); result->set_null_count(null_count); } else { - auto const op = - ArgMinMax(values.size(), *d_flattened_values_ptr); + auto const op = ArgMinMax( + values.size(), *d_flattened_values_ptr, flattened_null_precedences.data()); do_reduction(count_iter, result_begin, op); } From b26cc9343cf856c0ff7bc0912b2d727af9d23215 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 27 Oct 2021 21:28:58 -0600 Subject: [PATCH 08/48] Add unit tests --- cpp/tests/groupby/argmax_tests.cpp | 74 ++++++++++++++++++++++++- cpp/tests/groupby/argmin_tests.cpp | 74 ++++++++++++++++++++++++- cpp/tests/groupby/max_tests.cpp | 88 +++++++++++++++++++++++++++++- cpp/tests/groupby/min_tests.cpp | 88 +++++++++++++++++++++++++++++- 4 files changed, 318 insertions(+), 6 deletions(-) diff --git a/cpp/tests/groupby/argmax_tests.cpp b/cpp/tests/groupby/argmax_tests.cpp index 7cf693f7b08..0b06c184b75 100644 --- a/cpp/tests/groupby/argmax_tests.cpp +++ b/cpp/tests/groupby/argmax_tests.cpp @@ -32,7 +32,7 @@ struct groupby_argmax_test : public cudf::test::BaseFixture { }; using K = int32_t; -TYPED_TEST_CASE(groupby_argmax_test, cudf::test::FixedWidthTypes); +TYPED_TEST_SUITE(groupby_argmax_test, cudf::test::FixedWidthTypes); TYPED_TEST(groupby_argmax_test, basic) { @@ -182,6 +182,78 @@ TEST_F(groupby_dictionary_argmax_test, basic) force_use_sort_impl::YES); } +struct groupby_argmax_struct_test : public cudf::test::BaseFixture { +}; + +TEST_F(groupby_argmax_struct_test, basic) +{ + auto const keys = fixed_width_column_wrapper{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; + auto const vals = [] { + auto child1 = + strings_column_wrapper{"año", "bit", "₹1", "aaa", "zit", "bat", "aab", "$1", "€1", "wut"}; + auto child2 = fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; + auto const expect_indices = fixed_width_column_wrapper{0, 4, 2}; + + auto agg = cudf::make_argmax_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_indices, std::move(agg)); +} + +TEST_F(groupby_argmax_struct_test, slice_input) +{ + constexpr int32_t dont_care{1}; + auto const keys_original = fixed_width_column_wrapper{ + dont_care, dont_care, 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, dont_care}; + auto const vals_original = [] { + auto child1 = strings_column_wrapper{"dont_care", + "dont_care", + "año", + "bit", + "₹1", + "aaa", + "zit", + "bat", + "aab", + "$1", + "€1", + "wut", + "dont_care"}; + auto child2 = fixed_width_column_wrapper{ + dont_care, dont_care, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, dont_care}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto const keys = cudf::slice(keys_original, {2, 12})[0]; + auto const vals = cudf::slice(vals_original, {2, 12})[0]; + auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; + auto const expect_indices = fixed_width_column_wrapper{0, 4, 2}; + + auto agg = cudf::make_argmax_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_indices, std::move(agg)); +} + +TEST_F(groupby_argmax_struct_test, null_keys_and_values) +{ + constexpr int32_t null{0}; + auto const keys = + fixed_width_column_wrapper{{1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4}, null_at(7)}; + auto const vals = [] { + auto child1 = strings_column_wrapper{ + "año", "bit", "₹1", "aaa", "zit", "" /*NULL*/, "" /*NULL*/, "$1", "€1", "wut", "" /*NULL*/}; + auto child2 = fixed_width_column_wrapper{9, 8, 7, 6, 5, null, null, 2, 1, 0, null}; + return structs_column_wrapper{{child1, child2}, nulls_at({5, 6, 10})}; + }(); + + auto const expect_keys = fixed_width_column_wrapper{{1, 2, 3, 4}, no_nulls()}; + auto const expect_indices = fixed_width_column_wrapper{{0, 4, 2, null}, null_at(3)}; + + auto agg = cudf::make_argmax_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_indices, std::move(agg)); +} + } // namespace test } // namespace cudf diff --git a/cpp/tests/groupby/argmin_tests.cpp b/cpp/tests/groupby/argmin_tests.cpp index 915575546c9..67235a64066 100644 --- a/cpp/tests/groupby/argmin_tests.cpp +++ b/cpp/tests/groupby/argmin_tests.cpp @@ -32,7 +32,7 @@ struct groupby_argmin_test : public cudf::test::BaseFixture { }; using K = int32_t; -TYPED_TEST_CASE(groupby_argmin_test, cudf::test::FixedWidthTypes); +TYPED_TEST_SUITE(groupby_argmin_test, cudf::test::FixedWidthTypes); TYPED_TEST(groupby_argmin_test, basic) { @@ -183,5 +183,77 @@ TEST_F(groupby_dictionary_argmin_test, basic) force_use_sort_impl::YES); } +struct groupby_argmin_struct_test : public cudf::test::BaseFixture { +}; + +TEST_F(groupby_argmin_struct_test, basic) +{ + auto const keys = fixed_width_column_wrapper{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; + auto const vals = [] { + auto child1 = + strings_column_wrapper{"año", "bit", "₹1", "aaa", "zit", "bat", "aab", "$1", "€1", "wut"}; + auto child2 = fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; + auto const expect_indices = fixed_width_column_wrapper{3, 5, 7}; + + auto agg = cudf::make_argmin_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_indices, std::move(agg)); +} + +TEST_F(groupby_argmin_struct_test, slice_input) +{ + constexpr int32_t dont_care{1}; + auto const keys_original = fixed_width_column_wrapper{ + dont_care, dont_care, 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, dont_care}; + auto const vals_original = [] { + auto child1 = strings_column_wrapper{"dont_care", + "dont_care", + "año", + "bit", + "₹1", + "aaa", + "zit", + "bat", + "aab", + "$1", + "€1", + "wut", + "dont_care"}; + auto child2 = fixed_width_column_wrapper{ + dont_care, dont_care, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, dont_care}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto const keys = cudf::slice(keys_original, {2, 12})[0]; + auto const vals = cudf::slice(vals_original, {2, 12})[0]; + auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; + auto const expect_indices = fixed_width_column_wrapper{3, 5, 7}; + + auto agg = cudf::make_argmin_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_indices, std::move(agg)); +} + +TEST_F(groupby_argmin_struct_test, null_keys_and_values) +{ + constexpr int32_t null{0}; + auto const keys = + fixed_width_column_wrapper{{1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4}, null_at(7)}; + auto const vals = [] { + auto child1 = strings_column_wrapper{ + "año", "bit", "₹1", "aaa", "zit", "" /*NULL*/, "" /*NULL*/, "$1", "€1", "wut", "" /*NULL*/}; + auto child2 = fixed_width_column_wrapper{9, 8, 7, 6, 5, null, null, 2, 1, 0, null}; + return structs_column_wrapper{{child1, child2}, nulls_at({5, 6, 10})}; + }(); + + auto const expect_keys = fixed_width_column_wrapper{{1, 2, 3, 4}, no_nulls()}; + auto const expect_indices = fixed_width_column_wrapper{{3, 1, 8, null}, null_at(3)}; + + auto agg = cudf::make_argmin_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_indices, std::move(agg)); +} + } // namespace test } // namespace cudf diff --git a/cpp/tests/groupby/max_tests.cpp b/cpp/tests/groupby/max_tests.cpp index 491e6927304..8d15401aa09 100644 --- a/cpp/tests/groupby/max_tests.cpp +++ b/cpp/tests/groupby/max_tests.cpp @@ -33,7 +33,7 @@ struct groupby_max_test : public cudf::test::BaseFixture { }; using K = int32_t; -TYPED_TEST_CASE(groupby_max_test, cudf::test::FixedWidthTypesWithoutFixedPoint); +TYPED_TEST_SUITE(groupby_max_test, cudf::test::FixedWidthTypesWithoutFixedPoint); TYPED_TEST(groupby_max_test, basic) { @@ -255,7 +255,7 @@ template struct FixedPointTestBothReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_SUITE(FixedPointTestBothReps, cudf::test::FixedPointTypes); TYPED_TEST(FixedPointTestBothReps, GroupBySortMaxDecimalAsValue) { @@ -304,5 +304,89 @@ TYPED_TEST(FixedPointTestBothReps, GroupByHashMaxDecimalAsValue) } } +struct groupby_max_struct_test : public cudf::test::BaseFixture { +}; + +TEST_F(groupby_max_struct_test, basic) +{ + auto const keys = fixed_width_column_wrapper{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; + auto const vals = [] { + auto child1 = + strings_column_wrapper{"año", "bit", "₹1", "aaa", "zit", "bat", "aab", "$1", "€1", "wut"}; + auto child2 = fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; + auto const expect_vals = [] { + auto child1 = strings_column_wrapper{"año", "zit", "₹1"}; + auto child2 = fixed_width_column_wrapper{1, 5, 3}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto agg = cudf::make_max_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + +TEST_F(groupby_max_struct_test, slice_input) +{ + constexpr int32_t dont_care{1}; + auto const keys_original = fixed_width_column_wrapper{ + dont_care, dont_care, 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, dont_care}; + auto const vals_original = [] { + auto child1 = strings_column_wrapper{"dont_care", + "dont_care", + "año", + "bit", + "₹1", + "aaa", + "zit", + "bat", + "aab", + "$1", + "€1", + "wut", + "dont_care"}; + auto child2 = fixed_width_column_wrapper{ + dont_care, dont_care, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, dont_care}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto const keys = cudf::slice(keys_original, {2, 12})[0]; + auto const vals = cudf::slice(vals_original, {2, 12})[0]; + auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; + auto const expect_vals = [] { + auto child1 = strings_column_wrapper{"año", "zit", "₹1"}; + auto child2 = fixed_width_column_wrapper{1, 5, 3}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto agg = cudf::make_max_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + +TEST_F(groupby_max_struct_test, null_keys_and_values) +{ + constexpr int32_t null{0}; + auto const keys = + fixed_width_column_wrapper{{1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4}, null_at(7)}; + auto const vals = [] { + auto child1 = strings_column_wrapper{ + "año", "bit", "₹1", "aaa", "zit", "" /*NULL*/, "" /*NULL*/, "$1", "€1", "wut", "" /*NULL*/}; + auto child2 = fixed_width_column_wrapper{9, 8, 7, 6, 5, null, null, 2, 1, 0, null}; + return structs_column_wrapper{{child1, child2}, nulls_at({5, 6, 10})}; + }(); + + auto const expect_keys = fixed_width_column_wrapper{{1, 2, 3, 4}, no_nulls()}; + auto const expect_vals = [] { + auto child1 = strings_column_wrapper{"año", "zit", "₹1", "" /*NULL*/}; + auto child2 = fixed_width_column_wrapper{9, 5, 7, null}; + return structs_column_wrapper{{child1, child2}, null_at(3)}; + }(); + + auto agg = cudf::make_max_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + } // namespace test } // namespace cudf diff --git a/cpp/tests/groupby/min_tests.cpp b/cpp/tests/groupby/min_tests.cpp index 4f8db1d750c..c2cfca83b29 100644 --- a/cpp/tests/groupby/min_tests.cpp +++ b/cpp/tests/groupby/min_tests.cpp @@ -33,7 +33,7 @@ struct groupby_min_test : public cudf::test::BaseFixture { }; using K = int32_t; -TYPED_TEST_CASE(groupby_min_test, cudf::test::FixedWidthTypesWithoutFixedPoint); +TYPED_TEST_SUITE(groupby_min_test, cudf::test::FixedWidthTypesWithoutFixedPoint); TYPED_TEST(groupby_min_test, basic) { @@ -255,7 +255,7 @@ template struct FixedPointTestBothReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_SUITE(FixedPointTestBothReps, cudf::test::FixedPointTypes); TYPED_TEST(FixedPointTestBothReps, GroupBySortMinDecimalAsValue) { @@ -303,5 +303,89 @@ TYPED_TEST(FixedPointTestBothReps, GroupByHashMinDecimalAsValue) } } +struct groupby_min_struct_test : public cudf::test::BaseFixture { +}; + +TEST_F(groupby_min_struct_test, basic) +{ + auto const keys = fixed_width_column_wrapper{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; + auto const vals = [] { + auto child1 = + strings_column_wrapper{"año", "bit", "₹1", "aaa", "zit", "bat", "aab", "$1", "€1", "wut"}; + auto child2 = fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; + auto const expect_vals = [] { + auto child1 = strings_column_wrapper{"aaa", "bat", "$1"}; + auto child2 = fixed_width_column_wrapper{4, 6, 8}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto agg = cudf::make_min_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + +TEST_F(groupby_min_struct_test, slice_input) +{ + constexpr int32_t dont_care{1}; + auto const keys_original = fixed_width_column_wrapper{ + dont_care, dont_care, 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, dont_care}; + auto const vals_original = [] { + auto child1 = strings_column_wrapper{"dont_care", + "dont_care", + "año", + "bit", + "₹1", + "aaa", + "zit", + "bat", + "aab", + "$1", + "€1", + "wut", + "dont_care"}; + auto child2 = fixed_width_column_wrapper{ + dont_care, dont_care, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, dont_care}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto const keys = cudf::slice(keys_original, {2, 12})[0]; + auto const vals = cudf::slice(vals_original, {2, 12})[0]; + auto const expect_keys = fixed_width_column_wrapper{1, 2, 3}; + auto const expect_vals = [] { + auto child1 = strings_column_wrapper{"aaa", "bat", "$1"}; + auto child2 = fixed_width_column_wrapper{4, 6, 8}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto agg = cudf::make_min_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + +TEST_F(groupby_min_struct_test, null_keys_and_values) +{ + constexpr int32_t null{0}; + auto const keys = + fixed_width_column_wrapper{{1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4}, null_at(7)}; + auto const vals = [] { + auto child1 = strings_column_wrapper{ + "año", "bit", "₹1", "aaa", "zit", "" /*NULL*/, "" /*NULL*/, "$1", "€1", "wut", "" /*NULL*/}; + auto child2 = fixed_width_column_wrapper{9, 8, 7, 6, 5, null, null, 2, 1, 0, null}; + return structs_column_wrapper{{child1, child2}, nulls_at({5, 6, 10})}; + }(); + + auto const expect_keys = fixed_width_column_wrapper{{1, 2, 3, 4}, no_nulls()}; + auto const expect_vals = [] { + auto child1 = strings_column_wrapper{"aaa", "bit", "€1", "" /*NULL*/}; + auto child2 = fixed_width_column_wrapper{6, 8, 1, null}; + return structs_column_wrapper{{child1, child2}, null_at(3)}; + }(); + + auto agg = cudf::make_min_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + } // namespace test } // namespace cudf From 895cabb21fdd6799751db08638359735ebb2e9a1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 27 Oct 2021 22:18:21 -0600 Subject: [PATCH 09/48] Rename functor --- .../sort/group_single_pass_reduction_util.cuh | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index b292c74f37f..df779062cfd 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -46,7 +46,7 @@ namespace detail { * @tparam T Type of the underlying column. Must support '<' operator. */ template -struct ArgMinMax { +struct arg_minmax_fn { column_device_view const d_col; CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs_idx, size_type const& rhs_idx) const { @@ -75,16 +75,16 @@ struct ArgMinMax { * '<' operator. */ template -struct ArgMinMax()>> { +struct arg_minmax_fn()>> { size_type const num_rows; row_lexicographic_comparator const comp; - ArgMinMax(size_type const num_rows_, - table_device_view const& table_, - null_order const* null_precedence) + arg_minmax_fn(size_type const num_rows_, + table_device_view const& table_, + null_order const* null_precedence) : num_rows(num_rows_), comp(table_, table_, nullptr, null_precedence) { } @@ -190,7 +190,7 @@ struct reduce_functor { if (values.is_empty()) { return result; } // Perform segmented reduction. - auto const do_reduction = [&](auto const& inp_iter, auto const& out_iter, auto const& comp) { + auto const do_reduction = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { thrust::reduce_by_key(rmm::exec_policy(stream), group_labels.data(), group_labels.data() + group_labels.size(), @@ -198,7 +198,7 @@ struct reduce_functor { thrust::make_discard_iterator(), out_iter, thrust::equal_to{}, - comp); + binop); }; auto const d_values_ptr = column_device_view::create(values, stream); @@ -207,10 +207,10 @@ struct reduce_functor { if constexpr (K == aggregation::ARGMAX || K == aggregation::ARGMIN) { auto const count_iter = thrust::make_counting_iterator(0); if (values.has_nulls()) { - using OpType = ArgMinMax; + using OpType = arg_minmax_fn; do_reduction(count_iter, result_begin, OpType{*d_values_ptr}); } else { - using OpType = ArgMinMax; + using OpType = arg_minmax_fn; do_reduction(count_iter, result_begin, OpType{*d_values_ptr}); } } else { @@ -270,7 +270,7 @@ struct reduce_functor { : rmm::device_uvector(0, stream); // Perform segmented reduction to find ARGMIN/ARGMAX. - auto const do_reduction = [&](auto const& inp_iter, auto const& out_iter, auto const& comp) { + auto const do_reduction = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { thrust::reduce_by_key(rmm::exec_policy(stream), group_labels.data(), group_labels.data() + group_labels.size(), @@ -278,13 +278,13 @@ struct reduce_functor { thrust::make_discard_iterator(), out_iter, thrust::equal_to{}, - comp); + binop); }; auto const count_iter = thrust::make_counting_iterator(0); auto const result_begin = result->mutable_view().template begin(); if (values.has_nulls()) { - auto const op = ArgMinMax( + auto const op = arg_minmax_fn( values.size(), *d_flattened_values_ptr, flattened_null_precedences.data()); do_reduction(count_iter, result_begin, op); @@ -300,7 +300,7 @@ struct reduce_functor { result->set_null_mask(std::move(null_mask)); result->set_null_count(null_count); } else { - auto const op = ArgMinMax( + auto const op = arg_minmax_fn( values.size(), *d_flattened_values_ptr, flattened_null_precedences.data()); do_reduction(count_iter, result_begin, op); } From bfc0585151d3d9dcd3bc638fe925f497f474b273 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 28 Oct 2021 09:03:36 -0600 Subject: [PATCH 10/48] Move `has_struct` condition check into `can_use_has_groupby` --- cpp/src/groupby/groupby.cu | 10 +--------- cpp/src/groupby/hash/groupby.cu | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 508353e7759..e8b4a8b1cbf 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -67,21 +67,13 @@ std::pair, std::vector> groupby::disp { using namespace cudf::structs::detail; - // Currently, structs are not supported in hash-based aggregates. - // Therefore, if any request contains structs then we must fallback to sort-based aggregates. - // TODO: Support structs in hash-based aggregates. - auto const has_struct = - std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) { - return r.values.type().id() == type_id::STRUCT; - }); - // If sort groupby has been called once on this groupby object, then // always use sort groupby from now on. Because once keys are sorted, // all the aggs that can be done by hash groupby are efficiently done by // sort groupby as well. // Only use hash groupby if the keys aren't sorted and all requests can be // satisfied with a hash implementation - if (_keys_are_sorted == sorted::NO and not _helper and (not has_struct) and + if (_keys_are_sorted == sorted::NO and not _helper and detail::hash::can_use_hash_groupby(_keys, requests)) { // Optionally flatten nested key columns. auto flattened = flatten_nested_columns(_keys, {}, {}, column_nullability::FORCE); diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index e7024c80a68..ef640256927 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -632,11 +632,22 @@ std::unique_ptr groupby_null_templated(table_view const& keys, */ bool can_use_hash_groupby(table_view const& keys, host_span requests) { - return std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) { - return std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) { - return is_hash_aggregation(a->kind); + auto const all_hash_aggregations = + std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) { + return std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) { + return is_hash_aggregation(a->kind); + }); }); - }); + + // Currently, structs are not supported in any of hash-based aggregations. + // Therefore, if any request contains structs then we must fallback to sort-based aggregations. + // TODO: Support structs in hash-based aggregations. + auto const has_struct = + std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) { + return r.values.type().id() == type_id::STRUCT; + }); + + return all_hash_aggregations && !has_struct; } // Hash-based groupby From cc5c8c416250531e9dc26a8d35839ffdc5ddf4b7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 29 Oct 2021 14:39:08 -0600 Subject: [PATCH 11/48] Rename structs and function --- .../sort/group_single_pass_reduction_util.cuh | 36 +++++++++---------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index df779062cfd..22a01fb6cb3 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -45,8 +45,8 @@ namespace detail { * * @tparam T Type of the underlying column. Must support '<' operator. */ -template -struct arg_minmax_fn { +template +struct element_arg_minmax_fn { column_device_view const d_col; CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs_idx, size_type const& rhs_idx) const { @@ -75,16 +75,13 @@ struct arg_minmax_fn { * '<' operator. */ template -struct arg_minmax_fn()>> { +struct row_arg_minmax_fn { size_type const num_rows; row_lexicographic_comparator const comp; - arg_minmax_fn(size_type const num_rows_, - table_device_view const& table_, - null_order const* null_precedence) + row_arg_minmax_fn(size_type const num_rows_, + table_device_view const& table_, + null_order const* null_precedence) : num_rows(num_rows_), comp(table_, table_, nullptr, null_precedence) { } @@ -153,7 +150,7 @@ struct null_replaced_value_accessor : value_accessor { template struct reduce_functor { template - static constexpr bool is_natively_supported() + static constexpr bool is_trivially_supported() { switch (K) { case aggregation::SUM: @@ -169,7 +166,7 @@ struct reduce_functor { } template - std::enable_if_t(), std::unique_ptr> operator()( + std::enable_if_t(), std::unique_ptr> operator()( column_view const& values, size_type num_groups, cudf::device_span group_labels, @@ -207,10 +204,10 @@ struct reduce_functor { if constexpr (K == aggregation::ARGMAX || K == aggregation::ARGMIN) { auto const count_iter = thrust::make_counting_iterator(0); if (values.has_nulls()) { - using OpType = arg_minmax_fn; + using OpType = element_arg_minmax_fn; do_reduction(count_iter, result_begin, OpType{*d_values_ptr}); } else { - using OpType = arg_minmax_fn; + using OpType = element_arg_minmax_fn; do_reduction(count_iter, result_begin, OpType{*d_values_ptr}); } } else { @@ -236,11 +233,12 @@ struct reduce_functor { } // This specialization handles the cases when the input values type: - // - Is not natively supported, and // - Is struct type, and // - Aggregation is either ARGMIN or ARGMAX. + // Since `is_trivially_supported` returns false for `struct_view`, we don't have to cover it in + // the SFINAE condition. template - std::enable_if_t() and std::is_same_v and + std::enable_if_t and (K == aggregation::ARGMIN or K == aggregation::ARGMAX), std::unique_ptr> operator()(column_view const& values, @@ -284,7 +282,7 @@ struct reduce_functor { auto const count_iter = thrust::make_counting_iterator(0); auto const result_begin = result->mutable_view().template begin(); if (values.has_nulls()) { - auto const op = arg_minmax_fn( + auto const op = row_arg_minmax_fn( values.size(), *d_flattened_values_ptr, flattened_null_precedences.data()); do_reduction(count_iter, result_begin, op); @@ -300,7 +298,7 @@ struct reduce_functor { result->set_null_mask(std::move(null_mask)); result->set_null_count(null_count); } else { - auto const op = arg_minmax_fn( + auto const op = row_arg_minmax_fn( values.size(), *d_flattened_values_ptr, flattened_null_precedences.data()); do_reduction(count_iter, result_begin, op); } @@ -309,10 +307,10 @@ struct reduce_functor { } // Throw exception if the input values type: - // - Is not natively supported, and + // - Is not trivially supported, and // - Is not struct type, or is struct type but aggregation is not neither ARGMIN nor ARGMAX. template - std::enable_if_t() and + std::enable_if_t() and (not std::is_same_v or (K != aggregation::ARGMIN or K != aggregation::ARGMAX)), std::unique_ptr> From cd7f7a4ba4fa48410a23cee9e2be01fbf009c537 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 1 Nov 2021 11:12:48 -0600 Subject: [PATCH 12/48] Fix SFINAE condition, and extract a struct functor --- .../sort/group_single_pass_reduction_util.cuh | 37 +----------- cpp/src/groupby/sort/group_util.cuh | 60 +++++++++++++++++++ 2 files changed, 63 insertions(+), 34 deletions(-) create mode 100644 cpp/src/groupby/sort/group_util.cuh diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 22a01fb6cb3..3e1f994cf7b 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -16,6 +16,8 @@ #pragma once +#include + #include #include #include @@ -68,39 +70,6 @@ struct element_arg_minmax_fn { } }; -/** - * @brief Binary operator ArgMin/ArgMax with index values into the input table. - * - * @tparam T Type of the underlying data. This is the fallback for the cases when T does not support - * '<' operator. - */ -template -struct row_arg_minmax_fn { - size_type const num_rows; - row_lexicographic_comparator const comp; - - row_arg_minmax_fn(size_type const num_rows_, - table_device_view const& table_, - null_order const* null_precedence) - : num_rows(num_rows_), comp(table_, table_, nullptr, null_precedence) - { - } - - CUDA_DEVICE_CALLABLE auto operator()(size_type lhs_idx, size_type rhs_idx) const - { - // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and - // github.com/NVIDIA/thrust/issues/1525 - // where invalid random values may be passed here by thrust::reduce_by_key - if (lhs_idx < 0 || lhs_idx >= num_rows) { return rhs_idx; } - if (rhs_idx < 0 || rhs_idx >= num_rows) { return lhs_idx; } - - // Return `lhs_idx` iff: - // row(lhs_idx) < row(rhs_idx) and finding ArgMin, or - // row(lhs_idx) >= row(rhs_idx) and finding ArgMax. - return comp(lhs_idx, rhs_idx) == arg_min ? lhs_idx : rhs_idx; - } -}; - /** * @brief Value accessor for column which supports dictionary column too. * @@ -312,7 +281,7 @@ struct reduce_functor { template std::enable_if_t() and (not std::is_same_v or - (K != aggregation::ARGMIN or K != aggregation::ARGMAX)), + (K != aggregation::ARGMIN and K != aggregation::ARGMAX)), std::unique_ptr> operator()(Args&&... args) { diff --git a/cpp/src/groupby/sort/group_util.cuh b/cpp/src/groupby/sort/group_util.cuh new file mode 100644 index 00000000000..43de1b4a9d8 --- /dev/null +++ b/cpp/src/groupby/sort/group_util.cuh @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf { +namespace groupby { +namespace detail { + +/** + * @brief Binary operator ArgMin/ArgMax with index values into the input table. + * + * @tparam T Type of the underlying data. This is the fallback for the cases when T does not support + * '<' operator. + */ +template +struct row_arg_minmax_fn { + size_type const num_rows; + row_lexicographic_comparator const comp; + + row_arg_minmax_fn(size_type const num_rows_, + table_device_view const& table_, + null_order const* null_precedence) + : num_rows(num_rows_), comp(table_, table_, nullptr, null_precedence) + { + } + + CUDA_DEVICE_CALLABLE auto operator()(size_type lhs_idx, size_type rhs_idx) const + { + // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and + // github.com/NVIDIA/thrust/issues/1525 + // where invalid random values may be passed here by thrust::reduce_by_key + if (lhs_idx < 0 || lhs_idx >= num_rows) { return rhs_idx; } + if (rhs_idx < 0 || rhs_idx >= num_rows) { return lhs_idx; } + + // Return `lhs_idx` iff: + // row(lhs_idx) < row(rhs_idx) and finding ArgMin, or + // row(lhs_idx) >= row(rhs_idx) and finding ArgMax. + return comp(lhs_idx, rhs_idx) == arg_min ? lhs_idx : rhs_idx; + } +}; + +} // namespace detail +} // namespace groupby +} // namespace cudf From f7d1b3e027aefe80825f5662bd26cf3d186d500a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 1 Nov 2021 11:13:02 -0600 Subject: [PATCH 13/48] Implement groupby scan for struct min/max --- cpp/src/groupby/sort/group_scan_util.cuh | 153 ++++++++++++++++++----- 1 file changed, 119 insertions(+), 34 deletions(-) diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index ef9df937fc5..dba066fc55c 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -16,12 +16,18 @@ #pragma once +#include + #include #include #include +#include #include +#include #include #include +#include +#include #include #include #include @@ -39,7 +45,7 @@ namespace detail { template struct scan_functor { template - static constexpr bool is_supported() + static constexpr bool is_trivially_supported() { if (K == aggregation::SUM) return cudf::is_numeric() || cudf::is_duration() || cudf::is_fixed_point(); @@ -50,7 +56,7 @@ struct scan_functor { } template - std::enable_if_t() and not std::is_same_v, + std::enable_if_t() and not std::is_same_v, std::unique_ptr> operator()(column_view const& values, size_type num_groups, @@ -78,34 +84,33 @@ struct scan_functor { auto result_view = mutable_column_device_view::create(result->mutable_view(), stream); auto values_view = column_device_view::create(values, stream); - if (values.has_nulls()) { - auto input = thrust::make_transform_iterator( - make_null_replacement_iterator(*values_view, OpType::template identity()), - thrust::identity{}); + // Perform segmented scan. + auto const do_scan = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { thrust::inclusive_scan_by_key(rmm::exec_policy(stream), group_labels.begin(), group_labels.end(), - input, - result_view->begin(), + inp_iter, + out_iter, thrust::equal_to{}, - OpType{}); + binop); + }; + + if (values.has_nulls()) { + auto input = thrust::make_transform_iterator( + make_null_replacement_iterator(*values_view, OpType::template identity()), + thrust::identity{}); + do_scan(input, result_view->begin(), OpType{}); result->set_null_mask(cudf::detail::copy_bitmask(values, stream)); } else { auto input = thrust::make_transform_iterator(values_view->begin(), thrust::identity{}); - thrust::inclusive_scan_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - input, - result_view->begin(), - thrust::equal_to{}, - OpType{}); + do_scan(input, result_view->begin(), OpType{}); } return result; } template - std::enable_if_t() and std::is_same_v, + std::enable_if_t() and std::is_same_v, std::unique_ptr> operator()(column_view const& values, size_type num_groups, @@ -115,33 +120,30 @@ struct scan_functor { { using OpType = cudf::detail::corresponding_operator_t; - if (values.is_empty()) { - return cudf::make_empty_column(cudf::data_type{cudf::type_id::STRING}); - } + if (values.is_empty()) { return cudf::make_empty_column(cudf::type_id::STRING); } // create an empty output vector we can fill with string_view instances auto results_vector = rmm::device_uvector(values.size(), stream); auto values_view = column_device_view::create(values, stream); - if (values.has_nulls()) { - auto input = make_null_replacement_iterator( - *values_view, OpType::template identity(), values.has_nulls()); + // Perform segmented scan. + auto const do_scan = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { thrust::inclusive_scan_by_key(rmm::exec_policy(stream), group_labels.begin(), group_labels.end(), - input, - results_vector.begin(), + inp_iter, + out_iter, thrust::equal_to{}, - OpType{}); + binop); + }; + + if (values.has_nulls()) { + auto input = make_null_replacement_iterator( + *values_view, OpType::template identity(), values.has_nulls()); + do_scan(input, results_vector.begin(), OpType{}); } else { - thrust::inclusive_scan_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - values_view->begin(), - results_vector.begin(), - thrust::equal_to{}, - OpType{}); + do_scan(values_view->begin(), results_vector.begin(), OpType{}); } // turn the string_view vector into a strings column @@ -151,8 +153,91 @@ struct scan_functor { return results; } + template + std::enable_if_t and + (K == aggregation::MIN or K == aggregation::MAX), + std::unique_ptr> + operator()(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + if (values.is_empty()) { return cudf::empty_like(values); } + + // When finding MIN, we need to consider nulls as larger than non-null elements. + // Thing is opposite when finding MAX. + auto const null_precedence = (K == aggregation::MIN) ? null_order::AFTER : null_order::BEFORE; + auto const flattened_values = structs::detail::flatten_nested_columns( + table_view{{values}}, {}, std::vector{null_precedence}); + auto const d_flattened_values_ptr = table_device_view::create(flattened_values, stream); + auto const flattened_null_precedences = + (K == aggregation::MIN) + ? cudf::detail::make_device_uvector_async(flattened_values.null_orders(), stream) + : rmm::device_uvector(0, stream); + + // Create a gather map contaning indices of the prefix min/max elements. + auto gather_map = rmm::device_uvector(values.size(), stream); + auto const map_begin = gather_map.begin(); + + // Perform segmented scan. + auto const do_scan = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { + thrust::inclusive_scan_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + inp_iter, + out_iter, + thrust::equal_to{}, + binop); + }; + + // Find the indices of the prefix min/max elements within each group. + auto const count_iter = thrust::make_counting_iterator(0); + if (values.has_nulls()) { + auto const op = row_arg_minmax_fn( + values.size(), *d_flattened_values_ptr, flattened_null_precedences.data()); + do_scan(count_iter, map_begin, op); + } else { + auto const op = row_arg_minmax_fn( + values.size(), *d_flattened_values_ptr, flattened_null_precedences.data()); + do_scan(count_iter, map_begin, op); + } + + auto gather_map_view = + column_view(data_type{type_to_id()}, gather_map.size(), gather_map.data()); + + // Gather the children elements of the prefix min/max struct elements first. + auto scanned_children = + cudf::detail::gather( + table_view(std::vector{values.child_begin(), values.child_end()}), + gather_map_view, + cudf::out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr) + ->release(); + + // After gathering the children elements, we need to push down nulls from the root structs + // column to them (so we will have null row in => null row out). + if (values.has_nulls()) { + for (std::unique_ptr& child : scanned_children) { + structs::detail::superimpose_parent_nulls( + values.null_mask(), values.null_count(), *child, stream, mr); + } + } + + return make_structs_column(values.size(), + std::move(scanned_children), + values.null_count(), + cudf::detail::copy_bitmask(values, stream)); + } + template - std::enable_if_t(), std::unique_ptr> operator()(Args&&... args) + std::enable_if_t() and + (not std::is_same_v or + (K != aggregation::MIN and K != aggregation::MAX)), + std::unique_ptr> + operator()(Args&&... args) { CUDF_FAIL("Unsupported groupby scan type-agg combination"); } From 5d77d4f140fabefee7da190f1f717a01191b68c3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 1 Nov 2021 14:09:46 -0600 Subject: [PATCH 14/48] Implement unit tests --- cpp/tests/groupby/max_scan_tests.cpp | 89 +++++++++++++++++++++++++++- cpp/tests/groupby/min_scan_tests.cpp | 89 +++++++++++++++++++++++++++- 2 files changed, 174 insertions(+), 4 deletions(-) diff --git a/cpp/tests/groupby/max_scan_tests.cpp b/cpp/tests/groupby/max_scan_tests.cpp index 5108f91b31c..bb2f87fd424 100644 --- a/cpp/tests/groupby/max_scan_tests.cpp +++ b/cpp/tests/groupby/max_scan_tests.cpp @@ -39,7 +39,7 @@ struct groupby_max_scan_test : public cudf::test::BaseFixture { using result_wrapper = fixed_width_column_wrapper; }; -TYPED_TEST_CASE(groupby_max_scan_test, cudf::test::FixedWidthTypesWithoutFixedPoint); +TYPED_TEST_SUITE(groupby_max_scan_test, cudf::test::FixedWidthTypesWithoutFixedPoint); TYPED_TEST(groupby_max_scan_test, basic) { @@ -148,7 +148,7 @@ template struct FixedPointTestBothReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_SUITE(FixedPointTestBothReps, cudf::test::FixedPointTypes); TYPED_TEST(FixedPointTestBothReps, GroupBySortMaxScanDecimalAsValue) { @@ -173,5 +173,90 @@ TYPED_TEST(FixedPointTestBothReps, GroupBySortMaxScanDecimalAsValue) } } +struct groupby_max_scan_struct_test : public cudf::test::BaseFixture { +}; + +TEST_F(groupby_max_scan_struct_test, basic) +{ + auto const keys = key_wrapper{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; + auto const vals = [] { + auto child1 = + strings_column_wrapper{"año", "bit", "₹1", "aaa", "zit", "bat", "aab", "$1", "€1", "wut"}; + auto child2 = fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto const expect_keys = key_wrapper{1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; + auto const expect_vals = [] { + auto child1 = + strings_column_wrapper{"año", "año", "año", "bit", "zit", "zit", "zit", "₹1", "₹1", "₹1"}; + auto child2 = fixed_width_column_wrapper{1, 1, 1, 2, 5, 5, 5, 3, 3, 3}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto agg = cudf::make_max_aggregation(); + test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + +TEST_F(groupby_max_scan_struct_test, slice_input) +{ + constexpr int32_t dont_care{1}; + auto const keys_original = + key_wrapper{dont_care, dont_care, 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, dont_care}; + auto const vals_original = [] { + auto child1 = strings_column_wrapper{"dont_care", + "dont_care", + "año", + "bit", + "₹1", + "aaa", + "zit", + "bat", + "aab", + "$1", + "€1", + "wut", + "dont_care"}; + auto child2 = key_wrapper{dont_care, dont_care, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, dont_care}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto const keys = cudf::slice(keys_original, {2, 12})[0]; + auto const vals = cudf::slice(vals_original, {2, 12})[0]; + auto const expect_keys = key_wrapper{1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; + auto const expect_vals = [] { + auto child1 = + strings_column_wrapper{"año", "año", "año", "bit", "zit", "zit", "zit", "₹1", "₹1", "₹1"}; + auto child2 = fixed_width_column_wrapper{1, 1, 1, 2, 5, 5, 5, 3, 3, 3}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto agg = cudf::make_max_aggregation(); + test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + +TEST_F(groupby_max_scan_struct_test, null_keys_and_values) +{ + constexpr int32_t null{0}; + auto const keys = key_wrapper{{1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4}, null_at(7)}; + auto const vals = [] { + auto child1 = strings_column_wrapper{ + "año", "bit", "₹1", "aaa", "zit", "" /*NULL*/, "" /*NULL*/, "$1", "€1", "wut", "" /*NULL*/}; + auto child2 = fixed_width_column_wrapper{9, 8, 7, 6, 5, null, null, 2, 1, 0, null}; + return structs_column_wrapper{{child1, child2}, nulls_at({5, 6, 10})}; + }(); + + auto const expect_keys = key_wrapper{{1, 1, 1, 2, 2, 2, 2, 3, 3, 4}, no_nulls()}; + auto const expect_vals = [] { + auto child1 = strings_column_wrapper{ + "año", "año", "" /*NULL*/, "bit", "zit", "" /*NULL*/, "zit", "₹1", "₹1", "" /*NULL*/}; + auto child2 = fixed_width_column_wrapper{9, 9, null, 8, 5, null, 5, 7, 7, null}; + return structs_column_wrapper{{child1, child2}, nulls_at({2, 5, 9})}; + }(); + + auto agg = cudf::make_max_aggregation(); + test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + } // namespace test } // namespace cudf diff --git a/cpp/tests/groupby/min_scan_tests.cpp b/cpp/tests/groupby/min_scan_tests.cpp index 59b6382616f..06c0f5ceb3b 100644 --- a/cpp/tests/groupby/min_scan_tests.cpp +++ b/cpp/tests/groupby/min_scan_tests.cpp @@ -38,7 +38,7 @@ struct groupby_min_scan_test : public cudf::test::BaseFixture { using result_wrapper = fixed_width_column_wrapper; }; -TYPED_TEST_CASE(groupby_min_scan_test, cudf::test::FixedWidthTypesWithoutFixedPoint); +TYPED_TEST_SUITE(groupby_min_scan_test, cudf::test::FixedWidthTypesWithoutFixedPoint); TYPED_TEST(groupby_min_scan_test, basic) { @@ -146,7 +146,7 @@ template struct FixedPointTestBothReps : public cudf::test::BaseFixture { }; -TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes); +TYPED_TEST_SUITE(FixedPointTestBothReps, cudf::test::FixedPointTypes); TYPED_TEST(FixedPointTestBothReps, GroupBySortMinScanDecimalAsValue) { @@ -172,5 +172,90 @@ TYPED_TEST(FixedPointTestBothReps, GroupBySortMinScanDecimalAsValue) } } +struct groupby_min_scan_struct_test : public cudf::test::BaseFixture { +}; + +TEST_F(groupby_min_scan_struct_test, basic) +{ + auto const keys = key_wrapper{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; + auto const vals = [] { + auto child1 = + strings_column_wrapper{"año", "bit", "₹1", "aaa", "zit", "bat", "aab", "$1", "€1", "wut"}; + auto child2 = fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto const expect_keys = key_wrapper{1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; + auto const expect_vals = [] { + auto child1 = + strings_column_wrapper{"año", "aaa", "aaa", "bit", "bit", "bat", "bat", "₹1", "$1", "$1"}; + auto child2 = fixed_width_column_wrapper{1, 4, 4, 2, 2, 6, 6, 3, 8, 8}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto agg = cudf::make_min_aggregation(); + test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + +TEST_F(groupby_min_scan_struct_test, slice_input) +{ + constexpr int32_t dont_care{1}; + auto const keys_original = + key_wrapper{dont_care, dont_care, 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, dont_care}; + auto const vals_original = [] { + auto child1 = strings_column_wrapper{"dont_care", + "dont_care", + "año", + "bit", + "₹1", + "aaa", + "zit", + "bat", + "aab", + "$1", + "€1", + "wut", + "dont_care"}; + auto child2 = key_wrapper{dont_care, dont_care, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, dont_care}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto const keys = cudf::slice(keys_original, {2, 12})[0]; + auto const vals = cudf::slice(vals_original, {2, 12})[0]; + auto const expect_keys = key_wrapper{1, 1, 1, 2, 2, 2, 2, 3, 3, 3}; + auto const expect_vals = [] { + auto child1 = + strings_column_wrapper{"año", "aaa", "aaa", "bit", "bit", "bat", "bat", "₹1", "$1", "$1"}; + auto child2 = fixed_width_column_wrapper{1, 4, 4, 2, 2, 6, 6, 3, 8, 8}; + return structs_column_wrapper{{child1, child2}}; + }(); + + auto agg = cudf::make_min_aggregation(); + test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + +TEST_F(groupby_min_scan_struct_test, null_keys_and_values) +{ + constexpr int32_t null{0}; + auto const keys = key_wrapper{{1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4}, null_at(7)}; + auto const vals = [] { + auto child1 = strings_column_wrapper{ + "año", "bit", "₹1", "aaa", "zit", "" /*NULL*/, "" /*NULL*/, "$1", "€1", "wut", "" /*NULL*/}; + auto child2 = fixed_width_column_wrapper{9, 8, 7, 6, 5, null, null, 2, 1, 0, null}; + return structs_column_wrapper{{child1, child2}, nulls_at({5, 6, 10})}; + }(); + + auto const expect_keys = key_wrapper{{1, 1, 1, 2, 2, 2, 2, 3, 3, 4}, no_nulls()}; + auto const expect_vals = [] { + auto child1 = strings_column_wrapper{ + "año", "aaa", "" /*NULL*/, "bit", "bit", "" /*NULL*/, "bit", "₹1", "€1", "" /*NULL*/}; + auto child2 = fixed_width_column_wrapper{9, 6, null, 8, 8, null, 8, 7, 1, null}; + return structs_column_wrapper{{child1, child2}, nulls_at({2, 5, 9})}; + }(); + + auto agg = cudf::make_min_aggregation(); + test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + } // namespace test } // namespace cudf From bce93e40934b0d7dc654040a606690eb524e54d4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 1 Nov 2021 14:31:22 -0600 Subject: [PATCH 15/48] Rewrite SFINAE style --- cpp/src/groupby/sort/group_scan_util.cuh | 109 +++++++++++------- .../sort/group_single_pass_reduction_util.cuh | 104 +++++++++-------- 2 files changed, 124 insertions(+), 89 deletions(-) diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index dba066fc55c..c899052b058 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -42,27 +42,55 @@ namespace cudf { namespace groupby { namespace detail { +// Error case when no other overload or specialization is available +template +struct scan_functor_impl { + template + std::unique_ptr operator()(Args&&...) + { + CUDF_FAIL("Unsupported groupby scan type-agg combination."); + } +}; + template struct scan_functor { template - static constexpr bool is_trivially_supported() + std::unique_ptr operator()(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - if (K == aggregation::SUM) - return cudf::is_numeric() || cudf::is_duration() || cudf::is_fixed_point(); - else if (K == aggregation::MIN or K == aggregation::MAX) - return !cudf::is_dictionary() and is_relationally_comparable(); - else - return false; + return scan_functor_impl{}(values, num_groups, group_labels, stream, mr); } +}; - template - std::enable_if_t() and not std::is_same_v, - std::unique_ptr> - operator()(column_view const& values, - size_type num_groups, - cudf::device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +/** + * @brief Check if the given aggregation K with data type T is supported in groupby scan. + */ +template +static constexpr bool is_scan_supported() +{ + if (K == aggregation::SUM) + return cudf::is_numeric() || cudf::is_duration() || cudf::is_fixed_point(); + else if (K == aggregation::MIN or K == aggregation::MAX) + return not cudf::is_dictionary() and + (is_relationally_comparable() or std::is_same_v); + else + return false; +} + +template +struct scan_functor_impl< + K, + T, + std::enable_if_t() and not std::is_same_v and + not std::is_same_v>> { + std::unique_ptr operator()(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { using DeviceType = device_storage_type_t; using OpType = cudf::detail::corresponding_operator_t; @@ -108,15 +136,18 @@ struct scan_functor { } return result; } +}; - template - std::enable_if_t() and std::is_same_v, - std::unique_ptr> - operator()(column_view const& values, - size_type num_groups, - cudf::device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +template +struct scan_functor_impl< + K, + T, + std::enable_if_t() and std::is_same_v>> { + std::unique_ptr operator()(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { using OpType = cudf::detail::corresponding_operator_t; @@ -152,16 +183,18 @@ struct scan_functor { results->set_null_mask(cudf::detail::copy_bitmask(values, stream), values.null_count()); return results; } +}; - template - std::enable_if_t and - (K == aggregation::MIN or K == aggregation::MAX), - std::unique_ptr> - operator()(column_view const& values, - size_type num_groups, - cudf::device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +template +struct scan_functor_impl< + K, + T, + std::enable_if_t() and std::is_same_v>> { + std::unique_ptr operator()(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { if (values.is_empty()) { return cudf::empty_like(values); } @@ -218,7 +251,7 @@ struct scan_functor { ->release(); // After gathering the children elements, we need to push down nulls from the root structs - // column to them (so we will have null row in => null row out). + // column to them. if (values.has_nulls()) { for (std::unique_ptr& child : scanned_children) { structs::detail::superimpose_parent_nulls( @@ -231,16 +264,6 @@ struct scan_functor { values.null_count(), cudf::detail::copy_bitmask(values, stream)); } - - template - std::enable_if_t() and - (not std::is_same_v or - (K != aggregation::MIN and K != aggregation::MAX)), - std::unique_ptr> - operator()(Args&&... args) - { - CUDF_FAIL("Unsupported groupby scan type-agg combination"); - } }; } // namespace detail diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 3e1f994cf7b..e3dbfe23e57 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -116,31 +116,59 @@ struct null_replaced_value_accessor : value_accessor { } }; +// Error case when no other overload or specialization is available +template +struct reduce_functor_impl { + template + std::unique_ptr operator()(Args&&...) + { + CUDF_FAIL("Unsupported groupby reduction type-agg combination."); + } +}; + template struct reduce_functor { template - static constexpr bool is_trivially_supported() + std::unique_ptr operator()(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - switch (K) { - case aggregation::SUM: - return cudf::is_numeric() || cudf::is_duration() || cudf::is_fixed_point(); - case aggregation::PRODUCT: return cudf::detail::is_product_supported(); - case aggregation::MIN: - case aggregation::MAX: - return cudf::is_fixed_width() and is_relationally_comparable(); - case aggregation::ARGMIN: - case aggregation::ARGMAX: return is_relationally_comparable(); - default: return false; - } + return reduce_functor_impl{}(values, num_groups, group_labels, stream, mr); } +}; + +/** + * @brief Check if the given aggregation K with data type T is supported in groupby reduction. + */ +template +static constexpr bool is_redution_supported() +{ + switch (K) { + case aggregation::SUM: + return cudf::is_numeric() || cudf::is_duration() || cudf::is_fixed_point(); + case aggregation::PRODUCT: return cudf::detail::is_product_supported(); + case aggregation::MIN: + case aggregation::MAX: return cudf::is_fixed_width() and is_relationally_comparable(); + case aggregation::ARGMIN: + case aggregation::ARGMAX: + return is_relationally_comparable() or std::is_same_v; + default: return false; + } +} + +template +struct reduce_functor_impl< + K, + T, + std::enable_if_t() and not std::is_same_v>> { + std::unique_ptr operator()(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) - template - std::enable_if_t(), std::unique_ptr> operator()( - column_view const& values, - size_type num_groups, - cudf::device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) { using DeviceType = device_storage_type_t; using ResultType = cudf::detail::target_type_t; @@ -200,21 +228,18 @@ struct reduce_functor { } return result; } +}; - // This specialization handles the cases when the input values type: - // - Is struct type, and - // - Aggregation is either ARGMIN or ARGMAX. - // Since `is_trivially_supported` returns false for `struct_view`, we don't have to cover it in - // the SFINAE condition. - template - std::enable_if_t and - (K == aggregation::ARGMIN or K == aggregation::ARGMAX), - std::unique_ptr> - operator()(column_view const& values, - size_type num_groups, - cudf::device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +template +struct reduce_functor_impl< + K, + T, + std::enable_if_t() and std::is_same_v>> { + std::unique_ptr operator()(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // This is be expected to be size_type. using ResultType = cudf::detail::target_type_t; @@ -274,19 +299,6 @@ struct reduce_functor { return result; } - - // Throw exception if the input values type: - // - Is not trivially supported, and - // - Is not struct type, or is struct type but aggregation is not neither ARGMIN nor ARGMAX. - template - std::enable_if_t() and - (not std::is_same_v or - (K != aggregation::ARGMIN and K != aggregation::ARGMAX)), - std::unique_ptr> - operator()(Args&&... args) - { - CUDF_FAIL("Unsupported type-agg combination"); - } }; } // namespace detail From b1b916f2239fdd081255eccd334169cb74b382b6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 1 Nov 2021 16:21:47 -0600 Subject: [PATCH 16/48] Add missing `mr` parameter --- cpp/src/groupby/sort/group_scan_util.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index c899052b058..a1f4b6c9843 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -262,7 +262,7 @@ struct scan_functor_impl< return make_structs_column(values.size(), std::move(scanned_children), values.null_count(), - cudf::detail::copy_bitmask(values, stream)); + cudf::detail::copy_bitmask(values, stream, mr)); } }; From 75e201f5da16ac3907a9d161f8fc134f81c3736c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 2 Nov 2021 09:24:52 -0600 Subject: [PATCH 17/48] Refactor `row_arg_minmax` --- cpp/src/groupby/sort/group_scan_util.cuh | 16 ++++--- .../sort/group_single_pass_reduction_util.cuh | 45 ++++++++++--------- cpp/src/groupby/sort/group_util.cuh | 9 ++-- 3 files changed, 38 insertions(+), 32 deletions(-) diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index a1f4b6c9843..743ef97d505 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -227,13 +227,17 @@ struct scan_functor_impl< // Find the indices of the prefix min/max elements within each group. auto const count_iter = thrust::make_counting_iterator(0); if (values.has_nulls()) { - auto const op = row_arg_minmax_fn( - values.size(), *d_flattened_values_ptr, flattened_null_precedences.data()); - do_scan(count_iter, map_begin, op); + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::MIN); + do_scan(count_iter, map_begin, binop); } else { - auto const op = row_arg_minmax_fn( - values.size(), *d_flattened_values_ptr, flattened_null_precedences.data()); - do_scan(count_iter, map_begin, op); + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::MIN); + do_scan(count_iter, map_begin, binop); } auto gather_map_view = diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index e3dbfe23e57..9e8451f7794 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -47,9 +47,12 @@ namespace detail { * * @tparam T Type of the underlying column. Must support '<' operator. */ -template +template struct element_arg_minmax_fn { column_device_view const d_col; + bool const has_nulls; + bool const arg_min; + CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs_idx, size_type const& rhs_idx) const { // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and @@ -200,19 +203,15 @@ struct reduce_functor_impl< if constexpr (K == aggregation::ARGMAX || K == aggregation::ARGMIN) { auto const count_iter = thrust::make_counting_iterator(0); - if (values.has_nulls()) { - using OpType = element_arg_minmax_fn; - do_reduction(count_iter, result_begin, OpType{*d_values_ptr}); - } else { - using OpType = element_arg_minmax_fn; - do_reduction(count_iter, result_begin, OpType{*d_values_ptr}); - } + auto const binop = + element_arg_minmax_fn{*d_values_ptr, values.has_nulls(), K == aggregation::ARGMIN}; + do_reduction(count_iter, result_begin, binop); } else { - using OpType = cudf::detail::corresponding_operator_t; - auto init = OpType::template identity(); - auto begin = cudf::detail::make_counting_transform_iterator( + using OpType = cudf::detail::corresponding_operator_t; + auto init = OpType::template identity(); + auto inp_values = cudf::detail::make_counting_transform_iterator( 0, null_replaced_value_accessor{*d_values_ptr, init, values.has_nulls()}); - do_reduction(begin, result_begin, OpType{}); + do_reduction(inp_values, result_begin, OpType{}); } if (values.has_nulls()) { @@ -223,8 +222,7 @@ struct reduce_functor_impl< auto [null_mask, null_count] = cudf::detail::valid_if( validity.begin(), validity.end(), thrust::identity{}, stream, mr); - result->set_null_mask(std::move(null_mask)); - result->set_null_count(null_count); + result->set_null_mask(std::move(null_mask), null_count); } return result; } @@ -276,9 +274,11 @@ struct reduce_functor_impl< auto const count_iter = thrust::make_counting_iterator(0); auto const result_begin = result->mutable_view().template begin(); if (values.has_nulls()) { - auto const op = row_arg_minmax_fn( - values.size(), *d_flattened_values_ptr, flattened_null_precedences.data()); - do_reduction(count_iter, result_begin, op); + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::ARGMIN); + do_reduction(count_iter, result_begin, binop); // Generate bitmask for the output by segmented reduction of the input bitmask. auto const d_values_ptr = column_device_view::create(values, stream); @@ -289,12 +289,13 @@ struct reduce_functor_impl< auto [null_mask, null_count] = cudf::detail::valid_if( validity.begin(), validity.end(), thrust::identity{}, stream, mr); - result->set_null_mask(std::move(null_mask)); - result->set_null_count(null_count); + result->set_null_mask(std::move(null_mask), null_count); } else { - auto const op = row_arg_minmax_fn( - values.size(), *d_flattened_values_ptr, flattened_null_precedences.data()); - do_reduction(count_iter, result_begin, op); + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::ARGMIN); + do_reduction(count_iter, result_begin, binop); } return result; diff --git a/cpp/src/groupby/sort/group_util.cuh b/cpp/src/groupby/sort/group_util.cuh index 43de1b4a9d8..6475585a97e 100644 --- a/cpp/src/groupby/sort/group_util.cuh +++ b/cpp/src/groupby/sort/group_util.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,15 +28,16 @@ namespace detail { * @tparam T Type of the underlying data. This is the fallback for the cases when T does not support * '<' operator. */ -template +template struct row_arg_minmax_fn { size_type const num_rows; row_lexicographic_comparator const comp; + bool const arg_min; row_arg_minmax_fn(size_type const num_rows_, table_device_view const& table_, - null_order const* null_precedence) - : num_rows(num_rows_), comp(table_, table_, nullptr, null_precedence) + null_order const* null_precedence_, bool const arg_min_) + : num_rows(num_rows_), comp(table_, table_, nullptr, null_precedence_), arg_min(arg_min_) { } From 08a60f8f8b39f2d37c4b7313e32208e8c2db0f94 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 2 Nov 2021 13:00:39 -0600 Subject: [PATCH 18/48] Adopt "dispatch to static invoke" pattern --- cpp/src/groupby/sort/group_scan_util.cuh | 34 +++++++++---------- .../sort/group_single_pass_reduction_util.cuh | 24 ++++++------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index 743ef97d505..570f349642f 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -46,7 +46,7 @@ namespace detail { template struct scan_functor_impl { template - std::unique_ptr operator()(Args&&...) + static std::unique_ptr invoke(Args&&...) { CUDF_FAIL("Unsupported groupby scan type-agg combination."); } @@ -61,7 +61,7 @@ struct scan_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return scan_functor_impl{}(values, num_groups, group_labels, stream, mr); + return scan_functor_impl::invoke(values, num_groups, group_labels, stream, mr); } }; @@ -86,11 +86,11 @@ struct scan_functor_impl< T, std::enable_if_t() and not std::is_same_v and not std::is_same_v>> { - std::unique_ptr operator()(column_view const& values, - size_type num_groups, - cudf::device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + static std::unique_ptr invoke(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { using DeviceType = device_storage_type_t; using OpType = cudf::detail::corresponding_operator_t; @@ -143,11 +143,11 @@ struct scan_functor_impl< K, T, std::enable_if_t() and std::is_same_v>> { - std::unique_ptr operator()(column_view const& values, - size_type num_groups, - cudf::device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + static std::unique_ptr invoke(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { using OpType = cudf::detail::corresponding_operator_t; @@ -190,11 +190,11 @@ struct scan_functor_impl< K, T, std::enable_if_t() and std::is_same_v>> { - std::unique_ptr operator()(column_view const& values, - size_type num_groups, - cudf::device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + static std::unique_ptr invoke(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { if (values.is_empty()) { return cudf::empty_like(values); } diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 9e8451f7794..dd2098c81ec 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -123,7 +123,7 @@ struct null_replaced_value_accessor : value_accessor { template struct reduce_functor_impl { template - std::unique_ptr operator()(Args&&...) + static std::unique_ptr invoke(Args&&...) { CUDF_FAIL("Unsupported groupby reduction type-agg combination."); } @@ -138,7 +138,7 @@ struct reduce_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return reduce_functor_impl{}(values, num_groups, group_labels, stream, mr); + return reduce_functor_impl::invoke(values, num_groups, group_labels, stream, mr); } }; @@ -166,11 +166,11 @@ struct reduce_functor_impl< K, T, std::enable_if_t() and not std::is_same_v>> { - std::unique_ptr operator()(column_view const& values, - size_type num_groups, - cudf::device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + static std::unique_ptr invoke(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { using DeviceType = device_storage_type_t; @@ -233,11 +233,11 @@ struct reduce_functor_impl< K, T, std::enable_if_t() and std::is_same_v>> { - std::unique_ptr operator()(column_view const& values, - size_type num_groups, - cudf::device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + static std::unique_ptr invoke(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // This is be expected to be size_type. using ResultType = cudf::detail::target_type_t; From 3a0c580bdb7ae9339b9316f95ec6d49b624dbd12 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 2 Nov 2021 13:19:58 -0600 Subject: [PATCH 19/48] Rename functors to better expressive names --- cpp/src/groupby/sort/group_argmax.cu | 2 +- cpp/src/groupby/sort/group_argmin.cu | 2 +- cpp/src/groupby/sort/group_max.cu | 9 +++++++-- cpp/src/groupby/sort/group_max_scan.cu | 2 +- cpp/src/groupby/sort/group_min.cu | 9 +++++++-- cpp/src/groupby/sort/group_min_scan.cu | 9 +++++++-- cpp/src/groupby/sort/group_product.cu | 2 +- cpp/src/groupby/sort/group_scan_util.cuh | 20 +++++++++---------- .../sort/group_single_pass_reduction_util.cuh | 20 +++++++++---------- cpp/src/groupby/sort/group_sum.cu | 9 +++++++-- cpp/src/groupby/sort/group_sum_scan.cu | 9 +++++++-- 11 files changed, 59 insertions(+), 34 deletions(-) diff --git a/cpp/src/groupby/sort/group_argmax.cu b/cpp/src/groupby/sort/group_argmax.cu index 6ce23ffc35b..466171ec80b 100644 --- a/cpp/src/groupby/sort/group_argmax.cu +++ b/cpp/src/groupby/sort/group_argmax.cu @@ -34,7 +34,7 @@ std::unique_ptr group_argmax(column_view const& values, rmm::mr::device_memory_resource* mr) { auto indices = type_dispatcher(values.type(), - reduce_functor{}, + group_reduction_dispatcher{}, values, num_groups, group_labels, diff --git a/cpp/src/groupby/sort/group_argmin.cu b/cpp/src/groupby/sort/group_argmin.cu index ab91c2c0d29..4f7b2b713e6 100644 --- a/cpp/src/groupby/sort/group_argmin.cu +++ b/cpp/src/groupby/sort/group_argmin.cu @@ -34,7 +34,7 @@ std::unique_ptr group_argmin(column_view const& values, rmm::mr::device_memory_resource* mr) { auto indices = type_dispatcher(values.type(), - reduce_functor{}, + group_reduction_dispatcher{}, values, num_groups, group_labels, diff --git a/cpp/src/groupby/sort/group_max.cu b/cpp/src/groupby/sort/group_max.cu index 7dd0e43ad28..5da15266233 100644 --- a/cpp/src/groupby/sort/group_max.cu +++ b/cpp/src/groupby/sort/group_max.cu @@ -30,8 +30,13 @@ std::unique_ptr group_max(column_view const& values, auto values_type = cudf::is_dictionary(values.type()) ? dictionary_column_view(values).keys().type() : values.type(); - return type_dispatcher( - values_type, reduce_functor{}, values, num_groups, group_labels, stream, mr); + return type_dispatcher(values_type, + group_reduction_dispatcher{}, + values, + num_groups, + group_labels, + stream, + mr); } } // namespace detail diff --git a/cpp/src/groupby/sort/group_max_scan.cu b/cpp/src/groupby/sort/group_max_scan.cu index 303d606be9d..15d13f3f4b9 100644 --- a/cpp/src/groupby/sort/group_max_scan.cu +++ b/cpp/src/groupby/sort/group_max_scan.cu @@ -28,7 +28,7 @@ std::unique_ptr max_scan(column_view const& values, rmm::mr::device_memory_resource* mr) { return type_dispatcher( - values.type(), scan_functor{}, values, num_groups, group_labels, stream, mr); + values.type(), group_scan_dispatcher{}, values, num_groups, group_labels, stream, mr); } } // namespace detail diff --git a/cpp/src/groupby/sort/group_min.cu b/cpp/src/groupby/sort/group_min.cu index 4124ec0f6f6..c42a0b94de0 100644 --- a/cpp/src/groupby/sort/group_min.cu +++ b/cpp/src/groupby/sort/group_min.cu @@ -30,8 +30,13 @@ std::unique_ptr group_min(column_view const& values, auto values_type = cudf::is_dictionary(values.type()) ? dictionary_column_view(values).keys().type() : values.type(); - return type_dispatcher( - values_type, reduce_functor{}, values, num_groups, group_labels, stream, mr); + return type_dispatcher(values_type, + group_reduction_dispatcher{}, + values, + num_groups, + group_labels, + stream, + mr); } } // namespace detail diff --git a/cpp/src/groupby/sort/group_min_scan.cu b/cpp/src/groupby/sort/group_min_scan.cu index 4a692cdf0bd..daaeb6bb6f7 100644 --- a/cpp/src/groupby/sort/group_min_scan.cu +++ b/cpp/src/groupby/sort/group_min_scan.cu @@ -27,8 +27,13 @@ std::unique_ptr min_scan(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return type_dispatcher( - values.type(), scan_functor{}, values, num_groups, group_labels, stream, mr); + return type_dispatcher(values.type(), + group_scan_dispatcher{}, + values, + num_groups, + group_labels, + stream, + mr); } } // namespace detail diff --git a/cpp/src/groupby/sort/group_product.cu b/cpp/src/groupby/sort/group_product.cu index e9cf8611b58..74f5cbed041 100644 --- a/cpp/src/groupby/sort/group_product.cu +++ b/cpp/src/groupby/sort/group_product.cu @@ -33,7 +33,7 @@ std::unique_ptr group_product(column_view const& values, ? dictionary_column_view(values).keys().type() : values.type(); return type_dispatcher(values_type, - reduce_functor{}, + group_reduction_dispatcher{}, values, num_groups, group_labels, diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index 570f349642f..3b61cf5725b 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -44,7 +44,7 @@ namespace groupby { namespace detail { // Error case when no other overload or specialization is available template -struct scan_functor_impl { +struct group_scan_functor { template static std::unique_ptr invoke(Args&&...) { @@ -53,7 +53,7 @@ struct scan_functor_impl { }; template -struct scan_functor { +struct group_scan_dispatcher { template std::unique_ptr operator()(column_view const& values, size_type num_groups, @@ -61,7 +61,7 @@ struct scan_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return scan_functor_impl::invoke(values, num_groups, group_labels, stream, mr); + return group_scan_functor::invoke(values, num_groups, group_labels, stream, mr); } }; @@ -69,7 +69,7 @@ struct scan_functor { * @brief Check if the given aggregation K with data type T is supported in groupby scan. */ template -static constexpr bool is_scan_supported() +static constexpr bool is_group_scan_supported() { if (K == aggregation::SUM) return cudf::is_numeric() || cudf::is_duration() || cudf::is_fixed_point(); @@ -81,10 +81,10 @@ static constexpr bool is_scan_supported() } template -struct scan_functor_impl< +struct group_scan_functor< K, T, - std::enable_if_t() and not std::is_same_v and + std::enable_if_t() and not std::is_same_v and not std::is_same_v>> { static std::unique_ptr invoke(column_view const& values, size_type num_groups, @@ -139,10 +139,10 @@ struct scan_functor_impl< }; template -struct scan_functor_impl< +struct group_scan_functor< K, T, - std::enable_if_t() and std::is_same_v>> { + std::enable_if_t() and std::is_same_v>> { static std::unique_ptr invoke(column_view const& values, size_type num_groups, cudf::device_span group_labels, @@ -186,10 +186,10 @@ struct scan_functor_impl< }; template -struct scan_functor_impl< +struct group_scan_functor< K, T, - std::enable_if_t() and std::is_same_v>> { + std::enable_if_t() and std::is_same_v>> { static std::unique_ptr invoke(column_view const& values, size_type num_groups, cudf::device_span group_labels, diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index dd2098c81ec..1c5e23cd5c8 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -121,7 +121,7 @@ struct null_replaced_value_accessor : value_accessor { // Error case when no other overload or specialization is available template -struct reduce_functor_impl { +struct group_reduction_functor { template static std::unique_ptr invoke(Args&&...) { @@ -130,7 +130,7 @@ struct reduce_functor_impl { }; template -struct reduce_functor { +struct group_reduction_dispatcher { template std::unique_ptr operator()(column_view const& values, size_type num_groups, @@ -138,7 +138,7 @@ struct reduce_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return reduce_functor_impl::invoke(values, num_groups, group_labels, stream, mr); + return group_reduction_functor::invoke(values, num_groups, group_labels, stream, mr); } }; @@ -146,7 +146,7 @@ struct reduce_functor { * @brief Check if the given aggregation K with data type T is supported in groupby reduction. */ template -static constexpr bool is_redution_supported() +static constexpr bool is_group_redution_supported() { switch (K) { case aggregation::SUM: @@ -162,10 +162,10 @@ static constexpr bool is_redution_supported() } template -struct reduce_functor_impl< - K, - T, - std::enable_if_t() and not std::is_same_v>> { +struct group_reduction_functor() and + not std::is_same_v>> { static std::unique_ptr invoke(column_view const& values, size_type num_groups, cudf::device_span group_labels, @@ -229,10 +229,10 @@ struct reduce_functor_impl< }; template -struct reduce_functor_impl< +struct group_reduction_functor< K, T, - std::enable_if_t() and std::is_same_v>> { + std::enable_if_t() and std::is_same_v>> { static std::unique_ptr invoke(column_view const& values, size_type num_groups, cudf::device_span group_labels, diff --git a/cpp/src/groupby/sort/group_sum.cu b/cpp/src/groupby/sort/group_sum.cu index e9e6e985c54..e3c2ce7c864 100644 --- a/cpp/src/groupby/sort/group_sum.cu +++ b/cpp/src/groupby/sort/group_sum.cu @@ -32,8 +32,13 @@ std::unique_ptr group_sum(column_view const& values, auto values_type = cudf::is_dictionary(values.type()) ? dictionary_column_view(values).keys().type() : values.type(); - return type_dispatcher( - values_type, reduce_functor{}, values, num_groups, group_labels, stream, mr); + return type_dispatcher(values_type, + group_reduction_dispatcher{}, + values, + num_groups, + group_labels, + stream, + mr); } } // namespace detail diff --git a/cpp/src/groupby/sort/group_sum_scan.cu b/cpp/src/groupby/sort/group_sum_scan.cu index ae9b1c321d4..632fde3b9d5 100644 --- a/cpp/src/groupby/sort/group_sum_scan.cu +++ b/cpp/src/groupby/sort/group_sum_scan.cu @@ -27,8 +27,13 @@ std::unique_ptr sum_scan(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return type_dispatcher( - values.type(), scan_functor{}, values, num_groups, group_labels, stream, mr); + return type_dispatcher(values.type(), + group_scan_dispatcher{}, + values, + num_groups, + group_labels, + stream, + mr); } } // namespace detail From f4c53c274c721ebfb5eb937cfb93901b3bdef278 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 2 Nov 2021 14:26:34 -0600 Subject: [PATCH 20/48] Fix formatting style --- cpp/src/groupby/sort/group_util.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/groupby/sort/group_util.cuh b/cpp/src/groupby/sort/group_util.cuh index 6475585a97e..08803ba7916 100644 --- a/cpp/src/groupby/sort/group_util.cuh +++ b/cpp/src/groupby/sort/group_util.cuh @@ -36,7 +36,8 @@ struct row_arg_minmax_fn { row_arg_minmax_fn(size_type const num_rows_, table_device_view const& table_, - null_order const* null_precedence_, bool const arg_min_) + null_order const* null_precedence_, + bool const arg_min_) : num_rows(num_rows_), comp(table_, table_, nullptr, null_precedence_), arg_min(arg_min_) { } From b1a3628ede3fc1943c02f31dc32e0f768b76332d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 2 Nov 2021 14:30:40 -0600 Subject: [PATCH 21/48] Fix formatting style --- cpp/src/groupby/sort/group_max_scan.cu | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cpp/src/groupby/sort/group_max_scan.cu b/cpp/src/groupby/sort/group_max_scan.cu index 15d13f3f4b9..1551dc00a04 100644 --- a/cpp/src/groupby/sort/group_max_scan.cu +++ b/cpp/src/groupby/sort/group_max_scan.cu @@ -27,8 +27,13 @@ std::unique_ptr max_scan(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return type_dispatcher( - values.type(), group_scan_dispatcher{}, values, num_groups, group_labels, stream, mr); + return type_dispatcher(values.type(), + group_scan_dispatcher{}, + values, + num_groups, + group_labels, + stream, + mr); } } // namespace detail From d868d6628c4b5bc6c70926eec8dc344dc8485fd4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 3 Nov 2021 09:28:17 -0600 Subject: [PATCH 22/48] Remove redundant template argument --- cpp/src/groupby/sort/group_util.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/groupby/sort/group_util.cuh b/cpp/src/groupby/sort/group_util.cuh index 08803ba7916..98fcb199fb7 100644 --- a/cpp/src/groupby/sort/group_util.cuh +++ b/cpp/src/groupby/sort/group_util.cuh @@ -28,7 +28,7 @@ namespace detail { * @tparam T Type of the underlying data. This is the fallback for the cases when T does not support * '<' operator. */ -template +template struct row_arg_minmax_fn { size_type const num_rows; row_lexicographic_comparator const comp; From 94eed997c05236e049ca4d6114f91cdc63c3ad52 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 3 Nov 2021 09:29:05 -0600 Subject: [PATCH 23/48] Rewrite SFINAE into specialization --- cpp/src/groupby/sort/group_scan_util.cuh | 40 ++++++++----------- .../sort/group_single_pass_reduction_util.cuh | 29 ++++++-------- 2 files changed, 30 insertions(+), 39 deletions(-) diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index 3b61cf5725b..013ea924cce 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -81,11 +81,7 @@ static constexpr bool is_group_scan_supported() } template -struct group_scan_functor< - K, - T, - std::enable_if_t() and not std::is_same_v and - not std::is_same_v>> { +struct group_scan_functor()>> { static std::unique_ptr invoke(column_view const& values, size_type num_groups, cudf::device_span group_labels, @@ -138,11 +134,10 @@ struct group_scan_functor< } }; -template -struct group_scan_functor< - K, - T, - std::enable_if_t() and std::is_same_v>> { +template +struct group_scan_functor()>> { static std::unique_ptr invoke(column_view const& values, size_type num_groups, cudf::device_span group_labels, @@ -185,11 +180,10 @@ struct group_scan_functor< } }; -template -struct group_scan_functor< - K, - T, - std::enable_if_t() and std::is_same_v>> { +template +struct group_scan_functor()>> { static std::unique_ptr invoke(column_view const& values, size_type num_groups, cudf::device_span group_labels, @@ -227,16 +221,16 @@ struct group_scan_functor< // Find the indices of the prefix min/max elements within each group. auto const count_iter = thrust::make_counting_iterator(0); if (values.has_nulls()) { - auto const binop = row_arg_minmax_fn(values.size(), - *d_flattened_values_ptr, - flattened_null_precedences.data(), - K == aggregation::MIN); + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::MIN); do_scan(count_iter, map_begin, binop); } else { - auto const binop = row_arg_minmax_fn(values.size(), - *d_flattened_values_ptr, - flattened_null_precedences.data(), - K == aggregation::MIN); + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::MIN); do_scan(count_iter, map_begin, binop); } diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 1c5e23cd5c8..73ca658ce81 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -162,10 +162,7 @@ static constexpr bool is_group_redution_supported() } template -struct group_reduction_functor() and - not std::is_same_v>> { +struct group_reduction_functor()>> { static std::unique_ptr invoke(column_view const& values, size_type num_groups, cudf::device_span group_labels, @@ -228,11 +225,11 @@ struct group_reduction_functor +template struct group_reduction_functor< K, - T, - std::enable_if_t() and std::is_same_v>> { + cudf::struct_view, + std::enable_if_t()>> { static std::unique_ptr invoke(column_view const& values, size_type num_groups, cudf::device_span group_labels, @@ -240,7 +237,7 @@ struct group_reduction_functor< rmm::mr::device_memory_resource* mr) { // This is be expected to be size_type. - using ResultType = cudf::detail::target_type_t; + using ResultType = cudf::detail::target_type_t; auto result = make_fixed_width_column( data_type{type_to_id()}, num_groups, mask_state::UNALLOCATED, stream, mr); @@ -274,10 +271,10 @@ struct group_reduction_functor< auto const count_iter = thrust::make_counting_iterator(0); auto const result_begin = result->mutable_view().template begin(); if (values.has_nulls()) { - auto const binop = row_arg_minmax_fn(values.size(), - *d_flattened_values_ptr, - flattened_null_precedences.data(), - K == aggregation::ARGMIN); + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::ARGMIN); do_reduction(count_iter, result_begin, binop); // Generate bitmask for the output by segmented reduction of the input bitmask. @@ -291,10 +288,10 @@ struct group_reduction_functor< validity.begin(), validity.end(), thrust::identity{}, stream, mr); result->set_null_mask(std::move(null_mask), null_count); } else { - auto const binop = row_arg_minmax_fn(values.size(), - *d_flattened_values_ptr, - flattened_null_precedences.data(), - K == aggregation::ARGMIN); + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::ARGMIN); do_reduction(count_iter, result_begin, binop); } From cb6fb5f2dfcc0c0378f051039148588d0181c1e5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 4 Nov 2021 10:12:04 -0600 Subject: [PATCH 24/48] Attempt to patch thrust --- cpp/cmake/thrust.patch | 73 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/cpp/cmake/thrust.patch b/cpp/cmake/thrust.patch index 2f9201d8ab4..772b63d534d 100644 --- a/cpp/cmake/thrust.patch +++ b/cpp/cmake/thrust.patch @@ -1,3 +1,76 @@ +diff --git a/thrust/system/cuda/detail/scan_by_key.h b/thrust/system/cuda/detail/scan_by_key.h +index c9178628..b287b0d4 100644 +--- a/thrust/system/cuda/detail/scan_by_key.h ++++ b/thrust/system/cuda/detail/scan_by_key.h +@@ -340,7 +340,7 @@ namespace __scan_by_key { + size_value_pair_t (&scan_items)[ITEMS_PER_THREAD]) + { + // Zip values and segment_flags +-#pragma unroll ++#pragma unroll 1 + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + // Set segment_flags for first out-of-bounds item, zero for others +@@ -358,7 +358,7 @@ namespace __scan_by_key { + size_value_pair_t (&scan_items)[ITEMS_PER_THREAD]) + { + // Zip values and segment_flags +-#pragma unroll ++#pragma unroll 1 + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + values[ITEM] = scan_items[ITEM].value; +@@ -616,7 +616,7 @@ namespace __scan_by_key { + operator()(T (&items)[ITEMS_PER_THREAD], + Size (&flags)[ITEMS_PER_THREAD]) + { +-#pragma unroll ++#pragma unroll 1 + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + items[ITEM] = flags[ITEM] ? init : scan_op(init, items[ITEM]); + +diff --git a/thrust/system/cuda/detail/reduce_by_key.h b/thrust/system/cuda/detail/reduce_by_key.h +index ba66f6d8..bb3bff56 100644 +--- a/thrust/system/cuda/detail/reduce_by_key.h ++++ b/thrust/system/cuda/detail/reduce_by_key.h +@@ -369,7 +369,7 @@ namespace __reduce_by_key { + size_value_pair_t (&scan_items)[ITEMS_PER_THREAD]) + { + // Zip values and segment_flags +-#pragma unroll ++#pragma unroll 1 + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + // Set segment_flags for first out-of-bounds item, zero for others +@@ -389,7 +389,7 @@ namespace __reduce_by_key { + key_value_pair_t (&scatter_items)[ITEMS_PER_THREAD]) + { + // Zip values and segment_flags +-#pragma unroll ++#pragma unroll 1 + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + scatter_items[ITEM].key = keys[ITEM]; +@@ -410,7 +410,7 @@ namespace __reduce_by_key { + size_type (&segment_indices)[ITEMS_PER_THREAD]) + { + // Scatter flagged keys and values +-#pragma unroll ++#pragma unroll 1 + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (segment_flags[ITEM]) +@@ -440,7 +440,7 @@ namespace __reduce_by_key { + sync_threadblock(); + + // Compact and scatter keys +-#pragma unroll ++#pragma unroll 1 + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (segment_flags[ITEM]) + diff --git a/thrust/system/cuda/detail/sort.h b/thrust/system/cuda/detail/sort.h index 1ffeef0..5e80800 100644 --- a/thrust/system/cuda/detail/sort.h From e6885d4767449c02d3a51344767265779620c24d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 4 Nov 2021 12:07:24 -0600 Subject: [PATCH 25/48] Revert "Attempt to patch thrust" This reverts commit cb6fb5f2dfcc0c0378f051039148588d0181c1e5. --- cpp/cmake/thrust.patch | 73 ------------------------------------------ 1 file changed, 73 deletions(-) diff --git a/cpp/cmake/thrust.patch b/cpp/cmake/thrust.patch index 772b63d534d..2f9201d8ab4 100644 --- a/cpp/cmake/thrust.patch +++ b/cpp/cmake/thrust.patch @@ -1,76 +1,3 @@ -diff --git a/thrust/system/cuda/detail/scan_by_key.h b/thrust/system/cuda/detail/scan_by_key.h -index c9178628..b287b0d4 100644 ---- a/thrust/system/cuda/detail/scan_by_key.h -+++ b/thrust/system/cuda/detail/scan_by_key.h -@@ -340,7 +340,7 @@ namespace __scan_by_key { - size_value_pair_t (&scan_items)[ITEMS_PER_THREAD]) - { - // Zip values and segment_flags --#pragma unroll -+#pragma unroll 1 - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - // Set segment_flags for first out-of-bounds item, zero for others -@@ -358,7 +358,7 @@ namespace __scan_by_key { - size_value_pair_t (&scan_items)[ITEMS_PER_THREAD]) - { - // Zip values and segment_flags --#pragma unroll -+#pragma unroll 1 - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - values[ITEM] = scan_items[ITEM].value; -@@ -616,7 +616,7 @@ namespace __scan_by_key { - operator()(T (&items)[ITEMS_PER_THREAD], - Size (&flags)[ITEMS_PER_THREAD]) - { --#pragma unroll -+#pragma unroll 1 - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - items[ITEM] = flags[ITEM] ? init : scan_op(init, items[ITEM]); - -diff --git a/thrust/system/cuda/detail/reduce_by_key.h b/thrust/system/cuda/detail/reduce_by_key.h -index ba66f6d8..bb3bff56 100644 ---- a/thrust/system/cuda/detail/reduce_by_key.h -+++ b/thrust/system/cuda/detail/reduce_by_key.h -@@ -369,7 +369,7 @@ namespace __reduce_by_key { - size_value_pair_t (&scan_items)[ITEMS_PER_THREAD]) - { - // Zip values and segment_flags --#pragma unroll -+#pragma unroll 1 - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - // Set segment_flags for first out-of-bounds item, zero for others -@@ -389,7 +389,7 @@ namespace __reduce_by_key { - key_value_pair_t (&scatter_items)[ITEMS_PER_THREAD]) - { - // Zip values and segment_flags --#pragma unroll -+#pragma unroll 1 - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - scatter_items[ITEM].key = keys[ITEM]; -@@ -410,7 +410,7 @@ namespace __reduce_by_key { - size_type (&segment_indices)[ITEMS_PER_THREAD]) - { - // Scatter flagged keys and values --#pragma unroll -+#pragma unroll 1 - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - if (segment_flags[ITEM]) -@@ -440,7 +440,7 @@ namespace __reduce_by_key { - sync_threadblock(); - - // Compact and scatter keys --#pragma unroll -+#pragma unroll 1 - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - if (segment_flags[ITEM]) - diff --git a/thrust/system/cuda/detail/sort.h b/thrust/system/cuda/detail/sort.h index 1ffeef0..5e80800 100644 --- a/thrust/system/cuda/detail/sort.h From d4d46447583b860a9f181d850240bfea7f1bece4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 4 Nov 2021 13:29:54 -0600 Subject: [PATCH 26/48] Add declaration for new internal APIs --- cpp/src/groupby/sort/group_reductions.hpp | 20 ++++++++++++++++++++ cpp/src/groupby/sort/group_scan.hpp | 18 ++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index 75708c7b01c..268c088fe87 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -173,6 +173,26 @@ std::unique_ptr group_argmin(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Internal API to calculate group-wise indices of minimum/maximum values, specialized for + * STRUCT type. + * + * @param K The aggregation kind, must be `aggregation::ARGMIN` or `aggregation::ARGMAX` + * @param values Grouped values to get minimum value's index from + * @param num_groups Number of groups + * @param group_labels ID of group that the corresponding value belongs to + * @param key_sort_order Indices indicating sort order of groupby keys + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr group_argminmax_struct(aggregation::Kind K, + column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + column_view const& key_sort_order, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** * @brief Internal API to calculate number of non-null values in each group of * @p values diff --git a/cpp/src/groupby/sort/group_scan.hpp b/cpp/src/groupby/sort/group_scan.hpp index 82ef0e25380..2be779b6f3b 100644 --- a/cpp/src/groupby/sort/group_scan.hpp +++ b/cpp/src/groupby/sort/group_scan.hpp @@ -72,6 +72,24 @@ std::unique_ptr max_scan(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Internal API to calculate groupwise cumulative minimum/maximum value, specialized for + * STRUCT type. + * + * @param K The aggregation kind, must be `aggregation::MIN` or `aggregation::MAX` + * @param values Grouped values to get maximum from + * @param num_groups Number of groups + * @param group_labels ID of group that the corresponding value belongs to + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr minmax_scan_struct(aggregation::Kind K, + column_view const& values, + size_type num_groups, + device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** * @brief Internal API to calculate cumulative number of values in each group * From ad7998fcdc4ee96d8ecc43cdb07f0e77071a4319 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 4 Nov 2021 13:30:18 -0600 Subject: [PATCH 27/48] Call the specialized functions for struct type values --- cpp/src/groupby/sort/aggregate.cpp | 48 ++++++++++++++++++++---------- cpp/src/groupby/sort/scan.cpp | 34 ++++++++++++++------- 2 files changed, 56 insertions(+), 26 deletions(-) diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 83c6c1bca57..7bce5a3e0fa 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -117,14 +117,22 @@ void aggregate_result_functor::operator()(aggregation const { if (cache.has_result(values, agg)) return; - cache.add_result(values, - agg, - detail::group_argmax(get_grouped_values(), - helper.num_groups(stream), - helper.group_labels(stream), - helper.key_sort_order(stream), - stream, - mr)); + auto result = values.type().id() == type_id::STRUCT + ? detail::group_argminmax_struct(aggregation::ARGMAX, + get_grouped_values(), + helper.num_groups(stream), + helper.group_labels(stream), + helper.key_sort_order(stream), + stream, + mr) + : detail::group_argmax(get_grouped_values(), + helper.num_groups(stream), + helper.group_labels(stream), + helper.key_sort_order(stream), + stream, + mr); + + cache.add_result(values, agg, std::move(result)); }; template <> @@ -132,14 +140,22 @@ void aggregate_result_functor::operator()(aggregation const { if (cache.has_result(values, agg)) return; - cache.add_result(values, - agg, - detail::group_argmin(get_grouped_values(), - helper.num_groups(stream), - helper.group_labels(stream), - helper.key_sort_order(stream), - stream, - mr)); + auto result = values.type().id() == type_id::STRUCT + ? detail::group_argminmax_struct(aggregation::ARGMIN, + get_grouped_values(), + helper.num_groups(stream), + helper.group_labels(stream), + helper.key_sort_order(stream), + stream, + mr) + : detail::group_argmin(get_grouped_values(), + helper.num_groups(stream), + helper.group_labels(stream), + helper.key_sort_order(stream), + stream, + mr); + + cache.add_result(values, agg, std::move(result)); }; template <> diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index b22f82ce7e4..eed6bd52faf 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -81,11 +81,18 @@ void scan_result_functor::operator()(aggregation const& agg) { if (cache.has_result(values, agg)) return; - cache.add_result( - values, - agg, - detail::min_scan( - get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); + auto result = + values.type().id() == type_id::STRUCT + ? detail::minmax_scan_struct(aggregation::MIN, + get_grouped_values(), + helper.num_groups(stream), + helper.group_labels(stream), + stream, + mr) + : detail::min_scan( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr); + + cache.add_result(values, agg, std::move(result)); } template <> @@ -93,11 +100,18 @@ void scan_result_functor::operator()(aggregation const& agg) { if (cache.has_result(values, agg)) return; - cache.add_result( - values, - agg, - detail::max_scan( - get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); + auto result = + values.type().id() == type_id::STRUCT + ? detail::minmax_scan_struct(aggregation::MAX, + get_grouped_values(), + helper.num_groups(stream), + helper.group_labels(stream), + stream, + mr) + : detail::max_scan( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr); + + cache.add_result(values, agg, std::move(result)); } template <> From 0c2b0b4f03ad14e47e22dc2c0788e65918bd2c33 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 4 Nov 2021 13:30:28 -0600 Subject: [PATCH 28/48] Add new .cu files --- cpp/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e06d02b0bcb..c70d1cbe509 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -223,6 +223,7 @@ add_library(cudf src/groupby/sort/aggregate.cpp src/groupby/sort/group_argmax.cu src/groupby/sort/group_argmin.cu + src/groupby/sort/group_argminmax_struct.cu src/groupby/sort/group_collect.cu src/groupby/sort/group_correlation.cu src/groupby/sort/group_count.cu @@ -241,6 +242,7 @@ add_library(cudf src/groupby/sort/group_count_scan.cu src/groupby/sort/group_max_scan.cu src/groupby/sort/group_min_scan.cu + src/groupby/sort/group_minmax_scan_struct.cu src/groupby/sort/group_rank_scan.cu src/groupby/sort/group_replace_nulls.cu src/groupby/sort/group_sum_scan.cu From f063e266988541a220b2d2e7615c4a7a30a13766 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 4 Nov 2021 13:57:03 -0600 Subject: [PATCH 29/48] Remove `struct_view` specialization --- cpp/src/groupby/sort/group_scan_util.cuh | 87 +------------------ .../sort/group_single_pass_reduction_util.cuh | 77 +--------------- 2 files changed, 2 insertions(+), 162 deletions(-) diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index 013ea924cce..f0d27830bfb 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -74,8 +74,7 @@ static constexpr bool is_group_scan_supported() if (K == aggregation::SUM) return cudf::is_numeric() || cudf::is_duration() || cudf::is_fixed_point(); else if (K == aggregation::MIN or K == aggregation::MAX) - return not cudf::is_dictionary() and - (is_relationally_comparable() or std::is_same_v); + return not cudf::is_dictionary() and is_relationally_comparable(); else return false; } @@ -180,90 +179,6 @@ struct group_scan_functor -struct group_scan_functor()>> { - static std::unique_ptr invoke(column_view const& values, - size_type num_groups, - cudf::device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - { - if (values.is_empty()) { return cudf::empty_like(values); } - - // When finding MIN, we need to consider nulls as larger than non-null elements. - // Thing is opposite when finding MAX. - auto const null_precedence = (K == aggregation::MIN) ? null_order::AFTER : null_order::BEFORE; - auto const flattened_values = structs::detail::flatten_nested_columns( - table_view{{values}}, {}, std::vector{null_precedence}); - auto const d_flattened_values_ptr = table_device_view::create(flattened_values, stream); - auto const flattened_null_precedences = - (K == aggregation::MIN) - ? cudf::detail::make_device_uvector_async(flattened_values.null_orders(), stream) - : rmm::device_uvector(0, stream); - - // Create a gather map contaning indices of the prefix min/max elements. - auto gather_map = rmm::device_uvector(values.size(), stream); - auto const map_begin = gather_map.begin(); - - // Perform segmented scan. - auto const do_scan = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { - thrust::inclusive_scan_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - inp_iter, - out_iter, - thrust::equal_to{}, - binop); - }; - - // Find the indices of the prefix min/max elements within each group. - auto const count_iter = thrust::make_counting_iterator(0); - if (values.has_nulls()) { - auto const binop = row_arg_minmax_fn(values.size(), - *d_flattened_values_ptr, - flattened_null_precedences.data(), - K == aggregation::MIN); - do_scan(count_iter, map_begin, binop); - } else { - auto const binop = row_arg_minmax_fn(values.size(), - *d_flattened_values_ptr, - flattened_null_precedences.data(), - K == aggregation::MIN); - do_scan(count_iter, map_begin, binop); - } - - auto gather_map_view = - column_view(data_type{type_to_id()}, gather_map.size(), gather_map.data()); - - // Gather the children elements of the prefix min/max struct elements first. - auto scanned_children = - cudf::detail::gather( - table_view(std::vector{values.child_begin(), values.child_end()}), - gather_map_view, - cudf::out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr) - ->release(); - - // After gathering the children elements, we need to push down nulls from the root structs - // column to them. - if (values.has_nulls()) { - for (std::unique_ptr& child : scanned_children) { - structs::detail::superimpose_parent_nulls( - values.null_mask(), values.null_count(), *child, stream, mr); - } - } - - return make_structs_column(values.size(), - std::move(scanned_children), - values.null_count(), - cudf::detail::copy_bitmask(values, stream, mr)); - } -}; - } // namespace detail } // namespace groupby } // namespace cudf diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 73ca658ce81..8d6abaf3d69 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -155,8 +155,7 @@ static constexpr bool is_group_redution_supported() case aggregation::MIN: case aggregation::MAX: return cudf::is_fixed_width() and is_relationally_comparable(); case aggregation::ARGMIN: - case aggregation::ARGMAX: - return is_relationally_comparable() or std::is_same_v; + case aggregation::ARGMAX: return is_relationally_comparable(); default: return false; } } @@ -225,80 +224,6 @@ struct group_reduction_functor -struct group_reduction_functor< - K, - cudf::struct_view, - std::enable_if_t()>> { - static std::unique_ptr invoke(column_view const& values, - size_type num_groups, - cudf::device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - { - // This is be expected to be size_type. - using ResultType = cudf::detail::target_type_t; - - auto result = make_fixed_width_column( - data_type{type_to_id()}, num_groups, mask_state::UNALLOCATED, stream, mr); - - if (values.is_empty()) { return result; } - - // When finding ARGMIN, we need to consider nulls as larger than non-null elements. - // Thing is opposite for ARGMAX. - auto const null_precedence = - (K == aggregation::ARGMIN) ? null_order::AFTER : null_order::BEFORE; - auto const flattened_values = structs::detail::flatten_nested_columns( - table_view{{values}}, {}, std::vector{null_precedence}); - auto const d_flattened_values_ptr = table_device_view::create(flattened_values, stream); - auto const flattened_null_precedences = - (K == aggregation::ARGMIN) - ? cudf::detail::make_device_uvector_async(flattened_values.null_orders(), stream) - : rmm::device_uvector(0, stream); - - // Perform segmented reduction to find ARGMIN/ARGMAX. - auto const do_reduction = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { - thrust::reduce_by_key(rmm::exec_policy(stream), - group_labels.data(), - group_labels.data() + group_labels.size(), - inp_iter, - thrust::make_discard_iterator(), - out_iter, - thrust::equal_to{}, - binop); - }; - - auto const count_iter = thrust::make_counting_iterator(0); - auto const result_begin = result->mutable_view().template begin(); - if (values.has_nulls()) { - auto const binop = row_arg_minmax_fn(values.size(), - *d_flattened_values_ptr, - flattened_null_precedences.data(), - K == aggregation::ARGMIN); - do_reduction(count_iter, result_begin, binop); - - // Generate bitmask for the output by segmented reduction of the input bitmask. - auto const d_values_ptr = column_device_view::create(values, stream); - auto validity = rmm::device_uvector(num_groups, stream); - do_reduction(cudf::detail::make_validity_iterator(*d_values_ptr), - validity.begin(), - thrust::logical_or{}); - - auto [null_mask, null_count] = cudf::detail::valid_if( - validity.begin(), validity.end(), thrust::identity{}, stream, mr); - result->set_null_mask(std::move(null_mask), null_count); - } else { - auto const binop = row_arg_minmax_fn(values.size(), - *d_flattened_values_ptr, - flattened_null_precedences.data(), - K == aggregation::ARGMIN); - do_reduction(count_iter, result_begin, binop); - } - - return result; - } -}; - } // namespace detail } // namespace groupby } // namespace cudf From 9456ea332910bf826e701507f19bafb9ef6d31c9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 4 Nov 2021 13:59:15 -0600 Subject: [PATCH 30/48] Implement `struct_view` specialization --- .../groupby/sort/group_argminmax_struct.cu | 110 ++++++++++++++++ .../groupby/sort/group_minmax_scan_struct.cu | 122 ++++++++++++++++++ 2 files changed, 232 insertions(+) create mode 100644 cpp/src/groupby/sort/group_argminmax_struct.cu create mode 100644 cpp/src/groupby/sort/group_minmax_scan_struct.cu diff --git a/cpp/src/groupby/sort/group_argminmax_struct.cu b/cpp/src/groupby/sort/group_argminmax_struct.cu new file mode 100644 index 00000000000..145f45c5890 --- /dev/null +++ b/cpp/src/groupby/sort/group_argminmax_struct.cu @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +namespace cudf { +namespace groupby { +namespace detail { +std::unique_ptr group_argminmax_struct(aggregation::Kind K, + column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + column_view const& key_sort_order, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(K == aggregation::ARGMIN || aggregation::ARGMAX, + "Only groupby ARGMIN/ARGMAX are supported for STRUCT type."); + + auto result = make_fixed_width_column( + data_type{type_to_id()}, num_groups, mask_state::UNALLOCATED, stream, mr); + + if (values.is_empty()) { return result; } + + // When finding ARGMIN, we need to consider nulls as larger than non-null elements. + // Thing is opposite for ARGMAX. + auto const null_precedence = (K == aggregation::ARGMIN) ? null_order::AFTER : null_order::BEFORE; + auto const flattened_values = structs::detail::flatten_nested_columns( + table_view{{values}}, {}, std::vector{null_precedence}); + auto const d_flattened_values_ptr = table_device_view::create(flattened_values, stream); + auto const flattened_null_precedences = + (K == aggregation::ARGMIN) + ? cudf::detail::make_device_uvector_async(flattened_values.null_orders(), stream) + : rmm::device_uvector(0, stream); + + // Perform segmented reduction to find ARGMIN/ARGMAX. + auto const do_reduction = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { + thrust::reduce_by_key(rmm::exec_policy(stream), + group_labels.data(), + group_labels.data() + group_labels.size(), + inp_iter, + thrust::make_discard_iterator(), + out_iter, + thrust::equal_to{}, + binop); + }; + + auto const count_iter = thrust::make_counting_iterator(0); + auto const result_begin = result->mutable_view().template begin(); + if (values.has_nulls()) { + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::ARGMIN); + do_reduction(count_iter, result_begin, binop); + + // Generate bitmask for the output by segmented reduction of the input bitmask. + auto const d_values_ptr = column_device_view::create(values, stream); + auto validity = rmm::device_uvector(num_groups, stream); + do_reduction(cudf::detail::make_validity_iterator(*d_values_ptr), + validity.begin(), + thrust::logical_or{}); + + auto [null_mask, null_count] = cudf::detail::valid_if( + validity.begin(), validity.end(), thrust::identity{}, stream, mr); + result->set_null_mask(std::move(null_mask), null_count); + } else { + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::ARGMIN); + do_reduction(count_iter, result_begin, binop); + } + + return result; +} + +} // namespace detail +} // namespace groupby +} // namespace cudf diff --git a/cpp/src/groupby/sort/group_minmax_scan_struct.cu b/cpp/src/groupby/sort/group_minmax_scan_struct.cu new file mode 100644 index 00000000000..e92fce36f52 --- /dev/null +++ b/cpp/src/groupby/sort/group_minmax_scan_struct.cu @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +namespace cudf { +namespace groupby { +namespace detail { +std::unique_ptr minmax_scan_struct(aggregation::Kind K, + column_view const& values, + size_type num_groups, + device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if (values.is_empty()) { return cudf::empty_like(values); } + + // When finding MIN, we need to consider nulls as larger than non-null elements. + // Thing is opposite when finding MAX. + auto const null_precedence = (K == aggregation::MIN) ? null_order::AFTER : null_order::BEFORE; + auto const flattened_values = structs::detail::flatten_nested_columns( + table_view{{values}}, {}, std::vector{null_precedence}); + auto const d_flattened_values_ptr = table_device_view::create(flattened_values, stream); + auto const flattened_null_precedences = + (K == aggregation::MIN) + ? cudf::detail::make_device_uvector_async(flattened_values.null_orders(), stream) + : rmm::device_uvector(0, stream); + + // Create a gather map contaning indices of the prefix min/max elements. + auto gather_map = rmm::device_uvector(values.size(), stream); + auto const map_begin = gather_map.begin(); + + // Perform segmented scan. + auto const do_scan = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { + thrust::inclusive_scan_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + inp_iter, + out_iter, + thrust::equal_to{}, + binop); + }; + + // Find the indices of the prefix min/max elements within each group. + auto const count_iter = thrust::make_counting_iterator(0); + if (values.has_nulls()) { + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::MIN); + do_scan(count_iter, map_begin, binop); + } else { + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::MIN); + do_scan(count_iter, map_begin, binop); + } + + auto gather_map_view = + column_view(data_type{type_to_id()}, gather_map.size(), gather_map.data()); + + // Gather the children elements of the prefix min/max struct elements first. + auto scanned_children = + cudf::detail::gather( + table_view(std::vector{values.child_begin(), values.child_end()}), + gather_map_view, + cudf::out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr) + ->release(); + + // After gathering the children elements, we need to push down nulls from the root structs + // column to them. + if (values.has_nulls()) { + for (std::unique_ptr& child : scanned_children) { + structs::detail::superimpose_parent_nulls( + values.null_mask(), values.null_count(), *child, stream, mr); + } + } + + return make_structs_column(values.size(), + std::move(scanned_children), + values.null_count(), + cudf::detail::copy_bitmask(values, stream, mr)); +} + +} // namespace detail +} // namespace groupby +} // namespace cudf From 43be509575703c235a39372a3dbf2f0cd05ad102 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 4 Nov 2021 14:07:10 -0600 Subject: [PATCH 31/48] Fix output order --- cpp/src/groupby/sort/group_argminmax_struct.cu | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cpp/src/groupby/sort/group_argminmax_struct.cu b/cpp/src/groupby/sort/group_argminmax_struct.cu index 145f45c5890..4e97bd02804 100644 --- a/cpp/src/groupby/sort/group_argminmax_struct.cu +++ b/cpp/src/groupby/sort/group_argminmax_struct.cu @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -102,6 +103,14 @@ std::unique_ptr group_argminmax_struct(aggregation::Kind K, do_reduction(count_iter, result_begin, binop); } + // result now stores the indices of minimum elements in the sorted values. + // We need the indices of minimum elements in the original unsorted values. + thrust::gather(rmm::exec_policy(stream), + result_begin, + result_begin + num_groups, + key_sort_order.template begin(), + result_begin); + return result; } From 7cee90c2aef86def92c330848142f7c4ddccb322 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 4 Nov 2021 14:13:29 -0600 Subject: [PATCH 32/48] Fix EXPECT conditions --- cpp/src/groupby/sort/group_argminmax_struct.cu | 2 +- cpp/src/groupby/sort/group_minmax_scan_struct.cu | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/src/groupby/sort/group_argminmax_struct.cu b/cpp/src/groupby/sort/group_argminmax_struct.cu index 4e97bd02804..96106770544 100644 --- a/cpp/src/groupby/sort/group_argminmax_struct.cu +++ b/cpp/src/groupby/sort/group_argminmax_struct.cu @@ -45,7 +45,7 @@ std::unique_ptr group_argminmax_struct(aggregation::Kind K, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(K == aggregation::ARGMIN || aggregation::ARGMAX, + CUDF_EXPECTS(K == aggregation::ARGMIN || K == aggregation::ARGMAX, "Only groupby ARGMIN/ARGMAX are supported for STRUCT type."); auto result = make_fixed_width_column( diff --git a/cpp/src/groupby/sort/group_minmax_scan_struct.cu b/cpp/src/groupby/sort/group_minmax_scan_struct.cu index e92fce36f52..2d3c26d19f7 100644 --- a/cpp/src/groupby/sort/group_minmax_scan_struct.cu +++ b/cpp/src/groupby/sort/group_minmax_scan_struct.cu @@ -44,6 +44,9 @@ std::unique_ptr minmax_scan_struct(aggregation::Kind K, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + CUDF_EXPECTS(K == aggregation::MIN || K == aggregation::MAX, + "Only groupby MIN/MAX scan are supported for STRUCT type."); + if (values.is_empty()) { return cudf::empty_like(values); } // When finding MIN, we need to consider nulls as larger than non-null elements. From fee55e33eee9434dfc42f5ef80d4b916038e08c3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 4 Nov 2021 21:43:46 -0600 Subject: [PATCH 33/48] Refactor `row_operators.cuh` --- cpp/include/cudf/table/row_operators.cuh | 53 +++++++++++++++++------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index c719c564a87..566c6fdbea2 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -268,8 +268,8 @@ class element_relational_comparator { * @param null_precedence Indicates how null values are ordered with other * values */ - __host__ __device__ element_relational_comparator(column_device_view lhs, - column_device_view rhs, + __host__ __device__ element_relational_comparator(column_device_view const& lhs, + column_device_view const& rhs, null_order null_precedence) : lhs{lhs}, rhs{rhs}, null_precedence{null_precedence} { @@ -315,6 +315,28 @@ class element_relational_comparator { null_order null_precedence; }; +/** + * @brief Performs a relational comparison between two elements in two columns at the + * corresponding given indices. + * + * This function is explicitly prevented from inlining because it is very heavy-weight due + * type-dispatching. + * + * @tparam has_nulls Indicates the potential for null values in either column. + */ +template +__attribute__((noinline)) __device__ weak_ordering +compare_column_elements(data_type type, + column_device_view const& lhs, + column_device_view const& rhs, + null_order null_precedence, + size_type lhs_idx, + size_type rhs_idx) +{ + auto const comp = element_relational_comparator{lhs, rhs, null_precedence}; + return cudf::type_dispatcher(type, comp, lhs_idx, rhs_idx); +} + /** * @brief Computes whether one row is lexicographically *less* than another row. * @@ -350,8 +372,8 @@ class row_lexicographic_comparator { * it is nullptr, then null precedence would be `null_order::BEFORE` for all * columns. */ - row_lexicographic_comparator(table_device_view lhs, - table_device_view rhs, + row_lexicographic_comparator(table_device_view const& lhs, + table_device_view const& rhs, order const* column_order = nullptr, null_order const* null_precedence = nullptr) : _lhs{lhs}, _rhs{rhs}, _column_order{column_order}, _null_precedence{null_precedence} @@ -373,20 +395,19 @@ class row_lexicographic_comparator { __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept { for (size_type i = 0; i < _lhs.num_columns(); ++i) { - bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); - - weak_ordering state{weak_ordering::EQUIVALENT}; - null_order null_precedence = + auto const ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); + auto const null_precedence = _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i]; - auto comparator = - element_relational_comparator{_lhs.column(i), _rhs.column(i), null_precedence}; - - state = cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index); - - if (state == weak_ordering::EQUIVALENT) { continue; } - - return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER); + auto const state = compare_column_elements(_lhs.column(i).type(), + _lhs.column(i), + _rhs.column(i), + null_precedence, + lhs_index, + rhs_index); + if (state != weak_ordering::EQUIVALENT) { + return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER); + } } return false; } From 2fde89a49ffa0e52503178091e46e7b06899ea10 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 5 Nov 2021 14:56:48 -0600 Subject: [PATCH 34/48] Fix function name typo --- cpp/src/groupby/sort/group_single_pass_reduction_util.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 8d6abaf3d69..00d3605ffdf 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -146,7 +146,7 @@ struct group_reduction_dispatcher { * @brief Check if the given aggregation K with data type T is supported in groupby reduction. */ template -static constexpr bool is_group_redution_supported() +static constexpr bool is_group_reduction_supported() { switch (K) { case aggregation::SUM: @@ -161,7 +161,7 @@ static constexpr bool is_group_redution_supported() } template -struct group_reduction_functor()>> { +struct group_reduction_functor()>> { static std::unique_ptr invoke(column_view const& values, size_type num_groups, cudf::device_span group_labels, From ac9c6035659067f1a988d485c4603ecf7664bbe1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 5 Nov 2021 14:58:40 -0600 Subject: [PATCH 35/48] Remove redundant header --- cpp/src/groupby/sort/group_scan_util.cuh | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index f0d27830bfb..b8988c39077 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -16,8 +16,6 @@ #pragma once -#include - #include #include #include From f5d27ae3019bceac1930ef32e1d7cd61a319b1df Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 5 Nov 2021 14:58:45 -0600 Subject: [PATCH 36/48] Revert "Refactor `row_operators.cuh`" This reverts commit fee55e33eee9434dfc42f5ef80d4b916038e08c3. --- cpp/include/cudf/table/row_operators.cuh | 53 +++++++----------------- 1 file changed, 16 insertions(+), 37 deletions(-) diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 566c6fdbea2..c719c564a87 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -268,8 +268,8 @@ class element_relational_comparator { * @param null_precedence Indicates how null values are ordered with other * values */ - __host__ __device__ element_relational_comparator(column_device_view const& lhs, - column_device_view const& rhs, + __host__ __device__ element_relational_comparator(column_device_view lhs, + column_device_view rhs, null_order null_precedence) : lhs{lhs}, rhs{rhs}, null_precedence{null_precedence} { @@ -315,28 +315,6 @@ class element_relational_comparator { null_order null_precedence; }; -/** - * @brief Performs a relational comparison between two elements in two columns at the - * corresponding given indices. - * - * This function is explicitly prevented from inlining because it is very heavy-weight due - * type-dispatching. - * - * @tparam has_nulls Indicates the potential for null values in either column. - */ -template -__attribute__((noinline)) __device__ weak_ordering -compare_column_elements(data_type type, - column_device_view const& lhs, - column_device_view const& rhs, - null_order null_precedence, - size_type lhs_idx, - size_type rhs_idx) -{ - auto const comp = element_relational_comparator{lhs, rhs, null_precedence}; - return cudf::type_dispatcher(type, comp, lhs_idx, rhs_idx); -} - /** * @brief Computes whether one row is lexicographically *less* than another row. * @@ -372,8 +350,8 @@ class row_lexicographic_comparator { * it is nullptr, then null precedence would be `null_order::BEFORE` for all * columns. */ - row_lexicographic_comparator(table_device_view const& lhs, - table_device_view const& rhs, + row_lexicographic_comparator(table_device_view lhs, + table_device_view rhs, order const* column_order = nullptr, null_order const* null_precedence = nullptr) : _lhs{lhs}, _rhs{rhs}, _column_order{column_order}, _null_precedence{null_precedence} @@ -395,19 +373,20 @@ class row_lexicographic_comparator { __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept { for (size_type i = 0; i < _lhs.num_columns(); ++i) { - auto const ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); - auto const null_precedence = + bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); + + weak_ordering state{weak_ordering::EQUIVALENT}; + null_order null_precedence = _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i]; - auto const state = compare_column_elements(_lhs.column(i).type(), - _lhs.column(i), - _rhs.column(i), - null_precedence, - lhs_index, - rhs_index); - if (state != weak_ordering::EQUIVALENT) { - return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER); - } + auto comparator = + element_relational_comparator{_lhs.column(i), _rhs.column(i), null_precedence}; + + state = cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index); + + if (state == weak_ordering::EQUIVALENT) { continue; } + + return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER); } return false; } From 7a7c7060a9567adc2578ec7ba8bcb44685e1a17e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 5 Nov 2021 15:08:26 -0600 Subject: [PATCH 37/48] Prevent functor code from inlining --- cpp/src/groupby/sort/group_util.cuh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cpp/src/groupby/sort/group_util.cuh b/cpp/src/groupby/sort/group_util.cuh index 98fcb199fb7..31ff29ed4c3 100644 --- a/cpp/src/groupby/sort/group_util.cuh +++ b/cpp/src/groupby/sort/group_util.cuh @@ -42,7 +42,11 @@ struct row_arg_minmax_fn { { } - CUDA_DEVICE_CALLABLE auto operator()(size_type lhs_idx, size_type rhs_idx) const + // This function is explicitly prevented from inlining, because it calls to + // `row_lexicographic_comparator::operator()` which is inlined and very heavy-weight. As a result, + // instantiating this functor will result in huge code, and objects of this functor used with + // `thrust::reduce_by_key` or `thrust::scan_by_key` will result in significant compile time. + __attribute__((noinline)) __device__ auto operator()(size_type lhs_idx, size_type rhs_idx) const { // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and // github.com/NVIDIA/thrust/issues/1525 From 2a6a106c426f477c657ee5e56d321ece3d1e2670 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 5 Nov 2021 15:44:53 -0600 Subject: [PATCH 38/48] Revert "Remove redundant header" This reverts commit ac9c6035659067f1a988d485c4603ecf7664bbe1. --- cpp/src/groupby/sort/group_scan_util.cuh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index b8988c39077..f0d27830bfb 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -16,6 +16,8 @@ #pragma once +#include + #include #include #include From 88ef471e4c9ab87ba60536eb2421d7c2f09c82c4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 5 Nov 2021 15:45:57 -0600 Subject: [PATCH 39/48] Revert "Remove `struct_view` specialization" This reverts commit f063e266988541a220b2d2e7615c4a7a30a13766. --- cpp/src/groupby/sort/group_scan_util.cuh | 87 ++++++++++++++++++- .../sort/group_single_pass_reduction_util.cuh | 77 +++++++++++++++- 2 files changed, 162 insertions(+), 2 deletions(-) diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index f0d27830bfb..013ea924cce 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -74,7 +74,8 @@ static constexpr bool is_group_scan_supported() if (K == aggregation::SUM) return cudf::is_numeric() || cudf::is_duration() || cudf::is_fixed_point(); else if (K == aggregation::MIN or K == aggregation::MAX) - return not cudf::is_dictionary() and is_relationally_comparable(); + return not cudf::is_dictionary() and + (is_relationally_comparable() or std::is_same_v); else return false; } @@ -179,6 +180,90 @@ struct group_scan_functor +struct group_scan_functor()>> { + static std::unique_ptr invoke(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + if (values.is_empty()) { return cudf::empty_like(values); } + + // When finding MIN, we need to consider nulls as larger than non-null elements. + // Thing is opposite when finding MAX. + auto const null_precedence = (K == aggregation::MIN) ? null_order::AFTER : null_order::BEFORE; + auto const flattened_values = structs::detail::flatten_nested_columns( + table_view{{values}}, {}, std::vector{null_precedence}); + auto const d_flattened_values_ptr = table_device_view::create(flattened_values, stream); + auto const flattened_null_precedences = + (K == aggregation::MIN) + ? cudf::detail::make_device_uvector_async(flattened_values.null_orders(), stream) + : rmm::device_uvector(0, stream); + + // Create a gather map contaning indices of the prefix min/max elements. + auto gather_map = rmm::device_uvector(values.size(), stream); + auto const map_begin = gather_map.begin(); + + // Perform segmented scan. + auto const do_scan = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { + thrust::inclusive_scan_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + inp_iter, + out_iter, + thrust::equal_to{}, + binop); + }; + + // Find the indices of the prefix min/max elements within each group. + auto const count_iter = thrust::make_counting_iterator(0); + if (values.has_nulls()) { + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::MIN); + do_scan(count_iter, map_begin, binop); + } else { + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::MIN); + do_scan(count_iter, map_begin, binop); + } + + auto gather_map_view = + column_view(data_type{type_to_id()}, gather_map.size(), gather_map.data()); + + // Gather the children elements of the prefix min/max struct elements first. + auto scanned_children = + cudf::detail::gather( + table_view(std::vector{values.child_begin(), values.child_end()}), + gather_map_view, + cudf::out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr) + ->release(); + + // After gathering the children elements, we need to push down nulls from the root structs + // column to them. + if (values.has_nulls()) { + for (std::unique_ptr& child : scanned_children) { + structs::detail::superimpose_parent_nulls( + values.null_mask(), values.null_count(), *child, stream, mr); + } + } + + return make_structs_column(values.size(), + std::move(scanned_children), + values.null_count(), + cudf::detail::copy_bitmask(values, stream, mr)); + } +}; + } // namespace detail } // namespace groupby } // namespace cudf diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 00d3605ffdf..0c43acdeb4c 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -155,7 +155,8 @@ static constexpr bool is_group_reduction_supported() case aggregation::MIN: case aggregation::MAX: return cudf::is_fixed_width() and is_relationally_comparable(); case aggregation::ARGMIN: - case aggregation::ARGMAX: return is_relationally_comparable(); + case aggregation::ARGMAX: + return is_relationally_comparable() or std::is_same_v; default: return false; } } @@ -224,6 +225,80 @@ struct group_reduction_functor +struct group_reduction_functor< + K, + cudf::struct_view, + std::enable_if_t()>> { + static std::unique_ptr invoke(column_view const& values, + size_type num_groups, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + // This is be expected to be size_type. + using ResultType = cudf::detail::target_type_t; + + auto result = make_fixed_width_column( + data_type{type_to_id()}, num_groups, mask_state::UNALLOCATED, stream, mr); + + if (values.is_empty()) { return result; } + + // When finding ARGMIN, we need to consider nulls as larger than non-null elements. + // Thing is opposite for ARGMAX. + auto const null_precedence = + (K == aggregation::ARGMIN) ? null_order::AFTER : null_order::BEFORE; + auto const flattened_values = structs::detail::flatten_nested_columns( + table_view{{values}}, {}, std::vector{null_precedence}); + auto const d_flattened_values_ptr = table_device_view::create(flattened_values, stream); + auto const flattened_null_precedences = + (K == aggregation::ARGMIN) + ? cudf::detail::make_device_uvector_async(flattened_values.null_orders(), stream) + : rmm::device_uvector(0, stream); + + // Perform segmented reduction to find ARGMIN/ARGMAX. + auto const do_reduction = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { + thrust::reduce_by_key(rmm::exec_policy(stream), + group_labels.data(), + group_labels.data() + group_labels.size(), + inp_iter, + thrust::make_discard_iterator(), + out_iter, + thrust::equal_to{}, + binop); + }; + + auto const count_iter = thrust::make_counting_iterator(0); + auto const result_begin = result->mutable_view().template begin(); + if (values.has_nulls()) { + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::ARGMIN); + do_reduction(count_iter, result_begin, binop); + + // Generate bitmask for the output by segmented reduction of the input bitmask. + auto const d_values_ptr = column_device_view::create(values, stream); + auto validity = rmm::device_uvector(num_groups, stream); + do_reduction(cudf::detail::make_validity_iterator(*d_values_ptr), + validity.begin(), + thrust::logical_or{}); + + auto [null_mask, null_count] = cudf::detail::valid_if( + validity.begin(), validity.end(), thrust::identity{}, stream, mr); + result->set_null_mask(std::move(null_mask), null_count); + } else { + auto const binop = row_arg_minmax_fn(values.size(), + *d_flattened_values_ptr, + flattened_null_precedences.data(), + K == aggregation::ARGMIN); + do_reduction(count_iter, result_begin, binop); + } + + return result; + } +}; + } // namespace detail } // namespace groupby } // namespace cudf From 494af0143eb907a22e110e45d7750db409675aee Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 5 Nov 2021 15:48:00 -0600 Subject: [PATCH 40/48] Revert "Add new .cu files" This reverts commit 0c2b0b4f03ad14e47e22dc2c0788e65918bd2c33. # Conflicts: # cpp/CMakeLists.txt --- cpp/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f2bee0ee0f7..2182f4e1848 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -244,7 +244,6 @@ add_library( src/groupby/sort/aggregate.cpp src/groupby/sort/group_argmax.cu src/groupby/sort/group_argmin.cu - src/groupby/sort/group_argminmax_struct.cu src/groupby/sort/group_collect.cu src/groupby/sort/group_correlation.cu src/groupby/sort/group_count.cu @@ -263,7 +262,6 @@ add_library( src/groupby/sort/group_count_scan.cu src/groupby/sort/group_max_scan.cu src/groupby/sort/group_min_scan.cu - src/groupby/sort/group_minmax_scan_struct.cu src/groupby/sort/group_rank_scan.cu src/groupby/sort/group_replace_nulls.cu src/groupby/sort/group_sum_scan.cu @@ -437,7 +435,7 @@ add_library( src/strings/split/split.cu src/strings/split/split_record.cu src/strings/strings_column_factories.cu - src/strings/strings_column_view.cpp + src/strings/strings_column_view.cu src/strings/strings_scalar_factories.cpp src/strings/strip.cu src/strings/substring.cu From 3833bbc3d98320dc95004b006b5aed4a239b89d1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 5 Nov 2021 15:49:09 -0600 Subject: [PATCH 41/48] Revert "Call the specialized functions for struct type values" This reverts commit ad7998fcdc4ee96d8ecc43cdb07f0e77071a4319. --- cpp/src/groupby/sort/aggregate.cpp | 48 ++++++++++-------------------- cpp/src/groupby/sort/scan.cpp | 34 +++++++-------------- 2 files changed, 26 insertions(+), 56 deletions(-) diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 7bce5a3e0fa..83c6c1bca57 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -117,22 +117,14 @@ void aggregate_result_functor::operator()(aggregation const { if (cache.has_result(values, agg)) return; - auto result = values.type().id() == type_id::STRUCT - ? detail::group_argminmax_struct(aggregation::ARGMAX, - get_grouped_values(), - helper.num_groups(stream), - helper.group_labels(stream), - helper.key_sort_order(stream), - stream, - mr) - : detail::group_argmax(get_grouped_values(), - helper.num_groups(stream), - helper.group_labels(stream), - helper.key_sort_order(stream), - stream, - mr); - - cache.add_result(values, agg, std::move(result)); + cache.add_result(values, + agg, + detail::group_argmax(get_grouped_values(), + helper.num_groups(stream), + helper.group_labels(stream), + helper.key_sort_order(stream), + stream, + mr)); }; template <> @@ -140,22 +132,14 @@ void aggregate_result_functor::operator()(aggregation const { if (cache.has_result(values, agg)) return; - auto result = values.type().id() == type_id::STRUCT - ? detail::group_argminmax_struct(aggregation::ARGMIN, - get_grouped_values(), - helper.num_groups(stream), - helper.group_labels(stream), - helper.key_sort_order(stream), - stream, - mr) - : detail::group_argmin(get_grouped_values(), - helper.num_groups(stream), - helper.group_labels(stream), - helper.key_sort_order(stream), - stream, - mr); - - cache.add_result(values, agg, std::move(result)); + cache.add_result(values, + agg, + detail::group_argmin(get_grouped_values(), + helper.num_groups(stream), + helper.group_labels(stream), + helper.key_sort_order(stream), + stream, + mr)); }; template <> diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index eed6bd52faf..b22f82ce7e4 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -81,18 +81,11 @@ void scan_result_functor::operator()(aggregation const& agg) { if (cache.has_result(values, agg)) return; - auto result = - values.type().id() == type_id::STRUCT - ? detail::minmax_scan_struct(aggregation::MIN, - get_grouped_values(), - helper.num_groups(stream), - helper.group_labels(stream), - stream, - mr) - : detail::min_scan( - get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr); - - cache.add_result(values, agg, std::move(result)); + cache.add_result( + values, + agg, + detail::min_scan( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); } template <> @@ -100,18 +93,11 @@ void scan_result_functor::operator()(aggregation const& agg) { if (cache.has_result(values, agg)) return; - auto result = - values.type().id() == type_id::STRUCT - ? detail::minmax_scan_struct(aggregation::MAX, - get_grouped_values(), - helper.num_groups(stream), - helper.group_labels(stream), - stream, - mr) - : detail::max_scan( - get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr); - - cache.add_result(values, agg, std::move(result)); + cache.add_result( + values, + agg, + detail::max_scan( + get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr)); } template <> From 61c774a50f1f95fda5c7cc8d196a0aeea5559054 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 5 Nov 2021 15:49:17 -0600 Subject: [PATCH 42/48] Revert "Add declaration for new internal APIs" This reverts commit d4d46447583b860a9f181d850240bfea7f1bece4. --- cpp/src/groupby/sort/group_reductions.hpp | 20 -------------------- cpp/src/groupby/sort/group_scan.hpp | 18 ------------------ 2 files changed, 38 deletions(-) diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index 268c088fe87..75708c7b01c 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -173,26 +173,6 @@ std::unique_ptr group_argmin(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); -/** - * @brief Internal API to calculate group-wise indices of minimum/maximum values, specialized for - * STRUCT type. - * - * @param K The aggregation kind, must be `aggregation::ARGMIN` or `aggregation::ARGMAX` - * @param values Grouped values to get minimum value's index from - * @param num_groups Number of groups - * @param group_labels ID of group that the corresponding value belongs to - * @param key_sort_order Indices indicating sort order of groupby keys - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory - */ -std::unique_ptr group_argminmax_struct(aggregation::Kind K, - column_view const& values, - size_type num_groups, - cudf::device_span group_labels, - column_view const& key_sort_order, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); - /** * @brief Internal API to calculate number of non-null values in each group of * @p values diff --git a/cpp/src/groupby/sort/group_scan.hpp b/cpp/src/groupby/sort/group_scan.hpp index 2be779b6f3b..82ef0e25380 100644 --- a/cpp/src/groupby/sort/group_scan.hpp +++ b/cpp/src/groupby/sort/group_scan.hpp @@ -72,24 +72,6 @@ std::unique_ptr max_scan(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); -/** - * @brief Internal API to calculate groupwise cumulative minimum/maximum value, specialized for - * STRUCT type. - * - * @param K The aggregation kind, must be `aggregation::MIN` or `aggregation::MAX` - * @param values Grouped values to get maximum from - * @param num_groups Number of groups - * @param group_labels ID of group that the corresponding value belongs to - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned column's device memory - */ -std::unique_ptr minmax_scan_struct(aggregation::Kind K, - column_view const& values, - size_type num_groups, - device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); - /** * @brief Internal API to calculate cumulative number of values in each group * From a5ab52dec7057e165e3d45cf2b1c33aec7602c71 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 5 Nov 2021 15:50:53 -0600 Subject: [PATCH 43/48] Fix function name --- cpp/src/groupby/sort/group_single_pass_reduction_util.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 0c43acdeb4c..4e0820af236 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -229,7 +229,7 @@ template struct group_reduction_functor< K, cudf::struct_view, - std::enable_if_t()>> { + std::enable_if_t()>> { static std::unique_ptr invoke(column_view const& values, size_type num_groups, cudf::device_span group_labels, From 731426ac6a93a6017bfdf73a05a22f434fcd7247 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 5 Nov 2021 15:51:42 -0600 Subject: [PATCH 44/48] Fix CMakeList.txt --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2182f4e1848..1a0c853ef48 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -435,7 +435,7 @@ add_library( src/strings/split/split.cu src/strings/split/split_record.cu src/strings/strings_column_factories.cu - src/strings/strings_column_view.cu + src/strings/strings_column_view.cpp src/strings/strings_scalar_factories.cpp src/strings/strip.cu src/strings/substring.cu From 49259f3e0597540b77f28cf7472de6ba3a19ec66 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 5 Nov 2021 15:55:12 -0600 Subject: [PATCH 45/48] Remove files --- .../groupby/sort/group_argminmax_struct.cu | 119 ----------------- .../groupby/sort/group_minmax_scan_struct.cu | 125 ------------------ 2 files changed, 244 deletions(-) delete mode 100644 cpp/src/groupby/sort/group_argminmax_struct.cu delete mode 100644 cpp/src/groupby/sort/group_minmax_scan_struct.cu diff --git a/cpp/src/groupby/sort/group_argminmax_struct.cu b/cpp/src/groupby/sort/group_argminmax_struct.cu deleted file mode 100644 index 96106770544..00000000000 --- a/cpp/src/groupby/sort/group_argminmax_struct.cu +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include - -namespace cudf { -namespace groupby { -namespace detail { -std::unique_ptr group_argminmax_struct(aggregation::Kind K, - column_view const& values, - size_type num_groups, - cudf::device_span group_labels, - column_view const& key_sort_order, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_EXPECTS(K == aggregation::ARGMIN || K == aggregation::ARGMAX, - "Only groupby ARGMIN/ARGMAX are supported for STRUCT type."); - - auto result = make_fixed_width_column( - data_type{type_to_id()}, num_groups, mask_state::UNALLOCATED, stream, mr); - - if (values.is_empty()) { return result; } - - // When finding ARGMIN, we need to consider nulls as larger than non-null elements. - // Thing is opposite for ARGMAX. - auto const null_precedence = (K == aggregation::ARGMIN) ? null_order::AFTER : null_order::BEFORE; - auto const flattened_values = structs::detail::flatten_nested_columns( - table_view{{values}}, {}, std::vector{null_precedence}); - auto const d_flattened_values_ptr = table_device_view::create(flattened_values, stream); - auto const flattened_null_precedences = - (K == aggregation::ARGMIN) - ? cudf::detail::make_device_uvector_async(flattened_values.null_orders(), stream) - : rmm::device_uvector(0, stream); - - // Perform segmented reduction to find ARGMIN/ARGMAX. - auto const do_reduction = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { - thrust::reduce_by_key(rmm::exec_policy(stream), - group_labels.data(), - group_labels.data() + group_labels.size(), - inp_iter, - thrust::make_discard_iterator(), - out_iter, - thrust::equal_to{}, - binop); - }; - - auto const count_iter = thrust::make_counting_iterator(0); - auto const result_begin = result->mutable_view().template begin(); - if (values.has_nulls()) { - auto const binop = row_arg_minmax_fn(values.size(), - *d_flattened_values_ptr, - flattened_null_precedences.data(), - K == aggregation::ARGMIN); - do_reduction(count_iter, result_begin, binop); - - // Generate bitmask for the output by segmented reduction of the input bitmask. - auto const d_values_ptr = column_device_view::create(values, stream); - auto validity = rmm::device_uvector(num_groups, stream); - do_reduction(cudf::detail::make_validity_iterator(*d_values_ptr), - validity.begin(), - thrust::logical_or{}); - - auto [null_mask, null_count] = cudf::detail::valid_if( - validity.begin(), validity.end(), thrust::identity{}, stream, mr); - result->set_null_mask(std::move(null_mask), null_count); - } else { - auto const binop = row_arg_minmax_fn(values.size(), - *d_flattened_values_ptr, - flattened_null_precedences.data(), - K == aggregation::ARGMIN); - do_reduction(count_iter, result_begin, binop); - } - - // result now stores the indices of minimum elements in the sorted values. - // We need the indices of minimum elements in the original unsorted values. - thrust::gather(rmm::exec_policy(stream), - result_begin, - result_begin + num_groups, - key_sort_order.template begin(), - result_begin); - - return result; -} - -} // namespace detail -} // namespace groupby -} // namespace cudf diff --git a/cpp/src/groupby/sort/group_minmax_scan_struct.cu b/cpp/src/groupby/sort/group_minmax_scan_struct.cu deleted file mode 100644 index 2d3c26d19f7..00000000000 --- a/cpp/src/groupby/sort/group_minmax_scan_struct.cu +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -namespace cudf { -namespace groupby { -namespace detail { -std::unique_ptr minmax_scan_struct(aggregation::Kind K, - column_view const& values, - size_type num_groups, - device_span group_labels, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_EXPECTS(K == aggregation::MIN || K == aggregation::MAX, - "Only groupby MIN/MAX scan are supported for STRUCT type."); - - if (values.is_empty()) { return cudf::empty_like(values); } - - // When finding MIN, we need to consider nulls as larger than non-null elements. - // Thing is opposite when finding MAX. - auto const null_precedence = (K == aggregation::MIN) ? null_order::AFTER : null_order::BEFORE; - auto const flattened_values = structs::detail::flatten_nested_columns( - table_view{{values}}, {}, std::vector{null_precedence}); - auto const d_flattened_values_ptr = table_device_view::create(flattened_values, stream); - auto const flattened_null_precedences = - (K == aggregation::MIN) - ? cudf::detail::make_device_uvector_async(flattened_values.null_orders(), stream) - : rmm::device_uvector(0, stream); - - // Create a gather map contaning indices of the prefix min/max elements. - auto gather_map = rmm::device_uvector(values.size(), stream); - auto const map_begin = gather_map.begin(); - - // Perform segmented scan. - auto const do_scan = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { - thrust::inclusive_scan_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - inp_iter, - out_iter, - thrust::equal_to{}, - binop); - }; - - // Find the indices of the prefix min/max elements within each group. - auto const count_iter = thrust::make_counting_iterator(0); - if (values.has_nulls()) { - auto const binop = row_arg_minmax_fn(values.size(), - *d_flattened_values_ptr, - flattened_null_precedences.data(), - K == aggregation::MIN); - do_scan(count_iter, map_begin, binop); - } else { - auto const binop = row_arg_minmax_fn(values.size(), - *d_flattened_values_ptr, - flattened_null_precedences.data(), - K == aggregation::MIN); - do_scan(count_iter, map_begin, binop); - } - - auto gather_map_view = - column_view(data_type{type_to_id()}, gather_map.size(), gather_map.data()); - - // Gather the children elements of the prefix min/max struct elements first. - auto scanned_children = - cudf::detail::gather( - table_view(std::vector{values.child_begin(), values.child_end()}), - gather_map_view, - cudf::out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr) - ->release(); - - // After gathering the children elements, we need to push down nulls from the root structs - // column to them. - if (values.has_nulls()) { - for (std::unique_ptr& child : scanned_children) { - structs::detail::superimpose_parent_nulls( - values.null_mask(), values.null_count(), *child, stream, mr); - } - } - - return make_structs_column(values.size(), - std::move(scanned_children), - values.null_count(), - cudf::detail::copy_bitmask(values, stream, mr)); -} - -} // namespace detail -} // namespace groupby -} // namespace cudf From cbf386fbe69b430906fffd3716769ed1d7dba3c7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 8 Nov 2021 08:01:11 -0700 Subject: [PATCH 46/48] Add groupby struct benchmark --- cpp/benchmarks/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 193ac1f006c..fa1e61e26fd 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -157,7 +157,7 @@ ConfigureBench(FILL_BENCH filling/repeat_benchmark.cpp) # * groupby benchmark ----------------------------------------------------------------------------- ConfigureBench( GROUPBY_BENCH groupby/group_sum_benchmark.cu groupby/group_nth_benchmark.cu - groupby/group_shift_benchmark.cu + groupby/group_shift_benchmark.cu groupby/group_struct_benchmark.cu ) # ################################################################################################## From 8b3f72ed584c768d41ad193e351e8ed52bac4d09 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 8 Nov 2021 09:38:38 -0700 Subject: [PATCH 47/48] Implement benchmark --- .../groupby/group_struct_benchmark.cu | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 cpp/benchmarks/groupby/group_struct_benchmark.cu diff --git a/cpp/benchmarks/groupby/group_struct_benchmark.cu b/cpp/benchmarks/groupby/group_struct_benchmark.cu new file mode 100644 index 00000000000..8192b23fd78 --- /dev/null +++ b/cpp/benchmarks/groupby/group_struct_benchmark.cu @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include + +static constexpr cudf::size_type num_struct_members = 8; +static constexpr cudf::size_type max_int = 100; +static constexpr cudf::size_type max_str_length = 32; + +static auto create_data_table(cudf::size_type n_rows) +{ + data_profile table_profile; + table_profile.set_distribution_params(cudf::type_id::INT32, distribution_id::UNIFORM, 0, max_int); + table_profile.set_distribution_params( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + + // The first two struct members are int32 and string. + // The first column is also used as keys in groupby. + auto col_ids = std::vector{cudf::type_id::INT32, cudf::type_id::STRING}; + + // The subsequent struct members are int32 and string again. + for (cudf::size_type i = 3; i <= num_struct_members; ++i) { + if (i % 2) { + col_ids.push_back(cudf::type_id::INT32); + } else { + col_ids.push_back(cudf::type_id::STRING); + } + } + + return create_random_table(col_ids, num_struct_members, row_count{n_rows}, table_profile); +} + +// Max aggregation technically has the same performance as min. +void BM_min_aggregation(benchmark::State& state) +{ + auto const n_rows = static_cast(state.range(0)); + auto data_cols = create_data_table(n_rows)->release(); + + // Extract keys (integers column) and values (structs column). + auto const keys_view = data_cols.front()->view(); + auto const values = + cudf::make_structs_column(keys_view.size(), std::move(data_cols), 0, rmm::device_buffer()); + + auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys_view})); + auto requests = std::vector(); + requests.emplace_back(cudf::groupby::aggregation_request()); + requests.front().values = values->view(); + requests.front().aggregations.push_back(cudf::make_min_aggregation()); + + for (auto _ : state) { + [[maybe_unused]] auto const timer = cuda_event_timer(state, true); + [[maybe_unused]] auto const result = gb_obj.aggregate(requests); + } +} + +// Max aggregation technically has the same performance as min. +void BM_min_scan(benchmark::State& state) +{ + auto const n_rows = static_cast(state.range(0)); + auto data_cols = create_data_table(n_rows)->release(); + + // Extract keys (integers column) and values (structs column). + auto const keys_view = data_cols.front()->view(); + auto const values = + cudf::make_structs_column(keys_view.size(), std::move(data_cols), 0, rmm::device_buffer()); + + auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys_view})); + auto requests = std::vector(); + requests.emplace_back(cudf::groupby::scan_request()); + requests.front().values = values->view(); + requests.front().aggregations.push_back( + cudf::make_min_aggregation()); + + for (auto _ : state) { + [[maybe_unused]] auto const timer = cuda_event_timer(state, true); + [[maybe_unused]] auto const result = gb_obj.scan(requests); + } +} + +class Groupby : public cudf::benchmark { +}; + +#define MIN_RANGE 10'000 +#define MAX_RANGE 10'000'000 + +#define REGISTER_BENCHMARK(name, func) \ + BENCHMARK_DEFINE_F(Groupby, name)(::benchmark::State & state) { func(state); } \ + BENCHMARK_REGISTER_F(Groupby, name) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond) \ + ->RangeMultiplier(4) \ + ->Ranges({{MIN_RANGE, MAX_RANGE}}); + +REGISTER_BENCHMARK(Aggregation, BM_min_aggregation) +REGISTER_BENCHMARK(Scan, BM_min_scan) From cdfc6026ae93db2ea677c3b24a54953c31a5df62 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 8 Nov 2021 10:34:53 -0700 Subject: [PATCH 48/48] Unify 2 functions into a template function --- .../groupby/group_struct_benchmark.cu | 65 ++++++++----------- 1 file changed, 26 insertions(+), 39 deletions(-) diff --git a/cpp/benchmarks/groupby/group_struct_benchmark.cu b/cpp/benchmarks/groupby/group_struct_benchmark.cu index 8192b23fd78..702983a63bf 100644 --- a/cpp/benchmarks/groupby/group_struct_benchmark.cu +++ b/cpp/benchmarks/groupby/group_struct_benchmark.cu @@ -55,50 +55,34 @@ static auto create_data_table(cudf::size_type n_rows) return create_random_table(col_ids, num_struct_members, row_count{n_rows}, table_profile); } -// Max aggregation technically has the same performance as min. -void BM_min_aggregation(benchmark::State& state) +// Max aggregation/scan technically has the same performance as min. +template +void BM_groupby_min_struct(benchmark::State& state) { auto const n_rows = static_cast(state.range(0)); auto data_cols = create_data_table(n_rows)->release(); - // Extract keys (integers column) and values (structs column). auto const keys_view = data_cols.front()->view(); auto const values = cudf::make_structs_column(keys_view.size(), std::move(data_cols), 0, rmm::device_buffer()); - auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys_view})); - auto requests = std::vector(); - requests.emplace_back(cudf::groupby::aggregation_request()); - requests.front().values = values->view(); - requests.front().aggregations.push_back(cudf::make_min_aggregation()); - - for (auto _ : state) { - [[maybe_unused]] auto const timer = cuda_event_timer(state, true); - [[maybe_unused]] auto const result = gb_obj.aggregate(requests); - } -} - -// Max aggregation technically has the same performance as min. -void BM_min_scan(benchmark::State& state) -{ - auto const n_rows = static_cast(state.range(0)); - auto data_cols = create_data_table(n_rows)->release(); - - // Extract keys (integers column) and values (structs column). - auto const keys_view = data_cols.front()->view(); - auto const values = - cudf::make_structs_column(keys_view.size(), std::move(data_cols), 0, rmm::device_buffer()); + using RequestType = std::conditional_t, + cudf::groupby::aggregation_request, + cudf::groupby::scan_request>; auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys_view})); - auto requests = std::vector(); - requests.emplace_back(cudf::groupby::scan_request()); + auto requests = std::vector(); + requests.emplace_back(RequestType()); requests.front().values = values->view(); - requests.front().aggregations.push_back( - cudf::make_min_aggregation()); + requests.front().aggregations.push_back(cudf::make_min_aggregation()); for (auto _ : state) { - [[maybe_unused]] auto const timer = cuda_event_timer(state, true); - [[maybe_unused]] auto const result = gb_obj.scan(requests); + [[maybe_unused]] auto const timer = cuda_event_timer(state, true); + if constexpr (std::is_same_v) { + [[maybe_unused]] auto const result = gb_obj.aggregate(requests); + } else { + [[maybe_unused]] auto const result = gb_obj.scan(requests); + } } } @@ -108,13 +92,16 @@ class Groupby : public cudf::benchmark { #define MIN_RANGE 10'000 #define MAX_RANGE 10'000'000 -#define REGISTER_BENCHMARK(name, func) \ - BENCHMARK_DEFINE_F(Groupby, name)(::benchmark::State & state) { func(state); } \ - BENCHMARK_REGISTER_F(Groupby, name) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond) \ - ->RangeMultiplier(4) \ +#define REGISTER_BENCHMARK(name, op_type) \ + BENCHMARK_DEFINE_F(Groupby, name)(::benchmark::State & state) \ + { \ + BM_groupby_min_struct(state); \ + } \ + BENCHMARK_REGISTER_F(Groupby, name) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond) \ + ->RangeMultiplier(4) \ ->Ranges({{MIN_RANGE, MAX_RANGE}}); -REGISTER_BENCHMARK(Aggregation, BM_min_aggregation) -REGISTER_BENCHMARK(Scan, BM_min_scan) +REGISTER_BENCHMARK(Aggregation, cudf::groupby_aggregation) +REGISTER_BENCHMARK(Scan, cudf::groupby_scan_aggregation)