Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support min and max reduction for structs #9697

Merged
merged 9 commits into from
Nov 18, 2021
Merged
20 changes: 11 additions & 9 deletions cpp/src/groupby/sort/group_scan_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#pragma once

#include <groupby/sort/group_util.cuh>
#include <reductions/arg_minmax_util.cuh>

#include <cudf/column/column.hpp>
#include <cudf/column/column_factories.hpp>
Expand Down Expand Up @@ -221,16 +221,18 @@ struct group_scan_functor<K,
// Find the indices of the prefix min/max elements within each group.
auto const count_iter = thrust::make_counting_iterator<size_type>(0);
if (values.has_nulls()) {
auto const binop = row_arg_minmax_fn<true>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::MIN);
auto const binop =
cudf::reduction::detail::row_arg_minmax_fn<true>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::MIN);
do_scan(count_iter, map_begin, binop);
} else {
auto const binop = row_arg_minmax_fn<false>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::MIN);
auto const binop =
cudf::reduction::detail::row_arg_minmax_fn<false>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::MIN);
do_scan(count_iter, map_begin, binop);
}

Expand Down
20 changes: 11 additions & 9 deletions cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#pragma once

#include <groupby/sort/group_util.cuh>
#include <reductions/arg_minmax_util.cuh>

#include <cudf/column/column.hpp>
#include <cudf/column/column_factories.hpp>
Expand Down Expand Up @@ -271,10 +271,11 @@ struct group_reduction_functor<
auto const count_iter = thrust::make_counting_iterator<ResultType>(0);
auto const result_begin = result->mutable_view().template begin<ResultType>();
if (values.has_nulls()) {
auto const binop = row_arg_minmax_fn<true>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::ARGMIN);
auto const binop =
cudf::reduction::detail::row_arg_minmax_fn<true>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::ARGMIN);
do_reduction(count_iter, result_begin, binop);

// Generate bitmask for the output by segmented reduction of the input bitmask.
Expand All @@ -288,10 +289,11 @@ struct group_reduction_functor<
validity.begin(), validity.end(), thrust::identity<bool>{}, stream, mr);
result->set_null_mask(std::move(null_mask), null_count);
} else {
auto const binop = row_arg_minmax_fn<false>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::ARGMIN);
auto const binop =
cudf::reduction::detail::row_arg_minmax_fn<false>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::ARGMIN);
do_reduction(count_iter, result_begin, binop);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include <cudf/table/row_operators.cuh>

namespace cudf {
namespace groupby {
namespace reduction {
namespace detail {

/**
Expand Down Expand Up @@ -62,5 +62,5 @@ struct row_arg_minmax_fn {
};

} // namespace detail
} // namespace groupby
} // namespace reduction
} // namespace cudf
61 changes: 57 additions & 4 deletions cpp/src/reductions/simple.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,13 @@

#pragma once

#include <reductions/arg_minmax_util.cuh>

#include <cudf/detail/copy.hpp>
#include <cudf/detail/reduction.cuh>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/dictionary/detail/iterator.cuh>
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/scalar/scalar_device_view.cuh>
Expand All @@ -28,6 +32,9 @@
#include <cudf/utilities/type_dispatcher.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>

#include <thrust/reduce.h>

namespace cudf {
namespace reduction {
Expand Down Expand Up @@ -252,8 +259,7 @@ struct same_element_type_dispatcher {
template <typename ElementType>
static constexpr bool is_supported()
{
return !(cudf::is_dictionary<ElementType>() || std::is_same_v<ElementType, cudf::list_view> ||
std::is_same_v<ElementType, cudf::struct_view>);
return !(cudf::is_dictionary<ElementType>() || std::is_same_v<ElementType, cudf::list_view>);
}

template <typename IndexType,
Expand All @@ -279,8 +285,55 @@ struct same_element_type_dispatcher {

public:
template <typename ElementType,
std::enable_if_t<is_supported<ElementType>() &&
not cudf::is_fixed_point<ElementType>()>* = nullptr>
std::enable_if_t<std::is_same_v<ElementType, cudf::struct_view> &&
(std::is_same_v<Op, cudf::reduction::op::min> ||
std::is_same_v<Op, cudf::reduction::op::max>)>* = nullptr>
std::unique_ptr<scalar> operator()(column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (input.is_empty()) { return cudf::make_empty_scalar_like(input, stream, mr); }

auto constexpr is_min_op = std::is_same_v<Op, cudf::reduction::op::min>;

// We will do reduction to find the ARGMIN/ARGMAX index, then return the element at that index.
// When finding ARGMIN, we need to consider nulls as larger than non-null elements, and the
// opposite for ARGMAX.
auto constexpr null_precedence = is_min_op ? cudf::null_order::AFTER : cudf::null_order::BEFORE;
auto const flattened_input = cudf::structs::detail::flatten_nested_columns(
table_view{{input}}, {}, std::vector<null_order>{null_precedence});
auto const d_flattened_input_ptr = table_device_view::create(flattened_input, stream);
auto const flattened_null_precedences =
is_min_op ? cudf::detail::make_device_uvector_async(flattened_input.null_orders(), stream)
: rmm::device_uvector<cudf::null_order>(0, stream);

// Perform reduction to find ARGMIN/ARGMAX.
auto const do_reduction = [&](auto const& binop) {
return thrust::reduce(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.size()),
size_type{0},
binop);
};

auto const minmax_idx = [&] {
if (input.has_nulls()) {
auto const binop = cudf::reduction::detail::row_arg_minmax_fn<true>(
input.size(), *d_flattened_input_ptr, flattened_null_precedences.data(), is_min_op);
return do_reduction(binop);
} else {
auto const binop = cudf::reduction::detail::row_arg_minmax_fn<false>(
input.size(), *d_flattened_input_ptr, flattened_null_precedences.data(), is_min_op);
return do_reduction(binop);
}
}();

return cudf::detail::get_element(input, minmax_idx, stream, mr);
}

template <typename ElementType,
std::enable_if_t<is_supported<ElementType>() && !cudf::is_fixed_point<ElementType>() &&
!std::is_same_v<ElementType, cudf::struct_view>>* = nullptr>
std::unique_ptr<scalar> operator()(column_view const& col,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down
131 changes: 129 additions & 2 deletions cpp/tests/reductions/reduction_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/iterator_utilities.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/copying.hpp>
Expand Down Expand Up @@ -2055,7 +2056,7 @@ TEST_F(ListReductionTest, NonValidListReductionNthElement)
struct StructReductionTest : public cudf::test::BaseFixture {
using SCW = cudf::test::structs_column_wrapper;

void reduction_test(SCW const& struct_column,
void reduction_test(cudf::column_view const& struct_column,
cudf::table_view const& expected_value,
bool succeeded_condition,
bool is_valid,
Expand All @@ -2066,7 +2067,7 @@ struct StructReductionTest : public cudf::test::BaseFixture {
cudf::reduce(struct_column, agg, cudf::data_type(cudf::type_id::STRUCT));
auto struct_result = dynamic_cast<cudf::struct_scalar*>(result.get());
EXPECT_EQ(is_valid, struct_result->is_valid());
if (is_valid) { CUDF_TEST_EXPECT_TABLES_EQUAL(expected_value, struct_result->view()); }
if (is_valid) { CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_value, struct_result->view()); }
};

if (succeeded_condition) {
Expand Down Expand Up @@ -2210,4 +2211,130 @@ TEST_F(StructReductionTest, NonValidStructReductionNthElement)
cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE));
}

TEST_F(StructReductionTest, StructReductionMinMaxNoNull)
{
using INTS_CW = cudf::test::fixed_width_column_wrapper<int>;
using STRINGS_CW = cudf::test::strings_column_wrapper;
using STRUCTS_CW = cudf::test::structs_column_wrapper;

auto const input = [] {
auto child1 = STRINGS_CW{"año", "bit", "₹1", "aaa", "zit", "bat", "aab", "$1", "€1", "wut"};
auto child2 = INTS_CW{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
return STRUCTS_CW{{child1, child2}};
}();

{
auto const expected_child1 = STRINGS_CW{"$1"};
auto const expected_child2 = INTS_CW{8};
this->reduction_test(input,
cudf::table_view{{expected_child1, expected_child2}},
true,
true,
cudf::make_min_aggregation());
}

{
auto const expected_child1 = STRINGS_CW{"₹1"};
auto const expected_child2 = INTS_CW{3};
this->reduction_test(input,
cudf::table_view{{expected_child1, expected_child2}},
true,
true,
cudf::make_max_aggregation());
}
}

TEST_F(StructReductionTest, StructReductionMinMaxSlicedInput)
{
using INTS_CW = cudf::test::fixed_width_column_wrapper<int>;
using STRINGS_CW = cudf::test::strings_column_wrapper;
using STRUCTS_CW = cudf::test::structs_column_wrapper;
constexpr int32_t dont_care{1};

auto const input_original = [] {
auto child1 = STRINGS_CW{"$dont_care",
"$dont_care",
"año",
"bit",
"₹1",
"aaa",
"zit",
"bat",
"aab",
"$1",
"€1",
"wut",
"₹dont_care"};
auto child2 = INTS_CW{dont_care, dont_care, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, dont_care};
return STRUCTS_CW{{child1, child2}};
}();

auto const input = cudf::slice(input_original, {2, 12})[0];

{
auto const expected_child1 = STRINGS_CW{"$1"};
auto const expected_child2 = INTS_CW{8};
this->reduction_test(input,
cudf::table_view{{expected_child1, expected_child2}},
true,
true,
cudf::make_min_aggregation());
}

{
auto const expected_child1 = STRINGS_CW{"₹1"};
auto const expected_child2 = INTS_CW{3};
this->reduction_test(input,
cudf::table_view{{expected_child1, expected_child2}},
true,
true,
cudf::make_max_aggregation());
}
}

TEST_F(StructReductionTest, StructReductionMinMaxWithNulls)
{
using INTS_CW = cudf::test::fixed_width_column_wrapper<int>;
using STRINGS_CW = cudf::test::strings_column_wrapper;
using STRUCTS_CW = cudf::test::structs_column_wrapper;
using cudf::test::iterators::nulls_at;

auto const input = [] {
auto child1 = STRINGS_CW{{"año",
"bit",
"₹1" /*NULL*/,
"aaa" /*NULL*/,
"zit",
"bat",
"aab",
"$1" /*NULL*/,
"€1" /*NULL*/,
"wut"},
nulls_at({2, 7})};
auto child2 = INTS_CW{{1, 2, 3 /*NULL*/, 4 /*NULL*/, 5, 6, 7, 8 /*NULL*/, 9 /*NULL*/, 10},
nulls_at({2, 7})};
return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})};
}();

{
auto const expected_child1 = STRINGS_CW{"aab"};
auto const expected_child2 = INTS_CW{7};
this->reduction_test(input,
cudf::table_view{{expected_child1, expected_child2}},
true,
true,
cudf::make_min_aggregation());
}

{
auto const expected_child1 = STRINGS_CW{"zit"};
auto const expected_child2 = INTS_CW{5};
this->reduction_test(input,
cudf::table_view{{expected_child1, expected_child2}},
true,
true,
cudf::make_max_aggregation());
}
}

CUDF_TEST_PROGRAM_MAIN()