Skip to content

Commit

Permalink
Support min and max reduction for structs (#9697)
Browse files Browse the repository at this point in the history
This PR continues to address #8974, adding support for structs in `min` and `max` reduction.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - https://github.com/nvdbaranec

URL: #9697
  • Loading branch information
ttnghia authored Nov 18, 2021
1 parent 406429a commit 91fd74e
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 26 deletions.
20 changes: 11 additions & 9 deletions cpp/src/groupby/sort/group_scan_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#pragma once

#include <groupby/sort/group_util.cuh>
#include <reductions/arg_minmax_util.cuh>

#include <cudf/column/column.hpp>
#include <cudf/column/column_factories.hpp>
Expand Down Expand Up @@ -221,16 +221,18 @@ struct group_scan_functor<K,
// Find the indices of the prefix min/max elements within each group.
auto const count_iter = thrust::make_counting_iterator<size_type>(0);
if (values.has_nulls()) {
auto const binop = row_arg_minmax_fn<true>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::MIN);
auto const binop =
cudf::reduction::detail::row_arg_minmax_fn<true>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::MIN);
do_scan(count_iter, map_begin, binop);
} else {
auto const binop = row_arg_minmax_fn<false>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::MIN);
auto const binop =
cudf::reduction::detail::row_arg_minmax_fn<false>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::MIN);
do_scan(count_iter, map_begin, binop);
}

Expand Down
20 changes: 11 additions & 9 deletions cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#pragma once

#include <groupby/sort/group_util.cuh>
#include <reductions/arg_minmax_util.cuh>

#include <cudf/column/column.hpp>
#include <cudf/column/column_factories.hpp>
Expand Down Expand Up @@ -271,10 +271,11 @@ struct group_reduction_functor<
auto const count_iter = thrust::make_counting_iterator<ResultType>(0);
auto const result_begin = result->mutable_view().template begin<ResultType>();
if (values.has_nulls()) {
auto const binop = row_arg_minmax_fn<true>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::ARGMIN);
auto const binop =
cudf::reduction::detail::row_arg_minmax_fn<true>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::ARGMIN);
do_reduction(count_iter, result_begin, binop);

// Generate bitmask for the output by segmented reduction of the input bitmask.
Expand All @@ -288,10 +289,11 @@ struct group_reduction_functor<
validity.begin(), validity.end(), thrust::identity<bool>{}, stream, mr);
result->set_null_mask(std::move(null_mask), null_count);
} else {
auto const binop = row_arg_minmax_fn<false>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::ARGMIN);
auto const binop =
cudf::reduction::detail::row_arg_minmax_fn<false>(values.size(),
*d_flattened_values_ptr,
flattened_null_precedences.data(),
K == aggregation::ARGMIN);
do_reduction(count_iter, result_begin, binop);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include <cudf/table/row_operators.cuh>

namespace cudf {
namespace groupby {
namespace reduction {
namespace detail {

/**
Expand Down Expand Up @@ -62,5 +62,5 @@ struct row_arg_minmax_fn {
};

} // namespace detail
} // namespace groupby
} // namespace reduction
} // namespace cudf
61 changes: 57 additions & 4 deletions cpp/src/reductions/simple.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,13 @@

#pragma once

#include <reductions/arg_minmax_util.cuh>

#include <cudf/detail/copy.hpp>
#include <cudf/detail/reduction.cuh>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/dictionary/detail/iterator.cuh>
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/scalar/scalar_device_view.cuh>
Expand All @@ -28,6 +32,9 @@
#include <cudf/utilities/type_dispatcher.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>

#include <thrust/reduce.h>

namespace cudf {
namespace reduction {
Expand Down Expand Up @@ -252,8 +259,7 @@ struct same_element_type_dispatcher {
template <typename ElementType>
static constexpr bool is_supported()
{
return !(cudf::is_dictionary<ElementType>() || std::is_same_v<ElementType, cudf::list_view> ||
std::is_same_v<ElementType, cudf::struct_view>);
return !(cudf::is_dictionary<ElementType>() || std::is_same_v<ElementType, cudf::list_view>);
}

template <typename IndexType,
Expand All @@ -279,8 +285,55 @@ struct same_element_type_dispatcher {

public:
template <typename ElementType,
std::enable_if_t<is_supported<ElementType>() &&
not cudf::is_fixed_point<ElementType>()>* = nullptr>
std::enable_if_t<std::is_same_v<ElementType, cudf::struct_view> &&
(std::is_same_v<Op, cudf::reduction::op::min> ||
std::is_same_v<Op, cudf::reduction::op::max>)>* = nullptr>
std::unique_ptr<scalar> operator()(column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (input.is_empty()) { return cudf::make_empty_scalar_like(input, stream, mr); }

auto constexpr is_min_op = std::is_same_v<Op, cudf::reduction::op::min>;

// We will do reduction to find the ARGMIN/ARGMAX index, then return the element at that index.
// When finding ARGMIN, we need to consider nulls as larger than non-null elements, and the
// opposite for ARGMAX.
auto constexpr null_precedence = is_min_op ? cudf::null_order::AFTER : cudf::null_order::BEFORE;
auto const flattened_input = cudf::structs::detail::flatten_nested_columns(
table_view{{input}}, {}, std::vector<null_order>{null_precedence});
auto const d_flattened_input_ptr = table_device_view::create(flattened_input, stream);
auto const flattened_null_precedences =
is_min_op ? cudf::detail::make_device_uvector_async(flattened_input.null_orders(), stream)
: rmm::device_uvector<cudf::null_order>(0, stream);

// Perform reduction to find ARGMIN/ARGMAX.
auto const do_reduction = [&](auto const& binop) {
return thrust::reduce(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.size()),
size_type{0},
binop);
};

auto const minmax_idx = [&] {
if (input.has_nulls()) {
auto const binop = cudf::reduction::detail::row_arg_minmax_fn<true>(
input.size(), *d_flattened_input_ptr, flattened_null_precedences.data(), is_min_op);
return do_reduction(binop);
} else {
auto const binop = cudf::reduction::detail::row_arg_minmax_fn<false>(
input.size(), *d_flattened_input_ptr, flattened_null_precedences.data(), is_min_op);
return do_reduction(binop);
}
}();

return cudf::detail::get_element(input, minmax_idx, stream, mr);
}

template <typename ElementType,
std::enable_if_t<is_supported<ElementType>() && !cudf::is_fixed_point<ElementType>() &&
!std::is_same_v<ElementType, cudf::struct_view>>* = nullptr>
std::unique_ptr<scalar> operator()(column_view const& col,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down
131 changes: 129 additions & 2 deletions cpp/tests/reductions/reduction_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/iterator_utilities.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/copying.hpp>
Expand Down Expand Up @@ -2055,7 +2056,7 @@ TEST_F(ListReductionTest, NonValidListReductionNthElement)
struct StructReductionTest : public cudf::test::BaseFixture {
using SCW = cudf::test::structs_column_wrapper;

void reduction_test(SCW const& struct_column,
void reduction_test(cudf::column_view const& struct_column,
cudf::table_view const& expected_value,
bool succeeded_condition,
bool is_valid,
Expand All @@ -2066,7 +2067,7 @@ struct StructReductionTest : public cudf::test::BaseFixture {
cudf::reduce(struct_column, agg, cudf::data_type(cudf::type_id::STRUCT));
auto struct_result = dynamic_cast<cudf::struct_scalar*>(result.get());
EXPECT_EQ(is_valid, struct_result->is_valid());
if (is_valid) { CUDF_TEST_EXPECT_TABLES_EQUAL(expected_value, struct_result->view()); }
if (is_valid) { CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_value, struct_result->view()); }
};

if (succeeded_condition) {
Expand Down Expand Up @@ -2210,4 +2211,130 @@ TEST_F(StructReductionTest, NonValidStructReductionNthElement)
cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE));
}

TEST_F(StructReductionTest, StructReductionMinMaxNoNull)
{
using INTS_CW = cudf::test::fixed_width_column_wrapper<int>;
using STRINGS_CW = cudf::test::strings_column_wrapper;
using STRUCTS_CW = cudf::test::structs_column_wrapper;

auto const input = [] {
auto child1 = STRINGS_CW{"año", "bit", "₹1", "aaa", "zit", "bat", "aab", "$1", "€1", "wut"};
auto child2 = INTS_CW{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
return STRUCTS_CW{{child1, child2}};
}();

{
auto const expected_child1 = STRINGS_CW{"$1"};
auto const expected_child2 = INTS_CW{8};
this->reduction_test(input,
cudf::table_view{{expected_child1, expected_child2}},
true,
true,
cudf::make_min_aggregation());
}

{
auto const expected_child1 = STRINGS_CW{"₹1"};
auto const expected_child2 = INTS_CW{3};
this->reduction_test(input,
cudf::table_view{{expected_child1, expected_child2}},
true,
true,
cudf::make_max_aggregation());
}
}

TEST_F(StructReductionTest, StructReductionMinMaxSlicedInput)
{
using INTS_CW = cudf::test::fixed_width_column_wrapper<int>;
using STRINGS_CW = cudf::test::strings_column_wrapper;
using STRUCTS_CW = cudf::test::structs_column_wrapper;
constexpr int32_t dont_care{1};

auto const input_original = [] {
auto child1 = STRINGS_CW{"$dont_care",
"$dont_care",
"año",
"bit",
"₹1",
"aaa",
"zit",
"bat",
"aab",
"$1",
"€1",
"wut",
"₹dont_care"};
auto child2 = INTS_CW{dont_care, dont_care, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, dont_care};
return STRUCTS_CW{{child1, child2}};
}();

auto const input = cudf::slice(input_original, {2, 12})[0];

{
auto const expected_child1 = STRINGS_CW{"$1"};
auto const expected_child2 = INTS_CW{8};
this->reduction_test(input,
cudf::table_view{{expected_child1, expected_child2}},
true,
true,
cudf::make_min_aggregation());
}

{
auto const expected_child1 = STRINGS_CW{"₹1"};
auto const expected_child2 = INTS_CW{3};
this->reduction_test(input,
cudf::table_view{{expected_child1, expected_child2}},
true,
true,
cudf::make_max_aggregation());
}
}

TEST_F(StructReductionTest, StructReductionMinMaxWithNulls)
{
using INTS_CW = cudf::test::fixed_width_column_wrapper<int>;
using STRINGS_CW = cudf::test::strings_column_wrapper;
using STRUCTS_CW = cudf::test::structs_column_wrapper;
using cudf::test::iterators::nulls_at;

auto const input = [] {
auto child1 = STRINGS_CW{{"año",
"bit",
"₹1" /*NULL*/,
"aaa" /*NULL*/,
"zit",
"bat",
"aab",
"$1" /*NULL*/,
"€1" /*NULL*/,
"wut"},
nulls_at({2, 7})};
auto child2 = INTS_CW{{1, 2, 3 /*NULL*/, 4 /*NULL*/, 5, 6, 7, 8 /*NULL*/, 9 /*NULL*/, 10},
nulls_at({2, 7})};
return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})};
}();

{
auto const expected_child1 = STRINGS_CW{"aab"};
auto const expected_child2 = INTS_CW{7};
this->reduction_test(input,
cudf::table_view{{expected_child1, expected_child2}},
true,
true,
cudf::make_min_aggregation());
}

{
auto const expected_child1 = STRINGS_CW{"zit"};
auto const expected_child2 = INTS_CW{5};
this->reduction_test(input,
cudf::table_view{{expected_child1, expected_child2}},
true,
true,
cudf::make_max_aggregation());
}
}

CUDF_TEST_PROGRAM_MAIN()

0 comments on commit 91fd74e

Please sign in to comment.