Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix null check when comparing structs in arg_min operation of reduction/groupby #10026

Merged
merged 4 commits into from
Jan 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions cpp/src/reductions/struct_minmax_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,14 @@ class comparison_binop_generator {
{
if (is_min_op) {
null_orders = flattened_input.null_orders();
// Null structs are excluded from the operations, and that is equivalent to considering
// nulls as larger than all other non-null STRUCT elements (if finding for ARGMIN), or
// smaller than all other non-null STRUCT elements (if finding for ARGMAX).
// Thus, we need to set a separate null order for the top level structs column (which is
// stored at the first position in the null_orders array) to achieve this purpose.
null_orders.front() = cudf::null_order::AFTER;
null_orders_dvec = cudf::detail::make_device_uvector_async(null_orders, stream);
// If the input column has nulls (at the top level), null structs are excluded from the
// operations, and that is equivalent to considering top-level nulls as larger than all other
// non-null STRUCT elements (if finding for ARGMIN), or smaller than all other non-null STRUCT
// elements (if finding for ARGMAX). Thus, we need to set a separate null order for the top
// level structs column (which is stored at the first position in the null_orders array) to
// achieve this purpose.
if (input.has_nulls()) { null_orders.front() = cudf::null_order::AFTER; }
null_orders_dvec = cudf::detail::make_device_uvector_async(null_orders, stream);
}
// else: Don't need to generate nulls order to copy to device memory if we have all null orders
// are BEFORE (that happens when we have is_min_op == false).
Expand Down
51 changes: 36 additions & 15 deletions cpp/tests/groupby/max_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,22 +391,43 @@ TEST_F(groupby_max_struct_test, null_keys_and_values)
TEST_F(groupby_max_struct_test, values_with_null_child)
{
constexpr int32_t null{0};
auto const keys = fixed_width_column_wrapper<int32_t>{1, 1};
auto const vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{1, 1};
auto child2 = fixed_width_column_wrapper<int32_t>{{-1, null}, null_at(1)};
return structs_column_wrapper{child1, child2};
}();

auto const expect_keys = fixed_width_column_wrapper<int32_t>{1};
auto const expect_vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{1};
auto child2 = fixed_width_column_wrapper<int32_t>{-1};
return structs_column_wrapper{child1, child2};
}();
{
auto const keys = fixed_width_column_wrapper<int32_t>{1, 1};
auto const vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{1, 1};
auto child2 = fixed_width_column_wrapper<int32_t>{{-1, null}, null_at(1)};
return structs_column_wrapper{child1, child2};
}();

auto const expect_keys = fixed_width_column_wrapper<int32_t>{1};
auto const expect_vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{1};
auto child2 = fixed_width_column_wrapper<int32_t>{-1};
return structs_column_wrapper{child1, child2};
}();

auto agg = cudf::make_max_aggregation<groupby_aggregation>();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

auto agg = cudf::make_max_aggregation<groupby_aggregation>();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
{
auto const keys = fixed_width_column_wrapper<int32_t>{1, 1};
auto const vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{{-1, null}, null_at(1)};
auto child2 = fixed_width_column_wrapper<int32_t>{{null, null}, nulls_at({0, 1})};
return structs_column_wrapper{child1, child2};
}();

auto const expect_keys = fixed_width_column_wrapper<int32_t>{1};
auto const expect_vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{-1};
auto child2 = fixed_width_column_wrapper<int32_t>{{null}, null_at(0)};
return structs_column_wrapper{child1, child2};
}();

auto agg = cudf::make_max_aggregation<groupby_aggregation>();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}
}

} // namespace test
Expand Down
51 changes: 36 additions & 15 deletions cpp/tests/groupby/min_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,22 +390,43 @@ TEST_F(groupby_min_struct_test, null_keys_and_values)
TEST_F(groupby_min_struct_test, values_with_null_child)
{
constexpr int32_t null{0};
auto const keys = fixed_width_column_wrapper<int32_t>{1, 1};
auto const vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{1, 1};
auto child2 = fixed_width_column_wrapper<int32_t>{{-1, null}, null_at(1)};
return structs_column_wrapper{child1, child2};
}();

auto const expect_keys = fixed_width_column_wrapper<int32_t>{1};
auto const expect_vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{1};
auto child2 = fixed_width_column_wrapper<int32_t>{{null}, null_at(0)};
return structs_column_wrapper{child1, child2};
}();
{
auto const keys = fixed_width_column_wrapper<int32_t>{1, 1};
auto const vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{1, 1};
auto child2 = fixed_width_column_wrapper<int32_t>{{-1, null}, null_at(1)};
return structs_column_wrapper{child1, child2};
}();

auto const expect_keys = fixed_width_column_wrapper<int32_t>{1};
auto const expect_vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{1};
auto child2 = fixed_width_column_wrapper<int32_t>{{null}, null_at(0)};
return structs_column_wrapper{child1, child2};
}();

auto agg = cudf::make_min_aggregation<groupby_aggregation>();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

auto agg = cudf::make_min_aggregation<groupby_aggregation>();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
{
auto const keys = fixed_width_column_wrapper<int32_t>{1, 1};
auto const vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{{-1, null}, null_at(1)};
auto child2 = fixed_width_column_wrapper<int32_t>{{null, null}, nulls_at({0, 1})};
return structs_column_wrapper{child1, child2};
}();

auto const expect_keys = fixed_width_column_wrapper<int32_t>{1};
auto const expect_vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{{null}, null_at(0)};
auto child2 = fixed_width_column_wrapper<int32_t>{{null}, null_at(0)};
return structs_column_wrapper{child1, child2};
}();

auto agg = cudf::make_min_aggregation<groupby_aggregation>();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}
}

} // namespace test
Expand Down