Skip to content

Commit

Permalink
Fix null check when comparing rows of structs in min and max redu…
Browse files Browse the repository at this point in the history
…ction/groupby operations (#9994)

When comparing structs, we need to flatten its view into a `table_view` and compare the table's rows. Nulls check for the comparison needs to be done by checking nulls in the input structs column at all levels.

This PR fixes a bug that checks for nulls only at the top level. Unit tests designed specifically to detect this bug have also been added.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Bradley Dice (https://github.com/bdice)

URL: #9994
  • Loading branch information
ttnghia authored Jan 11, 2022
1 parent 496aa47 commit d3282cb
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 4 deletions.
5 changes: 3 additions & 2 deletions cpp/src/reductions/struct_minmax_util.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -23,6 +23,7 @@
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/table/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>
#include <cudf/table/table_view.hpp>

namespace cudf {
namespace reduction {
Expand Down Expand Up @@ -97,7 +98,7 @@ class comparison_binop_generator {
table_view{{input}}, {}, std::vector<null_order>{DEFAULT_NULL_ORDER})},
d_flattened_input_ptr{table_device_view::create(flattened_input, stream)},
is_min_op(is_min_op),
has_nulls{input.has_nulls()},
has_nulls{has_nested_nulls(table_view{{input}})},
null_orders_dvec(0, stream)
{
if (is_min_op) {
Expand Down
23 changes: 22 additions & 1 deletion cpp/tests/groupby/max_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -388,5 +388,26 @@ TEST_F(groupby_max_struct_test, null_keys_and_values)
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

TEST_F(groupby_max_struct_test, values_with_null_child)
{
constexpr int32_t null{0};
auto const keys = fixed_width_column_wrapper<int32_t>{1, 1};
auto const vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{1, 1};
auto child2 = fixed_width_column_wrapper<int32_t>{{-1, null}, null_at(1)};
return structs_column_wrapper{child1, child2};
}();

auto const expect_keys = fixed_width_column_wrapper<int32_t>{1};
auto const expect_vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{1};
auto child2 = fixed_width_column_wrapper<int32_t>{-1};
return structs_column_wrapper{child1, child2};
}();

auto agg = cudf::make_max_aggregation<groupby_aggregation>();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

} // namespace test
} // namespace cudf
23 changes: 22 additions & 1 deletion cpp/tests/groupby/min_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -387,5 +387,26 @@ TEST_F(groupby_min_struct_test, null_keys_and_values)
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

TEST_F(groupby_min_struct_test, values_with_null_child)
{
constexpr int32_t null{0};
auto const keys = fixed_width_column_wrapper<int32_t>{1, 1};
auto const vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{1, 1};
auto child2 = fixed_width_column_wrapper<int32_t>{{-1, null}, null_at(1)};
return structs_column_wrapper{child1, child2};
}();

auto const expect_keys = fixed_width_column_wrapper<int32_t>{1};
auto const expect_vals = [] {
auto child1 = fixed_width_column_wrapper<int32_t>{1};
auto child2 = fixed_width_column_wrapper<int32_t>{{null}, null_at(0)};
return structs_column_wrapper{child1, child2};
}();

auto agg = cudf::make_min_aggregation<groupby_aggregation>();
test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
}

} // namespace test
} // namespace cudf

0 comments on commit d3282cb

Please sign in to comment.