From d3282cb2ca0e65c10844c44697a5a5ac671a0be9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 10 Jan 2022 22:26:03 -0700 Subject: [PATCH] Fix null check when comparing rows of structs in `min` and `max` reduction/groupby operations (#9994) When comparing structs, we need to flatten its view into a `table_view` and compare the table's rows. Nulls check for the comparison needs to be done by checking nulls in the input structs column at all levels. This PR fixes a bug that checks for nulls only at the top level. Unit tests designed specifically to detect this bug have also been added. Authors: - Nghia Truong (https://github.com/ttnghia) Approvers: - Mike Wilson (https://github.com/hyperbolic2346) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/9994 --- cpp/src/reductions/struct_minmax_util.cuh | 5 +++-- cpp/tests/groupby/max_tests.cpp | 23 ++++++++++++++++++++++- cpp/tests/groupby/min_tests.cpp | 23 ++++++++++++++++++++++- 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/cpp/src/reductions/struct_minmax_util.cuh b/cpp/src/reductions/struct_minmax_util.cuh index 8a7e94ea4ca..e5832b849bd 100644 --- a/cpp/src/reductions/struct_minmax_util.cuh +++ b/cpp/src/reductions/struct_minmax_util.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include #include +#include namespace cudf { namespace reduction { @@ -97,7 +98,7 @@ class comparison_binop_generator { table_view{{input}}, {}, std::vector{DEFAULT_NULL_ORDER})}, d_flattened_input_ptr{table_device_view::create(flattened_input, stream)}, is_min_op(is_min_op), - has_nulls{input.has_nulls()}, + has_nulls{has_nested_nulls(table_view{{input}})}, null_orders_dvec(0, stream) { if (is_min_op) { diff --git a/cpp/tests/groupby/max_tests.cpp b/cpp/tests/groupby/max_tests.cpp index 983802cb9a2..47bed11df30 100644 --- a/cpp/tests/groupby/max_tests.cpp +++ b/cpp/tests/groupby/max_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -388,5 +388,26 @@ TEST_F(groupby_max_struct_test, null_keys_and_values) test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } +TEST_F(groupby_max_struct_test, values_with_null_child) +{ + constexpr int32_t null{0}; + auto const keys = fixed_width_column_wrapper{1, 1}; + auto const vals = [] { + auto child1 = fixed_width_column_wrapper{1, 1}; + auto child2 = fixed_width_column_wrapper{{-1, null}, null_at(1)}; + return structs_column_wrapper{child1, child2}; + }(); + + auto const expect_keys = fixed_width_column_wrapper{1}; + auto const expect_vals = [] { + auto child1 = fixed_width_column_wrapper{1}; + auto child2 = fixed_width_column_wrapper{-1}; + return structs_column_wrapper{child1, child2}; + }(); + + auto agg = cudf::make_max_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + } // namespace test } // namespace cudf diff --git a/cpp/tests/groupby/min_tests.cpp b/cpp/tests/groupby/min_tests.cpp index aca3384768c..64bffe1c883 100644 --- a/cpp/tests/groupby/min_tests.cpp +++ b/cpp/tests/groupby/min_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -387,5 +387,26 @@ TEST_F(groupby_min_struct_test, null_keys_and_values) test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); } +TEST_F(groupby_min_struct_test, values_with_null_child) +{ + constexpr int32_t null{0}; + auto const keys = fixed_width_column_wrapper{1, 1}; + auto const vals = [] { + auto child1 = fixed_width_column_wrapper{1, 1}; + auto child2 = fixed_width_column_wrapper{{-1, null}, null_at(1)}; + return structs_column_wrapper{child1, child2}; + }(); + + auto const expect_keys = fixed_width_column_wrapper{1}; + auto const expect_vals = [] { + auto child1 = fixed_width_column_wrapper{1}; + auto child2 = fixed_width_column_wrapper{{null}, null_at(0)}; + return structs_column_wrapper{child1, child2}; + }(); + + auto agg = cudf::make_min_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); +} + } // namespace test } // namespace cudf