From e165688999de9dc2706cd5dd6deff1317ba9a297 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 12 Jan 2022 10:21:06 -0700 Subject: [PATCH 1/2] Fix null order --- cpp/src/reductions/struct_minmax_util.cuh | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/src/reductions/struct_minmax_util.cuh b/cpp/src/reductions/struct_minmax_util.cuh index e5832b849bd..1de48ef482d 100644 --- a/cpp/src/reductions/struct_minmax_util.cuh +++ b/cpp/src/reductions/struct_minmax_util.cuh @@ -103,13 +103,14 @@ class comparison_binop_generator { { if (is_min_op) { null_orders = flattened_input.null_orders(); - // Null structs are excluded from the operations, and that is equivalent to considering - // nulls as larger than all other non-null STRUCT elements (if finding for ARGMIN), or - // smaller than all other non-null STRUCT elements (if finding for ARGMAX). - // Thus, we need to set a separate null order for the top level structs column (which is - // stored at the first position in the null_orders array) to achieve this purpose. - null_orders.front() = cudf::null_order::AFTER; - null_orders_dvec = cudf::detail::make_device_uvector_async(null_orders, stream); + // If the input column has nulls (at the top level), null structs are excluded from the + // operations, and that is equivalent to considering top-level nulls as larger than all other + // non-null STRUCT elements (if finding for ARGMIN), or smaller than all other non-null STRUCT + // elements (if finding for ARGMAX). Thus, we need to set a separate null order for the top + // level structs column (which is stored at the first position in the null_orders array) to + // achieve this purpose. + if (input.has_nulls()) { null_orders.front() = cudf::null_order::AFTER; } + null_orders_dvec = cudf::detail::make_device_uvector_async(null_orders, stream); } // else: Don't need to generate nulls order to copy to device memory if we have all null orders // are BEFORE (that happens when we have is_min_op == false). From 5337276cdc6d8ef08c8adf7ca3938e5c9545a027 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 12 Jan 2022 10:21:11 -0700 Subject: [PATCH 2/2] Add unit tests --- cpp/tests/groupby/max_tests.cpp | 51 +++++++++++++++++++++++---------- cpp/tests/groupby/min_tests.cpp | 51 +++++++++++++++++++++++---------- 2 files changed, 72 insertions(+), 30 deletions(-) diff --git a/cpp/tests/groupby/max_tests.cpp b/cpp/tests/groupby/max_tests.cpp index 47bed11df30..266312d16a2 100644 --- a/cpp/tests/groupby/max_tests.cpp +++ b/cpp/tests/groupby/max_tests.cpp @@ -391,22 +391,43 @@ TEST_F(groupby_max_struct_test, null_keys_and_values) TEST_F(groupby_max_struct_test, values_with_null_child) { constexpr int32_t null{0}; - auto const keys = fixed_width_column_wrapper{1, 1}; - auto const vals = [] { - auto child1 = fixed_width_column_wrapper{1, 1}; - auto child2 = fixed_width_column_wrapper{{-1, null}, null_at(1)}; - return structs_column_wrapper{child1, child2}; - }(); - - auto const expect_keys = fixed_width_column_wrapper{1}; - auto const expect_vals = [] { - auto child1 = fixed_width_column_wrapper{1}; - auto child2 = fixed_width_column_wrapper{-1}; - return structs_column_wrapper{child1, child2}; - }(); + { + auto const keys = fixed_width_column_wrapper{1, 1}; + auto const vals = [] { + auto child1 = fixed_width_column_wrapper{1, 1}; + auto child2 = fixed_width_column_wrapper{{-1, null}, null_at(1)}; + return structs_column_wrapper{child1, child2}; + }(); + + auto const expect_keys = fixed_width_column_wrapper{1}; + auto const expect_vals = [] { + auto child1 = fixed_width_column_wrapper{1}; + auto child2 = fixed_width_column_wrapper{-1}; + return structs_column_wrapper{child1, child2}; + }(); + + auto agg = cudf::make_max_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); + } - auto agg = cudf::make_max_aggregation(); - test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); + { + auto const keys = fixed_width_column_wrapper{1, 1}; + auto const vals = [] { + auto child1 = fixed_width_column_wrapper{{-1, null}, null_at(1)}; + auto child2 = fixed_width_column_wrapper{{null, null}, nulls_at({0, 1})}; + return structs_column_wrapper{child1, child2}; + }(); + + auto const expect_keys = fixed_width_column_wrapper{1}; + auto const expect_vals = [] { + auto child1 = fixed_width_column_wrapper{-1}; + auto child2 = fixed_width_column_wrapper{{null}, null_at(0)}; + return structs_column_wrapper{child1, child2}; + }(); + + auto agg = cudf::make_max_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); + } } } // namespace test diff --git a/cpp/tests/groupby/min_tests.cpp b/cpp/tests/groupby/min_tests.cpp index 64bffe1c883..00fa282cee4 100644 --- a/cpp/tests/groupby/min_tests.cpp +++ b/cpp/tests/groupby/min_tests.cpp @@ -390,22 +390,43 @@ TEST_F(groupby_min_struct_test, null_keys_and_values) TEST_F(groupby_min_struct_test, values_with_null_child) { constexpr int32_t null{0}; - auto const keys = fixed_width_column_wrapper{1, 1}; - auto const vals = [] { - auto child1 = fixed_width_column_wrapper{1, 1}; - auto child2 = fixed_width_column_wrapper{{-1, null}, null_at(1)}; - return structs_column_wrapper{child1, child2}; - }(); - - auto const expect_keys = fixed_width_column_wrapper{1}; - auto const expect_vals = [] { - auto child1 = fixed_width_column_wrapper{1}; - auto child2 = fixed_width_column_wrapper{{null}, null_at(0)}; - return structs_column_wrapper{child1, child2}; - }(); + { + auto const keys = fixed_width_column_wrapper{1, 1}; + auto const vals = [] { + auto child1 = fixed_width_column_wrapper{1, 1}; + auto child2 = fixed_width_column_wrapper{{-1, null}, null_at(1)}; + return structs_column_wrapper{child1, child2}; + }(); + + auto const expect_keys = fixed_width_column_wrapper{1}; + auto const expect_vals = [] { + auto child1 = fixed_width_column_wrapper{1}; + auto child2 = fixed_width_column_wrapper{{null}, null_at(0)}; + return structs_column_wrapper{child1, child2}; + }(); + + auto agg = cudf::make_min_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); + } - auto agg = cudf::make_min_aggregation(); - test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); + { + auto const keys = fixed_width_column_wrapper{1, 1}; + auto const vals = [] { + auto child1 = fixed_width_column_wrapper{{-1, null}, null_at(1)}; + auto child2 = fixed_width_column_wrapper{{null, null}, nulls_at({0, 1})}; + return structs_column_wrapper{child1, child2}; + }(); + + auto const expect_keys = fixed_width_column_wrapper{1}; + auto const expect_vals = [] { + auto child1 = fixed_width_column_wrapper{{null}, null_at(0)}; + auto child2 = fixed_width_column_wrapper{{null}, null_at(0)}; + return structs_column_wrapper{child1, child2}; + }(); + + auto agg = cudf::make_min_aggregation(); + test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)); + } } } // namespace test