From 5fbd86abc14acca579edf682d99448462620d66d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 29 Aug 2022 14:38:32 -0500 Subject: [PATCH 1/6] Default to equal NaNs in make_collect_set_aggregation. --- cpp/include/cudf/aggregation.hpp | 7 ++++--- cpp/tests/reductions/collect_ops_tests.cpp | 12 ++++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index a26a0c7947b..a92da0b0347 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -515,9 +515,10 @@ std::unique_ptr make_collect_list_aggregation( * @return A COLLECT_SET aggregation object */ template -std::unique_ptr make_collect_set_aggregation(null_policy null_handling = null_policy::INCLUDE, - null_equality nulls_equal = null_equality::EQUAL, - nan_equality nans_equal = nan_equality::UNEQUAL); +std::unique_ptr make_collect_set_aggregation( + null_policy null_handling = null_policy::INCLUDE, + null_equality nulls_equal = null_equality::EQUAL, + nan_equality nans_equal = nan_equality::ALL_EQUAL); /** * @brief Factory to create a LAG aggregation diff --git a/cpp/tests/reductions/collect_ops_tests.cpp b/cpp/tests/reductions/collect_ops_tests.cpp index a0fdab5e994..842aaa3ab07 100644 --- a/cpp/tests/reductions/collect_ops_tests.cpp +++ b/cpp/tests/reductions/collect_ops_tests.cpp @@ -196,15 +196,19 @@ TEST_F(CollectTest, CollectSetWithNaN) // nan unequal with null equal fp_wrapper expected1{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f}, {1, 1, 1, 1, 1, 1, 1, 0}}; - auto const ret1 = collect_set(col, make_collect_set_aggregation()); + auto const ret1 = + collect_set(col, + make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, dynamic_cast(ret1.get())->view()); // nan unequal with null unequal fp_wrapper expected2{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f, 0.0f}, {1, 1, 1, 1, 1, 1, 1, 0, 0}}; - auto const ret2 = collect_set( - col, - make_collect_set_aggregation(null_policy::INCLUDE, null_equality::UNEQUAL)); + auto const ret2 = + collect_set(col, + make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::UNEQUAL, nan_equality::UNEQUAL)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, dynamic_cast(ret2.get())->view()); // nan equal with null equal From 52de625d44a567e6c27756ba5a0663d08d1c14c8 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 30 Aug 2022 12:40:16 -0500 Subject: [PATCH 2/6] Test options explicitly. --- cpp/tests/groupby/collect_set_tests.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/cpp/tests/groupby/collect_set_tests.cpp b/cpp/tests/groupby/collect_set_tests.cpp index cf324cf3a8e..818a4c63a1f 100644 --- a/cpp/tests/groupby/collect_set_tests.cpp +++ b/cpp/tests/groupby/collect_set_tests.cpp @@ -248,7 +248,10 @@ TEST_F(CollectSetTest, FloatsWithNaN) vals_expected = {{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f}, validity_col{true, true, true, true, true, true, true, false}}}; auto const [out_keys, out_lists] = - groupby_collect_set(keys, vals, CollectSetTest::collect_set()); + groupby_collect_set(keys, + vals, + cudf::make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(keys_expected, *out_keys, verbosity); CUDF_TEST_EXPECT_COLUMNS_EQUAL(vals_expected, *out_lists, verbosity); } @@ -258,7 +261,10 @@ TEST_F(CollectSetTest, FloatsWithNaN) vals_expected = {{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f, 0.0f}, validity_col{true, true, true, true, true, true, true, false, false}}}; auto const [out_keys, out_lists] = - groupby_collect_set(keys, vals, CollectSetTest::collect_set_null_unequal()); + groupby_collect_set(keys, + vals, + cudf::make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::UNEQUAL, nan_equality::UNEQUAL)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(keys_expected, *out_keys, verbosity); CUDF_TEST_EXPECT_COLUMNS_EQUAL(vals_expected, *out_lists, verbosity); } @@ -267,7 +273,10 @@ TEST_F(CollectSetTest, FloatsWithNaN) { vals_expected = {{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN}}; auto const [out_keys, out_lists] = - groupby_collect_set(keys, vals, CollectSetTest::collect_set_null_exclude()); + groupby_collect_set(keys, + vals, + cudf::make_collect_set_aggregation( + null_policy::EXCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(keys_expected, *out_keys, verbosity); CUDF_TEST_EXPECT_COLUMNS_EQUAL(vals_expected, *out_lists, verbosity); } From 72ad130d45066fb873ecdb12d97f00ad28cd048d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 8 Sep 2022 10:34:16 -0700 Subject: [PATCH 3/6] Make test explicit about its options. --- cpp/tests/rolling/collect_ops_test.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/tests/rolling/collect_ops_test.cpp b/cpp/tests/rolling/collect_ops_test.cpp index a0af8f150e3..00c85fb5439 100644 --- a/cpp/tests/rolling/collect_ops_test.cpp +++ b/cpp/tests/rolling/collect_ops_test.cpp @@ -2118,13 +2118,14 @@ TEST_F(CollectSetTest, FloatGroupedRollingWindowWithNaNs) auto const following = 1; auto const min_periods = 1; // test on nan_equality::UNEQUAL - auto const result = - grouped_rolling_collect_set(table_view{std::vector{group_column}}, - input_column, - preceding, - following, - min_periods, - *make_collect_set_aggregation()); + auto const result = grouped_rolling_collect_set( + table_view{std::vector{group_column}}, + input_column, + preceding, + following, + min_periods, + *make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); auto const expected_result = lists_column_wrapper{ {{0.2341, 1.23}, std::initializer_list{true, true}}, From 05b8d6cc07ddca0b11c0a010f1a0cb7f6504ff06 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 9 Sep 2022 08:39:28 -0700 Subject: [PATCH 4/6] More explicit option setting. --- cpp/tests/rolling/collect_ops_test.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/tests/rolling/collect_ops_test.cpp b/cpp/tests/rolling/collect_ops_test.cpp index 00c85fb5439..fd1ec2e4fb9 100644 --- a/cpp/tests/rolling/collect_ops_test.cpp +++ b/cpp/tests/rolling/collect_ops_test.cpp @@ -2187,7 +2187,9 @@ TEST_F(CollectSetTest, BasicRollingWindowWithNaNs) prev_column, foll_column, 1, - *make_collect_set_aggregation()); + *make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); + auto const expected_result = lists_column_wrapper{ From dae9b3870dfed19e8e0593d86e634c134bea3399 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 20 Sep 2022 23:17:59 -0500 Subject: [PATCH 5/6] Fix style. --- cpp/tests/rolling/collect_ops_test.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/tests/rolling/collect_ops_test.cpp b/cpp/tests/rolling/collect_ops_test.cpp index 573d58d33da..30ac8a719e6 100644 --- a/cpp/tests/rolling/collect_ops_test.cpp +++ b/cpp/tests/rolling/collect_ops_test.cpp @@ -2190,7 +2190,6 @@ TEST_F(CollectSetTest, BasicRollingWindowWithNaNs) *make_collect_set_aggregation( null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); - auto const expected_result = lists_column_wrapper{ {0.2341, 1.23}, From a622f8fba4fe6c36f46f306542c764c424f15249 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 20 Oct 2022 09:15:54 -0500 Subject: [PATCH 6/6] Fix tests. --- cpp/tests/rolling/collect_ops_test.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cpp/tests/rolling/collect_ops_test.cpp b/cpp/tests/rolling/collect_ops_test.cpp index 30ac8a719e6..8a396d793a3 100644 --- a/cpp/tests/rolling/collect_ops_test.cpp +++ b/cpp/tests/rolling/collect_ops_test.cpp @@ -2202,8 +2202,13 @@ TEST_F(CollectSetTest, BasicRollingWindowWithNaNs) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result_column_based_window->view()); - auto const result_fixed_window = rolling_collect_set( - input_column, 2, 1, 1, *make_collect_set_aggregation()); + auto const result_fixed_window = + rolling_collect_set(input_column, + 2, + 1, + 1, + *make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result_fixed_window->view()); auto const result_with_nulls_excluded = @@ -2211,7 +2216,8 @@ TEST_F(CollectSetTest, BasicRollingWindowWithNaNs) 2, 1, 1, - *make_collect_set_aggregation(null_policy::EXCLUDE)); + *make_collect_set_aggregation( + null_policy::EXCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result_with_nulls_excluded->view());