diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2ffd62f1b53..7f66c83e363 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -361,6 +361,7 @@ add_library( src/quantiles/quantiles.cu src/reductions/all.cu src/reductions/any.cu + src/reductions/collect_ops.cu src/reductions/max.cu src/reductions/mean.cu src/reductions/min.cu diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp index 0565f332b48..d8f23e8d7cb 100644 --- a/cpp/include/cudf/detail/reduction_functions.hpp +++ b/cpp/include/cudf/detail/reduction_functions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include "cudf/lists/lists_column_view.hpp" #include namespace cudf { @@ -254,5 +255,69 @@ std::unique_ptr nth_element( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Collect input column into a (list) scalar + * + * @param col input column to collect from + * @param null_handling Indicates if null values will be counted while collecting. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return collected list as scalar + */ +std::unique_ptr collect_list( + column_view const& col, + null_policy null_handling, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Merge a bunch of list scalars into single list scalar + * + * @param col input list column representing numbers of list scalars to be merged + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return merged list as scalar + */ +std::unique_ptr merge_lists( + lists_column_view const& col, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Collect input column into a (list) scalar without duplicated elements + * + * @param col input column to collect from + * @param null_handling Indicates if null values will be counted while collecting. + * @param nulls_equal Indicates if null values will be considered as equal values. + * @param nans_equal Indicates if nan values will be considered as equal values. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return collected list with unique elements as scalar + */ +std::unique_ptr collect_set( + column_view const& col, + null_policy null_handling, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Merge a bunch of list scalars into single list scalar then drop duplicated elements + * + * @param col input list column representing numbers of list scalars to be merged + * @param nulls_equal Indicates if null values will be considered as equal values. + * @param nans_equal Indicates if nan values will be considered as equal values. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return collected list with unique elements as scalar + */ +std::unique_ptr merge_sets( + lists_column_view const& col, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + } // namespace reduction } // namespace cudf diff --git a/cpp/src/reductions/collect_ops.cu b/cpp/src/reductions/collect_ops.cu new file mode 100644 index 00000000000..c9bd06a1171 --- /dev/null +++ b/cpp/src/reductions/collect_ops.cu @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cudf { +namespace reduction { + +std::unique_ptr drop_duplicates(list_scalar const& scalar, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto list_wrapper = lists::detail::make_lists_column_from_scalar(scalar, 1, stream, mr); + auto lcw = lists_column_view(list_wrapper->view()); + auto no_dup_wrapper = lists::drop_list_duplicates(lcw, nulls_equal, nans_equal, mr); + auto no_dup = lists_column_view(no_dup_wrapper->view()).get_sliced_child(stream); + return make_list_scalar(no_dup, stream, mr); +} + +std::unique_ptr collect_list(column_view const& col, + null_policy null_handling, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if (null_handling == null_policy::EXCLUDE && col.has_nulls()) { + auto d_view = column_device_view::create(col, stream); + auto filter = detail::validity_accessor(*d_view); + auto null_purged_table = detail::copy_if(table_view{{col}}, filter, stream, mr); + column* null_purged_col = null_purged_table->release().front().release(); + null_purged_col->set_null_mask(rmm::device_buffer{0, stream, mr}, 0); + return std::make_unique(std::move(*null_purged_col), true, stream, mr); + } else { + return make_list_scalar(col, stream, mr); + } +} + +std::unique_ptr merge_lists(lists_column_view const& col, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto flatten_col = col.get_sliced_child(stream); + return make_list_scalar(flatten_col, stream, mr); +} + +std::unique_ptr collect_set(column_view const& col, + null_policy null_handling, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto scalar = collect_list(col, null_handling, stream, mr); + auto ls = dynamic_cast(scalar.get()); + return drop_duplicates(*ls, nulls_equal, nans_equal, stream, mr); +} + +std::unique_ptr merge_sets(lists_column_view const& col, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto flatten_col = col.get_sliced_child(stream); + auto scalar = std::make_unique(flatten_col, true, stream, mr); + return drop_duplicates(*scalar, nulls_equal, nans_equal, stream, mr); +} + +} // namespace reduction +} // namespace cudf diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 234eaf51f96..13574f83d4e 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -102,6 +102,22 @@ struct reduce_dispatch_functor { auto nth_agg = dynamic_cast(agg.get()); return reduction::nth_element(col, nth_agg->_n, nth_agg->_null_handling, stream, mr); } break; + case aggregation::COLLECT_LIST: { + auto col_agg = dynamic_cast(agg.get()); + return reduction::collect_list(col, col_agg->_null_handling, stream, mr); + } break; + case aggregation::COLLECT_SET: { + auto col_agg = dynamic_cast(agg.get()); + return reduction::collect_set( + col, col_agg->_null_handling, col_agg->_nulls_equal, col_agg->_nans_equal, stream, mr); + } break; + case aggregation::MERGE_LISTS: { + return reduction::merge_lists(col, stream, mr); + } break; + case aggregation::MERGE_SETS: { + auto col_agg = dynamic_cast(agg.get()); + return reduction::merge_sets(col, col_agg->_nulls_equal, col_agg->_nans_equal, stream, mr); + } break; default: CUDF_FAIL("Unsupported reduction operator"); } } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index f96edd3ce5a..0ce4fc6f1bd 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -150,7 +150,8 @@ ConfigureTest( # ################################################################################################## # * reduction tests ------------------------------------------------------------------------------- ConfigureTest( - REDUCTION_TEST reductions/rank_tests.cpp reductions/reduction_tests.cpp reductions/scan_tests.cpp + REDUCTION_TEST reductions/collect_ops_tests.cpp reductions/rank_tests.cpp + reductions/reduction_tests.cpp reductions/scan_tests.cpp ) # ################################################################################################## diff --git a/cpp/tests/reductions/collect_ops_tests.cpp b/cpp/tests/reductions/collect_ops_tests.cpp new file mode 100644 index 00000000000..d8b1d9c5ca7 --- /dev/null +++ b/cpp/tests/reductions/collect_ops_tests.cpp @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +using namespace cudf::test::iterators; + +namespace cudf::test { + +template +struct CollectTestFixedWidth : public cudf::test::BaseFixture { +}; + +using CollectFixedWidthTypes = + Concat; +TYPED_TEST_SUITE(CollectTestFixedWidth, CollectFixedWidthTypes); + +// ------------------------------------------------------------------------ +TYPED_TEST(CollectTestFixedWidth, CollectList) +{ + using fw_wrapper = cudf::test::fixed_width_column_wrapper; + + std::vector values({5, 0, -120, -111, 0, 64, 63, 99, 123, -16}); + std::vector null_mask({1, 1, 0, 1, 1, 1, 0, 1, 0, 1}); + + // null_include without nulls + fw_wrapper col(values.begin(), values.end()); + auto const ret = cudf::reduce(col, make_collect_list_aggregation(), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(col, dynamic_cast(ret.get())->view()); + + // null_include with nulls + fw_wrapper col_with_null(values.begin(), values.end(), null_mask.begin()); + auto const ret1 = + cudf::reduce(col_with_null, make_collect_list_aggregation(), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(col_with_null, dynamic_cast(ret1.get())->view()); + + // null_exclude with nulls + fw_wrapper col_null_filtered{{5, 0, -111, 0, 64, 99, -16}}; + auto const ret2 = cudf::reduce( + col_with_null, make_collect_list_aggregation(null_policy::EXCLUDE), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(col_null_filtered, dynamic_cast(ret2.get())->view()); +} + +TYPED_TEST(CollectTestFixedWidth, CollectSet) +{ + using fw_wrapper = cudf::test::fixed_width_column_wrapper; + + std::vector values({5, 0, 120, 0, 0, 64, 64, 99, 120, 99}); + std::vector null_mask({1, 1, 0, 1, 1, 1, 0, 1, 0, 1}); + + fw_wrapper col(values.begin(), values.end()); + fw_wrapper col_with_null(values.begin(), values.end(), null_mask.begin()); + + auto null_exclude = make_collect_set_aggregation( + null_policy::EXCLUDE, null_equality::UNEQUAL, nan_equality::ALL_EQUAL); + auto null_eq = make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::EQUAL, nan_equality::ALL_EQUAL); + auto null_unequal = make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::UNEQUAL, nan_equality::ALL_EQUAL); + + // test without nulls + auto const ret = cudf::reduce(col, null_eq, data_type{type_id::LIST}); + fw_wrapper expected{{0, 5, 64, 99, 120}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, dynamic_cast(ret.get())->view()); + + // null exclude + auto const ret1 = cudf::reduce(col_with_null, null_exclude, data_type{type_id::LIST}); + fw_wrapper expected1{{0, 5, 64, 99}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, dynamic_cast(ret1.get())->view()); + + // null equal + auto const ret2 = cudf::reduce(col_with_null, null_eq, data_type{type_id::LIST}); + fw_wrapper expected2{{0, 5, 64, 99, -1}, {1, 1, 1, 1, 0}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, dynamic_cast(ret2.get())->view()); + + // null unequal + auto const ret3 = cudf::reduce(col_with_null, null_unequal, data_type{type_id::LIST}); + fw_wrapper expected3{{0, 5, 64, 99, -1, -1, -1}, {1, 1, 1, 1, 0, 0, 0}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, dynamic_cast(ret3.get())->view()); +} + +TYPED_TEST(CollectTestFixedWidth, MergeLists) +{ + using fw_wrapper = cudf::test::fixed_width_column_wrapper; + using lists_col = cudf::test::lists_column_wrapper; + + // test without nulls + auto const lists1 = lists_col{{1, 2, 3}, {}, {}, {4}, {5, 6, 7}, {8, 9}, {}}; + auto const expected1 = fw_wrapper{{1, 2, 3, 4, 5, 6, 7, 8, 9}}; + auto const ret1 = cudf::reduce(lists1, make_merge_lists_aggregation(), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, dynamic_cast(ret1.get())->view()); + + // test with nulls + auto const lists2 = lists_col{{ + lists_col{1, 2, 3}, + lists_col{}, + lists_col{{0, 4, 0, 5}, nulls_at({0, 2})}, + lists_col{{0, 0, 0}, all_nulls()}, + lists_col{6}, + lists_col{-1, -1}, // null_list + lists_col{7, 8, 9}, + }, + null_at(5)}; + auto const expected2 = fw_wrapper{{1, 2, 3, 0, 4, 0, 5, 0, 0, 0, 6, 7, 8, 9}, + {1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1}}; + auto const ret2 = cudf::reduce(lists2, make_merge_lists_aggregation(), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, dynamic_cast(ret2.get())->view()); +} + +TYPED_TEST(CollectTestFixedWidth, MergeSets) +{ + using fw_wrapper = cudf::test::fixed_width_column_wrapper; + using lists_col = cudf::test::lists_column_wrapper; + + // test without nulls + auto const lists1 = lists_col{{1, 2, 3}, {}, {}, {4}, {1, 3, 4}, {0, 3, 10}, {}}; + auto const expected1 = fw_wrapper{{0, 1, 2, 3, 4, 10}}; + auto const ret1 = cudf::reduce(lists1, make_merge_sets_aggregation(), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, dynamic_cast(ret1.get())->view()); + + // test with null_equal + auto const lists2 = lists_col{{ + lists_col{1, 2, 3}, + lists_col{}, + lists_col{{0, 4, 0, 5}, nulls_at({0, 2})}, + lists_col{{0, 0, 0}, all_nulls()}, + lists_col{5}, + lists_col{-1, -1}, // null_list + lists_col{1, 3, 5}, + }, + null_at(5)}; + auto const expected2 = fw_wrapper{{1, 2, 3, 4, 5, 0}, {1, 1, 1, 1, 1, 0}}; + auto const ret2 = cudf::reduce(lists2, make_merge_sets_aggregation(), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, dynamic_cast(ret2.get())->view()); + + // test with null_unequal + auto const& lists3 = lists2; + auto const expected3 = fw_wrapper{{1, 2, 3, 4, 5, 0, 0, 0, 0, 0}, {1, 1, 1, 1, 1, 0, 0, 0, 0, 0}}; + auto const ret3 = cudf::reduce( + lists3, make_merge_sets_aggregation(null_equality::UNEQUAL), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, dynamic_cast(ret3.get())->view()); +} + +struct CollectTest : public cudf::test::BaseFixture { +}; + +TEST_F(CollectTest, CollectSetWithNaN) +{ + using fp_wrapper = cudf::test::fixed_width_column_wrapper; + + fp_wrapper col{{1.0f, 1.0f, -2.3e-5f, -2.3e-5f, 2.3e5f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f, 0.0f}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0}}; + + // nan unequal with null equal + fp_wrapper expected1{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f}, + {1, 1, 1, 1, 1, 1, 1, 0}}; + auto const ret1 = cudf::reduce(col, make_collect_set_aggregation(), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, dynamic_cast(ret1.get())->view()); + + // nan unequal with null unequal + fp_wrapper expected2{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f, 0.0f}, + {1, 1, 1, 1, 1, 1, 1, 0, 0}}; + auto const ret2 = + cudf::reduce(col, + make_collect_set_aggregation(null_policy::INCLUDE, null_equality::UNEQUAL), + data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, dynamic_cast(ret2.get())->view()); + + // nan equal with null equal + fp_wrapper expected3{{-2.3e-5f, 1.0f, 2.3e5f, NAN, 0.0f}, {1, 1, 1, 1, 0}}; + auto const ret3 = + cudf::reduce(col, + make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::EQUAL, nan_equality::ALL_EQUAL), + data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, dynamic_cast(ret3.get())->view()); + + // nan equal with null unequal + fp_wrapper expected4{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, 0.0f, 0.0f}, {1, 1, 1, 1, 0, 0}}; + auto const ret4 = + cudf::reduce(col, + make_collect_set_aggregation( + null_policy::INCLUDE, null_equality::UNEQUAL, nan_equality::ALL_EQUAL), + data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, dynamic_cast(ret4.get())->view()); +} + +TEST_F(CollectTest, MergeSetsWithNaN) +{ + using fp_wrapper = cudf::test::fixed_width_column_wrapper; + using lists_col = cudf::test::lists_column_wrapper; + + auto const col = lists_col{ + lists_col{1.0f, -2.3e-5f, NAN}, + lists_col{}, + lists_col{{-2.3e-5f, 2.3e5f, NAN, 0.0f}, nulls_at({3})}, + lists_col{{0.0f, 0.0f}, all_nulls()}, + lists_col{-NAN}, + }; + + // nan unequal with null equal + fp_wrapper expected1{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, NAN, NAN, 0.0f}, {1, 1, 1, 1, 1, 1, 0}}; + auto const ret1 = cudf::reduce(col, make_merge_sets_aggregation(), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, dynamic_cast(ret1.get())->view()); + + // nan unequal with null unequal + fp_wrapper expected2{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, NAN, NAN, 0.0f, 0.0f, 0.0f}, + {1, 1, 1, 1, 1, 1, 0, 0, 0}}; + auto const ret2 = cudf::reduce( + col, make_merge_sets_aggregation(null_equality::UNEQUAL), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, dynamic_cast(ret2.get())->view()); + + // nan equal with null equal + fp_wrapper expected3{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, 0.0f}, {1, 1, 1, 1, 0}}; + auto const ret3 = + cudf::reduce(col, + make_merge_sets_aggregation(null_equality::EQUAL, nan_equality::ALL_EQUAL), + data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, dynamic_cast(ret3.get())->view()); + + // nan equal with null unequal + fp_wrapper expected4{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, 0.0f, 0.0f, 0.0f}, {1, 1, 1, 1, 0, 0, 0}}; + auto const ret4 = + cudf::reduce(col, + make_merge_sets_aggregation(null_equality::UNEQUAL, nan_equality::ALL_EQUAL), + data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, dynamic_cast(ret4.get())->view()); +} + +TEST_F(CollectTest, CollectStrings) +{ + using str_col = cudf::test::strings_column_wrapper; + using lists_col = cudf::test::lists_column_wrapper; + + auto const s_col = + str_col{{"a", "a", "b", "b", "b", "c", "c", "d", "e", "e"}, {1, 1, 1, 0, 1, 1, 0, 1, 1, 1}}; + + // collect_list including nulls + auto const ret1 = cudf::reduce(s_col, make_collect_list_aggregation(), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(s_col, dynamic_cast(ret1.get())->view()); + + // collect_list excluding nulls + auto const expected2 = str_col{"a", "a", "b", "b", "c", "d", "e", "e"}; + auto const ret2 = cudf::reduce( + s_col, make_collect_list_aggregation(null_policy::EXCLUDE), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, dynamic_cast(ret2.get())->view()); + + // collect_set with null_equal + auto const expected3 = str_col{{"a", "b", "c", "d", "e", ""}, null_at(5)}; + auto const ret3 = cudf::reduce(s_col, make_collect_set_aggregation(), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, dynamic_cast(ret3.get())->view()); + + // collect_set with null_unequal + auto const expected4 = str_col{{"a", "b", "c", "d", "e", "", ""}, {1, 1, 1, 1, 1, 0, 0}}; + auto const ret4 = + cudf::reduce(s_col, + make_collect_set_aggregation(null_policy::INCLUDE, null_equality::UNEQUAL), + data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, dynamic_cast(ret4.get())->view()); + + lists_col strings{{"a"}, + {}, + {"a", "b"}, + lists_col{{"b", "null", "c"}, null_at(1)}, + lists_col{{"null", "d"}, null_at(0)}, + lists_col{{"null"}, null_at(0)}, + {"e"}}; + + // merge_lists + auto const expected5 = str_col{{"a", "a", "b", "b", "null", "c", "null", "d", "null", "e"}, + {1, 1, 1, 1, 0, 1, 0, 1, 0, 1}}; + auto const ret5 = cudf::reduce(strings, make_merge_lists_aggregation(), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected5, dynamic_cast(ret5.get())->view()); + + // merge_sets with null_equal + auto const expected6 = str_col{{"a", "b", "c", "d", "e", "null"}, {1, 1, 1, 1, 1, 0}}; + auto const ret6 = cudf::reduce(strings, make_merge_sets_aggregation(), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected6, dynamic_cast(ret6.get())->view()); + + // merge_sets with null_unequal + auto const expected7 = + str_col{{"a", "b", "c", "d", "e", "null", "null", "null"}, {1, 1, 1, 1, 1, 0, 0, 0}}; + auto const ret7 = cudf::reduce( + strings, make_merge_sets_aggregation(null_equality::UNEQUAL), data_type{type_id::LIST}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected7, dynamic_cast(ret7.get())->view()); +} + +} // namespace cudf::test