From 2b3085bc5c93e1314a7dcde10cef2e0c82a1785e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 15 May 2022 13:45:38 -0700 Subject: [PATCH 1/4] Add sliced table test --- .../stream_compaction/distinct_tests.cpp | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/cpp/tests/stream_compaction/distinct_tests.cpp b/cpp/tests/stream_compaction/distinct_tests.cpp index 2c822b93444..74c5d291b79 100644 --- a/cpp/tests/stream_compaction/distinct_tests.cpp +++ b/cpp/tests/stream_compaction/distinct_tests.cpp @@ -111,6 +111,35 @@ TEST_F(Distinct, NonNullTable) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, sorted_result->view()); } +TEST_F(Distinct, SlicedNonNullTable) +{ + using int32s_col = cudf::test::fixed_width_column_wrapper; + using floats_col = cudf::test::fixed_width_column_wrapper; + auto constexpr dont_care = int32_t{0}; + + auto const col1 = int32s_col{dont_care, dont_care, 6, 6, 3, 5, 8, 5, dont_care}; + auto const col2 = floats_col{dont_care, dont_care, 6, 6, 3, 4, 9, 4, dont_care}; + auto const col1_key = int32s_col{dont_care, dont_care, 20, 20, 20, 19, 21, 9, dont_care}; + auto const col2_key = int32s_col{dont_care, dont_care, 19, 19, 20, 20, 9, 21, dont_care}; + + auto const input_original = cudf::table_view{{col1, col2, col1_key, col2_key}}; + auto const input = cudf::slice(input_original, {2, 8})[0]; + auto const keys = std::vector{2, 3}; + + // The expected table would be sorted in ascending order with respect to keys. + auto const exp_col1 = int32s_col{{5, 5, 6, 3, 8}}; + auto const exp_col2 = floats_col{{4, 4, 6, 3, 9}}; + auto const exp_col1_key = int32s_col{{9, 19, 20, 20, 21}}; + auto const exp_col2_key = int32s_col{{21, 20, 19, 20, 9}}; + auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_col1_key, exp_col2_key}}; + + auto const result = cudf::distinct(input, keys); + auto const key_view = result->select(keys.begin(), keys.end()); + auto const sorted_result = cudf::sort_by_key(result->view(), key_view); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, sorted_result->view()); +} + TEST_F(Distinct, WithNull) { cudf::test::fixed_width_column_wrapper col{{5, 4, 4, 1, 8, 1}, {1, 0, 1, 1, 1, 1}}; From 2e5666dd96eeece8622d60affbc18f10baabd189 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 15 May 2022 13:49:17 -0700 Subject: [PATCH 2/4] Add BasicSlicedLists test --- .../stream_compaction/distinct_tests.cpp | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/cpp/tests/stream_compaction/distinct_tests.cpp b/cpp/tests/stream_compaction/distinct_tests.cpp index 74c5d291b79..6c8e94bfcd0 100644 --- a/cpp/tests/stream_compaction/distinct_tests.cpp +++ b/cpp/tests/stream_compaction/distinct_tests.cpp @@ -189,6 +189,28 @@ TEST_F(Distinct, BasicList) CUDF_TEST_EXPECT_TABLES_EQUAL(expect, *sorted_result); } +TEST_F(Distinct, BasicSlicedLists) +{ + using LCW = cudf::test::lists_column_wrapper; + using ICW = cudf::test::fixed_width_column_wrapper; + auto constexpr dont_care = int32_t{0}; + + auto const idx = ICW{dont_care, dont_care, 1, 2, 1, 3, 4, 5, 5, 6, 4, 4, dont_care}; + auto const col = + LCW{{0, 0}, {0, 0}, {1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}, {0, 0}}; + auto const input_original = cudf::table_view({idx, col}); + auto const input = cudf::slice(input_original, {2, 12})[0]; + + auto const exp_idx = ICW{1, 2, 3, 4, 5, 6}; + auto const exp_val = LCW{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}}; + auto const expect = cudf::table_view({exp_idx, exp_val}); + + auto const result = cudf::distinct(input, {1}); + auto const sorted_result = cudf::sort_by_key(*result, result->select({0})); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expect, *sorted_result); +} + TEST_F(Distinct, NullableList) { using LCW = cudf::test::lists_column_wrapper; From 9514e13b40be16b8d954637d9a7d608db83d84e2 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 15 May 2022 14:39:15 -0700 Subject: [PATCH 3/4] Add SlicedListsofStructs test --- .../stream_compaction/distinct_tests.cpp | 74 +++++++++++++++++-- 1 file changed, 68 insertions(+), 6 deletions(-) diff --git a/cpp/tests/stream_compaction/distinct_tests.cpp b/cpp/tests/stream_compaction/distinct_tests.cpp index 6c8e94bfcd0..92f063addd7 100644 --- a/cpp/tests/stream_compaction/distinct_tests.cpp +++ b/cpp/tests/stream_compaction/distinct_tests.cpp @@ -14,6 +14,13 @@ * limitations under the License. */ +#include +#include +#include +#include +#include +#include + #include #include #include @@ -21,12 +28,6 @@ #include #include -#include -#include -#include -#include -#include - #include #include @@ -296,6 +297,67 @@ TEST_F(Distinct, ListOfStruct) CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *sorted_result); } +TEST_F(Distinct, SlicedListsOfStructs) +{ + // Constructing a list of struct of two elements + // 0. [] == <- Don't care + // 1. [] != <- Don't care + // 2. Null == <- Don't care + // 3. Null != <- Don't care + // 4. [Null, Null] != <- Don't care + // 5. [Null] == <- Don't care + // 6. [Null] == <- Don't care + // 7. [Null] != <- Don't care + // 8. [{Null, Null}] != + // 9. [{1,'a'}, {2,'b'}] != + // 10. [{0,'a'}, {2,'b'}] != + // 11. [{0,'a'}, {2,'c'}] == + // 12. [{0,'a'}, {2,'c'}] != + // 13. [{0,Null}] == + // 14. [{0,Null}] != + // 15. [{Null, 'b'}] == <- Don't care + // 16. [{Null, 'b'}] <- Don't care + + using int32s_col = cudf::test::fixed_width_column_wrapper; + using strings_col = cudf::test::strings_column_wrapper; + using structs_col = cudf::test::structs_column_wrapper; + using cudf::test::iterators::nulls_at; + + auto const struct_col = [] { + auto child1 = + int32s_col{{-1, -1, 0, 2, 2, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, 0, 1, 2}, nulls_at({5, 16, 17})}; + auto child2 = strings_col{ + {"x", "x", "a", "a", "b", "b", "a", "b", "a", "b", "a", "c", "a", "c", "a", "c", "b", "b"}, + nulls_at({5, 14, 15})}; + return structs_col{{child1, child2}, nulls_at({0, 1, 2, 3, 4})}; + }(); + + auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18}; + auto const list_nullmask = std::vector{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + auto const nullmask_buf = + cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end()); + auto const lists_column = + cudf::column_view(cudf::data_type(cudf::type_id::LIST), + 17, + nullptr, + static_cast(nullmask_buf.data()), + cudf::UNKNOWN_NULL_COUNT, + 0, + {offsets, struct_col}); + + auto const idx = int32s_col{1, 1, 2, 2, 3, 4, 4, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10}; + auto const input_original = cudf::table_view({idx, lists_column}); + auto const input = cudf::slice(input_original, {8, 15})[0]; + + auto const result = cudf::distinct(input, {1}); + auto const sorted_result = cudf::sort_by_key(*result, result->select({0})); + + auto const expect_map = cudf::test::fixed_width_column_wrapper{8, 9, 10, 11, 13}; + auto const expect_table = cudf::gather(input_original, expect_map); + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expect_table, *sorted_result); +} + TEST_F(Distinct, StructOfStruct) { using FWCW = cudf::test::fixed_width_column_wrapper; From ff6d063cf5cc72b62e896768a565ff2f3294751b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Sun, 15 May 2022 14:57:27 -0700 Subject: [PATCH 4/4] Add SlicedStructsOfLists test --- .../stream_compaction/distinct_tests.cpp | 71 ++++++++++++------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/cpp/tests/stream_compaction/distinct_tests.cpp b/cpp/tests/stream_compaction/distinct_tests.cpp index 92f063addd7..13065dcec8b 100644 --- a/cpp/tests/stream_compaction/distinct_tests.cpp +++ b/cpp/tests/stream_compaction/distinct_tests.cpp @@ -192,24 +192,24 @@ TEST_F(Distinct, BasicList) TEST_F(Distinct, BasicSlicedLists) { - using LCW = cudf::test::lists_column_wrapper; - using ICW = cudf::test::fixed_width_column_wrapper; + using int32s_col = cudf::test::fixed_width_column_wrapper; + using lists_col = cudf::test::lists_column_wrapper; auto constexpr dont_care = int32_t{0}; - auto const idx = ICW{dont_care, dont_care, 1, 2, 1, 3, 4, 5, 5, 6, 4, 4, dont_care}; - auto const col = - LCW{{0, 0}, {0, 0}, {1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}, {0, 0}}; + auto const idx = int32s_col{dont_care, dont_care, 1, 2, 1, 3, 4, 5, 5, 6, 4, 4, dont_care}; + auto const col = lists_col{ + {0, 0}, {0, 0}, {1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}, {5, 5}}; auto const input_original = cudf::table_view({idx, col}); auto const input = cudf::slice(input_original, {2, 12})[0]; - auto const exp_idx = ICW{1, 2, 3, 4, 5, 6}; - auto const exp_val = LCW{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}}; - auto const expect = cudf::table_view({exp_idx, exp_val}); + auto const exp_idx = int32s_col{1, 2, 3, 4, 5, 6}; + auto const exp_val = lists_col{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}}; + auto const expected = cudf::table_view({exp_idx, exp_val}); auto const result = cudf::distinct(input, {1}); auto const sorted_result = cudf::sort_by_key(*result, result->select({0})); - CUDF_TEST_EXPECT_TABLES_EQUAL(expect, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *sorted_result); } TEST_F(Distinct, NullableList) @@ -323,7 +323,7 @@ TEST_F(Distinct, SlicedListsOfStructs) using structs_col = cudf::test::structs_column_wrapper; using cudf::test::iterators::nulls_at; - auto const struct_col = [] { + auto const structs = [] { auto child1 = int32s_col{{-1, -1, 0, 2, 2, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, 0, 1, 2}, nulls_at({5, 16, 17})}; auto child2 = strings_col{ @@ -333,29 +333,28 @@ TEST_F(Distinct, SlicedListsOfStructs) }(); auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18}; - auto const list_nullmask = std::vector{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + auto const lists_nullmask = std::vector{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; auto const nullmask_buf = - cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end()); - auto const lists_column = - cudf::column_view(cudf::data_type(cudf::type_id::LIST), - 17, - nullptr, - static_cast(nullmask_buf.data()), - cudf::UNKNOWN_NULL_COUNT, - 0, - {offsets, struct_col}); + cudf::test::detail::make_null_mask(lists_nullmask.begin(), lists_nullmask.end()); + auto const lists = cudf::column_view(cudf::data_type(cudf::type_id::LIST), + 17, + nullptr, + static_cast(nullmask_buf.data()), + cudf::UNKNOWN_NULL_COUNT, + 0, + {offsets, structs}); auto const idx = int32s_col{1, 1, 2, 2, 3, 4, 4, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10}; - auto const input_original = cudf::table_view({idx, lists_column}); + auto const input_original = cudf::table_view({idx, lists}); auto const input = cudf::slice(input_original, {8, 15})[0]; auto const result = cudf::distinct(input, {1}); auto const sorted_result = cudf::sort_by_key(*result, result->select({0})); - auto const expect_map = cudf::test::fixed_width_column_wrapper{8, 9, 10, 11, 13}; - auto const expect_table = cudf::gather(input_original, expect_map); + auto const exp_map = cudf::test::fixed_width_column_wrapper{8, 9, 10, 11, 13}; + auto const expected_table = cudf::gather(input_original, exp_map); - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expect_table, *sorted_result); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected_table, *sorted_result); } TEST_F(Distinct, StructOfStruct) @@ -416,6 +415,30 @@ TEST_F(Distinct, StructOfStruct) CUDF_TEST_EXPECT_COLUMNS_EQUAL(sliced_expect->get_column(1), sorted_sliced_result->get_column(1)); } +TEST_F(Distinct, SlicedStructsOfLists) +{ + using lists_col = cudf::test::lists_column_wrapper; + using structs_col = cudf::test::structs_column_wrapper; + + auto const structs = [] { + auto child = lists_col{ + {0, 0}, {0, 0}, {1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}, {5, 5}}; + return structs_col{{child}}; + }(); + + auto const input_original = cudf::table_view({structs}); + auto const input = cudf::slice(input_original, {2, 12})[0]; + + auto const expected_structs = [] { + auto child = lists_col{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}}; + return structs_col{{child}}; + }(); + auto const expected = cudf::table_view({expected_structs}); + + auto const result = cudf::distinct(input, {0}); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); +} + TEST_F(Distinct, ListOfEmptyStruct) { // 0. [] ==