From b1db648b3bc3776900e62581f3accb31dec5d9c0 Mon Sep 17 00:00:00 2001 From: srikarvanavasam Date: Thu, 28 Jul 2022 16:21:37 -0700 Subject: [PATCH 1/6] init --- cpp/include/cudf/detail/structs/utilities.hpp | 29 --- cpp/src/structs/utilities.cpp | 25 --- cpp/tests/structs/utilities_tests.cpp | 210 +++++++++--------- 3 files changed, 104 insertions(+), 160 deletions(-) diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index 7d8ac5c9325..1a4b8f02dd3 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -151,35 +151,6 @@ flattened_table flatten_nested_columns( std::vector const& null_precedence, column_nullability nullability = column_nullability::MATCH_INCOMING); -/** - * @brief Unflatten columns flattened as by `flatten_nested_columns()`, - * based on the provided `blueprint`. - * - * cudf::flatten_nested_columns() executes depth first, and serializes the struct null vector - * before the child/member columns. - * E.g. STRUCT_1< STRUCT_2< A, B >, C > is flattened to: - * 1. Null Vector for STRUCT_1 - * 2. Null Vector for STRUCT_2 - * 3. Member STRUCT_2::A - * 4. Member STRUCT_2::B - * 5. Member STRUCT_1::C - * - * `unflatten_nested_columns()` reconstructs nested columns from flattened input that follows - * the convention above. - * - * Note: This function requires a null-mask vector for each STRUCT column, including for nested - * STRUCT members. - * - * @param flattened "Flattened" `table` of input columns, following the conventions in - * `flatten_nested_columns()`. - * @param blueprint The exemplar `table_view` with nested columns intact, whose structure defines - * the nesting of the reconstructed output table. - * @return std::unique_ptr Unflattened table (with nested STRUCT columns) reconstructed - * based on `blueprint`. - */ -std::unique_ptr unflatten_nested_columns(std::unique_ptr&& flattened, - table_view const& blueprint); - /** * @brief Push down nulls from a parent mask into a child column, using bitwise AND. * diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index 1d5ebfaa7fc..3260a55516d 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -276,31 +276,6 @@ std::unique_ptr unflatten_struct(vector_of_columns& flattened, } } // namespace -std::unique_ptr unflatten_nested_columns(std::unique_ptr&& flattened, - table_view const& blueprint) -{ - // Bail, if LISTs are present. - auto const has_lists = std::any_of(blueprint.begin(), blueprint.end(), is_or_has_nested_lists); - CUDF_EXPECTS(not has_lists, "Unflattening LIST columns is not supported."); - - // If there are no STRUCTs, unflattening is a NOOP. - auto const has_structs = std::any_of(blueprint.begin(), blueprint.end(), is_struct); - if (not has_structs) { - return std::move(flattened); // Unchanged. - } - - // There be struct columns. - // Note: Requires null vectors for all struct input columns. - auto flattened_columns = flattened->release(); - auto current_idx = column_index_t{0}; - - auto unflattening_iter = - thrust::make_transform_iterator(blueprint.begin(), unflattener{flattened_columns, current_idx}); - - return std::make_unique( - vector_of_columns{unflattening_iter, unflattening_iter + blueprint.num_columns()}); -} - // Helper function to superimpose validity of parent struct // over the specified member (child) column. void superimpose_parent_nulls(bitmask_type const* parent_null_mask, diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp index b26ea87c5b8..925a3c4c223 100644 --- a/cpp/tests/structs/utilities_tests.cpp +++ b/cpp/tests/structs/utilities_tests.cpp @@ -30,23 +30,9 @@ namespace cudf::test { -/** - * @brief Round-trip input table through flatten/unflatten, - * verify that the table remains equivalent. - */ -void flatten_unflatten_compare(table_view const& input_table) -{ - using namespace cudf::structs::detail; - - auto flattened = flatten_nested_columns(input_table, {}, {}, column_nullability::FORCE); - auto unflattened = - unflatten_nested_columns(std::make_unique(flattened), input_table); - - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, unflattened->view()); -} - using namespace cudf; using namespace iterators; +using namespace cudf::structs::detail; using strings = strings_column_wrapper; using dictionary = dictionary_column_wrapper; using structs = structs_column_wrapper; @@ -57,8 +43,7 @@ using nums = fixed_width_column_wrapper; template using lists = lists_column_wrapper; -struct StructUtilitiesTest : BaseFixture { -}; +struct StructUtilitiesTest : BaseFixture {}; template struct TypedStructUtilitiesTest : StructUtilitiesTest { @@ -66,7 +51,7 @@ struct TypedStructUtilitiesTest : StructUtilitiesTest { TYPED_TEST_SUITE(TypedStructUtilitiesTest, FixedWidthTypes); -TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevelUnsupported) +TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevel) { using T = TypeParam; using lists = lists_column_wrapper; @@ -75,8 +60,10 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevelUnsupported) auto lists_col = lists{{0, 1}, {22, 33}, {44, 55, 66}}; auto nums_col = nums{{0, 1, 2}, null_at(6)}; - EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{lists_col, nums_col}}), - cudf::logic_error); + auto table = cudf::table_view{{lists_col, nums_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT( + table, flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); } TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported) @@ -88,10 +75,10 @@ TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported) auto lists_member = lists{{0, 1}, {22, 33}, {44, 55, 66}}; auto nums_member = nums{{0, 1, 2}, null_at(6)}; auto structs_col = structs{{nums_member, lists_member}}; + auto nums_col = nums{{0, 1, 2}, null_at(6)}; - auto nums_col = nums{{0, 1, 2}, null_at(6)}; - - EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}}), + EXPECT_THROW(flatten_nested_columns( + cudf::table_view{{nums_col, structs_col}}, {}, {}, column_nullability::FORCE), cudf::logic_error); } @@ -104,7 +91,10 @@ TYPED_TEST(TypedStructUtilitiesTest, NoStructs) auto strings_col = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; auto nuther_nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; - flatten_unflatten_compare(cudf::table_view{{nums_col, strings_col, nuther_nums_col}}); + auto table = cudf::table_view{{nums_col, strings_col, nuther_nums_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT( + table, flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); } TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct) @@ -117,117 +107,125 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct) auto structs_col = structs{{nums_member, strings_member}}; auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; - flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}}); + auto table = cudf::table_view{{nums_col, structs_col}}; + auto expected = + cudf::table_view{{nums_col, + nums{{0, 1, 22, 333, 44, 55, 66}, null_at(0)}, + strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}}}; + + CUDF_TEST_EXPECT_TABLES_EQUIVALENT( + expected, flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); } -TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls) -{ - using T = TypeParam; - using nums = fixed_width_column_wrapper; +// TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls) +// { +// using T = TypeParam; +// using nums = fixed_width_column_wrapper; - auto nums_member = nums{{0, 1, 22, 333, 44, 55, 66}, null_at(0)}; - auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; - auto structs_col = structs{{nums_member, strings_member}, null_at(2)}; - auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; +// auto nums_member = nums{{0, 1, 22, 333, 44, 55, 66}, null_at(0)}; +// auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; +// auto structs_col = structs{{nums_member, strings_member}, null_at(2)}; +// auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; - flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}}); -} +// auto table = cudf::table_view{{nums_col, structs_col}}; -TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct) -{ - using T = TypeParam; - using nums = fixed_width_column_wrapper; +// CUDF_TEST_EXPECT_TABLES_EQUIVALENT( +// flatten_nested_columns(table, {}, {}, column_nullability::FORCE), table); +// } - auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; +// TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct) +// { +// using T = TypeParam; +// using nums = fixed_width_column_wrapper; - auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; - auto struct_0_strings_member = - strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; - auto structs_1_structs_member = structs{{struct_0_nums_member, struct_0_strings_member}}; +// auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; - auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; - auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}}; +// auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; +// auto struct_0_strings_member = +// strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; +// auto structs_1_structs_member = structs{{struct_0_nums_member, struct_0_strings_member}}; - flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}}); -} +// auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; +// auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}}; -TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel) -{ - using T = TypeParam; - using nums = fixed_width_column_wrapper; +// flatten_test(cudf::table_view{{nums_col, struct_of_structs_col}}); +// } - auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; +// TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel) +// { +// using T = TypeParam; +// using nums = fixed_width_column_wrapper; - auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; - auto struct_0_strings_member = - strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; - auto structs_1_structs_member = - structs{{struct_0_nums_member, struct_0_strings_member}, null_at(2)}; +// auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; - auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; - auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}}; +// auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; +// auto struct_0_strings_member = +// strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; +// auto structs_1_structs_member = +// structs{{struct_0_nums_member, struct_0_strings_member}, null_at(2)}; - flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}}); -} +// auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; +// auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}}; -TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel) -{ - using T = TypeParam; - using nums = fixed_width_column_wrapper; +// flatten_test(cudf::table_view{{nums_col, struct_of_structs_col}}); +// } - auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; +// TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel) +// { +// using T = TypeParam; +// using nums = fixed_width_column_wrapper; - auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; - auto struct_0_strings_member = - strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; - auto structs_1_structs_member = structs{{struct_0_nums_member, struct_0_strings_member}}; +// auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; - auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; - auto struct_of_structs_col = - structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)}; +// auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; +// auto struct_0_strings_member = +// strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; +// auto structs_1_structs_member = structs{{struct_0_nums_member, struct_0_strings_member}}; - flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}}); -} +// auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; +// auto struct_of_structs_col = +// structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)}; -TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels) -{ - using T = TypeParam; - using nums = fixed_width_column_wrapper; +// flatten_test(cudf::table_view{{nums_col, struct_of_structs_col}}); +// } - auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; +// TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels) +// { +// using T = TypeParam; +// using nums = fixed_width_column_wrapper; - auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; - auto struct_0_strings_member = - strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; - auto structs_1_structs_member = - structs{{struct_0_nums_member, struct_0_strings_member}, null_at(2)}; +// auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; - auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; - auto struct_of_structs_col = - structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)}; +// auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; +// auto struct_0_strings_member = +// strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; +// auto structs_1_structs_member = +// structs{{struct_0_nums_member, struct_0_strings_member}, null_at(2)}; - flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}}); -} +// auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; +// auto struct_of_structs_col = +// structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)}; -TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported) -{ - using T = TypeParam; - using ints = fixed_width_column_wrapper; - using lcw = lists_column_wrapper; +// flatten_test(cudf::table_view{{nums_col, struct_of_structs_col}}); +// } - // clang-format off - auto lists_member = lcw{ {0,1,2}, {3,4,5}, {6,7,8,9} }; - auto ints_member = ints{ 0, 1, 2 }; - // clang-format on +// TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported) +// { +// using T = TypeParam; +// using ints = fixed_width_column_wrapper; +// using lcw = lists_column_wrapper; - auto structs_with_lists_col = structs{lists_member, ints_member}; +// // clang-format off +// auto lists_member = lcw{ {0,1,2}, {3,4,5}, {6,7,8,9} }; +// auto ints_member = ints{ 0, 1, 2 }; +// // clang-format on - EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{structs_with_lists_col}}), - cudf::logic_error); -} +// auto structs_with_lists_col = structs{lists_member, ints_member}; -struct SuperimposeTest : StructUtilitiesTest { -}; +// EXPECT_THROW(flatten_test(cudf::table_view{{structs_with_lists_col}}), cudf::logic_error); +// } + +struct SuperimposeTest : StructUtilitiesTest {}; template struct TypedSuperimposeTest : StructUtilitiesTest { From ffe6eb9b9652653c7f84ae95de421c78b9f3e66c Mon Sep 17 00:00:00 2001 From: srikarvanavasam Date: Mon, 1 Aug 2022 13:06:14 -0700 Subject: [PATCH 2/6] Added tests --- cpp/tests/structs/utilities_tests.cpp | 309 ++++++++++++++++---------- 1 file changed, 197 insertions(+), 112 deletions(-) diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp index 925a3c4c223..03512ed59cb 100644 --- a/cpp/tests/structs/utilities_tests.cpp +++ b/cpp/tests/structs/utilities_tests.cpp @@ -36,6 +36,7 @@ using namespace cudf::structs::detail; using strings = strings_column_wrapper; using dictionary = dictionary_column_wrapper; using structs = structs_column_wrapper; +using bools = fixed_width_column_wrapper; template using nums = fixed_width_column_wrapper; @@ -62,8 +63,8 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevel) auto table = cudf::table_view{{lists_col, nums_col}}; - CUDF_TEST_EXPECT_TABLES_EQUIVALENT( - table, flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); + CUDF_TEST_EXPECT_TABLES_EQUAL(table, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); } TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported) @@ -93,8 +94,8 @@ TYPED_TEST(TypedStructUtilitiesTest, NoStructs) auto table = cudf::table_view{{nums_col, strings_col, nuther_nums_col}}; - CUDF_TEST_EXPECT_TABLES_EQUIVALENT( - table, flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); + CUDF_TEST_EXPECT_TABLES_EQUAL(table, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); } TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct) @@ -106,124 +107,210 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct) auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; auto structs_col = structs{{nums_member, strings_member}}; auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; - - auto table = cudf::table_view{{nums_col, structs_col}}; - auto expected = - cudf::table_view{{nums_col, - nums{{0, 1, 22, 333, 44, 55, 66}, null_at(0)}, - strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}}}; - - CUDF_TEST_EXPECT_TABLES_EQUIVALENT( - expected, flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); + auto table = cudf::table_view{{nums_col, structs_col}}; + + auto expected_nums_col_1 = cudf::column(nums_col); + auto expected_structs_col = bools{{1, 1, 1, 1, 1, 1, 1}}; + auto expected_nums_col_2 = + cudf::column(static_cast(structs_col).get_sliced_child(0)); + auto expected_strings_col = + cudf::column(static_cast(structs_col).get_sliced_child(1)); + auto expected = cudf::table_view{ + {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); } -// TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls) -// { -// using T = TypeParam; -// using nums = fixed_width_column_wrapper; - -// auto nums_member = nums{{0, 1, 22, 333, 44, 55, 66}, null_at(0)}; -// auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; -// auto structs_col = structs{{nums_member, strings_member}, null_at(2)}; -// auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; - -// auto table = cudf::table_view{{nums_col, structs_col}}; - -// CUDF_TEST_EXPECT_TABLES_EQUIVALENT( -// flatten_nested_columns(table, {}, {}, column_nullability::FORCE), table); -// } - -// TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct) -// { -// using T = TypeParam; -// using nums = fixed_width_column_wrapper; - -// auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; - -// auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; -// auto struct_0_strings_member = -// strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; -// auto structs_1_structs_member = structs{{struct_0_nums_member, struct_0_strings_member}}; - -// auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; -// auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}}; - -// flatten_test(cudf::table_view{{nums_col, struct_of_structs_col}}); -// } - -// TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel) -// { -// using T = TypeParam; -// using nums = fixed_width_column_wrapper; - -// auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; - -// auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; -// auto struct_0_strings_member = -// strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; -// auto structs_1_structs_member = -// structs{{struct_0_nums_member, struct_0_strings_member}, null_at(2)}; - -// auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; -// auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}}; - -// flatten_test(cudf::table_view{{nums_col, struct_of_structs_col}}); -// } - -// TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel) -// { -// using T = TypeParam; -// using nums = fixed_width_column_wrapper; +TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls) +{ + using T = TypeParam; + using nums = fixed_width_column_wrapper; -// auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; + auto nums_member = nums{{0, 1, 22, 333, 44, 55, 66}, null_at(0)}; + auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; + auto structs_col = structs{{nums_member, strings_member}, null_at(2)}; + auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; + auto table = cudf::table_view{{nums_col, structs_col}}; + + auto expected_nums_col_1 = cudf::column(nums_col); + auto expected_structs_col = bools{{1, 1, 0, 1, 1, 1, 1}, null_at(2)}; + auto expected_nums_col_2 = + cudf::column(static_cast(structs_col).get_sliced_child(0)); + auto expected_strings_col = + cudf::column(static_cast(structs_col).get_sliced_child(1)); + auto expected = cudf::table_view{ + {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); +} -// auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; -// auto struct_0_strings_member = -// strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; -// auto structs_1_structs_member = structs{{struct_0_nums_member, struct_0_strings_member}}; +TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct) +{ + using T = TypeParam; + using nums = fixed_width_column_wrapper; -// auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; -// auto struct_of_structs_col = -// structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)}; + auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; + + auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; + auto struct_0_strings_member = + strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; + auto structs_1_structs_member = structs{{struct_0_nums_member, struct_0_strings_member}}; + + auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; + auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}}; + auto table = cudf::table_view{{nums_col, struct_of_structs_col}}; + + auto expected_nums_col_1 = cudf::column(nums_col); + auto expected_structs_col_1 = bools{{1, 1, 1, 1, 1, 1, 1}}; + auto expected_nums_col_2 = + cudf::column(static_cast(struct_of_structs_col).get_sliced_child(0)); + auto expected_structs_col_2 = bools{{1, 1, 1, 1, 1, 1, 1}}; + auto expected_nums_col_3 = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(0)); + auto expected_strings_col = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(1)); + auto expected = cudf::table_view{{expected_nums_col_1, + expected_structs_col_1, + expected_nums_col_2, + expected_structs_col_2, + expected_nums_col_3, + expected_strings_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); +} -// flatten_test(cudf::table_view{{nums_col, struct_of_structs_col}}); -// } +TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel) +{ + using T = TypeParam; + using nums = fixed_width_column_wrapper; -// TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels) -// { -// using T = TypeParam; -// using nums = fixed_width_column_wrapper; + auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; + + auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; + auto struct_0_strings_member = + strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; + auto structs_1_structs_member = + structs{{struct_0_nums_member, struct_0_strings_member}, null_at(2)}; + + auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; + auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}}; + auto table = cudf::table_view{{nums_col, struct_of_structs_col}}; + + auto expected_nums_col_1 = cudf::column(nums_col); + auto expected_structs_col_1 = bools{{1, 1, 1, 1, 1, 1, 1}}; + auto expected_nums_col_2 = + cudf::column(static_cast(struct_of_structs_col).get_sliced_child(0)); + auto expected_structs_col_2 = bools{{1, 1, 0, 1, 1, 1, 1}, null_at(2)}; + auto expected_nums_col_3 = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(0)); + auto expected_strings_col = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(1)); + auto expected = cudf::table_view{{expected_nums_col_1, + expected_structs_col_1, + expected_nums_col_2, + expected_structs_col_2, + expected_nums_col_3, + expected_strings_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); +} -// auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; +TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel) +{ + using T = TypeParam; + using nums = fixed_width_column_wrapper; -// auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; -// auto struct_0_strings_member = -// strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; -// auto structs_1_structs_member = -// structs{{struct_0_nums_member, struct_0_strings_member}, null_at(2)}; + auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; + + auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; + auto struct_0_strings_member = + strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; + auto structs_1_structs_member = structs{{struct_0_nums_member, struct_0_strings_member}}; + + auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; + auto struct_of_structs_col = + structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)}; + auto table = cudf::table_view{{nums_col, struct_of_structs_col}}; + + auto expected_nums_col_1 = cudf::column(nums_col); + auto expected_structs_col_1 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)}; + auto expected_nums_col_2 = + cudf::column(static_cast(struct_of_structs_col).get_sliced_child(0)); + auto expected_structs_col_2 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)}; + auto expected_nums_col_3 = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(0)); + auto expected_strings_col = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(1)); + auto expected = cudf::table_view{{expected_nums_col_1, + expected_structs_col_1, + expected_nums_col_2, + expected_structs_col_2, + expected_nums_col_3, + expected_strings_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); +} -// auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; -// auto struct_of_structs_col = -// structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)}; +TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels) +{ + using T = TypeParam; + using nums = fixed_width_column_wrapper; -// flatten_test(cudf::table_view{{nums_col, struct_of_structs_col}}); -// } + auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)}; + + auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)}; + auto struct_0_strings_member = + strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)}; + auto structs_1_structs_member = + structs{{struct_0_nums_member, struct_0_strings_member}, null_at(2)}; + + auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)}; + auto struct_of_structs_col = + structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)}; + auto table = cudf::table_view{{nums_col, struct_of_structs_col}}; + + auto expected_nums_col_1 = cudf::column(nums_col); + auto expected_structs_col_1 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)}; + auto expected_nums_col_2 = + cudf::column(static_cast(struct_of_structs_col).get_sliced_child(0)); + auto expected_structs_col_2 = bools{{1, 1, 0, 1, 0, 1, 1}, {1, 1, 0, 1, 0, 1, 1}}; + auto expected_nums_col_3 = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(0)); + auto expected_strings_col = cudf::column( + static_cast(struct_of_structs_col).get_sliced_child(1).child(1)); + auto expected = cudf::table_view{{expected_nums_col_1, + expected_structs_col_1, + expected_nums_col_2, + expected_structs_col_2, + expected_nums_col_3, + expected_strings_col}}; + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, + flatten_nested_columns(table, {}, {}, column_nullability::FORCE)); +} -// TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported) -// { -// using T = TypeParam; -// using ints = fixed_width_column_wrapper; -// using lcw = lists_column_wrapper; +TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported) +{ + using T = TypeParam; + using ints = fixed_width_column_wrapper; + using lcw = lists_column_wrapper; -// // clang-format off -// auto lists_member = lcw{ {0,1,2}, {3,4,5}, {6,7,8,9} }; -// auto ints_member = ints{ 0, 1, 2 }; -// // clang-format on + // clang-format off + auto lists_member = lcw{ {0,1,2}, {3,4,5}, {6,7,8,9} }; + auto ints_member = ints{ 0, 1, 2 }; + // clang-format on -// auto structs_with_lists_col = structs{lists_member, ints_member}; + auto structs_with_lists_col = structs{lists_member, ints_member}; -// EXPECT_THROW(flatten_test(cudf::table_view{{structs_with_lists_col}}), cudf::logic_error); -// } + EXPECT_THROW(flatten_nested_columns( + cudf::table_view{{structs_with_lists_col}}, {}, {}, column_nullability::FORCE), + cudf::logic_error); +} struct SuperimposeTest : StructUtilitiesTest {}; @@ -466,11 +553,9 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_Sliced) // Reset STRUCT's null-mask. Mark third row as null. mark_row_as_null(struct_structs_column->mutable_view(), 2); - // The null masks should now look as follows, with the STRUCT null mask *not* pushed down: - // STRUCT: 1111011 - // STRUCT: 1111101 - // nums_member: 0110101 - // lists_member: 1001101 + // The null masks should now look as follows, with the STRUCT null mask *not* pushed + // down: STRUCT: 1111011 STRUCT: 1111101 nums_member: 0110101 lists_member: + // 1001101 // Slice off the first and last rows. auto sliced_structs = slice_off_first_and_last_rows(struct_structs_column->view()); From 977830c956a2eed8699a75dfc11b0be2c2a2b7a2 Mon Sep 17 00:00:00 2001 From: srikarvanavasam Date: Mon, 1 Aug 2022 13:19:13 -0700 Subject: [PATCH 3/6] remove helper function --- cpp/src/structs/utilities.cpp | 67 ----------------------------------- 1 file changed, 67 deletions(-) diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index 3260a55516d..bf4216b6983 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -209,73 +209,6 @@ flattened_table flatten_nested_columns(table_view const& input, return table_flattener{input, column_order, null_precedence, nullability}(); } -namespace { -using vector_of_columns = std::vector>; -using column_index_t = typename vector_of_columns::size_type; - -// Forward declaration, to enable recursion via `unflattener`. -std::unique_ptr unflatten_struct(vector_of_columns& flattened, - column_index_t& current_index, - cudf::column_view const& blueprint); - -/** - * @brief Helper functor to reconstruct STRUCT columns from its flattened member columns. - * - */ -class unflattener { - public: - unflattener(vector_of_columns& flattened_, column_index_t& current_index_) - : flattened{flattened_}, current_index{current_index_} - { - } - - auto operator()(column_view const& blueprint) - { - return is_struct(blueprint) ? unflatten_struct(flattened, current_index, blueprint) - : std::move(flattened[current_index++]); - } - - private: - vector_of_columns& flattened; - column_index_t& current_index; - -}; // class unflattener; - -std::unique_ptr unflatten_struct(vector_of_columns& flattened, - column_index_t& current_index, - cudf::column_view const& blueprint) -{ - // "Consume" columns from `flattened`, starting at `current_index`, - // based on the provided `blueprint` struct col. Recurse for struct children. - CUDF_EXPECTS(blueprint.type().id() == type_id::STRUCT, - "Expected blueprint column to be a STRUCT column."); - - CUDF_EXPECTS(current_index < flattened.size(), "STRUCT column can't have 0 children."); - - auto const num_rows = flattened[current_index]->size(); - - // cudf::flatten_nested_columns() executes depth first, and serializes the struct null vector - // before the child/member columns. - // E.g. STRUCT_1< STRUCT_2< A, B >, C > is flattened to: - // 1. Null Vector for STRUCT_1 - // 2. Null Vector for STRUCT_2 - // 3. Member STRUCT_2::A - // 4. Member STRUCT_2::B - // 5. Member STRUCT_1::C - // - // Extract null-vector *before* child columns are constructed. - auto struct_null_column_contents = flattened[current_index++]->release(); - auto unflattening_iter = - thrust::make_transform_iterator(blueprint.child_begin(), unflattener{flattened, current_index}); - - return cudf::make_structs_column( - num_rows, - vector_of_columns{unflattening_iter, unflattening_iter + blueprint.num_children()}, - UNKNOWN_NULL_COUNT, // Do count? - std::move(*struct_null_column_contents.null_mask)); -} -} // namespace - // Helper function to superimpose validity of parent struct // over the specified member (child) column. void superimpose_parent_nulls(bitmask_type const* parent_null_mask, From 150d16e0a9490eec2964bc9d07ddf3f910db3e8a Mon Sep 17 00:00:00 2001 From: srikarvanavasam Date: Mon, 1 Aug 2022 13:31:59 -0700 Subject: [PATCH 4/6] format --- cpp/tests/structs/utilities_tests.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp index 03512ed59cb..9c7c220eec7 100644 --- a/cpp/tests/structs/utilities_tests.cpp +++ b/cpp/tests/structs/utilities_tests.cpp @@ -44,7 +44,8 @@ using nums = fixed_width_column_wrapper; template using lists = lists_column_wrapper; -struct StructUtilitiesTest : BaseFixture {}; +struct StructUtilitiesTest : BaseFixture { +}; template struct TypedStructUtilitiesTest : StructUtilitiesTest { @@ -312,7 +313,8 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported) cudf::logic_error); } -struct SuperimposeTest : StructUtilitiesTest {}; +struct SuperimposeTest : StructUtilitiesTest { +}; template struct TypedSuperimposeTest : StructUtilitiesTest { From 513962d0e8d0c646ef03f9ecb9a5f63022f450c8 Mon Sep 17 00:00:00 2001 From: Srikar Vanavasam Date: Mon, 1 Aug 2022 15:53:21 -0500 Subject: [PATCH 5/6] update copyright --- cpp/tests/structs/utilities_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp index 9c7c220eec7..8a0fca2c9f9 100644 --- a/cpp/tests/structs/utilities_tests.cpp +++ b/cpp/tests/structs/utilities_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From d3c061976a6f86a5860b3f3a7aaeed74f76b96db Mon Sep 17 00:00:00 2001 From: srikarvanavasam Date: Mon, 1 Aug 2022 15:48:53 -0700 Subject: [PATCH 6/6] type --- cpp/tests/structs/utilities_tests.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp index 9c7c220eec7..4962663b373 100644 --- a/cpp/tests/structs/utilities_tests.cpp +++ b/cpp/tests/structs/utilities_tests.cpp @@ -555,9 +555,11 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_Sliced) // Reset STRUCT's null-mask. Mark third row as null. mark_row_as_null(struct_structs_column->mutable_view(), 2); - // The null masks should now look as follows, with the STRUCT null mask *not* pushed - // down: STRUCT: 1111011 STRUCT: 1111101 nums_member: 0110101 lists_member: - // 1001101 + // The null masks should now look as follows, with the STRUCT null mask *not* pushed down: + // STRUCT: 1111011 + // STRUCT: 1111101 + // nums_member: 0110101 + // lists_member: 1001101 // Slice off the first and last rows. auto sliced_structs = slice_off_first_and_last_rows(struct_structs_column->view());