diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index bfeb6ef3533..ace9a608bdb 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -21,11 +21,15 @@ #include #include #include +#include #include #include #include +#include #include +#include + namespace cudf { namespace structs { namespace detail { @@ -337,6 +341,84 @@ void superimpose_parent_nulls(bitmask_type const* parent_null_mask, } } +std::tuple> superimpose_parent_nulls( + column_view const& parent, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) +{ + if (parent.type().id() != type_id::STRUCT) { + // NOOP for non-STRUCT columns. + return std::make_tuple(parent, std::vector{}); + } + + auto structs_column = structs_column_view{parent}; + + auto ret_validity_buffers = std::vector{}; + + // Function to rewrite child null mask. + auto rewrite_child_mask = [&](auto const& child_idx) { + auto child = structs_column.get_sliced_child(child_idx); + + // If struct is not nullable, child null mask is retained. NOOP. + if (not structs_column.nullable()) { return child; } + + auto parent_child_null_masks = + std::vector{structs_column.null_mask(), child.null_mask()}; + + auto new_child_mask = [&] { + if (not child.nullable()) { + // Adopt parent STRUCT's null mask. + return structs_column.null_mask(); + } + + // Both STRUCT and child are nullable. AND() for the child's new null mask. + // + // Note: ANDing only [offset(), offset()+size()) would not work. The null-mask produced thus + // would start at offset=0. The column-view attempts to apply its offset() to both the _data + // and the _null_mask(). It would be better to AND the bits from the beginning, and apply + // offset() uniformly. + // Alternatively, one could construct a big enough buffer, and use inplace_bitwise_and. + ret_validity_buffers.push_back(cudf::detail::bitmask_and(parent_child_null_masks, + std::vector{0, 0}, + child.offset() + child.size(), + stream, + mr)); + return reinterpret_cast(ret_validity_buffers.back().data()); + }(); + + return cudf::column_view( + child.type(), + child.size(), + child.head(), + new_child_mask, + cudf::UNKNOWN_NULL_COUNT, + child.offset(), + std::vector{child.child_begin(), child.child_end()}); + }; + + auto child_begin = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), rewrite_child_mask); + auto child_end = child_begin + structs_column.num_children(); + + auto ret_children = std::vector{}; + std::for_each(child_begin, child_end, [&](auto const& child) { + auto [processed_child, backing_buffers] = superimpose_parent_nulls(child, stream, mr); + ret_children.push_back(processed_child); + ret_validity_buffers.insert(ret_validity_buffers.end(), + std::make_move_iterator(backing_buffers.begin()), + std::make_move_iterator(backing_buffers.end())); + }); + + // Make column view out of newly constructed column_views, and all the validity buffers. + + return std::make_tuple(column_view(parent.type(), + parent.size(), + nullptr, + parent.null_mask(), + parent.null_count(), // Alternatively, postpone. + parent.offset(), + ret_children), + std::move(ret_validity_buffers)); +} + } // namespace detail } // namespace structs } // namespace cudf diff --git a/cpp/src/structs/utilities.hpp b/cpp/src/structs/utilities.hpp index a68f09574ce..1683518a1ef 100644 --- a/cpp/src/structs/utilities.hpp +++ b/cpp/src/structs/utilities.hpp @@ -19,6 +19,8 @@ #include #include +#include + namespace cudf { namespace structs { namespace detail { @@ -106,7 +108,7 @@ std::unique_ptr unflatten_nested_columns(std::unique_ptr> superimpose_parent_nulls( + column_view const& parent, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + } // namespace detail } // namespace structs } // namespace cudf diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp index d4ded02adce..08b40b22aa4 100644 --- a/cpp/tests/structs/utilities_tests.cpp +++ b/cpp/tests/structs/utilities_tests.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include + #include #include #include @@ -22,8 +24,10 @@ #include #include +#include #include -#include +#include +#include namespace cudf::test { @@ -44,9 +48,16 @@ void flatten_unflatten_compare(table_view const& input_table) } using namespace cudf; -using iterators::null_at; -using strings = strings_column_wrapper; -using structs = structs_column_wrapper; +using namespace iterators; +using strings = strings_column_wrapper; +using dictionary = dictionary_column_wrapper; +using structs = structs_column_wrapper; + +template +using nums = fixed_width_column_wrapper; + +template +using lists = lists_column_wrapper; struct StructUtilitiesTest : BaseFixture { }; @@ -55,7 +66,7 @@ template struct TypedStructUtilitiesTest : StructUtilitiesTest { }; -TYPED_TEST_CASE(TypedStructUtilitiesTest, FixedWidthTypes); +TYPED_TEST_SUITE(TypedStructUtilitiesTest, FixedWidthTypes); TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevelUnsupported) { @@ -217,4 +228,280 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported) cudf::logic_error); } +struct SuperimposeTest : StructUtilitiesTest { +}; + +template +struct TypedSuperimposeTest : StructUtilitiesTest { +}; + +TYPED_TEST_SUITE(TypedSuperimposeTest, FixedWidthTypes); + +void test_non_struct_columns(cudf::column_view const& input) +{ + // superimpose_parent_nulls() on non-struct columns should return the input column, unchanged. + auto [superimposed, backing_validity_buffers] = + cudf::structs::detail::superimpose_parent_nulls(input); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(input, superimposed); + EXPECT_TRUE(backing_validity_buffers.empty()); +} + +TYPED_TEST(TypedSuperimposeTest, NoStructInput) +{ + using T = TypeParam; + + test_non_struct_columns(fixed_width_column_wrapper{{6, 5, 4, 3, 2, 1, 0}, null_at(3)}); + test_non_struct_columns( + lists_column_wrapper{{{6, 5}, {4, 3}, {2, 1}, {0}}, null_at(3)}); + test_non_struct_columns(strings{{"All", "The", "Leaves", "Are", "Brown"}, null_at(3)}); + test_non_struct_columns(dictionary{{"All", "The", "Leaves", "Are", "Brown"}, null_at(3)}); +} + +/** + * @brief Helper to construct a numeric member of a struct column. + */ +template +nums make_nums_member(NullIter null_iter = no_nulls()) +{ + return nums{{10, 11, 12, 13, 14, 15, 16}, null_iter}; +} + +/** + * @brief Helper to construct a lists member of a struct column. + */ +template +lists make_lists_member(NullIter null_iter = no_nulls()) +{ + return lists{{{20, 20}, {21, 21}, {22, 22}, {23, 23}, {24, 24}, {25, 25}, {26, 26}}, + null_iter}; +} + +TYPED_TEST(TypedSuperimposeTest, BasicStruct) +{ + using T = TypeParam; + + auto nums_member = make_nums_member(nulls_at({3, 6})); + auto lists_member = make_lists_member(nulls_at({4, 5})); + auto structs_input = structs{{nums_member, lists_member}, no_nulls()}.release(); + + // Reset STRUCTs' null-mask. Mark first STRUCT row as null. + auto structs_view = structs_input->mutable_view(); + cudf::detail::set_null_mask(structs_view.null_mask(), 0, 1, false); + + // At this point, the STRUCT nulls aren't pushed down to members, + // even though the parent null-mask was modified. + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs_view.child(0), make_nums_member(nulls_at({3, 6}))); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs_view.child(1), + make_lists_member(nulls_at({4, 5}))); + + auto [output, backing_buffers] = cudf::structs::detail::superimpose_parent_nulls(structs_view); + + // After superimpose_parent_nulls(), the struct nulls (i.e. at index-0) should have been pushed + // down to the children. All members should have nulls at row-index 0. + auto expected_nums_member = make_nums_member(nulls_at({0, 3, 6})); + auto expected_lists_member = make_lists_member(nulls_at({0, 4, 5})); + auto expected_structs_output = structs{{expected_nums_member, expected_lists_member}, null_at(0)}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output, expected_structs_output); +} + +TYPED_TEST(TypedSuperimposeTest, NonNullableParentStruct) +{ + // Test that if the parent struct is not nullable, non-struct members should + // remain unchanged. + + using T = TypeParam; + + auto nums_member = make_nums_member(nulls_at({3, 6})); + auto lists_member = make_lists_member(nulls_at({4, 5})); + auto structs_input = structs{{nums_member, lists_member}, no_nulls()}.release(); + + auto [output, backing_buffers] = + cudf::structs::detail::superimpose_parent_nulls(structs_input->view()); + + // After superimpose_parent_nulls(), none of the child structs should have changed, + // because the parent had no nulls to begin with. + auto expected_nums_member = make_nums_member(nulls_at({3, 6})); + auto expected_lists_member = make_lists_member(nulls_at({4, 5})); + auto expected_structs_output = structs{{expected_nums_member, expected_lists_member}, no_nulls()}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output, expected_structs_output); +} + +TYPED_TEST(TypedSuperimposeTest, NestedStruct_ChildNullable_ParentNonNullable) +{ + // Test with Struct. If the parent struct is not nullable: + // 1. Non-struct members should remain unchanged. + // 2. Member-structs should have their respective nulls pushed down into grandchildren. + + using T = TypeParam; + + auto nums_member = make_nums_member(nulls_at({3, 6})); + auto lists_member = make_lists_member(nulls_at({4, 5})); + auto outer_struct_members = std::vector>{}; + outer_struct_members.push_back(structs{{nums_member, lists_member}, no_nulls()}.release()); + + // Reset STRUCTs' null-mask. Mark first STRUCT row as null. + auto structs_view = outer_struct_members.back()->mutable_view(); + cudf::detail::set_null_mask(structs_view.null_mask(), 0, 1, false); + + auto structs_of_structs = structs{std::move(outer_struct_members)}.release(); + + auto [output, backing_buffers] = + cudf::structs::detail::superimpose_parent_nulls(structs_of_structs->view()); + + // After superimpose_parent_nulls(), outer-struct column should not have pushed nulls to child + // structs. But the child struct column must push its nulls to its own children. + auto expected_nums_member = make_nums_member(nulls_at({0, 3, 6})); + auto expected_lists_member = make_lists_member(nulls_at({0, 4, 5})); + auto expected_structs = structs{{expected_nums_member, expected_lists_member}, null_at(0)}; + auto expected_structs_of_structs = structs{{expected_structs}}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output, expected_structs_of_structs); +} + +TYPED_TEST(TypedSuperimposeTest, NestedStruct_ChildNullable_ParentNullable) +{ + // Test with Struct. + // If both the parent struct and the child are nullable, the leaf nodes should + // have a 3-way ANDed null-mask. + + using T = TypeParam; + + auto nums_member = make_nums_member(nulls_at({3, 6})); + auto lists_member = make_lists_member(nulls_at({4, 5})); + auto outer_struct_members = std::vector>{}; + outer_struct_members.push_back(structs{{nums_member, lists_member}, no_nulls()}.release()); + + // Reset STRUCTs' null-mask. Mark first STRUCT row as null. + auto structs_view = outer_struct_members.back()->mutable_view(); + auto num_rows = structs_view.size(); + cudf::detail::set_null_mask(structs_view.null_mask(), 0, 1, false); + + auto structs_of_structs = + structs{std::move(outer_struct_members), std::vector(num_rows, true)}.release(); + + // Modify STRUCT-of-STRUCT's null-mask. Mark second STRUCT row as null. + auto structs_of_structs_view = structs_of_structs->mutable_view(); + cudf::detail::set_null_mask(structs_of_structs_view.null_mask(), 1, 2, false); + + auto [output, backing_buffers] = + cudf::structs::detail::superimpose_parent_nulls(structs_of_structs->view()); + + // After superimpose_parent_nulls(), outer-struct column should not have pushed nulls to child + // structs. But the child struct column must push its nulls to its own children. + auto expected_nums_member = make_nums_member(nulls_at({0, 1, 3, 6})); + auto expected_lists_member = make_lists_member(nulls_at({0, 1, 4, 5})); + auto expected_structs = structs{{expected_nums_member, expected_lists_member}, nulls_at({0, 1})}; + auto expected_structs_of_structs = structs{{expected_structs}, null_at(1)}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output, expected_structs_of_structs); +} + +cudf::column_view slice_off_first_and_last_rows(cudf::column_view const& col) +{ + return cudf::slice(col, {1, col.size() - 1})[0]; +} + +void mark_row_as_null(cudf::mutable_column_view const& col, size_type row_index) +{ + cudf::detail::set_null_mask(col.null_mask(), row_index, row_index + 1, false); +} + +TYPED_TEST(TypedSuperimposeTest, Struct_Sliced) +{ + // Test with a sliced STRUCT column. + // Ensure that superimpose_parent_nulls() produces the right results, even when the input is + // sliced. + + using T = TypeParam; + + auto nums_member = make_nums_member(nulls_at({3, 6})); + auto lists_member = make_lists_member(nulls_at({4, 5})); + auto structs_column = structs{{nums_member, lists_member}, no_nulls()}.release(); + + // Reset STRUCTs' null-mask. Mark second STRUCT row as null. + mark_row_as_null(structs_column->mutable_view(), 1); + + // The null masks should now look as follows, with the STRUCT null mask *not* pushed down: + // STRUCT: 1111101 + // nums_member: 0110111 + // lists_member: 1001111 + + // Slice off the first and last rows. + auto sliced_structs = slice_off_first_and_last_rows(structs_column->view()); + + // After slice(), the null masks will be: + // STRUCT: 11110 + // nums_member: 11011 + // lists_member: 00111 + + auto [output, backing_buffers] = cudf::structs::detail::superimpose_parent_nulls(sliced_structs); + + // After superimpose_parent_nulls(), the null masks should be: + // STRUCT: 11110 + // nums_member: 11010 + // lists_member: 00110 + + // Construct expected columns using structs_column_wrapper, which should push the parent nulls + // down automatically. Then, slice() off the ends. + auto expected_nums = make_nums_member(nulls_at({1, 3, 6})); + auto expected_lists = make_lists_member(nulls_at({1, 4, 5})); + auto expected_unsliced_structs = structs{{expected_nums, expected_lists}, nulls_at({1})}; + auto expected_structs = slice_off_first_and_last_rows(expected_unsliced_structs); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output, expected_structs); +} + +TYPED_TEST(TypedSuperimposeTest, NestedStruct_Sliced) +{ + // Test with a sliced STRUCT column. + // Ensure that superimpose_parent_nulls() produces the right results, even when the input is + // sliced. + + using T = TypeParam; + + auto nums_member = make_nums_member(nulls_at({3, 6})); + auto lists_member = make_lists_member(nulls_at({4, 5})); + auto structs_column = structs{{nums_member, lists_member}, null_at(1)}; + auto struct_structs_column = structs{{structs_column}, no_nulls()}.release(); + + // Reset STRUCT's null-mask. Mark third row as null. + mark_row_as_null(struct_structs_column->mutable_view(), 2); + + // The null masks should now look as follows, with the STRUCT null mask *not* pushed down: + // STRUCT: 1111011 + // STRUCT: 1111101 + // nums_member: 0110101 + // lists_member: 1001101 + + // Slice off the first and last rows. + auto sliced_structs = slice_off_first_and_last_rows(struct_structs_column->view()); + + // After slice(), the null masks will be: + // STRUCT: 11101 + // STRUCT: 11110 + // nums_member: 11010 + // lists_member: 00110 + + auto [output, backing_buffers] = cudf::structs::detail::superimpose_parent_nulls(sliced_structs); + + // After superimpose_parent_nulls(), the null masks will be: + // STRUCT: 11101 + // STRUCT: 11100 + // nums_member: 11000 + // lists_member: 00100 + + // Construct expected columns using structs_column_wrapper, which should push the parent nulls + // down automatically. Then, slice() off the ends. + auto expected_nums = make_nums_member(nulls_at({3, 6})); + auto expected_lists = make_lists_member(nulls_at({4, 5})); + auto expected_structs = structs{{expected_nums, expected_lists}, nulls_at({1})}; + auto expected_struct_structs = structs{{expected_structs}, null_at(2)}; + auto expected_sliced_structs = slice_off_first_and_last_rows(expected_struct_structs); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output, expected_sliced_structs); +} + } // namespace cudf::test