Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix has_null predicate for drop_list_duplicates on nested structs #10436

Merged
merged 4 commits into from
Mar 17, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions cpp/src/lists/drop_list_duplicates.cu
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,6 @@ struct get_indices_of_unique_entries_dispatch {
size_type*,
null_equality,
nan_equality,
bool,
duplicate_keep_option,
rmm::cuda_stream_view) const
{
Expand All @@ -370,7 +369,6 @@ struct get_indices_of_unique_entries_dispatch {
size_type* output_begin,
null_equality nulls_equal,
nan_equality nans_equal,
bool has_nulls,
duplicate_keep_option keep_option,
rmm::cuda_stream_view stream) const noexcept
{
Expand All @@ -379,7 +377,7 @@ struct get_indices_of_unique_entries_dispatch {
*d_view,
*d_view,
nulls_equal,
has_nulls,
all_lists_entries.has_nulls(),
nans_equal == nan_equality::ALL_EQUAL};
return cudf::detail::unique_copy(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(num_entries),
Expand All @@ -396,19 +394,20 @@ struct get_indices_of_unique_entries_dispatch {
size_type* output_begin,
null_equality nulls_equal,
nan_equality nans_equal,
bool has_nulls,
duplicate_keep_option keep_option,
rmm::cuda_stream_view stream) const noexcept
{
auto const flattened_entries = cudf::structs::detail::flatten_nested_columns(
table_view{{all_lists_entries}}, {order::ASCENDING}, {null_order::AFTER}, {});
auto const dview_ptr = table_device_view::create(flattened_entries, stream);
// Search through children of all levels for nulls.
auto const nested_has_nulls = has_nulls(flattened_entries.flattened_columns());

auto const comp = table_row_comparator_fn{list_indices,
*dview_ptr,
*dview_ptr,
nulls_equal,
has_nulls,
nested_has_nulls,
nans_equal == nan_equality::ALL_EQUAL};
return cudf::detail::unique_copy(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(num_entries),
Expand Down Expand Up @@ -447,7 +446,6 @@ std::vector<std::unique_ptr<column>> get_unique_entries_and_list_indices(
output_begin,
nulls_equal,
nans_equal,
keys_entries.has_nulls(),
keep_option,
stream);

Expand Down
109 changes: 108 additions & 1 deletion cpp/tests/lists/drop_list_duplicates_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -734,6 +734,113 @@ TYPED_TEST(DropListDuplicatesTypedTest, InputListsOfStructsHaveNull)
}
}

TYPED_TEST(DropListDuplicatesTypedTest, InputListsOfNestedStructsHaveNull)
{
using ColWrapper = cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>;
auto constexpr null = int32_t{0}; // nulls at the children columns level
// XXX and YY are int placeholders for nulls at parent structs column level.
sperlingxx marked this conversation as resolved.
Show resolved Hide resolved
// We bring up two placeholders of different values to create intra null structs with
// children of different values, so as to test whether null_equality::EQUAL works or not.
auto constexpr XXX = int32_t{5};
auto constexpr YYY = int32_t{6};

auto const get_nested_structs = [] {
auto grandchild1 = ColWrapper{{
1, XXX, null, XXX, YYY, 1, 1, 1, // list1
1, 1, 1, 1, 2, 1, null, 2, // list2
null, null, 2, 2, 3, 2, 3, 3 // list3
},
nulls_at({2, 14, 16, 17})};
auto grandchild2 = StringsCol{{
// begin list1
"Banana",
"YYY", /*NULL*/
"Apple",
"XXX", /*NULL*/
"YYY", /*NULL*/
"Banana",
"Cherry",
"Kiwi", // end list1
// begin list2
"Bear",
"Duck",
"Cat",
"Dog",
"Panda",
"Bear",
"" /*NULL*/,
"Panda", // end list2
// begin list3
"ÁÁÁ",
"ÉÉÉÉÉ",
"ÍÍÍÍÍ",
"ÁBC",
"" /*NULL*/,
"ÁÁÁ",
"ÁBC",
"XYZ" // end list3
},
nulls_at({14, 20})};
auto child1 = StructsCol{{grandchild1, grandchild2}, nulls_at({1, 3, 4})};
return StructsCol{{child1}};
};

auto const get_nested_struct_expected = [] {
auto grandchild1 =
ColWrapper{{1, 1, 1, null, XXX, 1, 1, 1, 1, 2, null, 2, 2, 2, 3, 3, 3, null, null},
nulls_at({3, 4, 10, 17, 18})};
auto grandchild2 = StringsCol{{
// begin list1
"Banana",
"Cherry",
"Kiwi",
"Apple",
"XXX" /*NULL*/, // end list1
// begin list2
"Bear",
"Cat",
"Dog",
"Duck",
"Panda",
"" /*NULL*/, // end list2
// begin list3
"ÁBC",
"ÁÁÁ",
"ÍÍÍÍÍ",
"XYZ",
"ÁBC",
"" /*NULL*/,
"ÁÁÁ",
"ÉÉÉÉÉ" // end list3
},
nulls_at({4, 10, 16})};
auto child1 = StructsCol{{grandchild1, grandchild2}, nulls_at({4})};
return StructsCol{{child1}};
};

// Test full columns.
{
auto const lists = cudf::make_lists_column(
3, IntsCol{0, 8, 16, 24}.release(), get_nested_structs().release(), 0, {});
auto const expected = cudf::make_lists_column(
3, IntsCol{0, 5, 11, 19}.release(), get_nested_struct_expected().release(), 0, {});
auto const results = cudf::lists::drop_list_duplicates(cudf::lists_column_view{lists->view()});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected->view(), verbosity);
}

// Test sliced columns.
{
auto const lists_original = cudf::make_lists_column(
3, IntsCol{0, 8, 16, 24}.release(), get_nested_structs().release(), 0, {});
auto const expected_original = cudf::make_lists_column(
3, IntsCol{0, 5, 11, 19}.release(), get_nested_struct_expected().release(), 0, {});
auto const lists = cudf::slice(lists_original->view(), {1, 3})[0];
auto const expected = cudf::slice(expected_original->view(), {1, 3})[0];
auto const results = cudf::lists::drop_list_duplicates(cudf::lists_column_view{lists});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected, verbosity);
}
}

TEST_F(DropListDuplicatesTest, SlicedInputListsOfStructsWithNaNs)
{
auto const h_child = std::vector<float_type>{
Expand Down