Skip to content

Commit

Permalink
Fix gather for sliced input structs column (#9218)
Browse files Browse the repository at this point in the history
This PR fixes the `gather` API for structs columns when the input is a sliced column. Previously, `gather` calls `child_begin()` and `child_end()` to access the children column so if the input structs column is sliced then the output is incorrect.

This closes #9213, and is blocked by #9194 due to conflict work.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - MithunR (https://github.com/mythrocks)
  - Mark Harris (https://github.com/harrism)

URL: #9218
  • Loading branch information
ttnghia authored Sep 17, 2021
1 parent 5a82585 commit 3bbec6e
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 9 deletions.
27 changes: 18 additions & 9 deletions cpp/include/cudf/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -450,16 +450,24 @@ struct column_gatherer_impl<struct_view> {
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
structs_column_view structs_column(column);
auto gather_map_size{std::distance(gather_map_begin, gather_map_end)};
auto const gather_map_size = std::distance(gather_map_begin, gather_map_end);
if (gather_map_size == 0) { return empty_like(column); }

// Gathering needs to operate on the sliced children since they need to take into account the
// offset of the parent structs column.
std::vector<cudf::column_view> sliced_children;
std::transform(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(column.num_children()),
std::back_inserter(sliced_children),
[structs_view = structs_column_view{column}](auto const idx) {
return structs_view.get_sliced_child(idx);
});

std::vector<std::unique_ptr<cudf::column>> output_struct_members;
std::transform(structs_column.child_begin(),
structs_column.child_end(),
std::transform(sliced_children.begin(),
sliced_children.end(),
std::back_inserter(output_struct_members),
[&gather_map_begin, &gather_map_end, nullify_out_of_bounds, stream, mr](
cudf::column_view const& col) {
[&](auto const& col) {
return cudf::type_dispatcher<dispatch_storage_type>(col.type(),
column_gatherer{},
col,
Expand All @@ -471,14 +479,15 @@ struct column_gatherer_impl<struct_view> {
});

auto const nullable =
nullify_out_of_bounds or std::any_of(structs_column.child_begin(),
structs_column.child_end(),
nullify_out_of_bounds || std::any_of(sliced_children.begin(),
sliced_children.end(),
[](auto const& col) { return col.nullable(); });

if (nullable) {
gather_bitmask(
// Table view of struct column.
cudf::table_view{
std::vector<cudf::column_view>{structs_column.child_begin(), structs_column.child_end()}},
std::vector<cudf::column_view>{sliced_children.begin(), sliced_children.end()}},
gather_map_begin,
output_struct_members,
nullify_out_of_bounds ? gather_bitmask_op::NULLIFY : gather_bitmask_op::DONT_CHECK,
Expand Down
58 changes: 58 additions & 0 deletions cpp/tests/copying/gather_struct_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,64 @@ TYPED_TEST(TypedStructGatherTest, TestSimpleStructGather)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output->view(), expected_output);
}

TYPED_TEST(TypedStructGatherTest, TestSlicedStructsColumnGatherNoNulls)
{
auto const structs_original = [] {
auto child1 = fixed_width_column_wrapper<TypeParam, int32_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
auto child2 = strings_column_wrapper{
"One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten"};
return structs_column_wrapper{{child1, child2}};
}();

auto const expected = [] {
auto child1 = fixed_width_column_wrapper<TypeParam, int32_t>{6, 10, 8};
auto child2 = strings_column_wrapper{"Six", "Ten", "Eight"};
return structs_column_wrapper{{child1, child2}};
}();

auto const structs = cudf::slice(structs_original, {4, 10})[0];
auto const gather_map = fixed_width_column_wrapper<int32_t>{1, 5, 3};
auto const result = cudf::gather(cudf::table_view{{structs}}, gather_map)->get_column(0);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.view(), expected);
}

TYPED_TEST(TypedStructGatherTest, TestSlicedStructsColumnGatherWithNulls)
{
auto constexpr null = int32_t{0}; // null at child
auto constexpr XXX = int32_t{0}; // null at parent

auto const structs_original = [] {
auto child1 = fixed_width_column_wrapper<TypeParam, int32_t>{
{null, XXX, 3, null, null, 6, XXX, null, null, 10}, nulls_at({0, 3, 4, 7, 8})};
auto child2 = strings_column_wrapper{{"One",
"" /*NULL at both parent and child*/,
"Three",
"" /*NULL*/,
"Five",
"" /*NULL*/,
"" /*NULL at parent*/,
"" /*NULL*/,
"Nine",
"" /*NULL*/},
nulls_at({1, 3, 5, 7, 9})};
return structs_column_wrapper{{child1, child2}, nulls_at({1, 6})};
}();

auto const expected = [] {
auto child1 = fixed_width_column_wrapper<TypeParam, int32_t>{{6, 10, null, XXX}, null_at(2)};
auto child2 = strings_column_wrapper{{
"" /*NULL*/, "" /*NULL*/, "Nine", "" /*NULL at parent*/
},
nulls_at({0, 1})};
return structs_column_wrapper{{child1, child2}, null_at(3)};
}();

auto const structs = cudf::slice(structs_original, {4, 10})[0];
auto const gather_map = fixed_width_column_wrapper<int32_t>{1, 5, 4, 2};
auto const result = cudf::gather(cudf::table_view{{structs}}, gather_map)->get_column(0);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.view(), expected);
}

TYPED_TEST(TypedStructGatherTest, TestNullifyOnNonNullInput)
{
// Test that the null masks of the struct output (and its children) are set correctly,
Expand Down

0 comments on commit 3bbec6e

Please sign in to comment.