Skip to content

Commit

Permalink
Fixes the scatter map size to the target column size
Browse files Browse the repository at this point in the history
Simplifies copy_if_else by skipping gather on lhs, and using lhs directly
as the rhs scatter destinations for 0-rows in select col.

Closes #8356

Co-authored-by: MithunR <[email protected]>
  • Loading branch information
gerashegalov and mythrocks committed Jun 12, 2021
1 parent 709adb1 commit 75faa03
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 17 deletions.
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ struct column_scatterer_impl<struct_view> {
[](auto const& col) { return col.nullable(); });
if (child_nullable) {
auto const gather_map =
scatter_to_gather(scatter_map_begin, scatter_map_end, source.size(), stream);
scatter_to_gather(scatter_map_begin, scatter_map_end, target.size(), stream);
gather_bitmask(cudf::table_view{std::vector<cudf::column_view>{structs_src.child_begin(),
structs_src.child_end()}},
gather_map.begin(),
Expand Down
17 changes: 1 addition & 16 deletions cpp/src/copying/copy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -195,21 +195,6 @@ std::unique_ptr<column> scatter_gather_based_if_else(Left const& lhs,
{
if constexpr (std::is_same<Left, cudf::column_view>::value &&
std::is_same<Right, cudf::column_view>::value) {
auto const null_map_entry = size + 1; // Out of bounds index, for gather() to nullify.

auto const gather_lhs = make_counting_transform_iterator(
size_type{0}, lhs_gather_map_functor<Filter>{is_left, null_map_entry});

auto const lhs_gathered_columns =
cudf::detail::gather(table_view{std::vector<cudf::column_view>{lhs}},
gather_lhs,
gather_lhs + size,
out_of_bounds_policy::NULLIFY,
stream,
mr)
->release();
auto& lhs_partial_output = lhs_gathered_columns[0];

auto scatter_map_rhs = rmm::device_uvector<size_type>{static_cast<std::size_t>(size), stream};
auto const scatter_map_end = thrust::copy_if(rmm::exec_policy(stream),
thrust::make_counting_iterator(size_type{0}),
Expand All @@ -227,7 +212,7 @@ std::unique_ptr<column> scatter_gather_based_if_else(Left const& lhs,
table_view{std::vector<column_view>{scatter_src_rhs->get_column(0).view()}},
scatter_map_rhs.begin(),
scatter_map_end,
table_view{std::vector<column_view>{lhs_partial_output->view()}},
table_view{std::vector<column_view>{lhs}},
false,
stream,
mr);
Expand Down
29 changes: 29 additions & 0 deletions cpp/tests/copying/copy_if_else_nested_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,35 @@ TYPED_TEST(TypedCopyIfElseNestedTest, StructsWithNulls)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result_column->view(), expected_result->view());
}

TYPED_TEST(TypedCopyIfElseNestedTest, LongerStructsWithNulls)
{
using T = TypeParam;

using namespace cudf;
using namespace cudf::test;

using ints = fixed_width_column_wrapper<T, int32_t>;
using structs = structs_column_wrapper;
using bools = fixed_width_column_wrapper<bool, int32_t>;

auto selector_column = bools{1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,
0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0,
0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0}
.release();
auto lhs_child_1 =
ints{{27, -80, -24, 76, -56, 42, 5, 13, -69, -77, 61, -77, 72, 0, 31, 118, -30,
86, 125, 0, 0, 0, 75, -49, 125, 60, 116, 118, 64, 20, -70, -18, 0, -25,
22, -46, -89, -9, 27, -56, -77, 123, 0, -90, 87, -113, -37, 22, -22, -53, 73,
99, 113, -2, -24, 113, 75, 6, 82, -58, 122, -123, -127, 19, -62, -24},
iterator_with_null_at(std::vector<size_type>{13, 19, 20, 21, 32, 42})};

auto lhs_structs_column = structs{{lhs_child_1}}.release();
auto result_column =
copy_if_else(lhs_structs_column->view(), lhs_structs_column->view(), selector_column->view());

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result_column->view(), lhs_structs_column->view());
}

TYPED_TEST(TypedCopyIfElseNestedTest, Lists)
{
using T = TypeParam;
Expand Down
18 changes: 18 additions & 0 deletions cpp/tests/copying/scatter_struct_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,3 +241,21 @@ TYPED_TEST(TypedStructScatterTest, SourceSmallerThanTargetScatterTest)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(
*structs_expected, scatter_structs(structs_src, structs_tgt, scatter_map), print_all);
}

TYPED_TEST(TypedStructScatterTest, IntStructNullMaskRegression)
{
auto child_0 = int32s_col({0, -1, 2}, null_at(1));
auto struct_col_0 = structs_col({child_0}).release();

auto child_1 = int32s_col({20});
auto struct_col_1 = structs_col({child_1}).release();

auto scatter_map = int32s_col{2}.release();

auto expected_child = int32s_col({0, -1, 20}, null_at(1));
auto expected_struct = structs_col({expected_child}).release();

auto result_ptr = scatter_structs(struct_col_1, struct_col_0, scatter_map);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_struct, result_ptr, print_all);
}

0 comments on commit 75faa03

Please sign in to comment.