Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix interleave_columns on ListType with nullable child #8181

Merged
merged 1 commit into from
May 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions cpp/src/lists/interleave_columns.cu
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,15 @@ struct interleave_list_entries_fn {
column_view const& output_list_offsets,
size_type num_output_lists,
size_type num_output_entries,
bool has_null_mask,
bool data_has_null_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const noexcept
{
auto const table_dv_ptr = table_device_view::create(input);
auto const comp_fn = compute_string_sizes_and_interleave_lists_fn{
*table_dv_ptr, output_list_offsets.template begin<offset_type>(), has_null_mask};
*table_dv_ptr, output_list_offsets.template begin<offset_type>(), data_has_null_mask};

if (has_null_mask) {
if (data_has_null_mask) {
auto [offsets_column, chars_column, null_mask, null_count] =
cudf::strings::detail::make_strings_children_with_null_mask(
comp_fn, num_output_lists, num_output_entries, stream, mr);
Expand Down Expand Up @@ -205,7 +205,7 @@ struct interleave_list_entries_fn {
column_view const& output_list_offsets,
size_type num_output_lists,
size_type num_output_entries,
bool has_null_mask,
bool data_has_null_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const noexcept
{
Expand All @@ -222,7 +222,8 @@ struct interleave_list_entries_fn {
auto output_dv_ptr = mutable_column_device_view::create(*output);

// The array of int8_t to store entry validities.
auto validities = rmm::device_uvector<int8_t>(has_null_mask ? num_output_entries : 0, stream);
auto validities =
rmm::device_uvector<int8_t>(data_has_null_mask ? num_output_entries : 0, stream);

thrust::for_each_n(
rmm::exec_policy(stream),
Expand All @@ -233,7 +234,7 @@ struct interleave_list_entries_fn {
d_validities = validities.begin(),
d_offsets = output_list_offsets.template begin<offset_type>(),
d_output = output_dv_ptr->template begin<T>(),
has_null_mask] __device__(size_type const idx) {
data_has_null_mask] __device__(size_type const idx) {
auto const col_id = idx % num_cols;
auto const list_id = idx / num_cols;
auto const& lists_col = table_dv.column(col_id);
Expand All @@ -248,7 +249,7 @@ struct interleave_list_entries_fn {
auto const write_start = d_offsets[idx];

// Fill the validities array if necessary.
if (has_null_mask) {
if (data_has_null_mask) {
for (auto read_idx = start_idx, write_idx = write_start; read_idx < end_idx;
++read_idx, ++write_idx) {
d_validities[write_idx] = static_cast<int8_t>(data_col.is_valid(read_idx));
Expand All @@ -263,7 +264,7 @@ struct interleave_list_entries_fn {
thrust::seq, input_ptr, input_ptr + sizeof(T) * (end_idx - start_idx), output_ptr);
});

if (has_null_mask) {
if (data_has_null_mask) {
auto [null_mask, null_count] = cudf::detail::valid_if(
validities.begin(), validities.end(), thrust::identity<int8_t>{}, stream, mr);
if (null_count > 0) { output->set_null_mask(null_mask, null_count); }
Expand Down Expand Up @@ -323,13 +324,17 @@ std::unique_ptr<column> interleave_columns(table_view const& input,
auto const num_output_lists = input.num_rows() * input.num_columns();
auto const num_output_entries =
cudf::detail::get_value<offset_type>(offsets_view, num_output_lists, stream);
auto const data_has_null_mask =
std::any_of(std::cbegin(input), std::cend(input), [](auto const& col) {
return col.child(lists_column_view::child_column_index).nullable();
});
auto list_entries = type_dispatcher<dispatch_storage_type>(entry_type,
interleave_list_entries_fn{},
input,
offsets_view,
num_output_lists,
num_output_entries,
has_null_mask,
data_has_null_mask,
stream,
mr);

Expand Down
32 changes: 32 additions & 0 deletions cpp/tests/lists/concatenate_rows_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,18 @@ TYPED_TEST(ListConcatenateRowsTypedTest, SimpleInputNoNull)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TYPED_TEST(ListConcatenateRowsTypedTest, SimpleInputWithNullableChild)
{
using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;

auto const col1 = ListsCol{{1, 2}, ListsCol{{null}, null_at(0)}, {5, 6}}.release();
auto const col2 = ListsCol{{7, 8}, {9, 10}, {11, 12}}.release();
auto const expected =
ListsCol{{1, 2, 7, 8}, ListsCol{{null, 9, 10}, null_at(0)}, {5, 6, 11, 12}}.release();
auto const results = cudf::lists::concatenate_rows(TView{{col1->view(), col2->view()}});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TEST_F(ListConcatenateRowsTest, SimpleInputStringsColumnsNoNull)
{
auto const col1 = StrListsCol{
Expand All @@ -143,6 +155,26 @@ TEST_F(ListConcatenateRowsTest, SimpleInputStringsColumnsNoNull)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TEST_F(ListConcatenateRowsTest, SimpleInputStringsColumnsWithNullableChild)
{
auto const col1 = StrListsCol{
StrListsCol{"Tomato", "Apple"},
StrListsCol{"Banana", "Kiwi", "Cherry"},
StrListsCol{
"Coconut"}}.release();
auto const col2 = StrListsCol{
StrListsCol{"Orange"},
StrListsCol{{"Lemon", "Peach"}, null_at(1)},
StrListsCol{}}.release();
auto const expected = StrListsCol{
StrListsCol{"Tomato", "Apple", "Orange"},
StrListsCol{{"Banana", "Kiwi", "Cherry", "Lemon", "Peach"}, null_at(4)},
StrListsCol{
"Coconut"}}.release();
auto const results = cudf::lists::concatenate_rows(TView{{col1->view(), col2->view()}});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TYPED_TEST(ListConcatenateRowsTypedTest, SimpleInputWithNulls)
{
using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;
Expand Down
67 changes: 67 additions & 0 deletions cpp/tests/reshape/interleave_columns_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,19 @@ TYPED_TEST(ListsColumnsInterleaveTypedTest, SimpleInputWithNulls)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TYPED_TEST(ListsColumnsInterleaveTypedTest, SimpleInputWithNullableChild)
{
using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;

auto const col1 = ListsCol{{1, 2}, {3, 4}}.release();
auto const col2 = ListsCol{{5, 6}, {7, 8}}.release();
auto const col3 = ListsCol{{9, 10}, ListsCol{{null, 12}, null_at(0)}}.release();
auto const expected =
ListsCol{{1, 2}, {5, 6}, {9, 10}, {3, 4}, {7, 8}, ListsCol{{null, 12}, null_at(0)}}.release();
auto const results = cudf::interleave_columns(TView{{col1->view(), col2->view(), col3->view()}});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TEST_F(ListsColumnsInterleaveTest, SimpleInputStringsColumnsWithNulls)
{
auto const col1 = StrListsCol{
Expand Down Expand Up @@ -568,6 +581,33 @@ TEST_F(ListsColumnsInterleaveTest, SimpleInputStringsColumnsWithNulls)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TEST_F(ListsColumnsInterleaveTest, SimpleInputStringsColumnsWithNullableChild)
{
auto const col1 = StrListsCol{
StrListsCol{"Tomato", "Bear", "Apple"},
StrListsCol{"Banana", "Pig", "Kiwi", "Cherry", "Whale"},
StrListsCol{
"Coconut"}}.release();
auto const col2 = StrListsCol{
StrListsCol{{"Orange", "Dog" /*NULL*/, "Fox" /*NULL*/, "Duck" /*NULL*/}, null_at({1, 2, 3})},
StrListsCol{"Lemon", "Peach"},
StrListsCol{
{"Deer" /*NULL*/, "Snake" /*NULL*/, "Horse" /*NULL*/},
all_nulls()}}.release();

auto const expected = StrListsCol{
StrListsCol{"Tomato", "Bear", "Apple"},
StrListsCol{{"Orange", "" /*NULL*/, "" /*NULL*/, "" /*NULL*/}, null_at({1, 2, 3})},
StrListsCol{"Banana", "Pig", "Kiwi", "Cherry", "Whale"},
StrListsCol{"Lemon", "Peach"},
StrListsCol{"Coconut"},
StrListsCol{
{"Deer" /*NULL*/, "Snake" /*NULL*/, "Horse" /*NULL*/},
all_nulls()}}.release();
auto const results = cudf::interleave_columns(TView{{col1->view(), col2->view()}});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TYPED_TEST(ListsColumnsInterleaveTypedTest, SlicedColumnsInputNoNull)
{
using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;
Expand Down Expand Up @@ -633,6 +673,33 @@ TYPED_TEST(ListsColumnsInterleaveTypedTest, SlicedColumnsInputWithNulls)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TYPED_TEST(ListsColumnsInterleaveTypedTest, SlicedColumnsInputNullableChild)
{
using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;

auto const col =
ListsCol{{1, 2, 3}, ListsCol{{null, 3}, null_at(0)}, {3, 4, 5, 6}, {5, 6}, {}, {7}}.release();
auto const col1 = cudf::slice(col->view(), {0, 3})[0];
auto const col2 = cudf::slice(col->view(), {1, 4})[0];
auto const col3 = cudf::slice(col->view(), {2, 5})[0];
auto const col4 = cudf::slice(col->view(), {3, 6})[0];
auto const expected = ListsCol{
ListsCol{1, 2, 3},
ListsCol{{null, 3}, null_at(0)},
ListsCol{3, 4, 5, 6},
ListsCol{5, 6},
ListsCol{{null, 3}, null_at(0)},
ListsCol{3, 4, 5, 6},
ListsCol{5, 6},
ListsCol{},
ListsCol{3, 4, 5, 6},
ListsCol{5, 6},
ListsCol{},
ListsCol{7}}.release();
auto const results = cudf::interleave_columns(TView{{col1, col2, col3, col4}});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TEST_F(ListsColumnsInterleaveTest, SlicedStringsColumnsInputWithNulls)
{
auto const col =
Expand Down