Skip to content

Commit

Permalink
Fix interleave_columns on ListType with nullable child (#8181)
Browse files Browse the repository at this point in the history
Current PR is to fix `interleave_columns` on ListType with nullable child column. The problem is regarding `list_has_null_mask` as `data_has_null_mask` by mistake. This problem also breaks `cudf::lists::concatenate_rows`.

Authors:
  - Alfred Xu (https://github.com/sperlingxx)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Jake Hemstad (https://github.com/jrhemstad)

URL: #8181
  • Loading branch information
sperlingxx authored May 10, 2021
1 parent fbb9a98 commit 97b2e9e
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 9 deletions.
23 changes: 14 additions & 9 deletions cpp/src/lists/interleave_columns.cu
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,15 @@ struct interleave_list_entries_fn {
column_view const& output_list_offsets,
size_type num_output_lists,
size_type num_output_entries,
bool has_null_mask,
bool data_has_null_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const noexcept
{
auto const table_dv_ptr = table_device_view::create(input);
auto const comp_fn = compute_string_sizes_and_interleave_lists_fn{
*table_dv_ptr, output_list_offsets.template begin<offset_type>(), has_null_mask};
*table_dv_ptr, output_list_offsets.template begin<offset_type>(), data_has_null_mask};

if (has_null_mask) {
if (data_has_null_mask) {
auto [offsets_column, chars_column, null_mask, null_count] =
cudf::strings::detail::make_strings_children_with_null_mask(
comp_fn, num_output_lists, num_output_entries, stream, mr);
Expand Down Expand Up @@ -205,7 +205,7 @@ struct interleave_list_entries_fn {
column_view const& output_list_offsets,
size_type num_output_lists,
size_type num_output_entries,
bool has_null_mask,
bool data_has_null_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const noexcept
{
Expand All @@ -222,7 +222,8 @@ struct interleave_list_entries_fn {
auto output_dv_ptr = mutable_column_device_view::create(*output);

// The array of int8_t to store entry validities.
auto validities = rmm::device_uvector<int8_t>(has_null_mask ? num_output_entries : 0, stream);
auto validities =
rmm::device_uvector<int8_t>(data_has_null_mask ? num_output_entries : 0, stream);

thrust::for_each_n(
rmm::exec_policy(stream),
Expand All @@ -233,7 +234,7 @@ struct interleave_list_entries_fn {
d_validities = validities.begin(),
d_offsets = output_list_offsets.template begin<offset_type>(),
d_output = output_dv_ptr->template begin<T>(),
has_null_mask] __device__(size_type const idx) {
data_has_null_mask] __device__(size_type const idx) {
auto const col_id = idx % num_cols;
auto const list_id = idx / num_cols;
auto const& lists_col = table_dv.column(col_id);
Expand All @@ -248,7 +249,7 @@ struct interleave_list_entries_fn {
auto const write_start = d_offsets[idx];

// Fill the validities array if necessary.
if (has_null_mask) {
if (data_has_null_mask) {
for (auto read_idx = start_idx, write_idx = write_start; read_idx < end_idx;
++read_idx, ++write_idx) {
d_validities[write_idx] = static_cast<int8_t>(data_col.is_valid(read_idx));
Expand All @@ -263,7 +264,7 @@ struct interleave_list_entries_fn {
thrust::seq, input_ptr, input_ptr + sizeof(T) * (end_idx - start_idx), output_ptr);
});

if (has_null_mask) {
if (data_has_null_mask) {
auto [null_mask, null_count] = cudf::detail::valid_if(
validities.begin(), validities.end(), thrust::identity<int8_t>{}, stream, mr);
if (null_count > 0) { output->set_null_mask(null_mask, null_count); }
Expand Down Expand Up @@ -323,13 +324,17 @@ std::unique_ptr<column> interleave_columns(table_view const& input,
auto const num_output_lists = input.num_rows() * input.num_columns();
auto const num_output_entries =
cudf::detail::get_value<offset_type>(offsets_view, num_output_lists, stream);
auto const data_has_null_mask =
std::any_of(std::cbegin(input), std::cend(input), [](auto const& col) {
return col.child(lists_column_view::child_column_index).nullable();
});
auto list_entries = type_dispatcher<dispatch_storage_type>(entry_type,
interleave_list_entries_fn{},
input,
offsets_view,
num_output_lists,
num_output_entries,
has_null_mask,
data_has_null_mask,
stream,
mr);

Expand Down
32 changes: 32 additions & 0 deletions cpp/tests/lists/concatenate_rows_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,18 @@ TYPED_TEST(ListConcatenateRowsTypedTest, SimpleInputNoNull)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TYPED_TEST(ListConcatenateRowsTypedTest, SimpleInputWithNullableChild)
{
using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;

auto const col1 = ListsCol{{1, 2}, ListsCol{{null}, null_at(0)}, {5, 6}}.release();
auto const col2 = ListsCol{{7, 8}, {9, 10}, {11, 12}}.release();
auto const expected =
ListsCol{{1, 2, 7, 8}, ListsCol{{null, 9, 10}, null_at(0)}, {5, 6, 11, 12}}.release();
auto const results = cudf::lists::concatenate_rows(TView{{col1->view(), col2->view()}});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TEST_F(ListConcatenateRowsTest, SimpleInputStringsColumnsNoNull)
{
auto const col1 = StrListsCol{
Expand All @@ -143,6 +155,26 @@ TEST_F(ListConcatenateRowsTest, SimpleInputStringsColumnsNoNull)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TEST_F(ListConcatenateRowsTest, SimpleInputStringsColumnsWithNullableChild)
{
auto const col1 = StrListsCol{
StrListsCol{"Tomato", "Apple"},
StrListsCol{"Banana", "Kiwi", "Cherry"},
StrListsCol{
"Coconut"}}.release();
auto const col2 = StrListsCol{
StrListsCol{"Orange"},
StrListsCol{{"Lemon", "Peach"}, null_at(1)},
StrListsCol{}}.release();
auto const expected = StrListsCol{
StrListsCol{"Tomato", "Apple", "Orange"},
StrListsCol{{"Banana", "Kiwi", "Cherry", "Lemon", "Peach"}, null_at(4)},
StrListsCol{
"Coconut"}}.release();
auto const results = cudf::lists::concatenate_rows(TView{{col1->view(), col2->view()}});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TYPED_TEST(ListConcatenateRowsTypedTest, SimpleInputWithNulls)
{
using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;
Expand Down
67 changes: 67 additions & 0 deletions cpp/tests/reshape/interleave_columns_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,19 @@ TYPED_TEST(ListsColumnsInterleaveTypedTest, SimpleInputWithNulls)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TYPED_TEST(ListsColumnsInterleaveTypedTest, SimpleInputWithNullableChild)
{
using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;

auto const col1 = ListsCol{{1, 2}, {3, 4}}.release();
auto const col2 = ListsCol{{5, 6}, {7, 8}}.release();
auto const col3 = ListsCol{{9, 10}, ListsCol{{null, 12}, null_at(0)}}.release();
auto const expected =
ListsCol{{1, 2}, {5, 6}, {9, 10}, {3, 4}, {7, 8}, ListsCol{{null, 12}, null_at(0)}}.release();
auto const results = cudf::interleave_columns(TView{{col1->view(), col2->view(), col3->view()}});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TEST_F(ListsColumnsInterleaveTest, SimpleInputStringsColumnsWithNulls)
{
auto const col1 = StrListsCol{
Expand Down Expand Up @@ -568,6 +581,33 @@ TEST_F(ListsColumnsInterleaveTest, SimpleInputStringsColumnsWithNulls)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TEST_F(ListsColumnsInterleaveTest, SimpleInputStringsColumnsWithNullableChild)
{
auto const col1 = StrListsCol{
StrListsCol{"Tomato", "Bear", "Apple"},
StrListsCol{"Banana", "Pig", "Kiwi", "Cherry", "Whale"},
StrListsCol{
"Coconut"}}.release();
auto const col2 = StrListsCol{
StrListsCol{{"Orange", "Dog" /*NULL*/, "Fox" /*NULL*/, "Duck" /*NULL*/}, null_at({1, 2, 3})},
StrListsCol{"Lemon", "Peach"},
StrListsCol{
{"Deer" /*NULL*/, "Snake" /*NULL*/, "Horse" /*NULL*/},
all_nulls()}}.release();

auto const expected = StrListsCol{
StrListsCol{"Tomato", "Bear", "Apple"},
StrListsCol{{"Orange", "" /*NULL*/, "" /*NULL*/, "" /*NULL*/}, null_at({1, 2, 3})},
StrListsCol{"Banana", "Pig", "Kiwi", "Cherry", "Whale"},
StrListsCol{"Lemon", "Peach"},
StrListsCol{"Coconut"},
StrListsCol{
{"Deer" /*NULL*/, "Snake" /*NULL*/, "Horse" /*NULL*/},
all_nulls()}}.release();
auto const results = cudf::interleave_columns(TView{{col1->view(), col2->view()}});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TYPED_TEST(ListsColumnsInterleaveTypedTest, SlicedColumnsInputNoNull)
{
using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;
Expand Down Expand Up @@ -633,6 +673,33 @@ TYPED_TEST(ListsColumnsInterleaveTypedTest, SlicedColumnsInputWithNulls)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TYPED_TEST(ListsColumnsInterleaveTypedTest, SlicedColumnsInputNullableChild)
{
using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;

auto const col =
ListsCol{{1, 2, 3}, ListsCol{{null, 3}, null_at(0)}, {3, 4, 5, 6}, {5, 6}, {}, {7}}.release();
auto const col1 = cudf::slice(col->view(), {0, 3})[0];
auto const col2 = cudf::slice(col->view(), {1, 4})[0];
auto const col3 = cudf::slice(col->view(), {2, 5})[0];
auto const col4 = cudf::slice(col->view(), {3, 6})[0];
auto const expected = ListsCol{
ListsCol{1, 2, 3},
ListsCol{{null, 3}, null_at(0)},
ListsCol{3, 4, 5, 6},
ListsCol{5, 6},
ListsCol{{null, 3}, null_at(0)},
ListsCol{3, 4, 5, 6},
ListsCol{5, 6},
ListsCol{},
ListsCol{3, 4, 5, 6},
ListsCol{5, 6},
ListsCol{},
ListsCol{7}}.release();
auto const results = cudf::interleave_columns(TView{{col1, col2, col3, col4}});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
}

TEST_F(ListsColumnsInterleaveTest, SlicedStringsColumnsInputWithNulls)
{
auto const col =
Expand Down

0 comments on commit 97b2e9e

Please sign in to comment.