diff --git a/cpp/src/lists/interleave_columns.cu b/cpp/src/lists/interleave_columns.cu index eb205ae8076..caadcf90f16 100644 --- a/cpp/src/lists/interleave_columns.cu +++ b/cpp/src/lists/interleave_columns.cu @@ -167,15 +167,15 @@ struct interleave_list_entries_fn { column_view const& output_list_offsets, size_type num_output_lists, size_type num_output_entries, - bool has_null_mask, + bool data_has_null_mask, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const noexcept { auto const table_dv_ptr = table_device_view::create(input); auto const comp_fn = compute_string_sizes_and_interleave_lists_fn{ - *table_dv_ptr, output_list_offsets.template begin(), has_null_mask}; + *table_dv_ptr, output_list_offsets.template begin(), data_has_null_mask}; - if (has_null_mask) { + if (data_has_null_mask) { auto [offsets_column, chars_column, null_mask, null_count] = cudf::strings::detail::make_strings_children_with_null_mask( comp_fn, num_output_lists, num_output_entries, stream, mr); @@ -205,7 +205,7 @@ struct interleave_list_entries_fn { column_view const& output_list_offsets, size_type num_output_lists, size_type num_output_entries, - bool has_null_mask, + bool data_has_null_mask, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const noexcept { @@ -222,7 +222,8 @@ struct interleave_list_entries_fn { auto output_dv_ptr = mutable_column_device_view::create(*output); // The array of int8_t to store entry validities. - auto validities = rmm::device_uvector(has_null_mask ? num_output_entries : 0, stream); + auto validities = + rmm::device_uvector(data_has_null_mask ? num_output_entries : 0, stream); thrust::for_each_n( rmm::exec_policy(stream), @@ -233,7 +234,7 @@ struct interleave_list_entries_fn { d_validities = validities.begin(), d_offsets = output_list_offsets.template begin(), d_output = output_dv_ptr->template begin(), - has_null_mask] __device__(size_type const idx) { + data_has_null_mask] __device__(size_type const idx) { auto const col_id = idx % num_cols; auto const list_id = idx / num_cols; auto const& lists_col = table_dv.column(col_id); @@ -248,7 +249,7 @@ struct interleave_list_entries_fn { auto const write_start = d_offsets[idx]; // Fill the validities array if necessary. - if (has_null_mask) { + if (data_has_null_mask) { for (auto read_idx = start_idx, write_idx = write_start; read_idx < end_idx; ++read_idx, ++write_idx) { d_validities[write_idx] = static_cast(data_col.is_valid(read_idx)); @@ -263,7 +264,7 @@ struct interleave_list_entries_fn { thrust::seq, input_ptr, input_ptr + sizeof(T) * (end_idx - start_idx), output_ptr); }); - if (has_null_mask) { + if (data_has_null_mask) { auto [null_mask, null_count] = cudf::detail::valid_if( validities.begin(), validities.end(), thrust::identity{}, stream, mr); if (null_count > 0) { output->set_null_mask(null_mask, null_count); } @@ -323,13 +324,17 @@ std::unique_ptr interleave_columns(table_view const& input, auto const num_output_lists = input.num_rows() * input.num_columns(); auto const num_output_entries = cudf::detail::get_value(offsets_view, num_output_lists, stream); + auto const data_has_null_mask = + std::any_of(std::cbegin(input), std::cend(input), [](auto const& col) { + return col.child(lists_column_view::child_column_index).nullable(); + }); auto list_entries = type_dispatcher(entry_type, interleave_list_entries_fn{}, input, offsets_view, num_output_lists, num_output_entries, - has_null_mask, + data_has_null_mask, stream, mr); diff --git a/cpp/tests/lists/concatenate_rows_tests.cpp b/cpp/tests/lists/concatenate_rows_tests.cpp index 9c4329677e1..131949ec1e9 100644 --- a/cpp/tests/lists/concatenate_rows_tests.cpp +++ b/cpp/tests/lists/concatenate_rows_tests.cpp @@ -125,6 +125,18 @@ TYPED_TEST(ListConcatenateRowsTypedTest, SimpleInputNoNull) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all); } +TYPED_TEST(ListConcatenateRowsTypedTest, SimpleInputWithNullableChild) +{ + using ListsCol = cudf::test::lists_column_wrapper; + + auto const col1 = ListsCol{{1, 2}, ListsCol{{null}, null_at(0)}, {5, 6}}.release(); + auto const col2 = ListsCol{{7, 8}, {9, 10}, {11, 12}}.release(); + auto const expected = + ListsCol{{1, 2, 7, 8}, ListsCol{{null, 9, 10}, null_at(0)}, {5, 6, 11, 12}}.release(); + auto const results = cudf::lists::concatenate_rows(TView{{col1->view(), col2->view()}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all); +} + TEST_F(ListConcatenateRowsTest, SimpleInputStringsColumnsNoNull) { auto const col1 = StrListsCol{ @@ -143,6 +155,26 @@ TEST_F(ListConcatenateRowsTest, SimpleInputStringsColumnsNoNull) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all); } +TEST_F(ListConcatenateRowsTest, SimpleInputStringsColumnsWithNullableChild) +{ + auto const col1 = StrListsCol{ + StrListsCol{"Tomato", "Apple"}, + StrListsCol{"Banana", "Kiwi", "Cherry"}, + StrListsCol{ + "Coconut"}}.release(); + auto const col2 = StrListsCol{ + StrListsCol{"Orange"}, + StrListsCol{{"Lemon", "Peach"}, null_at(1)}, + StrListsCol{}}.release(); + auto const expected = StrListsCol{ + StrListsCol{"Tomato", "Apple", "Orange"}, + StrListsCol{{"Banana", "Kiwi", "Cherry", "Lemon", "Peach"}, null_at(4)}, + StrListsCol{ + "Coconut"}}.release(); + auto const results = cudf::lists::concatenate_rows(TView{{col1->view(), col2->view()}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all); +} + TYPED_TEST(ListConcatenateRowsTypedTest, SimpleInputWithNulls) { using ListsCol = cudf::test::lists_column_wrapper; diff --git a/cpp/tests/reshape/interleave_columns_tests.cpp b/cpp/tests/reshape/interleave_columns_tests.cpp index 68cf19bbe9c..bece196ccac 100644 --- a/cpp/tests/reshape/interleave_columns_tests.cpp +++ b/cpp/tests/reshape/interleave_columns_tests.cpp @@ -539,6 +539,19 @@ TYPED_TEST(ListsColumnsInterleaveTypedTest, SimpleInputWithNulls) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all); } +TYPED_TEST(ListsColumnsInterleaveTypedTest, SimpleInputWithNullableChild) +{ + using ListsCol = cudf::test::lists_column_wrapper; + + auto const col1 = ListsCol{{1, 2}, {3, 4}}.release(); + auto const col2 = ListsCol{{5, 6}, {7, 8}}.release(); + auto const col3 = ListsCol{{9, 10}, ListsCol{{null, 12}, null_at(0)}}.release(); + auto const expected = + ListsCol{{1, 2}, {5, 6}, {9, 10}, {3, 4}, {7, 8}, ListsCol{{null, 12}, null_at(0)}}.release(); + auto const results = cudf::interleave_columns(TView{{col1->view(), col2->view(), col3->view()}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all); +} + TEST_F(ListsColumnsInterleaveTest, SimpleInputStringsColumnsWithNulls) { auto const col1 = StrListsCol{ @@ -568,6 +581,33 @@ TEST_F(ListsColumnsInterleaveTest, SimpleInputStringsColumnsWithNulls) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all); } +TEST_F(ListsColumnsInterleaveTest, SimpleInputStringsColumnsWithNullableChild) +{ + auto const col1 = StrListsCol{ + StrListsCol{"Tomato", "Bear", "Apple"}, + StrListsCol{"Banana", "Pig", "Kiwi", "Cherry", "Whale"}, + StrListsCol{ + "Coconut"}}.release(); + auto const col2 = StrListsCol{ + StrListsCol{{"Orange", "Dog" /*NULL*/, "Fox" /*NULL*/, "Duck" /*NULL*/}, null_at({1, 2, 3})}, + StrListsCol{"Lemon", "Peach"}, + StrListsCol{ + {"Deer" /*NULL*/, "Snake" /*NULL*/, "Horse" /*NULL*/}, + all_nulls()}}.release(); + + auto const expected = StrListsCol{ + StrListsCol{"Tomato", "Bear", "Apple"}, + StrListsCol{{"Orange", "" /*NULL*/, "" /*NULL*/, "" /*NULL*/}, null_at({1, 2, 3})}, + StrListsCol{"Banana", "Pig", "Kiwi", "Cherry", "Whale"}, + StrListsCol{"Lemon", "Peach"}, + StrListsCol{"Coconut"}, + StrListsCol{ + {"Deer" /*NULL*/, "Snake" /*NULL*/, "Horse" /*NULL*/}, + all_nulls()}}.release(); + auto const results = cudf::interleave_columns(TView{{col1->view(), col2->view()}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all); +} + TYPED_TEST(ListsColumnsInterleaveTypedTest, SlicedColumnsInputNoNull) { using ListsCol = cudf::test::lists_column_wrapper; @@ -633,6 +673,33 @@ TYPED_TEST(ListsColumnsInterleaveTypedTest, SlicedColumnsInputWithNulls) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all); } +TYPED_TEST(ListsColumnsInterleaveTypedTest, SlicedColumnsInputNullableChild) +{ + using ListsCol = cudf::test::lists_column_wrapper; + + auto const col = + ListsCol{{1, 2, 3}, ListsCol{{null, 3}, null_at(0)}, {3, 4, 5, 6}, {5, 6}, {}, {7}}.release(); + auto const col1 = cudf::slice(col->view(), {0, 3})[0]; + auto const col2 = cudf::slice(col->view(), {1, 4})[0]; + auto const col3 = cudf::slice(col->view(), {2, 5})[0]; + auto const col4 = cudf::slice(col->view(), {3, 6})[0]; + auto const expected = ListsCol{ + ListsCol{1, 2, 3}, + ListsCol{{null, 3}, null_at(0)}, + ListsCol{3, 4, 5, 6}, + ListsCol{5, 6}, + ListsCol{{null, 3}, null_at(0)}, + ListsCol{3, 4, 5, 6}, + ListsCol{5, 6}, + ListsCol{}, + ListsCol{3, 4, 5, 6}, + ListsCol{5, 6}, + ListsCol{}, + ListsCol{7}}.release(); + auto const results = cudf::interleave_columns(TView{{col1, col2, col3, col4}}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all); +} + TEST_F(ListsColumnsInterleaveTest, SlicedStringsColumnsInputWithNulls) { auto const col =