diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 469c25fb673..a5b6f6319ae 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -195,6 +195,7 @@ outputs: - test -f $PREFIX/include/cudf/lists/detail/dremel.hpp - test -f $PREFIX/include/cudf/lists/detail/extract.hpp - test -f $PREFIX/include/cudf/lists/detail/interleave_columns.hpp + - test -f $PREFIX/include/cudf/lists/detail/lists_column_factories.hpp - test -f $PREFIX/include/cudf/lists/detail/reverse.hpp - test -f $PREFIX/include/cudf/lists/detail/scatter_helper.cuh - test -f $PREFIX/include/cudf/lists/detail/set_operations.hpp @@ -205,7 +206,6 @@ outputs: - test -f $PREFIX/include/cudf/lists/filling.hpp - test -f $PREFIX/include/cudf/lists/gather.hpp - test -f $PREFIX/include/cudf/lists/list_view.hpp - - test -f $PREFIX/include/cudf/lists/lists_column_factories.hpp - test -f $PREFIX/include/cudf/lists/lists_column_view.hpp - test -f $PREFIX/include/cudf/lists/reverse.hpp - test -f $PREFIX/include/cudf/lists/set_operations.hpp diff --git a/cpp/include/cudf/lists/lists_column_factories.hpp b/cpp/include/cudf/lists/detail/lists_column_factories.hpp similarity index 73% rename from cpp/include/cudf/lists/lists_column_factories.hpp rename to cpp/include/cudf/lists/detail/lists_column_factories.hpp index fea1118748c..f4dcbfcce7e 100644 --- a/cpp/include/cudf/lists/lists_column_factories.hpp +++ b/cpp/include/cudf/lists/detail/lists_column_factories.hpp @@ -40,6 +40,19 @@ std::unique_ptr make_lists_column_from_scalar(list_scalar const& v rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Create an empty lists column + * + * A list column requires a child type and so cannot be created with `make_empty_column`. + * + * @param child_type The type used for the empty child column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr make_empty_lists_column(data_type child_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + } // namespace detail } // namespace lists } // namespace cudf diff --git a/cpp/src/column/column_factories.cu b/cpp/src/column/column_factories.cu index c401b765f0b..a2da33599c7 100644 --- a/cpp/src/column/column_factories.cu +++ b/cpp/src/column/column_factories.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/cpp/src/lists/lists_column_factories.cu b/cpp/src/lists/lists_column_factories.cu index ec69d8bda1f..875bf67133f 100644 --- a/cpp/src/lists/lists_column_factories.cu +++ b/cpp/src/lists/lists_column_factories.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -84,6 +85,16 @@ std::unique_ptr make_lists_column_from_scalar(list_scalar const& v return std::move(res->release()[0]); } +std::unique_ptr make_empty_lists_column(data_type child_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto offsets = make_empty_column(data_type(type_to_id())); + auto child = make_empty_column(child_type); + return make_lists_column( + 0, std::move(offsets), std::move(child), 0, rmm::device_buffer{}, stream, mr); +} + } // namespace detail } // namespace lists diff --git a/cpp/src/lists/sequences.cu b/cpp/src/lists/sequences.cu index ecdf81b9158..895bc9de816 100644 --- a/cpp/src/lists/sequences.cu +++ b/cpp/src/lists/sequences.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -127,16 +128,6 @@ struct sequences_functor()>> { } }; -std::unique_ptr make_empty_lists_column(data_type child_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto offsets = make_empty_column(data_type(type_to_id())); - auto child = make_empty_column(child_type); - return make_lists_column( - 0, std::move(offsets), std::move(child), 0, rmm::device_buffer(0, stream, mr), stream, mr); -} - std::unique_ptr sequences(column_view const& starts, std::optional const& steps, column_view const& sizes, diff --git a/cpp/src/strings/extract/extract_all.cu b/cpp/src/strings/extract/extract_all.cu index 70d7947c639..1252e79be90 100644 --- a/cpp/src/strings/extract/extract_all.cu +++ b/cpp/src/strings/extract/extract_all.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -122,13 +123,7 @@ std::unique_ptr extract_all_record(strings_column_view const& input, // Return an empty lists column if there are no valid rows if (strings_count == null_count) { - return make_lists_column(0, - make_empty_column(type_to_id()), - make_empty_column(type_id::STRING), - 0, - rmm::device_buffer{}, - stream, - mr); + return cudf::lists::detail::make_empty_lists_column(data_type{type_id::STRING}, stream, mr); } // Convert counts into offsets. diff --git a/cpp/src/strings/split/split_record.cu b/cpp/src/strings/split/split_record.cu index 5b79fdefb5a..0b5ee5a900e 100644 --- a/cpp/src/strings/split/split_record.cu +++ b/cpp/src/strings/split/split_record.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,9 @@ std::unique_ptr split_record_fn(strings_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (input.is_empty()) { return make_empty_column(type_id::LIST); } + if (input.is_empty()) { + return cudf::lists::detail::make_empty_lists_column(data_type{type_id::STRING}, stream, mr); + } if (input.size() == input.null_count()) { auto offsets = std::make_unique(input.offsets(), stream, mr); auto results = make_empty_column(type_id::STRING); diff --git a/cpp/tests/strings/split_tests.cpp b/cpp/tests/strings/split_tests.cpp index 7cca564d112..9eeb11a4a1f 100644 --- a/cpp/tests/strings/split_tests.cpp +++ b/cpp/tests/strings/split_tests.cpp @@ -693,10 +693,15 @@ TEST_F(StringsSplitTest, SplitZeroSizeStringsColumns) EXPECT_TRUE(results->num_columns() == 1); EXPECT_TRUE(results->num_rows() == 0); + auto target = cudf::string_scalar(" "); auto list_result = cudf::strings::split_record(zero_size_strings_column); EXPECT_TRUE(list_result->size() == 0); list_result = cudf::strings::rsplit_record(zero_size_strings_column); EXPECT_TRUE(list_result->size() == 0); + list_result = cudf::strings::split_record(zero_size_strings_column, target); + EXPECT_TRUE(list_result->size() == 0); + list_result = cudf::strings::rsplit_record(zero_size_strings_column, target); + EXPECT_TRUE(list_result->size() == 0); list_result = cudf::strings::split_record_re(zero_size_strings_column, *prog); EXPECT_TRUE(list_result->size() == 0); list_result = cudf::strings::rsplit_record_re(zero_size_strings_column, *prog); @@ -729,12 +734,17 @@ TEST_F(StringsSplitTest, AllNullsCase) EXPECT_TRUE(results->num_columns() == 1); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(0).view(), input); + auto target = cudf::string_scalar(" "); auto list_result = cudf::strings::split_record(sv); using LCW = cudf::test::lists_column_wrapper; LCW expected({LCW{}, LCW{}, LCW{}}, cudf::test::iterators::all_nulls()); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(list_result->view(), expected); list_result = cudf::strings::rsplit_record(sv); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(list_result->view(), expected); + list_result = cudf::strings::split_record(sv, target); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(list_result->view(), expected); + list_result = cudf::strings::rsplit_record(sv, target); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(list_result->view(), expected); list_result = cudf::strings::split_record_re(sv, *prog); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(list_result->view(), expected); list_result = cudf::strings::rsplit_record_re(sv, *prog);