From 773fc7aa93825694d75acac1230b16a995d9a25f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 26 May 2021 14:32:24 -0600 Subject: [PATCH] `strings::join_list_elements` options for empty list inputs (#8285) This PR implements a new option for `strings::join_list_elements` on top of https://github.com/rapidsai/cudf/pull/8282. In particular, the new option is: ``` /** * @brief Setting for specifying what will be output from `join_list_elements` when an input list * is empty. */ enum class output_if_empty_list { EMPTY_STRING, ///< Empty list will result in empty string NULL_ELEMENT ///< Empty list will result in a null }; ``` This new option is necessary for implementing `concat_ws` in Spark, since the behavior of the output string is required to be different depending on the situation. Currently blocked from merging by https://github.com/rapidsai/cudf/pull/8282. Authors: - Nghia Truong (https://github.com/ttnghia) - David Wendt (https://github.com/davidwendt) Approvers: - Robert Maynard (https://github.com/robertmaynard) - Keith Kraus (https://github.com/kkraus14) - Mike Wilson (https://github.com/hyperbolic2346) - David Wendt (https://github.com/davidwendt) - GALI PREM SAGAR (https://github.com/galipremsagar) - Ashwin Srinath (https://github.com/shwina) URL: https://github.com/rapidsai/cudf/pull/8285 --- cpp/include/cudf/strings/combine.hpp | 48 +++++++-- cpp/src/strings/combine/join_list_elements.cu | 65 ++++++++---- .../combine/join_list_elements_tests.cpp | 99 +++++++++++++++---- python/cudf/cudf/_lib/cpp/strings/combine.pxd | 16 ++- python/cudf/cudf/_lib/strings/combine.pyx | 12 ++- python/cudf/cudf/core/column/string.py | 33 ++++--- python/cudf/cudf/tests/test_string.py | 15 +-- 7 files changed, 212 insertions(+), 76 deletions(-) diff --git a/cpp/include/cudf/strings/combine.hpp b/cpp/include/cudf/strings/combine.hpp index 360efe15303..3e069de2f0f 100644 --- a/cpp/include/cudf/strings/combine.hpp +++ b/cpp/include/cudf/strings/combine.hpp @@ -39,6 +39,15 @@ enum class separator_on_nulls { NO ///< Do not add separators if an element is null }; +/** + * @brief Setting for specifying what will be output from `join_list_elements` when an input list + * is empty. + */ +enum class output_if_empty_list { + EMPTY_STRING, ///< Empty list will result in empty string + NULL_ELEMENT ///< Empty list will result in a null +}; + /** * @brief Concatenates all strings in the column into one new string delimited * by an optional separator string. @@ -203,8 +212,15 @@ std::unique_ptr concatenate( * column will also result in a null output row unless a valid @p separator_narep scalar is provided * to be used in place of the null separators. * - * If @p separate_nulls is set to `NO` and @p narep is valid then separators are not added to the - * output between null elements. Otherwise, separators are always added if @p narep is valid. + * If @p separate_nulls is set to `NO` and @p string_narep is valid then separators are not added to + * the output between null elements. Otherwise, separators are always added if @p string_narep is + * valid. + * + * If @p empty_list_policy is set to `EMPTY_STRING`, any row that is an empty list will result in + * an empty output string. Otherwise, the output will be a null. + * + * In the special case when the input list row contains all null elements, the output will be the + * same as in case of empty input list regardless of @p string_narep and @p separate_nulls values. * * @code{.pseudo} * Example: @@ -234,16 +250,19 @@ std::unique_ptr concatenate( * default is an invalid-scalar denoting that list rows containing null strings will result * in null string in the corresponding output rows. * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid. + * @param empty_list_policy if set to EMPTY_STRING, any input row that is an empty list will + * result in an empty string. Otherwise, it will result in a null. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New strings column with concatenated results. */ std::unique_ptr join_list_elements( const lists_column_view& lists_strings_column, const strings_column_view& separators, - string_scalar const& separator_narep = string_scalar("", false), - string_scalar const& string_narep = string_scalar("", false), - separator_on_nulls separate_nulls = separator_on_nulls::YES, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + string_scalar const& separator_narep = string_scalar("", false), + string_scalar const& string_narep = string_scalar("", false), + separator_on_nulls separate_nulls = separator_on_nulls::YES, + output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Given a lists column of strings (each row is a list of strings), concatenates the strings @@ -259,6 +278,12 @@ std::unique_ptr join_list_elements( * If @p separate_nulls is set to `NO` and @p narep is valid then separators are not added to the * output between null elements. Otherwise, separators are always added if @p narep is valid. * + * If @p empty_list_policy is set to `EMPTY_STRING`, any row that is an empty list will result in + * an empty output string. Otherwise, the output will be a null. + * + * In the special case when the input list row contains all null elements, the output will be the + * same as in case of empty input list regardless of @p narep and @p separate_nulls values. + * * @code{.pseudo} * Example: * s = [ ['aa', 'bb', 'cc'], null, ['', 'dd'], ['ee', null], ['ff'] ] @@ -283,15 +308,18 @@ std::unique_ptr join_list_elements( * is an invalid-scalar denoting that list rows containing null strings will result in null * string in the corresponding output rows. * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid. + * @param empty_list_policy if set to EMPTY_STRING, any input row that is an empty list will result + * in an empty string. Otherwise, it will result in a null. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New strings column with concatenated results. */ std::unique_ptr join_list_elements( const lists_column_view& lists_strings_column, - string_scalar const& separator = string_scalar(""), - string_scalar const& narep = string_scalar("", false), - separator_on_nulls separate_nulls = separator_on_nulls::YES, - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + string_scalar const& separator = string_scalar(""), + string_scalar const& narep = string_scalar("", false), + separator_on_nulls separate_nulls = separator_on_nulls::YES, + output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/src/strings/combine/join_list_elements.cu b/cpp/src/strings/combine/join_list_elements.cu index 7a83097566c..7edb0cd8e7b 100644 --- a/cpp/src/strings/combine/join_list_elements.cu +++ b/cpp/src/strings/combine/join_list_elements.cu @@ -53,6 +53,7 @@ struct compute_size_and_concatenate_fn { column_device_view const strings_dv; string_scalar_device_view const string_narep_dv; separator_on_nulls const separate_nulls; + output_if_empty_list const empty_list_policy; offset_type* d_offsets{nullptr}; @@ -63,27 +64,40 @@ struct compute_size_and_concatenate_fn { // We need to set `1` or `0` for the validities of the output strings. int8_t* d_validities{nullptr}; - __device__ void operator()(size_type const idx) + __device__ bool output_is_null(size_type const idx, + size_type const start_idx, + size_type const end_idx) const noexcept + { + if (func.is_null_list(lists_dv, idx)) { return true; } + return empty_list_policy == output_if_empty_list::NULL_ELEMENT && start_idx == end_idx; + } + + __device__ void operator()(size_type const idx) const noexcept { // If this is the second pass, and the row `idx` is known to be a null string - if (d_chars and not d_validities[idx]) { return; } + if (d_chars && !d_validities[idx]) { return; } + + // Indices of the strings within the list row + auto const start_idx = list_offsets[idx]; + auto const end_idx = list_offsets[idx + 1]; - if (not d_chars and func.is_null_list(lists_dv, idx)) { + if (!d_chars && output_is_null(idx, start_idx, end_idx)) { d_offsets[idx] = 0; d_validities[idx] = false; return; } - auto const separator = func.separator(idx); - auto size_bytes = size_type{0}; - char* output_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; - bool write_separator = false; + auto const separator = func.separator(idx); + auto size_bytes = size_type{0}; + char* output_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; + bool has_valid_element = false; + bool write_separator = false; - for (size_type str_idx = list_offsets[idx], idx_end = list_offsets[idx + 1]; str_idx < idx_end; - ++str_idx) { + for (size_type str_idx = start_idx; str_idx < end_idx; ++str_idx) { bool null_element = strings_dv.is_null(str_idx); + has_valid_element = has_valid_element || !null_element; - if (not d_chars and (null_element and not string_narep_dv.is_valid())) { + if (!d_chars && (null_element && !string_narep_dv.is_valid())) { d_offsets[idx] = 0; d_validities[idx] = false; return; // early termination: the entire list of strings will result in a null string @@ -104,9 +118,12 @@ struct compute_size_and_concatenate_fn { write_separator || (separate_nulls == separator_on_nulls::YES) || !null_element; } - if (not d_chars) { - d_offsets[idx] = size_bytes; - d_validities[idx] = true; + // If there are all null elements, the output should be the same as having an empty list input: + // a null or an empty string + if (!d_chars) { + d_offsets[idx] = has_valid_element ? size_bytes : 0; + d_validities[idx] = + has_valid_element || empty_list_policy == output_if_empty_list::EMPTY_STRING; } } }; @@ -134,6 +151,7 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string string_scalar const& separator, string_scalar const& narep, separator_on_nulls separate_nulls, + output_if_empty_list empty_list_policy, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -161,7 +179,8 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string lists_strings_column.offsets_begin(), *strings_dv_ptr, string_narep_dv, - separate_nulls}; + separate_nulls, + empty_list_policy}; auto [offsets_column, chars_column, null_mask, null_count] = make_strings_children_with_null_mask(comp_fn, num_rows, num_rows, stream, mr); @@ -187,7 +206,7 @@ struct column_separators_fn { __device__ bool is_null_list(column_device_view const& lists_dv, size_type const idx) const noexcept { - return lists_dv.is_null(idx) or (separators_dv.is_null(idx) and not sep_narep_dv.is_valid()); + return lists_dv.is_null(idx) || (separators_dv.is_null(idx) && !sep_narep_dv.is_valid()); } __device__ string_view separator(size_type const idx) const noexcept @@ -204,6 +223,7 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string string_scalar const& separator_narep, string_scalar const& string_narep, separator_on_nulls separate_nulls, + output_if_empty_list empty_list_policy, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -233,7 +253,8 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string lists_strings_column.offsets_begin(), *strings_dv_ptr, string_narep_dv, - separate_nulls}; + separate_nulls, + empty_list_policy}; auto [offsets_column, chars_column, null_mask, null_count] = make_strings_children_with_null_mask(comp_fn, num_rows, num_rows, stream, mr); @@ -252,11 +273,17 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string string_scalar const& separator, string_scalar const& narep, separator_on_nulls separate_nulls, + output_if_empty_list empty_list_policy, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::join_list_elements( - lists_strings_column, separator, narep, separate_nulls, rmm::cuda_stream_default, mr); + return detail::join_list_elements(lists_strings_column, + separator, + narep, + separate_nulls, + empty_list_policy, + rmm::cuda_stream_default, + mr); } std::unique_ptr join_list_elements(lists_column_view const& lists_strings_column, @@ -264,6 +291,7 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string string_scalar const& separator_narep, string_scalar const& string_narep, separator_on_nulls separate_nulls, + output_if_empty_list empty_list_policy, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); @@ -272,6 +300,7 @@ std::unique_ptr join_list_elements(lists_column_view const& lists_string separator_narep, string_narep, separate_nulls, + empty_list_policy, rmm::cuda_stream_default, mr); } diff --git a/cpp/tests/strings/combine/join_list_elements_tests.cpp b/cpp/tests/strings/combine/join_list_elements_tests.cpp index e2f7c3e36a2..509acfd3292 100644 --- a/cpp/tests/strings/combine/join_list_elements_tests.cpp +++ b/cpp/tests/strings/combine/join_list_elements_tests.cpp @@ -22,8 +22,7 @@ #include #include #include - -#include +#include struct StringsListsConcatenateTest : public cudf::test::BaseFixture { }; @@ -35,14 +34,13 @@ using INT_LISTS = cudf::test::lists_column_wrapper; constexpr bool print_all{false}; -auto null_at(cudf::size_type idx) -{ - return cudf::detail::make_counting_transform_iterator(0, [idx](auto i) { return i != idx; }); -} +auto all_nulls() { return cudf::test::iterator_all_nulls(); } -auto all_nulls() +auto null_at(cudf::size_type idx) { return cudf::test::iterator_with_null_at(idx); } + +auto null_at(std::vector const& indices) { - return cudf::detail::make_counting_transform_iterator(0, [](auto) { return false; }); + return cudf::test::iterator_with_null_at(cudf::host_span{indices}); } auto nulls_from_nullptr(std::vector const& strs) @@ -99,14 +97,81 @@ TEST_F(StringsListsConcatenateTest, ZeroSizeStringsInput) auto const string_lists = STR_LISTS{STR_LISTS{""}, STR_LISTS{"", "", ""}, STR_LISTS{"", ""}, STR_LISTS{}}.release(); auto const string_lv = cudf::lists_column_view(string_lists->view()); - auto const expected = STR_COL{"", "", "", ""}; - auto results = cudf::strings::join_list_elements(string_lv); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all); + // Empty list results in empty string + { + auto const expected = STR_COL{"", "", "", ""}; - auto const separators = STR_COL{"", "", "", ""}.release(); - results = cudf::strings::join_list_elements(string_lv, separators->view()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all); + auto results = cudf::strings::join_list_elements(string_lv); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all); + + auto const separators = STR_COL{"", "", "", ""}.release(); + results = cudf::strings::join_list_elements(string_lv, separators->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all); + } + + // Empty list results in null + { + auto const expected = STR_COL{{"", "", "", "" /*NULL*/}, null_at(3)}; + auto results = + cudf::strings::join_list_elements(string_lv, + cudf::string_scalar(""), + cudf::string_scalar(""), + cudf::strings::separator_on_nulls::NO, + cudf::strings::output_if_empty_list::NULL_ELEMENT); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all); + + auto const separators = STR_COL{"", "", "", ""}.release(); + results = cudf::strings::join_list_elements(string_lv, + separators->view(), + cudf::string_scalar(""), + cudf::string_scalar(""), + cudf::strings::separator_on_nulls::NO, + cudf::strings::output_if_empty_list::NULL_ELEMENT); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all); + } +} + +TEST_F(StringsListsConcatenateTest, ColumnHasEmptyListAndNullListInput) +{ + auto const string_lists = + STR_LISTS{{STR_LISTS{"abc", "def", ""}, STR_LISTS{} /*NULL*/, STR_LISTS{}, STR_LISTS{"gh"}}, + null_at(1)} + .release(); + auto const string_lv = cudf::lists_column_view(string_lists->view()); + + // Empty list results in empty string + { + auto const expected = STR_COL{{"abc-def-", "" /*NULL*/, "", "gh"}, null_at(1)}; + + auto results = cudf::strings::join_list_elements(string_lv, cudf::string_scalar("-")); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all); + + auto const separators = STR_COL{"-", "", "", ""}.release(); + results = cudf::strings::join_list_elements(string_lv, separators->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all); + } + + // Empty list results in null + { + auto const expected = STR_COL{{"abc-def-", "" /*NULL*/, "" /*NULL*/, "gh"}, null_at({1, 2})}; + auto results = + cudf::strings::join_list_elements(string_lv, + cudf::string_scalar("-"), + cudf::string_scalar(""), + cudf::strings::separator_on_nulls::NO, + cudf::strings::output_if_empty_list::NULL_ELEMENT); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all); + + auto const separators = STR_COL{"-", "", "", ""}.release(); + results = cudf::strings::join_list_elements(string_lv, + separators->view(), + cudf::string_scalar(""), + cudf::string_scalar(""), + cudf::strings::separator_on_nulls::NO, + cudf::strings::output_if_empty_list::NULL_ELEMENT); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all); + } } TEST_F(StringsListsConcatenateTest, AllNullsStringsInput) @@ -127,12 +192,6 @@ TEST_F(StringsListsConcatenateTest, AllNullsStringsInput) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all); } -auto null_at(std::initializer_list indices) -{ - return cudf::detail::make_counting_transform_iterator( - 0, [indices](auto i) { return std::find(indices.begin(), indices.end(), i) == indices.end(); }); -} - TEST_F(StringsListsConcatenateTest, ScalarSeparator) { auto const string_lists = STR_LISTS{{STR_LISTS{{"a", "bb" /*NULL*/, "ccc"}, null_at(1)}, diff --git a/python/cudf/cudf/_lib/cpp/strings/combine.pxd b/python/cudf/cudf/_lib/cpp/strings/combine.pxd index 51c706b68d0..35d7516d127 100644 --- a/python/cudf/cudf/_lib/cpp/strings/combine.pxd +++ b/python/cudf/cudf/_lib/cpp/strings/combine.pxd @@ -8,6 +8,14 @@ from cudf._lib.cpp.column.column cimport column cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil: + ctypedef enum separator_on_nulls: + YES 'cudf::strings::separator_on_nulls::YES' + NO 'cudf::strings::separator_on_nulls::NO' + + ctypedef enum output_if_empty_list: + EMPTY_STRING 'cudf::strings::output_if_empty_list::EMPTY_STRING' + NULL_ELEMENT 'cudf::strings::output_if_empty_list::NULL_ELEMENT' + cdef unique_ptr[column] concatenate( table_view source_strings, string_scalar separator, @@ -22,9 +30,13 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil: column_view lists_strings_column, column_view separators, string_scalar separator_narep, - string_scalar string_narep) except + + string_scalar string_narep, + separator_on_nulls separate_nulls, + output_if_empty_list empty_list_policy) except + cdef unique_ptr[column] join_list_elements( column_view lists_strings_column, string_scalar separator, - string_scalar narep) except + + string_scalar narep, + separator_on_nulls separate_nulls, + output_if_empty_list empty_list_policy) except + diff --git a/python/cudf/cudf/_lib/strings/combine.pyx b/python/cudf/cudf/_lib/strings/combine.pyx index 0d7dfb5c619..3d20e5f15b7 100644 --- a/python/cudf/cudf/_lib/strings/combine.pyx +++ b/python/cudf/cudf/_lib/strings/combine.pyx @@ -16,7 +16,9 @@ from cudf._lib.table cimport Table from cudf._lib.cpp.strings.combine cimport ( concatenate as cpp_concatenate, join_strings as cpp_join_strings, - join_list_elements as cpp_join_list_elements + join_list_elements as cpp_join_list_elements, + separator_on_nulls as separator_on_nulls, + output_if_empty_list as output_if_empty_list ) @@ -108,7 +110,9 @@ def join_lists_with_scalar( c_result = move(cpp_join_list_elements( source_view, scalar_separator[0], - scalar_narep[0] + scalar_narep[0], + separator_on_nulls.YES, + output_if_empty_list.NULL_ELEMENT )) return Column.from_unique_ptr(move(c_result)) @@ -146,7 +150,9 @@ def join_lists_with_column( source_view, separator_view, scalar_separator_narep[0], - scalar_source_narep[0] + scalar_source_narep[0], + separator_on_nulls.YES, + output_if_empty_list.NULL_ELEMENT )) return Column.from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index a8ae74bdd4b..0b83548a92d 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -480,7 +480,9 @@ def join( If the elements of a Series are lists themselves, join the content of these lists using the delimiter passed to the function. - This function is an equivalent to :meth:`str.join`. + This function is an equivalent to :meth:`str.join`. + In the special case that the lists in the Series contain only ``None``, + a ``/`None` value will always be returned. Parameters ---------- @@ -489,10 +491,11 @@ def join( If array-like, the string at a position is used as a delimiter for corresponding row of the list entries. string_na_rep : str, default None - This character will take the place of any null strings - (not empty strings) in the Series. - If ``string_na_rep`` is ``None``, it defaults to empty - space "". + This character will take the place of null strings + (not empty strings) in the Series but will be considered + only if the Series contains list elements and those lists have + at least one non-null string. If ``string_na_rep`` is ``None``, + it defaults to empty space "". sep_na_rep : str, default None This character will take the place of any null strings (not empty strings) in `sep`. This parameter can be used @@ -556,27 +559,31 @@ def join( dtype: object We can replace ``/`None` values present in lists using - ``string_na_rep``: + ``string_na_rep`` if the lists contain at least one valid string + (lists containing all `None` will result in a ``/`None` value): - >>> ser = cudf.Series([['a', 'b', None], None, ['c', 'd']]) + >>> ser = cudf.Series([['a', 'b', None], [None, None, None], None, ['c', 'd']]) >>> ser - 0 [a, b, None] - 1 None - 2 [c, d] + 0 [a, b, None] + 1 [None, None, None] + 2 None + 3 [c, d] dtype: list >>> ser.str.join(sep='_', string_na_rep='k') 0 a_b_k 1 - 2 c_d + 2 + 3 c_d dtype: object We can replace ``/`None` values present in lists of ``sep`` using ``sep_na_rep``: - >>> ser.str.join(sep=[None, '.', '-'], sep_na_rep='+') + >>> ser.str.join(sep=[None, '^', '.', '-'], sep_na_rep='+') 0 a+b+ 1 - 2 c-d + 2 + 3 c-d dtype: object """ # noqa E501 if sep is None: diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 7be64bedcd7..58b3996ab5c 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -3135,7 +3135,7 @@ def test_str_join_lists_error(): "__", "=", None, - cudf.Series(["a__b", "=", "=__hello__=__world"]), + cudf.Series(["a__b", None, "=__hello__=__world"]), ), ( cudf.Series( @@ -3149,7 +3149,7 @@ def test_str_join_lists_error(): ["-", "_", "**", "!"], None, None, - cudf.Series(["a--b", "", "**hello****world", None]), + cudf.Series(["a--b", None, "**hello****world", None]), ), ( cudf.Series( @@ -3164,12 +3164,7 @@ def test_str_join_lists_error(): "rep_str", "sep_str", cudf.Series( - [ - "a-rep_str-b", - "rep_str", - "rep_str**hello**rep_str**world", - None, - ] + ["a-rep_str-b", None, "rep_str**hello**rep_str**world", None] ), ), ( @@ -3177,14 +3172,14 @@ def test_str_join_lists_error(): ["-", "_", None], "rep_str", None, - cudf.Series(["rep_str-a", "rep_str", None]), + cudf.Series(["rep_str-a", None, None]), ), ( cudf.Series([[None, "a"], [None], None]), ["-", "_", None], None, "sep_str", - cudf.Series(["-a", "", None]), + cudf.Series(["-a", None, None]), ), ], )