From 9cc51f64c73d1e38267f464f28726e6cbcf2fb70 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 5 Apr 2022 11:53:36 -0400 Subject: [PATCH 1/2] Fix strings strip() to accept only str Scalar for to_strip parameter --- python/cudf/cudf/core/column/string.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index ef8e9c4dffc..d5d45c341d5 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -2006,7 +2006,9 @@ def filter_alphanum( repl = "" return self._return_or_inplace( - libstrings.filter_alphanum(self._column, cudf.Scalar(repl), keep), + libstrings.filter_alphanum( + self._column, cudf.Scalar(repl, "str"), keep + ), ) def slice_from( @@ -2141,7 +2143,7 @@ def slice_replace( return self._return_or_inplace( libstrings.slice_replace( - self._column, start, stop, cudf.Scalar(repl) + self._column, start, stop, cudf.Scalar(repl, "str") ), ) @@ -2192,7 +2194,7 @@ def insert(self, start: int = 0, repl: str = None) -> SeriesOrIndex: repl = "" return self._return_or_inplace( - libstrings.insert(self._column, start, cudf.Scalar(repl)), + libstrings.insert(self._column, start, cudf.Scalar(repl, "str")), ) def get(self, i: int = 0) -> SeriesOrIndex: @@ -2643,7 +2645,7 @@ def rsplit( ) else: result_table = libstrings.rsplit_record( - self._column, cudf.Scalar(pat), n + self._column, cudf.Scalar(pat, "str"), n ) return self._return_or_inplace(result_table, expand=expand) @@ -2726,7 +2728,7 @@ def partition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex: return self._return_or_inplace( cudf.core.frame.Frame( - *libstrings.partition(self._column, cudf.Scalar(sep)) + *libstrings.partition(self._column, cudf.Scalar(sep, "str")) ), expand=expand, ) @@ -2793,7 +2795,7 @@ def rpartition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex: return self._return_or_inplace( cudf.core.frame.Frame( - *libstrings.rpartition(self._column, cudf.Scalar(sep)) + *libstrings.rpartition(self._column, cudf.Scalar(sep, "str")) ), expand=expand, ) @@ -3194,7 +3196,7 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex: to_strip = "" return self._return_or_inplace( - libstrings.strip(self._column, cudf.Scalar(to_strip)) + libstrings.strip(self._column, cudf.Scalar(to_strip, "str")) ) def lstrip(self, to_strip: str = None) -> SeriesOrIndex: @@ -3241,7 +3243,7 @@ def lstrip(self, to_strip: str = None) -> SeriesOrIndex: to_strip = "" return self._return_or_inplace( - libstrings.lstrip(self._column, cudf.Scalar(to_strip)) + libstrings.lstrip(self._column, cudf.Scalar(to_strip, "str")) ) def rstrip(self, to_strip: str = None) -> SeriesOrIndex: @@ -3296,7 +3298,7 @@ def rstrip(self, to_strip: str = None) -> SeriesOrIndex: to_strip = "" return self._return_or_inplace( - libstrings.rstrip(self._column, cudf.Scalar(to_strip)) + libstrings.rstrip(self._column, cudf.Scalar(to_strip, "str")) ) def wrap(self, width: int, **kwargs) -> SeriesOrIndex: @@ -4245,7 +4247,7 @@ def filter_characters( table = str.maketrans(table) return self._return_or_inplace( libstrings.filter_characters( - self._column, table, keep, cudf.Scalar(repl) + self._column, table, keep, cudf.Scalar(repl, "str") ), ) From 24790781d0be4f24d95661e93400c2a4e06bd5f7 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 5 Apr 2022 13:20:55 -0400 Subject: [PATCH 2/2] add pytests for checking invalid parameter type --- python/cudf/cudf/tests/test_string.py | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 493098cd494..d600fdeee27 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -1299,6 +1299,12 @@ def test_string_slice_replace(string, number, diff, repr): ) +def test_string_slice_replace_fail(): + gs = cudf.Series(["abc", "xyz", ""]) + with pytest.raises(TypeError): + gs.str.slice_replace(0, 1, ["_"]) + + def test_string_insert(): gs = cudf.Series(["hello world", "holy accéntéd", "batman", None, ""]) @@ -1312,6 +1318,9 @@ def test_string_insert(): ps.str.slice(stop=5) + "---" + ps.str.slice(start=5), ) + with pytest.raises(TypeError): + gs.str.insert(0, ["+"]) + _string_char_types_data = [ ["abc", "xyz", "a", "ab", "123", "097"], @@ -1404,6 +1413,9 @@ def test_string_filter_alphanum(): expected.append(rs) assert_eq(gs.str.filter_alphanum("*", keep=False), cudf.Series(expected)) + with pytest.raises(TypeError): + gs.str.filter_alphanum(["a"]) + @pytest.mark.parametrize( "case_op", ["title", "capitalize", "lower", "upper", "swapcase"] @@ -1504,6 +1516,14 @@ def test_strings_partition(data): assert_eq(pi.str.partition("-"), gi.str.partition("-")) +def test_string_partition_fail(): + gs = cudf.Series(["abc", "aa", "cba"]) + with pytest.raises(TypeError): + gs.str.partition(["a"]) + with pytest.raises(TypeError): + gs.str.rpartition(["a"]) + + @pytest.mark.parametrize( "data", [ @@ -1640,6 +1660,16 @@ def test_strings_strip_tests(data, to_strip): ) +def test_string_strip_fail(): + gs = cudf.Series(["a", "aa", ""]) + with pytest.raises(TypeError): + gs.str.strip(["a"]) + with pytest.raises(TypeError): + gs.str.lstrip(["a"]) + with pytest.raises(TypeError): + gs.str.rstrip(["a"]) + + @pytest.mark.parametrize( "data", [ @@ -2364,6 +2394,9 @@ def test_string_str_filter_characters(): ) assert_eq(expected, gs.str.filter_characters(filter, True, " ")) + with pytest.raises(TypeError): + gs.str.filter_characters(filter, True, ["a"]) + def test_string_str_code_points():