Skip to content

Commit

Permalink
Fix strings strip() to accept only str Scalar for to_strip parameter (#…
Browse files Browse the repository at this point in the history
…10597)

Closes #10591 

Ensures `to_strip` parameter is a `str` type when converting it to `cudf.Scalar`. It will now through a `TypeError` as follows
```
    libstrings.strip(self._column, cudf.Scalar(to_strip, "str"))
  File "/conda/envs/rapids/lib/python3.8/site-packages/cudf-22.6.0a0+96.g0aef0c1c3e.dirty-py3.8-linux-x86_64.egg/cudf/core/scalar.py", line 78, in __init__
    self._host_value, self._host_dtype = self._preprocess_host_value(
  File "/conda/envs/rapids/lib/python3.8/site-packages/cudf-22.6.0a0+96.g0aef0c1c3e.dirty-py3.8-linux-x86_64.egg/cudf/core/scalar.py", line 128, in _preprocess_host_value
    raise TypeError("Lists may not be cast to a different dtype")
TypeError: Lists may not be cast to a different dtype

```
This will also prevent the _sticky_ CUDA error.

Also, added the `str` parameter to other `cudf.Scalar` calls where only strings are supported as well.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #10597
  • Loading branch information
davidwendt authored Apr 6, 2022
1 parent 5f4f232 commit 956c7b5
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 10 deletions.
22 changes: 12 additions & 10 deletions python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -2006,7 +2006,9 @@ def filter_alphanum(
repl = ""

return self._return_or_inplace(
libstrings.filter_alphanum(self._column, cudf.Scalar(repl), keep),
libstrings.filter_alphanum(
self._column, cudf.Scalar(repl, "str"), keep
),
)

def slice_from(
Expand Down Expand Up @@ -2141,7 +2143,7 @@ def slice_replace(

return self._return_or_inplace(
libstrings.slice_replace(
self._column, start, stop, cudf.Scalar(repl)
self._column, start, stop, cudf.Scalar(repl, "str")
),
)

Expand Down Expand Up @@ -2192,7 +2194,7 @@ def insert(self, start: int = 0, repl: str = None) -> SeriesOrIndex:
repl = ""

return self._return_or_inplace(
libstrings.insert(self._column, start, cudf.Scalar(repl)),
libstrings.insert(self._column, start, cudf.Scalar(repl, "str")),
)

def get(self, i: int = 0) -> SeriesOrIndex:
Expand Down Expand Up @@ -2643,7 +2645,7 @@ def rsplit(
)
else:
result_table = libstrings.rsplit_record(
self._column, cudf.Scalar(pat), n
self._column, cudf.Scalar(pat, "str"), n
)

return self._return_or_inplace(result_table, expand=expand)
Expand Down Expand Up @@ -2726,7 +2728,7 @@ def partition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex:

return self._return_or_inplace(
cudf.core.frame.Frame(
*libstrings.partition(self._column, cudf.Scalar(sep))
*libstrings.partition(self._column, cudf.Scalar(sep, "str"))
),
expand=expand,
)
Expand Down Expand Up @@ -2793,7 +2795,7 @@ def rpartition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex:

return self._return_or_inplace(
cudf.core.frame.Frame(
*libstrings.rpartition(self._column, cudf.Scalar(sep))
*libstrings.rpartition(self._column, cudf.Scalar(sep, "str"))
),
expand=expand,
)
Expand Down Expand Up @@ -3194,7 +3196,7 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex:
to_strip = ""

return self._return_or_inplace(
libstrings.strip(self._column, cudf.Scalar(to_strip))
libstrings.strip(self._column, cudf.Scalar(to_strip, "str"))
)

def lstrip(self, to_strip: str = None) -> SeriesOrIndex:
Expand Down Expand Up @@ -3241,7 +3243,7 @@ def lstrip(self, to_strip: str = None) -> SeriesOrIndex:
to_strip = ""

return self._return_or_inplace(
libstrings.lstrip(self._column, cudf.Scalar(to_strip))
libstrings.lstrip(self._column, cudf.Scalar(to_strip, "str"))
)

def rstrip(self, to_strip: str = None) -> SeriesOrIndex:
Expand Down Expand Up @@ -3296,7 +3298,7 @@ def rstrip(self, to_strip: str = None) -> SeriesOrIndex:
to_strip = ""

return self._return_or_inplace(
libstrings.rstrip(self._column, cudf.Scalar(to_strip))
libstrings.rstrip(self._column, cudf.Scalar(to_strip, "str"))
)

def wrap(self, width: int, **kwargs) -> SeriesOrIndex:
Expand Down Expand Up @@ -4245,7 +4247,7 @@ def filter_characters(
table = str.maketrans(table)
return self._return_or_inplace(
libstrings.filter_characters(
self._column, table, keep, cudf.Scalar(repl)
self._column, table, keep, cudf.Scalar(repl, "str")
),
)

Expand Down
33 changes: 33 additions & 0 deletions python/cudf/cudf/tests/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1299,6 +1299,12 @@ def test_string_slice_replace(string, number, diff, repr):
)


def test_string_slice_replace_fail():
gs = cudf.Series(["abc", "xyz", ""])
with pytest.raises(TypeError):
gs.str.slice_replace(0, 1, ["_"])


def test_string_insert():
gs = cudf.Series(["hello world", "holy accéntéd", "batman", None, ""])

Expand All @@ -1312,6 +1318,9 @@ def test_string_insert():
ps.str.slice(stop=5) + "---" + ps.str.slice(start=5),
)

with pytest.raises(TypeError):
gs.str.insert(0, ["+"])


_string_char_types_data = [
["abc", "xyz", "a", "ab", "123", "097"],
Expand Down Expand Up @@ -1404,6 +1413,9 @@ def test_string_filter_alphanum():
expected.append(rs)
assert_eq(gs.str.filter_alphanum("*", keep=False), cudf.Series(expected))

with pytest.raises(TypeError):
gs.str.filter_alphanum(["a"])


@pytest.mark.parametrize(
"case_op", ["title", "capitalize", "lower", "upper", "swapcase"]
Expand Down Expand Up @@ -1504,6 +1516,14 @@ def test_strings_partition(data):
assert_eq(pi.str.partition("-"), gi.str.partition("-"))


def test_string_partition_fail():
gs = cudf.Series(["abc", "aa", "cba"])
with pytest.raises(TypeError):
gs.str.partition(["a"])
with pytest.raises(TypeError):
gs.str.rpartition(["a"])


@pytest.mark.parametrize(
"data",
[
Expand Down Expand Up @@ -1640,6 +1660,16 @@ def test_strings_strip_tests(data, to_strip):
)


def test_string_strip_fail():
gs = cudf.Series(["a", "aa", ""])
with pytest.raises(TypeError):
gs.str.strip(["a"])
with pytest.raises(TypeError):
gs.str.lstrip(["a"])
with pytest.raises(TypeError):
gs.str.rstrip(["a"])


@pytest.mark.parametrize(
"data",
[
Expand Down Expand Up @@ -2364,6 +2394,9 @@ def test_string_str_filter_characters():
)
assert_eq(expected, gs.str.filter_characters(filter, True, " "))

with pytest.raises(TypeError):
gs.str.filter_characters(filter, True, ["a"])


def test_string_str_code_points():

Expand Down

0 comments on commit 956c7b5

Please sign in to comment.