From 65ef0ea4f7d1a3b91f30d24d87b1477b52412448 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Sat, 26 Mar 2022 17:21:35 -0700 Subject: [PATCH] Fix default value of str.split expand parameter. (#10457) This is a small fix to [match the pandas API](https://pandas.pydata.org/docs/reference/api/pandas.Series.str.split.html) for the `expand` parameter of `Series.str.split`. Only boolean values are allowed. Currently the default is set to `None` and then replaced with the intended default of `False`. This PR changes it to have a default value of `False`. This is a tiny bit of an API break because users who explicitly passed `None` will now see an error instead of getting the intended default value, but the previous behavior was a bug with respect to pandas API compatibility. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Charles Blackmon-Luca (https://github.com/charlesbluca) URL: https://github.com/rapidsai/cudf/pull/10457 --- python/cudf/cudf/core/column/string.py | 10 ++-------- python/cudf/cudf/tests/test_string.py | 10 +++++----- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 95bb06ebb0c..d18bcaa84f4 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -2299,7 +2299,7 @@ def split( self, pat: str = None, n: int = -1, - expand: bool = None, + expand: bool = False, regex: bool = None, ) -> SeriesOrIndex: """ @@ -2420,9 +2420,6 @@ def split( 2 """ - if expand is None: - expand = False - if expand not in (True, False): raise ValueError( f"expand parameter accepts only : [True, False], " @@ -2470,7 +2467,7 @@ def rsplit( self, pat: str = None, n: int = -1, - expand: bool = None, + expand: bool = False, regex: bool = None, ) -> SeriesOrIndex: """ @@ -2599,9 +2596,6 @@ def rsplit( 2 """ - if expand is None: - expand = False - if expand not in (True, False): raise ValueError( f"expand parameter accepts only : [True, False], " diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index f237e5bf715..f5bfcd8c9d2 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -945,7 +945,7 @@ def test_string_upper(ps_gs): ) @pytest.mark.parametrize("pat", [None, " ", "-"]) @pytest.mark.parametrize("n", [-1, 0, 1, 3, 10]) -@pytest.mark.parametrize("expand", [True, False, None]) +@pytest.mark.parametrize("expand", [True, False]) def test_string_split(data, pat, n, expand): ps = pd.Series(data, dtype="str") gs = cudf.Series(data, dtype="str") @@ -967,7 +967,7 @@ def test_string_split(data, pat, n, expand): ) @pytest.mark.parametrize("pat", [None, " ", "\\-+", "\\s+"]) @pytest.mark.parametrize("n", [-1, 0, 1, 3, 10]) -@pytest.mark.parametrize("expand", [True, False, None]) +@pytest.mark.parametrize("expand", [True, False]) def test_string_split_re(data, pat, n, expand): ps = pd.Series(data, dtype="str") gs = cudf.Series(data, dtype="str") @@ -1510,7 +1510,7 @@ def test_strings_partition(data): ], ) @pytest.mark.parametrize("n", [-1, 2, 1, 9]) -@pytest.mark.parametrize("expand", [True, False, None]) +@pytest.mark.parametrize("expand", [True, False]) def test_strings_rsplit(data, n, expand): gs = cudf.Series(data) ps = pd.Series(data) @@ -1531,7 +1531,7 @@ def test_strings_rsplit(data, n, expand): @pytest.mark.parametrize("n", [-1, 0, 1, 3, 10]) -@pytest.mark.parametrize("expand", [True, False, None]) +@pytest.mark.parametrize("expand", [True, False]) def test_string_rsplit_re(n, expand): data = ["a b", " c ", " d", "e ", "f"] ps = pd.Series(data, dtype="str") @@ -1566,7 +1566,7 @@ def test_string_rsplit_re(n, expand): ], ) @pytest.mark.parametrize("n", [-1, 2, 1, 9]) -@pytest.mark.parametrize("expand", [True, False, None]) +@pytest.mark.parametrize("expand", [True, False]) def test_strings_split(data, n, expand): gs = cudf.Series(data) ps = pd.Series(data)