Fix default value of str.split expand parameter. (#10457)

This is a small fix to [match the pandas API](https://pandas.pydata.org/docs/reference/api/pandas.Series.str.split.html) for the `expand` parameter of `Series.str.split`. Only boolean values are allowed. Currently the default is set to `None` and then replaced with the intended default of `False`. This PR changes it to have a default value of `False`. This is a tiny bit of an API break because users who explicitly passed `None` will now see an error instead of getting the intended default value, but the previous behavior was a bug with respect to pandas API compatibility. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Charles Blackmon-Luca (https://github.com/charlesbluca) URL: #10457
rapidsai · Mar 27, 2022 · 65ef0ea · 65ef0ea
1 parent 19ab7d6
commit 65ef0ea
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 13 deletions.
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
@@ -2299,7 +2299,7 @@ def split(
         self,
         pat: str = None,
         n: int = -1,
-        expand: bool = None,
+        expand: bool = False,
         regex: bool = None,
     ) -> SeriesOrIndex:
         """
@@ -2420,9 +2420,6 @@ def split(
         2                                <NA>  <NA>  <NA>     <NA>      <NA>
         """
 
-        if expand is None:
-            expand = False
-
         if expand not in (True, False):
             raise ValueError(
                 f"expand parameter accepts only : [True, False], "
@@ -2470,7 +2467,7 @@ def rsplit(
         self,
         pat: str = None,
         n: int = -1,
-        expand: bool = None,
+        expand: bool = False,
         regex: bool = None,
     ) -> SeriesOrIndex:
         """
@@ -2599,9 +2596,6 @@ def rsplit(
         2                                <NA>        <NA>
         """
 
-        if expand is None:
-            expand = False
-
         if expand not in (True, False):
             raise ValueError(
                 f"expand parameter accepts only : [True, False], "

diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
@@ -945,7 +945,7 @@ def test_string_upper(ps_gs):
 )
 @pytest.mark.parametrize("pat", [None, " ", "-"])
 @pytest.mark.parametrize("n", [-1, 0, 1, 3, 10])
-@pytest.mark.parametrize("expand", [True, False, None])
+@pytest.mark.parametrize("expand", [True, False])
 def test_string_split(data, pat, n, expand):
     ps = pd.Series(data, dtype="str")
     gs = cudf.Series(data, dtype="str")
@@ -967,7 +967,7 @@ def test_string_split(data, pat, n, expand):
 )
 @pytest.mark.parametrize("pat", [None, " ", "\\-+", "\\s+"])
 @pytest.mark.parametrize("n", [-1, 0, 1, 3, 10])
-@pytest.mark.parametrize("expand", [True, False, None])
+@pytest.mark.parametrize("expand", [True, False])
 def test_string_split_re(data, pat, n, expand):
     ps = pd.Series(data, dtype="str")
     gs = cudf.Series(data, dtype="str")
@@ -1510,7 +1510,7 @@ def test_strings_partition(data):
     ],
 )
 @pytest.mark.parametrize("n", [-1, 2, 1, 9])
-@pytest.mark.parametrize("expand", [True, False, None])
+@pytest.mark.parametrize("expand", [True, False])
 def test_strings_rsplit(data, n, expand):
     gs = cudf.Series(data)
     ps = pd.Series(data)
@@ -1531,7 +1531,7 @@ def test_strings_rsplit(data, n, expand):
 
 
 @pytest.mark.parametrize("n", [-1, 0, 1, 3, 10])
-@pytest.mark.parametrize("expand", [True, False, None])
+@pytest.mark.parametrize("expand", [True, False])
 def test_string_rsplit_re(n, expand):
     data = ["a b", " c ", "   d", "e   ", "f"]
     ps = pd.Series(data, dtype="str")
@@ -1566,7 +1566,7 @@ def test_string_rsplit_re(n, expand):
     ],
 )
 @pytest.mark.parametrize("n", [-1, 2, 1, 9])
-@pytest.mark.parametrize("expand", [True, False, None])
+@pytest.mark.parametrize("expand", [True, False])
 def test_strings_split(data, n, expand):
     gs = cudf.Series(data)
     ps = pd.Series(data)