From 65ef0ea4f7d1a3b91f30d24d87b1477b52412448 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Sat, 26 Mar 2022 17:21:35 -0700
Subject: [PATCH] Fix default value of str.split expand parameter. (#10457)

This is a small fix to [match the pandas API](https://pandas.pydata.org/docs/reference/api/pandas.Series.str.split.html) for the `expand` parameter of `Series.str.split`. Only boolean values are allowed. Currently the default is set to `None` and then replaced with the intended default of `False`. This PR changes it to have a default value of `False`.

This is a tiny bit of an API break because users who explicitly passed `None` will now see an error instead of getting the intended default value, but the previous behavior was a bug with respect to pandas API compatibility.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Charles Blackmon-Luca (https://github.com/charlesbluca)

URL: https://github.com/rapidsai/cudf/pull/10457
---
 python/cudf/cudf/core/column/string.py | 10 ++--------
 python/cudf/cudf/tests/test_string.py  | 10 +++++-----
 2 files changed, 7 insertions(+), 13 deletions(-)
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 95bb06ebb0c..d18bcaa84f4 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -2299,7 +2299,7 @@ def split(
         self,
         pat: str = None,
         n: int = -1,
-        expand: bool = None,
+        expand: bool = False,
         regex: bool = None,
     ) -> SeriesOrIndex:
         """
@@ -2420,9 +2420,6 @@ def split(
         2                                <NA>  <NA>  <NA>     <NA>      <NA>
         """
 
-        if expand is None:
-            expand = False
-
         if expand not in (True, False):
             raise ValueError(
                 f"expand parameter accepts only : [True, False], "
@@ -2470,7 +2467,7 @@ def rsplit(
         self,
         pat: str = None,
         n: int = -1,
-        expand: bool = None,
+        expand: bool = False,
         regex: bool = None,
     ) -> SeriesOrIndex:
         """
@@ -2599,9 +2596,6 @@ def rsplit(
         2                                <NA>        <NA>
         """
 
-        if expand is None:
-            expand = False
-
         if expand not in (True, False):
             raise ValueError(
                 f"expand parameter accepts only : [True, False], "
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index f237e5bf715..f5bfcd8c9d2 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -945,7 +945,7 @@ def test_string_upper(ps_gs):
 )
 @pytest.mark.parametrize("pat", [None, " ", "-"])
 @pytest.mark.parametrize("n", [-1, 0, 1, 3, 10])
-@pytest.mark.parametrize("expand", [True, False, None])
+@pytest.mark.parametrize("expand", [True, False])
 def test_string_split(data, pat, n, expand):
     ps = pd.Series(data, dtype="str")
     gs = cudf.Series(data, dtype="str")
@@ -967,7 +967,7 @@ def test_string_split(data, pat, n, expand):
 )
 @pytest.mark.parametrize("pat", [None, " ", "\\-+", "\\s+"])
 @pytest.mark.parametrize("n", [-1, 0, 1, 3, 10])
-@pytest.mark.parametrize("expand", [True, False, None])
+@pytest.mark.parametrize("expand", [True, False])
 def test_string_split_re(data, pat, n, expand):
     ps = pd.Series(data, dtype="str")
     gs = cudf.Series(data, dtype="str")
@@ -1510,7 +1510,7 @@ def test_strings_partition(data):
     ],
 )
 @pytest.mark.parametrize("n", [-1, 2, 1, 9])
-@pytest.mark.parametrize("expand", [True, False, None])
+@pytest.mark.parametrize("expand", [True, False])
 def test_strings_rsplit(data, n, expand):
     gs = cudf.Series(data)
     ps = pd.Series(data)
@@ -1531,7 +1531,7 @@ def test_strings_rsplit(data, n, expand):
 
 
 @pytest.mark.parametrize("n", [-1, 0, 1, 3, 10])
-@pytest.mark.parametrize("expand", [True, False, None])
+@pytest.mark.parametrize("expand", [True, False])
 def test_string_rsplit_re(n, expand):
     data = ["a b", " c ", "   d", "e   ", "f"]
     ps = pd.Series(data, dtype="str")
@@ -1566,7 +1566,7 @@ def test_string_rsplit_re(n, expand):
     ],
 )
 @pytest.mark.parametrize("n", [-1, 2, 1, 9])
-@pytest.mark.parametrize("expand", [True, False, None])
+@pytest.mark.parametrize("expand", [True, False])
 def test_strings_split(data, n, expand):
     gs = cudf.Series(data)
     ps = pd.Series(data)