From 0623200fe060df9ec324aa775cf9a7ffd3bdf429 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 12 Dec 2019 11:02:29 -0600
Subject: [PATCH 1/5] API: Return BoolArray for string ops

---
 doc/source/user_guide/text.rst |  9 ++++++++-
 pandas/core/strings.py         | 18 +++++++++++-------
 pandas/tests/test_strings.py   | 22 ++++++++++++++++++++++
 3 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst
index 072871f89bdae..032d51c5a388f 100644
--- a/doc/source/user_guide/text.rst
+++ b/doc/source/user_guide/text.rst
@@ -74,6 +74,7 @@ These are places where the behavior of ``StringDtype`` objects differ from
 l. For ``StringDtype``, :ref:`string accessor methods<api.series.str>`
    that return **numeric** output will always return a nullable integer dtype,
    rather than either int or float dtype, depending on the presence of NA values.
+   Methods returning **boolean** output will return a nullable boolean dtype.
 
    .. ipython:: python
 
@@ -89,7 +90,13 @@ l. For ``StringDtype``, :ref:`string accessor methods<api.series.str>`
       s.astype(object).str.count("a")
       s.astype(object).dropna().str.count("a")
 
-   When NA values are present, the output dtype is float64.
+   When NA values are present, the output dtype is float64. Similarly for
+   methods returning boolean values.
+
+   .. ipython:: python
+
+      s.str.isdigit()
+      s.str.match("a")
 
 2. Some string methods, like :meth:`Series.str.decode` are not available
    on ``StringArray`` because ``StringArray`` only holds strings, not
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index d4d8be90402b7..724a85a83ef26 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -140,7 +140,7 @@ def _map_stringarray(
         The value to use for missing values. By default, this is
         the original value (NA).
     dtype : Dtype
-        The result dtype to use. Specifying this aviods an intermediate
+        The result dtype to use. Specifying this avoids an intermediate
         object-dtype allocation.
 
     Returns
@@ -150,14 +150,19 @@ def _map_stringarray(
         an ndarray.
 
     """
-    from pandas.arrays import IntegerArray, StringArray
+    from pandas.arrays import IntegerArray, StringArray, BooleanArray
 
     mask = isna(arr)
 
     assert isinstance(arr, StringArray)
     arr = np.asarray(arr)
 
-    if is_integer_dtype(dtype):
+    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
+        if is_integer_dtype(dtype):
+            constructor = IntegerArray
+        else:
+            constructor = BooleanArray
+
         na_value_is_na = isna(na_value)
         if na_value_is_na:
             na_value = 1
@@ -167,13 +172,13 @@ def _map_stringarray(
             mask.view("uint8"),
             convert=False,
             na_value=na_value,
-            dtype=np.dtype("int64"),
+            dtype=np.dtype(dtype),
         )
 
         if not na_value_is_na:
             mask[:] = False
 
-        return IntegerArray(result, mask)
+        return constructor(result, mask)
 
     elif is_string_dtype(dtype) and not is_object_dtype(dtype):
         # i.e. StringDtype
@@ -181,7 +186,6 @@ def _map_stringarray(
             arr, func, mask.view("uint8"), convert=False, na_value=na_value
         )
         return StringArray(result)
-    # TODO: BooleanArray
     else:
         # This is when the result type is object. We reach this when
         # -> We know the result type is truly object (e.g. .encode returns bytes
@@ -3352,7 +3356,7 @@ def rindex(self, sub, start=0, end=None):
     Series.str.isalpha : Check whether all characters are alphabetic.
     Series.str.isnumeric : Check whether all characters are numeric.
     Series.str.isalnum : Check whether all characters are alphanumeric.
-    Series.str.isdigit : Check whether all characters are digits.
+    Series.str.isdigit : Check whether all characters are digits.h
     Series.str.isdecimal : Check whether all characters are decimal.
     Series.str.isspace : Check whether all characters are whitespace.
     Series.str.islower : Check whether all characters are lowercase.
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 584550d562b0d..0bd316f41cbe2 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -3524,6 +3524,12 @@ def test_string_array(any_string_method):
             assert result.dtype == "string"
             result = result.astype(object)
 
+        elif expected.dtype == "object" and lib.is_bool_array(
+            expected.values, skipna=True
+        ):
+            assert result.dtype == "boolean"
+            result = result.astype(object)
+
         elif expected.dtype == "float" and expected.isna().any():
             assert result.dtype == "Int64"
             result = result.astype("float")
@@ -3549,3 +3555,19 @@ def test_string_array_numeric_integer_array(method, expected):
     result = getattr(s.str, method)("a")
     expected = Series(expected, dtype="Int64")
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "method,expected",
+    [
+        ("isdigit", [False, None, True]),
+        ("isalpha", [True, None, False]),
+        ("isalnum", [True, None, True]),
+        ("isdigit", [False, None, True]),
+    ],
+)
+def test_string_array_boolean_array(method, expected):
+    s = Series(["a", None, "1"], dtype="string")
+    result = getattr(s.str, method)()
+    expected = Series(expected, dtype="boolean")
+    tm.assert_series_equal(result, expected)

From 1d9317eb60dcae86aa86fe622638bca95eff0f86 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 12 Dec 2019 11:38:34 -0600
Subject: [PATCH 2/5] int64

---
 pandas/core/strings.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 724a85a83ef26..295e7d6a74316 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -301,7 +301,7 @@ def str_count(arr, pat, flags=0):
     """
     regex = re.compile(pat, flags=flags)
     f = lambda x: len(regex.findall(x))
-    return _na_map(f, arr, dtype=int)
+    return _na_map(f, arr, dtype="int64")
 
 
 def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True):
@@ -1367,7 +1367,7 @@ def str_find(arr, sub, start=0, end=None, side="left"):
     else:
         f = lambda x: getattr(x, method)(sub, start, end)
 
-    return _na_map(f, arr, dtype=int)
+    return _na_map(f, arr, dtype="int64")
 
 
 def str_index(arr, sub, start=0, end=None, side="left"):
@@ -1387,7 +1387,7 @@ def str_index(arr, sub, start=0, end=None, side="left"):
     else:
         f = lambda x: getattr(x, method)(sub, start, end)
 
-    return _na_map(f, arr, dtype=int)
+    return _na_map(f, arr, dtype="int64")
 
 
 def str_pad(arr, width, side="left", fillchar=" "):
@@ -3212,7 +3212,7 @@ def rindex(self, sub, start=0, end=None):
         len,
         docstring=_shared_docs["len"],
         forbidden_types=None,
-        dtype=int,
+        dtype="int64",
         returns_string=False,
     )
 

From f6d9e4e1bc292037f5a53b9d73c3281ffadfcf23 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 12 Dec 2019 12:10:41 -0600
Subject: [PATCH 3/5] typo

---
 pandas/core/strings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 295e7d6a74316..acec80a028cb2 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -3356,7 +3356,7 @@ def rindex(self, sub, start=0, end=None):
     Series.str.isalpha : Check whether all characters are alphabetic.
     Series.str.isnumeric : Check whether all characters are numeric.
     Series.str.isalnum : Check whether all characters are alphanumeric.
-    Series.str.isdigit : Check whether all characters are digits.h
+    Series.str.isdigit : Check whether all characters are digits.
     Series.str.isdecimal : Check whether all characters are decimal.
     Series.str.isspace : Check whether all characters are whitespace.
     Series.str.islower : Check whether all characters are lowercase.

From 5f9dbe9a66daa027e11be3c7b21a845cd91fbf9a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 12 Dec 2019 12:11:49 -0600
Subject: [PATCH 4/5] fixup

---
 pandas/tests/test_strings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 0bd316f41cbe2..d8b9c5983618e 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -1825,7 +1825,7 @@ def test_extractall_same_as_extract_subject_index(self):
 
     def test_empty_str_methods(self):
         empty_str = empty = Series(dtype=object)
-        empty_int = Series(dtype=int)
+        empty_int = Series(dtype="int64")
         empty_bool = Series(dtype=bool)
         empty_bytes = Series(dtype=object)
 

From b83d677048ec550dff52e61a51abf068feb226d4 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 12 Dec 2019 15:02:29 -0600
Subject: [PATCH 5/5] types

---
 pandas/core/strings.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index acec80a028cb2..6ef42eb185e49 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -2,7 +2,7 @@
 from functools import wraps
 import re
 import textwrap
-from typing import TYPE_CHECKING, Any, Callable, Dict, List
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Type, Union
 import warnings
 
 import numpy as np
@@ -158,6 +158,7 @@ def _map_stringarray(
     arr = np.asarray(arr)
 
     if is_integer_dtype(dtype) or is_bool_dtype(dtype):
+        constructor: Union[Type[IntegerArray], Type[BooleanArray]]
         if is_integer_dtype(dtype):
             constructor = IntegerArray
         else: