Skip to content

Commit

Permalink
ENH: Added isascii() string method fixing issue #59091 (#60532)
Browse files Browse the repository at this point in the history
* first

* second

* Update object_array.py

* third

* ascii

* ascii2

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* ascii3

* style

* style

* style

* style

* docs

* reset

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update doc/source/whatsnew/v3.0.0.rst

---------

Co-authored-by: Abby VeCasey <[email protected]>
Co-authored-by: Matthew Roeschke <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
4 people authored Jan 2, 2025
1 parent adb6689 commit 228627a
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ Other enhancements
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
- :meth:`str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
- Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/_arrow_string_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,10 @@ def _str_isalpha(self):
result = pc.utf8_is_alpha(self._pa_array)
return self._convert_bool_result(result)

def _str_isascii(self):
result = pc.string_is_ascii(self._pa_array)
return self._convert_bool_result(result)

def _str_isdecimal(self):
result = pc.utf8_is_decimal(self._pa_array)
return self._convert_bool_result(result)
Expand Down
46 changes: 45 additions & 1 deletion pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3415,7 +3415,8 @@ def len(self):
# cases:
# upper, lower, title, capitalize, swapcase, casefold
# boolean:
# isalpha, isnumeric isalnum isdigit isdecimal isspace islower isupper istitle
# isalpha, isnumeric isalnum isdigit isdecimal isspace islower
# isupper istitle isascii
# _doc_args holds dict of strings to use in substituting casemethod docs
_doc_args: dict[str, dict[str, str]] = {}
_doc_args["lower"] = {"type": "lowercase", "method": "lower", "version": ""}
Expand Down Expand Up @@ -3495,6 +3496,7 @@ def casefold(self):
Series.str.isdecimal : Check whether all characters are decimal.
Series.str.isspace : Check whether all characters are whitespace.
Series.str.islower : Check whether all characters are lowercase.
Series.str.isascii : Check whether all characters are ascii.
Series.str.isupper : Check whether all characters are uppercase.
Series.str.istitle : Check whether all characters are titlecase.
Expand All @@ -3518,6 +3520,7 @@ def casefold(self):
Series.str.isdecimal : Check whether all characters are decimal.
Series.str.isspace : Check whether all characters are whitespace.
Series.str.islower : Check whether all characters are lowercase.
Series.str.isascii : Check whether all characters are ascii.
Series.str.isupper : Check whether all characters are uppercase.
Series.str.istitle : Check whether all characters are titlecase.
Expand All @@ -3544,6 +3547,7 @@ def casefold(self):
Series.str.isdecimal : Check whether all characters are decimal.
Series.str.isspace : Check whether all characters are whitespace.
Series.str.islower : Check whether all characters are lowercase.
Series.str.isascii : Check whether all characters are ascii.
Series.str.isupper : Check whether all characters are uppercase.
Series.str.istitle : Check whether all characters are titlecase.
Expand Down Expand Up @@ -3576,6 +3580,7 @@ def casefold(self):
Series.str.isdigit : Check whether all characters are digits.
Series.str.isspace : Check whether all characters are whitespace.
Series.str.islower : Check whether all characters are lowercase.
Series.str.isascii : Check whether all characters are ascii.
Series.str.isupper : Check whether all characters are uppercase.
Series.str.istitle : Check whether all characters are titlecase.
Expand All @@ -3601,6 +3606,7 @@ def casefold(self):
Series.str.isdecimal : Check whether all characters are decimal.
Series.str.isspace : Check whether all characters are whitespace.
Series.str.islower : Check whether all characters are lowercase.
Series.str.isascii : Check whether all characters are ascii.
Series.str.isupper : Check whether all characters are uppercase.
Series.str.istitle : Check whether all characters are titlecase.
Expand All @@ -3627,6 +3633,7 @@ def casefold(self):
Series.str.isdigit : Check whether all characters are digits.
Series.str.isdecimal : Check whether all characters are decimal.
Series.str.islower : Check whether all characters are lowercase.
Series.str.isascii : Check whether all characters are ascii.
Series.str.isupper : Check whether all characters are uppercase.
Series.str.istitle : Check whether all characters are titlecase.
Expand All @@ -3649,6 +3656,7 @@ def casefold(self):
Series.str.isdigit : Check whether all characters are digits.
Series.str.isdecimal : Check whether all characters are decimal.
Series.str.isspace : Check whether all characters are whitespace.
Series.str.isascii : Check whether all characters are ascii.
Series.str.isupper : Check whether all characters are uppercase.
Series.str.istitle : Check whether all characters are titlecase.
Expand All @@ -3674,6 +3682,7 @@ def casefold(self):
Series.str.isdecimal : Check whether all characters are decimal.
Series.str.isspace : Check whether all characters are whitespace.
Series.str.islower : Check whether all characters are lowercase.
Series.str.isascii : Check whether all characters are ascii.
Series.str.istitle : Check whether all characters are titlecase.
Examples
Expand All @@ -3697,6 +3706,7 @@ def casefold(self):
Series.str.isdecimal : Check whether all characters are decimal.
Series.str.isspace : Check whether all characters are whitespace.
Series.str.islower : Check whether all characters are lowercase.
Series.str.isascii : Check whether all characters are ascii.
Series.str.isupper : Check whether all characters are uppercase.
Examples
Expand All @@ -3714,11 +3724,40 @@ def casefold(self):
3 False
dtype: bool
"""
_shared_docs["isascii"] = """
See Also
--------
Series.str.isalpha : Check whether all characters are alphabetic.
Series.str.isnumeric : Check whether all characters are numeric.
Series.str.isalnum : Check whether all characters are alphanumeric.
Series.str.isdigit : Check whether all characters are digits.
Series.str.isdecimal : Check whether all characters are decimal.
Series.str.isspace : Check whether all characters are whitespace.
Series.str.islower : Check whether all characters are lowercase.
Series.str.istitle : Check whether all characters are titlecase.
Series.str.isupper : Check whether all characters are uppercase.
Examples
------------
The ``s5.str.isascii`` method checks for whether all characters are ascii
characters, which includes digits 0-9, capital and lowercase letters A-Z,
and some other special characters.
>>> s5 = pd.Series(['ö', 'see123', 'hello world', ''])
>>> s5.str.isascii()
0 False
1 True
2 True
3 True
dtype: bool
"""

_doc_args["isalnum"] = {"type": "alphanumeric", "method": "isalnum"}
_doc_args["isalpha"] = {"type": "alphabetic", "method": "isalpha"}
_doc_args["isdigit"] = {"type": "digits", "method": "isdigit"}
_doc_args["isspace"] = {"type": "whitespace", "method": "isspace"}
_doc_args["islower"] = {"type": "lowercase", "method": "islower"}
_doc_args["isascii"] = {"type": "ascii", "method": "isascii"}
_doc_args["isupper"] = {"type": "uppercase", "method": "isupper"}
_doc_args["istitle"] = {"type": "titlecase", "method": "istitle"}
_doc_args["isnumeric"] = {"type": "numeric", "method": "isnumeric"}
Expand Down Expand Up @@ -3750,6 +3789,11 @@ def casefold(self):
docstring=_shared_docs["ismethods"] % _doc_args["islower"]
+ _shared_docs["islower"],
)
isascii = _map_and_wrap(
"isascii",
docstring=_shared_docs["ismethods"] % _doc_args["isascii"]
+ _shared_docs["isascii"],
)
isupper = _map_and_wrap(
"isupper",
docstring=_shared_docs["ismethods"] % _doc_args["isupper"]
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/strings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,10 @@ def _str_isalnum(self):
def _str_isalpha(self):
pass

@abc.abstractmethod
def _str_isascii(self):
pass

@abc.abstractmethod
def _str_isdecimal(self):
pass
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/strings/object_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,9 @@ def _str_isalnum(self):
def _str_isalpha(self):
return self._str_map(str.isalpha, dtype="bool")

def _str_isascii(self):
return self._str_map(str.isascii, dtype="bool")

def _str_isdecimal(self):
return self._str_map(str.isdecimal, dtype="bool")

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/strings/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
"get_dummies",
"isalnum",
"isalpha",
"isascii",
"isdecimal",
"isdigit",
"islower",
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/strings/test_string_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def test_string_array_numeric_integer_array(nullable_string_dtype, method, expec
[
("isdigit", [False, None, True]),
("isalpha", [True, None, False]),
("isascii", [True, None, True]),
("isalnum", [True, None, True]),
("isnumeric", [False, None, True]),
],
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/strings/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def test_empty_str_methods(any_string_dtype):
# ismethods should always return boolean (GH 29624)
tm.assert_series_equal(empty_bool, empty.str.isalnum())
tm.assert_series_equal(empty_bool, empty.str.isalpha())
tm.assert_series_equal(empty_bool, empty.str.isascii())
tm.assert_series_equal(empty_bool, empty.str.isdigit())
tm.assert_series_equal(empty_bool, empty.str.isspace())
tm.assert_series_equal(empty_bool, empty.str.islower())
Expand All @@ -177,6 +178,7 @@ def test_empty_str_methods(any_string_dtype):
@pytest.mark.parametrize(
"method, expected",
[
("isascii", [True, True, True, True, True, True, True, True, True, True]),
("isalnum", [True, True, True, True, True, False, True, True, False, False]),
("isalpha", [True, True, True, False, False, False, True, False, False, False]),
(
Expand Down

0 comments on commit 228627a

Please sign in to comment.