Skip to content

Commit

Permalink
ENH: Implement more string accessors through PyArrow (#54960)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Sep 6, 2023
1 parent 88683e9 commit faeedad
Showing 1 changed file with 23 additions and 8 deletions.
31 changes: 23 additions & 8 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@


if TYPE_CHECKING:
from collections.abc import Sequence

from pandas._typing import (
Dtype,
Scalar,
Expand Down Expand Up @@ -337,19 +339,13 @@ def _str_startswith(self, pat: str, na=None):
result = pc.starts_with(self._pa_array, pattern=pat)
if not isna(na):
result = result.fill_null(na)
result = self._result_converter(result)
if not isna(na):
result[isna(result)] = bool(na)
return result
return self._result_converter(result)

def _str_endswith(self, pat: str, na=None):
result = pc.ends_with(self._pa_array, pattern=pat)
if not isna(na):
result = result.fill_null(na)
result = self._result_converter(result)
if not isna(na):
result[isna(result)] = bool(na)
return result
return self._result_converter(result)

def _str_replace(
self,
Expand All @@ -368,6 +364,12 @@ def _str_replace(
result = func(self._pa_array, pattern=pat, replacement=repl, max_replacements=n)
return type(self)(result)

def _str_repeat(self, repeats: int | Sequence[int]):
if not isinstance(repeats, int):
return super()._str_repeat(repeats)
else:
return type(self)(pc.binary_repeat(self._pa_array, repeats))

def _str_match(
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
):
Expand All @@ -382,6 +384,19 @@ def _str_fullmatch(
pat = f"{pat}$"
return self._str_match(pat, case, flags, na)

def _str_slice(
self, start: int | None = None, stop: int | None = None, step: int | None = None
):
if stop is None:
return super()._str_slice(start, stop, step)
if start is None:
start = 0
if step is None:
step = 1
return type(self)(
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
)

def _str_isalnum(self):
result = pc.utf8_is_alnum(self._pa_array)
return self._result_converter(result)
Expand Down

0 comments on commit faeedad

Please sign in to comment.