Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add str.len_chars #1036

Merged
merged 5 commits into from
Sep 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/expr_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- contains
- ends_with
- head
- len_chars
- slice
- replace
- replace_all
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series_str.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- contains
- ends_with
- head
- len_chars
- replace
- replace_all
- slice
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,9 @@ class ArrowExprStringNamespace:
def __init__(self, expr: ArrowExpr) -> None:
self._expr = expr

def len_chars(self) -> ArrowExpr:
return reuse_series_namespace_implementation(self._expr, "str", "len_chars")

def replace(
self,
pattern: str,
Expand Down
7 changes: 7 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,13 @@ class ArrowSeriesStringNamespace:
def __init__(self: Self, series: ArrowSeries) -> None:
self._arrow_series = series

def len_chars(self) -> ArrowSeries:
import pyarrow.compute as pc # ignore-banned-import()

return self._arrow_series._from_native_series(
pc.utf8_length(self._arrow_series._native_series)
)

def replace(
self, pattern: str, value: str, *, literal: bool = False, n: int = 1
) -> ArrowSeries:
Expand Down
5 changes: 5 additions & 0 deletions narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,11 @@ class DaskExprStringNamespace:
def __init__(self, expr: DaskExpr) -> None:
self._expr = expr

def len_chars(self) -> DaskExpr:
return self._expr._from_call(
lambda _input: _input.str.len(), "len", returns_scalar=False
)

def replace(
self,
pattern: str,
Expand Down
5 changes: 5 additions & 0 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,11 @@ class PandasLikeExprStringNamespace:
def __init__(self, expr: PandasLikeExpr) -> None:
self._expr = expr

def len_chars(
self,
) -> PandasLikeExpr:
return reuse_series_namespace_implementation(self._expr, "str", "len_chars")

def replace(
self,
pattern: str,
Expand Down
5 changes: 5 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,11 @@ class PandasLikeSeriesStringNamespace:
def __init__(self, series: PandasLikeSeries) -> None:
self._pandas_series = series

def len_chars(self) -> PandasLikeSeries:
return self._pandas_series._from_native_series(
self._pandas_series._native_series.str.len()
)

def replace(
self, pattern: str, value: str, *, literal: bool = False, n: int = 1
) -> PandasLikeSeries:
Expand Down
44 changes: 44 additions & 0 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2021,6 +2021,50 @@ class ExprStringNamespace:
def __init__(self, expr: Expr) -> None:
self._expr = expr

def len_chars(self) -> Expr:
r"""
Return the length of each string as the number of characters.

Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> data = {"words": ["foo", "Café", "345", "東京", None]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

We define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
... return df.with_columns(words_len=nw.col("words").str.len_chars())

We can then pass either pandas or Polars to `func`:

>>> func(df_pd)
words words_len
0 foo 3.0
1 CafΓ© 4.0
2 345 3.0
3 東京 2.0
4 None NaN

>>> func(df_pl)
shape: (5, 2)
β”Œβ”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚ words ┆ words_len β”‚
β”‚ --- ┆ --- β”‚
β”‚ str ┆ u32 β”‚
β•žβ•β•β•β•β•β•β•β•ͺ═══════════║
β”‚ foo ┆ 3 β”‚
β”‚ CafΓ© ┆ 4 β”‚
β”‚ 345 ┆ 3 β”‚
β”‚ 東京 ┆ 2 β”‚
β”‚ null ┆ null β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
return self._expr.__class__(lambda plx: self._expr._call(plx).str.len_chars())

def replace(
self, pattern: str, value: str, *, literal: bool = False, n: int = 1
) -> Expr:
Expand Down
43 changes: 43 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2467,6 +2467,49 @@ class SeriesStringNamespace:
def __init__(self, series: Series) -> None:
self._narwhals_series = series

def len_chars(self) -> Series:
r"""
Return the length of each string as the number of characters.

Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> data = ["foo", "Café", "345", "東京", None]
>>> s_pd = pd.Series(data)
>>> s_pl = pl.Series(data)

We define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(s):
... return s.str.len_chars()

We can then pass either pandas or Polars to `func`:

>>> func(s_pd)
0 3.0
1 4.0
2 3.0
3 2.0
4 NaN
dtype: float64

>>> func(s_pl) # doctest: +NORMALIZE_WHITESPACE
shape: (5,)
Series: '' [u32]
[
3
4
3
2
null
]
"""
return self._narwhals_series._from_compliant_series(
self._narwhals_series._compliant_series.str.len_chars()
)

def replace(
self, pattern: str, value: str, *, literal: bool = False, n: int = 1
) -> Series:
Expand Down
25 changes: 25 additions & 0 deletions tests/expr_and_series/str/len_chars_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from typing import Any

import narwhals.stable.v1 as nw
from tests.utils import Constructor
from tests.utils import compare_dicts

data = {"a": ["foo", "foobar", "Café", "345", "東京"]}


def test_str_len_chars(constructor: Constructor) -> None:
df = nw.from_native(constructor(data))
result = df.select(nw.col("a").str.len_chars())
expected = {
"a": [3, 6, 4, 3, 2],
}
compare_dicts(result, expected)


def test_str_len_chars_series(constructor_eager: Any) -> None:
df = nw.from_native(constructor_eager(data), eager_only=True)
expected = {
"a": [3, 6, 4, 3, 2],
}
result = df.select(df["a"].str.len_chars())
compare_dicts(result, expected)
Loading