Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Expressify clip arguments #1709

Merged
merged 1 commit into from
Jan 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,13 +858,20 @@ def quantile(
def gather_every(self: Self, n: int, offset: int = 0) -> Self:
return self._from_native_series(self._native_series[offset::n])

def clip(self: Self, lower_bound: Any | None, upper_bound: Any | None) -> Self:
import pyarrow as pa
def clip(
self: Self, lower_bound: Self | Any | None, upper_bound: Self | Any | None
) -> Self:
import pyarrow.compute as pc

arr = self._native_series
arr = pc.max_element_wise(arr, pa.scalar(lower_bound, type=arr.type))
arr = pc.min_element_wise(arr, pa.scalar(upper_bound, type=arr.type))
_, lower_bound = broadcast_and_extract_native(
self, lower_bound, self._backend_version
)
_, upper_bound = broadcast_and_extract_native(
self, upper_bound, self._backend_version
)
arr = pc.max_element_wise(arr, lower_bound)
arr = pc.min_element_wise(arr, upper_bound)

return self._from_native_series(arr)

Expand Down
5 changes: 5 additions & 0 deletions narwhals/_arrow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ def broadcast_and_extract_native(
from narwhals._arrow.dataframe import ArrowDataFrame
from narwhals._arrow.series import ArrowSeries

if rhs is None:
import pyarrow as pa

return lhs._native_series, pa.scalar(None, type=lhs._native_series.type)

# If `rhs` is the output of an expression evaluation, then it is
# a list of Series. So, we verify that that list is of length-1,
# and take the first (and only) element.
Expand Down
8 changes: 4 additions & 4 deletions narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,8 +535,8 @@ def cum_prod(self: Self, *, reverse: bool) -> Self:

def is_between(
self,
lower_bound: Any,
upper_bound: Any,
lower_bound: Self | Any,
upper_bound: Self | Any,
closed: str = "both",
) -> Self:
if closed == "none":
Expand Down Expand Up @@ -672,8 +672,8 @@ def func(

def clip(
self: Self,
lower_bound: Any | None = None,
upper_bound: Any | None = None,
lower_bound: Self | Any | None,
upper_bound: Self | Any | None,
) -> Self:
return self._from_call(
lambda _input, lower_bound, upper_bound: _input.clip(
Expand Down
7 changes: 5 additions & 2 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,10 +988,13 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
return self._from_native_series(self._native_series.iloc[offset::n])

def clip(
self: Self, lower_bound: Any | None = None, upper_bound: Any | None = None
self: Self, lower_bound: Self | Any | None, upper_bound: Self | Any | None
) -> Self:
_, lower_bound = broadcast_align_and_extract_native(self, lower_bound)
_, upper_bound = broadcast_align_and_extract_native(self, upper_bound)
kwargs = {"axis": 0} if self._implementation is Implementation.MODIN else {}
return self._from_native_series(
self._native_series.clip(lower_bound, upper_bound)
self._native_series.clip(lower_bound, upper_bound, **kwargs)
)

def to_arrow(self: Self) -> Any:
Expand Down
9 changes: 6 additions & 3 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2938,8 +2938,8 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
# TODO @aivanoved: make type alias for numeric type
def clip(
self,
lower_bound: Any | None = None,
upper_bound: Any | None = None,
lower_bound: IntoExpr | Any | None = None,
upper_bound: IntoExpr | Any | None = None,
) -> Self:
r"""Clip values in the Series.

Expand Down Expand Up @@ -3066,7 +3066,10 @@ def clip(
s: [[-1,1,-1,3,-1,3]]
"""
return self.__class__(
lambda plx: self._to_compliant_expr(plx).clip(lower_bound, upper_bound)
lambda plx: self._to_compliant_expr(plx).clip(
extract_compliant(plx, lower_bound),
extract_compliant(plx, upper_bound),
)
)

def mode(self: Self) -> Self:
Expand Down
7 changes: 5 additions & 2 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1342,7 +1342,7 @@ def var(self, *, ddof: int = 1) -> Any:
return self._compliant_series.var(ddof=ddof)

def clip(
self, lower_bound: Any | None = None, upper_bound: Any | None = None
self, lower_bound: Self | Any | None = None, upper_bound: Self | Any | None = None
) -> Self:
r"""Clip values in the Series.

Expand Down Expand Up @@ -1484,7 +1484,10 @@ def clip(
]
"""
return self._from_compliant_series(
self._compliant_series.clip(lower_bound=lower_bound, upper_bound=upper_bound)
self._compliant_series.clip(
lower_bound=self._extract_native(lower_bound),
upper_bound=self._extract_native(upper_bound),
)
)

def is_in(self, other: Any) -> Self:
Expand Down
30 changes: 29 additions & 1 deletion tests/expr_and_series/clip_test.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from __future__ import annotations

import pytest

import narwhals.stable.v1 as nw
from tests.utils import Constructor
from tests.utils import ConstructorEager
from tests.utils import assert_equal_data


def test_clip(constructor: Constructor) -> None:
def test_clip_expr(constructor: Constructor) -> None:
df = nw.from_native(constructor({"a": [1, 2, 3, -4, 5]}))
result = df.select(
lower_only=nw.col("a").clip(lower_bound=3),
Expand All @@ -21,6 +23,19 @@ def test_clip(constructor: Constructor) -> None:
assert_equal_data(result, expected)


def test_clip_expr_expressified(
request: pytest.FixtureRequest, constructor: Constructor
) -> None:
if "modin_pyarrow" in str(constructor):
request.applymarker(pytest.mark.xfail)

data = {"a": [1, 2, 3, -4, 5], "lb": [3, 2, 1, 1, 1], "ub": [4, 4, 2, 2, 2]}
df = nw.from_native(constructor(data))
result = df.select(nw.col("a").clip(nw.col("lb"), nw.col("ub") + 1))
expected_dict = {"a": [3, 2, 3, 1, 3]}
assert_equal_data(result, expected_dict)


def test_clip_series(constructor_eager: ConstructorEager) -> None:
df = nw.from_native(constructor_eager({"a": [1, 2, 3, -4, 5]}), eager_only=True)
result = {
Expand All @@ -35,3 +50,16 @@ def test_clip_series(constructor_eager: ConstructorEager) -> None:
"both": [3, 3, 3, 3, 4],
}
assert_equal_data(result, expected)


def test_clip_series_expressified(
request: pytest.FixtureRequest, constructor_eager: ConstructorEager
) -> None:
if "modin_pyarrow" in str(constructor_eager):
request.applymarker(pytest.mark.xfail)
Comment on lines +58 to +59
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reported upstream: modin-project/modin#7415


data = {"a": [1, 2, 3, -4, 5], "lb": [3, 2, 1, 1, 1], "ub": [4, 4, 2, 2, 2]}
df = nw.from_native(constructor_eager(data), eager_only=True)
result = df["a"].clip(df["lb"], df["ub"] + 1).to_frame()
expected_dict = {"a": [3, 2, 3, 1, 3]}
assert_equal_data(result, expected_dict)
Loading