Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking β€œSign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

patch: pandas-like and pyarrow scalar reduction fix #716

Merged
merged 10 commits into from
Aug 5, 2024
Prev Previous commit
Next Next commit
test refactor
FBruzzesi committed Aug 5, 2024
commit 7220f886bf2a2e001d95c43c2ae980fc80a00000
102 changes: 44 additions & 58 deletions tests/expr_and_series/reduction_test.py
Original file line number Diff line number Diff line change
@@ -8,73 +8,59 @@
from tests.utils import compare_dicts


def test_scalar_reduction_select(request: Any, constructor: Any) -> None:
if "dask" in str(constructor):
@pytest.mark.parametrize(
("expr", "expected"),
[
(
[nw.col("a").min().alias("min"), nw.col("a", "b").mean()],
{"min": [1], "a": [2], "b": [5]},
),
([(nw.col("a") + nw.col("b").max()).alias("x")], {"x": [7, 8, 9]}),
([nw.col("a"), nw.col("b").min()], {"a": [1, 2, 3], "b": [4, 4, 4]}),
([nw.col("a").max(), nw.col("b")], {"a": [3, 3, 3], "b": [4, 5, 6]}),
(
[nw.col("a"), nw.col("b").min().alias("min")],
{"a": [1, 2, 3], "min": [4, 4, 4]},
),
],
ids=range(5),
)
def test_scalar_reduction_select(
request: Any, constructor: Any, expr: list[Any], expected: dict[str, list[Any]]
) -> None:
if "dask" in str(constructor) and int(request.node.callspec.id[-1]) != 1:
Copy link
Member Author

@FBruzzesi FBruzzesi Aug 5, 2024 β€’

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't panic (just yet): this friendly request.node.callspec.id will look like the following: <constructor_name>-<id>. The id is specified in @pytest.mark.parametrize and ranges from 0 to 4 (number of test cases).

As dask passes the second test which corresponds to request.node.callspec.id = "dask_lazy_constructor-1", I am extracting the id value

request.applymarker(pytest.mark.xfail)

data = {"a": [1, 2, 3], "b": [4, 5, 6]}
df = nw.from_native(constructor(data))
result = df.select(
nw.col("a").min().alias("min"),
nw.col("b").max().alias("max"),
nw.col("a", "b").mean(),
)
expected = {"min": [1], "max": [6], "a": [2], "b": [5]}
compare_dicts(result, expected)

df = nw.from_native(constructor(data))
result = df.select((nw.col("a") + nw.col("b").max()).alias("x"))
expected = {"x": [7, 8, 9]}
compare_dicts(result, expected)

df = nw.from_native(constructor(data))
result = df.select(nw.col("a"), nw.col("b").min())
expected = {"a": [1, 2, 3], "b": [4, 4, 4]}
compare_dicts(result, expected)

df = nw.from_native(constructor(data))
result = df.select(nw.col("a").max(), nw.col("b"))
expected = {"a": [3, 3, 3], "b": [4, 5, 6]}
compare_dicts(result, expected)

df = nw.from_native(constructor(data))
result = df.select(nw.col("a"), nw.col("b").min().alias("min"))
expected = {"a": [1, 2, 3], "min": [4, 4, 4]}
result = df.select(*expr)
compare_dicts(result, expected)


def test_scalar_reduction_with_columns(request: Any, constructor: Any) -> None:
if "dask" in str(constructor):
@pytest.mark.parametrize(
("expr", "expected"),
[
(
[nw.col("a").min().alias("min"), nw.col("a", "b").mean()],
{"min": [1, 1, 1], "a": [2, 2, 2], "b": [5, 5, 5]},
),
([(nw.col("a") + nw.col("b").max()).alias("x")], {"x": [7, 8, 9]}),
([nw.col("a"), nw.col("b").min()], {"a": [1, 2, 3], "b": [4, 4, 4]}),
([nw.col("a").max(), nw.col("b")], {"a": [3, 3, 3], "b": [4, 5, 6]}),
(
[nw.col("a"), nw.col("b").min().alias("min")],
{"a": [1, 2, 3], "min": [4, 4, 4]},
),
],
ids=range(5),
)
def test_scalar_reduction_with_columns(
request: Any, constructor: Any, expr: list[Any], expected: dict[str, list[Any]]
) -> None:
if "dask" in str(constructor) and int(request.node.callspec.id[-1]) != 1:
request.applymarker(pytest.mark.xfail)

data = {"a": [1, 2, 3], "b": [4, 5, 6]}
df = nw.from_native(constructor(data))
result = df.with_columns(
nw.col("a").min().alias("min"),
nw.col("b").max().alias("max"),
nw.col("a", "b").mean(),
)
expected = {"min": [1, 1, 1], "max": [6, 6, 6], "a": [2, 2, 2], "b": [5, 5, 5]}
compare_dicts(result, expected)

df = nw.from_native(constructor(data))
result = df.with_columns((nw.col("a") + nw.col("b").max()).alias("x")).select("x")
expected = {"x": [7, 8, 9]}
compare_dicts(result, expected)

df = nw.from_native(constructor(data))
result = df.with_columns(nw.col("a"), nw.col("b").min())
expected = {"a": [1, 2, 3], "b": [4, 4, 4]}
compare_dicts(result, expected)

df = nw.from_native(constructor(data))
result = df.with_columns(nw.col("a").max(), nw.col("b"))
expected = {"a": [3, 3, 3], "b": [4, 5, 6]}
compare_dicts(result, expected)

df = nw.from_native(constructor(data))
result = df.with_columns(nw.col("a"), nw.col("b").min().alias("min")).select(
"a", "min"
)
expected = {"a": [1, 2, 3], "min": [4, 4, 4]}
result = df.with_columns(*expr).select(*expected.keys())
compare_dicts(result, expected)