Skip to content

Commit

Permalink
start moving dask tests over (narwhals-dev#703)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored and aivanoved committed Aug 6, 2024
1 parent 828b3ba commit aca43d1
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 317 deletions.
2 changes: 2 additions & 0 deletions narwhals/_dask/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ def is_between(
upper_bound: Any,
closed: str = "both",
) -> Self:
if closed == "none":
closed = "neither"
return self._from_call(
lambda _input, lower_bound, upper_bound, closed: _input.between(
lower_bound,
Expand Down
299 changes: 0 additions & 299 deletions tests/dask_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,111 +34,6 @@
pytest.skip("Dask tests require Python 3.9+", allow_module_level=True)


def test_with_columns() -> None:
import dask.dataframe as dd

dfdd = dd.from_pandas(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))

df = nw.from_native(dfdd)
result = df.with_columns(
nw.col("a") + 1,
(nw.col("a") + nw.col("b").mean()).alias("c"),
d=nw.col("a"),
e=nw.col("a") + nw.col("b"),
f=nw.col("b") - 1,
g=nw.col("a") - nw.col("b"),
h=nw.col("a") * 3,
i=nw.col("a") * nw.col("b"),
)
compare_dicts(
result,
{
"a": [2, 3, 4],
"b": [4, 5, 6],
"c": [6.0, 7.0, 8.0],
"d": [1, 2, 3],
"e": [5, 7, 9],
"f": [3, 4, 5],
"g": [-3, -3, -3],
"h": [3, 6, 9],
"i": [4, 10, 18],
},
)


def test_shift() -> None:
import dask.dataframe as dd

dfdd = dd.from_pandas(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))
df = nw.from_native(dfdd)
result = df.with_columns(nw.col("a").shift(1), nw.col("b").shift(-1))
expected = {"a": [float("nan"), 1, 2], "b": [5, 6, float("nan")]}
compare_dicts(result, expected)


def test_min() -> None:
import dask.dataframe as dd

dfdd = dd.from_pandas(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))
df = nw.from_native(dfdd)
result = df.with_columns((nw.col("a") + nw.col("b").min()).alias("c"))
expected = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [5, 6, 7]}
compare_dicts(result, expected)


def test_max() -> None:
import dask.dataframe as dd

dfdd = dd.from_pandas(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))
df = nw.from_native(dfdd)
result = df.with_columns((nw.col("a") + nw.col("b").max()).alias("c"))
expected = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
compare_dicts(result, expected)


def test_cum_sum() -> None:
import dask.dataframe as dd

dfdd = dd.from_pandas(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))
df = nw.from_native(dfdd)
result = df.with_columns(nw.col("a", "b").cum_sum())
expected = {"a": [1, 3, 6], "b": [4, 9, 15]}
compare_dicts(result, expected)


def test_sum() -> None:
import dask.dataframe as dd

dfdd = dd.from_pandas(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))
df = nw.from_native(dfdd)
result = df.with_columns((nw.col("a") + nw.col("b").sum()).alias("c"))
expected = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [16, 17, 18]}
compare_dicts(result, expected)


@pytest.mark.parametrize(
("closed", "expected"),
[
("left", [True, True, True, False]),
("right", [False, True, True, True]),
("both", [True, True, True, True]),
("neither", [False, True, True, False]),
],
)
def test_is_between(closed: str, expected: list[bool]) -> None:
import dask.dataframe as dd

data = {
"a": [1, 4, 2, 5],
}
dfdd = dd.from_pandas(pd.DataFrame(data))

df = nw.from_native(dfdd)
result = df.with_columns(nw.col("a").is_between(1, 5, closed=closed))
expected_dict = {"a": expected}
compare_dicts(result, expected_dict)


@pytest.mark.parametrize(
("prefix", "expected"),
[
Expand Down Expand Up @@ -177,23 +72,6 @@ def test_ends_with(suffix: str, expected: dict[str, list[bool]]) -> None:
compare_dicts(result, expected)


def test_contains() -> None:
import dask.dataframe as dd

data = {"pets": ["cat", "dog", "rabbit and parrot", "dove"]}
dfdd = dd.from_pandas(pd.DataFrame(data))
df = nw.from_native(dfdd)

result = df.with_columns(
case_insensitive_match=nw.col("pets").str.contains("(?i)parrot|Dove")
)
expected = {
"pets": ["cat", "dog", "rabbit and parrot", "dove"],
"case_insensitive_match": [False, False, True, True],
}
compare_dicts(result, expected)


@pytest.mark.parametrize(
("offset", "length", "expected"),
[(1, 2, {"a": ["da", "df"]}), (-2, None, {"a": ["as", "as"]})],
Expand Down Expand Up @@ -291,61 +169,6 @@ def test_str_to_lowercase(
compare_dicts(result_frame, expected)


def test_columns() -> None:
import dask.dataframe as dd

dfdd = dd.from_pandas(pd.DataFrame({"a": [1, 2, 3], "b": ["cat", "bat", "mat"]}))
df = nw.from_native(dfdd)

result = df.columns

assert set(result) == {"a", "b"}


def test_filter() -> None:
import dask.dataframe as dd

data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
dfdd = dd.from_pandas(pd.DataFrame(data))
df = nw.from_native(dfdd)
result = df.filter(nw.col("a") > 1)
expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]}

compare_dicts(result, expected)


def test_select() -> None:
import dask.dataframe as dd

data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(dd.from_pandas(pd.DataFrame(data)))
result = df.select("a", nw.col("b") + 1, (nw.col("z") * 2).alias("z*2"))
expected = {"a": [1, 3, 2], "b": [5, 5, 7], "z*2": [14.0, 16.0, 18.0]}
compare_dicts(result, expected)


def test_str_only_select() -> None:
import dask.dataframe as dd

data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(dd.from_pandas(pd.DataFrame(data)))
result = df.select("a", "b")
expected = {"a": [1, 3, 2], "b": [4, 4, 6]}
compare_dicts(result, expected)


def test_empty_select() -> None:
import dask.dataframe as dd

result = (
nw.from_native(dd.from_pandas(pd.DataFrame({"a": [1, 2, 3]})))
.lazy()
.select()
.collect()
)
assert result.shape == (0, 0)


def test_dt_year() -> None:
import dask.dataframe as dd

Expand Down Expand Up @@ -469,125 +292,3 @@ def test_dt_ordinal_day() -> None:
result = df.with_columns(ordinal_day=nw.col("a").dt.ordinal_day())
expected = {"a": data["a"], "ordinal_day": [7, 32]}
compare_dicts(result, expected)


def test_drop_nulls() -> None:
import dask.dataframe as dd

data = {
"A": [1, 2, None, 4],
"B": [5, 6, 7, 8],
"C": [None, None, None, None],
"D": [9, 10, 11, 12],
}

df = dd.from_pandas(pd.DataFrame(data))
dddf = nw.from_native(df)

result_a = dddf.select(nw.col("A")).drop_nulls()
result_b = dddf.select(nw.col("B")).drop_nulls()
result_c = dddf.select(nw.col("C")).drop_nulls()
result_d = dddf.select(nw.col("D")).drop_nulls()
expected_a = {"A": [1.0, 2.0, 4.0]}
expected_b = {"B": [5, 6, 7, 8]}
expected_c = {"C": []} # type: ignore[var-annotated]
expected_d = {"D": [9, 10, 11, 12]}

compare_dicts(result_a, expected_a)
compare_dicts(result_b, expected_b)
compare_dicts(result_c, expected_c)
compare_dicts(result_d, expected_d)


def test_fill_null_series() -> None:
import dask.dataframe as dd

data = {
"a": [0.0, None, 2, 3, 4],
"b": [1.0, None, None, 5, 3],
"c": [5.0, None, 3, 2, 1],
}
df = nw.from_native(dd.from_pandas(pd.DataFrame(data)))

expected = {
"a": [0.0, 99, 2, 3, 4],
"b": [1.0, 99, 99, 5, 3],
"c": [5.0, 99, 3, 2, 1],
}
result = df.with_columns(
a=nw.col("a").fill_null(99),
b=nw.col("b").fill_null(99),
c=nw.col("c").fill_null(99),
)
compare_dicts(result, expected)


def test_comparison_operations() -> None:
import dask.dataframe as dd

data = {"a": [1, 2, 3], "b": [3, 2, 1]}
dfdd = dd.from_pandas(pd.DataFrame(data))
df = nw.from_native(dfdd)
result = df.filter(nw.col("a") > nw.col("b"))
expected = {"a": [3], "b": [1]}
compare_dicts(result, expected)
result = df.filter(nw.col("a") >= nw.col("b"))
expected = {"a": [2, 3], "b": [2, 1]}
compare_dicts(result, expected)
result = df.filter(nw.col("a") < nw.col("b"))
expected = {"a": [1], "b": [3]}
compare_dicts(result, expected)
result = df.filter(nw.col("a") <= nw.col("b"))
expected = {"a": [1, 2], "b": [3, 2]}
compare_dicts(result, expected)


def test_and_operations() -> None:
import dask.dataframe as dd

data = {"a": [True, True, False], "b": [True, False, True]}
dfdd = dd.from_pandas(pd.DataFrame(data))
df = nw.from_native(dfdd)
result = df.filter(nw.col("a") & nw.col("b"))
expected = {"a": [True], "b": [True]}
compare_dicts(result, expected)


def test_allh() -> None:
import dask.dataframe as dd

data = {
"a": [False, False, True],
"b": [False, True, True],
}
dfdd = dd.from_pandas(pd.DataFrame(data))
df = nw.from_native(dfdd)
result = df.select(all=nw.all_horizontal(nw.col("a")))
expected = {"all": [False, False, True]}
compare_dicts(result, expected)


@pytest.mark.filterwarnings("ignore:Determining|Resolving.*")
def test_schema() -> None:
import dask.dataframe as dd

df = nw.from_native(
dd.from_pandas(pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8, 9]}))
)
result = df.schema
expected = {"a": nw.Int64, "b": nw.Int64, "z": nw.Float64}

result = df.schema
assert result == expected


def test_collect_schema() -> None:
import dask.dataframe as dd

df = nw.from_native(
dd.from_pandas(pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8, 9]}))
)
expected = {"a": nw.Int64, "b": nw.Int64, "z": nw.Float64}

result = df.collect_schema()
assert result == expected
8 changes: 2 additions & 6 deletions tests/expr_and_series/cum_sum_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from typing import Any

import pytest

import narwhals.stable.v1 as nw
from tests.utils import compare_dicts

Expand All @@ -12,11 +10,9 @@
}


def test_cum_sum_simple(constructor: Any, request: Any) -> None:
if "dask" in str(constructor):
request.applymarker(pytest.mark.xfail)
def test_cum_sum_simple(constructor: Any) -> None:
df = nw.from_native(constructor(data))
result = df.select(nw.all().cum_sum())
result = df.select(nw.col("a", "b", "c").cum_sum())
expected = {
"a": [0, 1, 3, 6, 10],
"b": [1, 3, 6, 11, 14],
Expand Down
6 changes: 1 addition & 5 deletions tests/expr_and_series/is_between_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,7 @@
("none", [False, True, True, False]),
],
)
def test_is_between(
constructor: Any, closed: str, expected: list[bool], request: Any
) -> None:
if "dask" in str(constructor) and closed == "none":
request.applymarker(pytest.mark.xfail)
def test_is_between(constructor: Any, closed: str, expected: list[bool]) -> None:
df = nw.from_native(constructor(data))
result = df.select(nw.col("a").is_between(1, 5, closed=closed))
expected_dict = {"a": expected}
Expand Down
16 changes: 12 additions & 4 deletions tests/expr_and_series/str/contains_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,25 @@
df_polars = pl.DataFrame(data)


def test_contains(constructor_eager: Any) -> None:
df = nw.from_native(constructor_eager(data), eager_only=True)
def test_contains(constructor: Any) -> None:
df = nw.from_native(constructor(data))
result = df.with_columns(
case_insensitive_match=nw.col("pets").str.contains("(?i)parrot|Dove")
nw.col("pets").str.contains("(?i)parrot|Dove").alias("result")
)
expected = {
"pets": ["cat", "dog", "rabbit and parrot", "dove"],
"case_insensitive_match": [False, False, True, True],
"result": [False, False, True, True],
}
compare_dicts(result, expected)


def test_contains_series(constructor_eager: Any) -> None:
df = nw.from_native(constructor_eager(data), eager_only=True)
result = df.with_columns(
case_insensitive_match=df["pets"].str.contains("(?i)parrot|Dove")
)
expected = {
"pets": ["cat", "dog", "rabbit and parrot", "dove"],
"case_insensitive_match": [False, False, True, True],
}
compare_dicts(result, expected)
Loading

0 comments on commit aca43d1

Please sign in to comment.