Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: ban dask filtering with boolean mask #939

Merged
merged 1 commit into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions narwhals/_dask/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,15 @@ def filter(
and isinstance(predicates[0], list)
and all(isinstance(x, bool) for x in predicates[0])
):
mask = predicates[0]
else:
from narwhals._dask.namespace import DaskNamespace
msg = "Filtering with boolean mask is not supported for `DaskLazyFrame`"
raise NotImplementedError(msg)

from narwhals._dask.namespace import DaskNamespace

plx = DaskNamespace(backend_version=self._backend_version)
expr = plx.all_horizontal(*predicates)
# Safety: all_horizontal's expression only returns a single column.
mask = expr._call(self)[0]
plx = DaskNamespace(backend_version=self._backend_version)
expr = plx.all_horizontal(*predicates)
# Safety: all_horizontal's expression only returns a single column.
mask = expr._call(self)[0]
return self._from_native_frame(self._native_frame.loc[mask])

def lazy(self) -> Self:
Expand Down
19 changes: 16 additions & 3 deletions tests/frame/filter_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from contextlib import nullcontext as does_not_raise
from typing import Any

import pytest

import narwhals.stable.v1 as nw
from tests.utils import compare_dicts

Expand All @@ -15,6 +18,16 @@ def test_filter(constructor: Any) -> None:
def test_filter_with_boolean_list(constructor: Any) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df = nw.from_native(constructor(data))
result = df.filter([False, True, True])
expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]}
compare_dicts(result, expected)

context = (
pytest.raises(
NotImplementedError, match="Filtering with boolean mask is not supported"
)
if "dask" in str(constructor)
else does_not_raise()
)

with context:
result = df.filter([False, True, True])
expected = {"a": [3, 2], "b": [4, 6], "z": [8.0, 9.0]}
compare_dicts(result, expected)
Loading