Skip to content

Commit

Permalink
feat: add duckdb dataframe drop_nulls (#1811)
Browse files Browse the repository at this point in the history
* feat: add duckdb dataframe drop_nulls

* rollback

* test columns with spaces

* single double quote

* push list conversion to narwhals BaseFrame level
  • Loading branch information
FBruzzesi authored Jan 16, 2025
1 parent 0b75d85 commit e4e881b
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 21 deletions.
3 changes: 1 addition & 2 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,10 +395,9 @@ def drop(self: Self, columns: list[str], strict: bool) -> Self: # noqa: FBT001
)
return self._from_native_frame(self._native_frame.drop(to_drop))

def drop_nulls(self: Self, subset: str | list[str] | None) -> Self:
def drop_nulls(self: Self, subset: list[str] | None) -> Self:
if subset is None:
return self._from_native_frame(self._native_frame.drop_null())
subset = [subset] if isinstance(subset, str) else subset
plx = self.__narwhals_namespace__()
return self.filter(~plx.any_horizontal(plx.col(*subset).is_null()))

Expand Down
3 changes: 1 addition & 2 deletions narwhals/_dask/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,9 @@ def select(
)
return self._from_native_frame(df)

def drop_nulls(self: Self, subset: str | list[str] | None) -> Self:
def drop_nulls(self: Self, subset: list[str] | None) -> Self:
if subset is None:
return self._from_native_frame(self._native_frame.dropna())
subset = [subset] if isinstance(subset, str) else subset
plx = self.__narwhals_namespace__()
return self.filter(~plx.any_horizontal(plx.col(*subset).is_null()))

Expand Down
9 changes: 9 additions & 0 deletions narwhals/_duckdb/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,3 +321,12 @@ def sort(
)
)
return self._from_native_frame(result)

def drop_nulls(self: Self, subset: list[str] | None) -> Self:
import duckdb

rel = self._native_frame
subset_ = subset if subset is not None else rel.columns
keep_condition = " and ".join(f'"{col}" is not null' for col in subset_)
query = f"select * from rel where {keep_condition}" # noqa: S608
return self._from_native_frame(duckdb.sql(query))
3 changes: 1 addition & 2 deletions narwhals/_pandas_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,10 +371,9 @@ def select(
)
return self._from_native_frame(df)

def drop_nulls(self, subset: str | list[str] | None) -> Self:
def drop_nulls(self, subset: list[str] | None) -> Self:
if subset is None:
return self._from_native_frame(self._native_frame.dropna(axis=0))
subset = [subset] if isinstance(subset, str) else subset
plx = self.__narwhals_namespace__()
return self.filter(~plx.any_horizontal(plx.col(*subset).is_null()))

Expand Down
2 changes: 1 addition & 1 deletion narwhals/_spark_like/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def sort(
sort_cols = [sort_f(col) for col, sort_f in zip(flat_by, sort_funcs)]
return self._from_native_frame(self._native_frame.sort(*sort_cols))

def drop_nulls(self: Self, subset: str | list[str] | None) -> Self:
def drop_nulls(self: Self, subset: list[str] | None) -> Self:
return self._from_native_frame(self._native_frame.dropna(subset=subset))

def rename(self: Self, mapping: dict[str, str]) -> Self:
Expand Down
3 changes: 2 additions & 1 deletion narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ def with_row_index(self, name: str = "index") -> Self:
self._compliant_frame.with_row_index(name),
)

def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
def drop_nulls(self: Self, subset: str | list[str] | None) -> Self:
subset = [subset] if isinstance(subset, str) else subset
return self._from_compliant_dataframe(
self._compliant_frame.drop_nulls(subset=subset),
)
Expand Down
21 changes: 8 additions & 13 deletions tests/frame/drop_nulls_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,32 @@
from tests.utils import assert_equal_data

data = {
"a": [1.0, 2.0, None, 4.0],
"b": [None, 3.0, None, 5.0],
"alpha": [1.0, 2.0, None, 4.0],
"beta gamma": [None, 3.0, None, 5.0],
}


def test_drop_nulls(constructor: Constructor, request: pytest.FixtureRequest) -> None:
if "duckdb" in str(constructor):
request.applymarker(pytest.mark.xfail)
def test_drop_nulls(constructor: Constructor) -> None:
result = nw.from_native(constructor(data)).drop_nulls()
expected = {
"a": [2.0, 4.0],
"b": [3.0, 5.0],
"alpha": [2.0, 4.0],
"beta gamma": [3.0, 5.0],
}
assert_equal_data(result, expected)


@pytest.mark.parametrize(
("subset", "expected"),
[
("a", {"a": [1, 2.0, 4.0], "b": [None, 3.0, 5.0]}),
(["a"], {"a": [1, 2.0, 4.0], "b": [None, 3.0, 5.0]}),
(["a", "b"], {"a": [2.0, 4.0], "b": [3.0, 5.0]}),
("alpha", {"alpha": [1, 2.0, 4.0], "beta gamma": [None, 3.0, 5.0]}),
(["alpha"], {"alpha": [1, 2.0, 4.0], "beta gamma": [None, 3.0, 5.0]}),
(["alpha", "beta gamma"], {"alpha": [2.0, 4.0], "beta gamma": [3.0, 5.0]}),
],
)
def test_drop_nulls_subset(
constructor: Constructor,
subset: str | list[str],
expected: dict[str, float],
request: pytest.FixtureRequest,
) -> None:
if "duckdb" in str(constructor):
request.applymarker(pytest.mark.xfail)
result = nw.from_native(constructor(data)).drop_nulls(subset=subset)
assert_equal_data(result, expected)

0 comments on commit e4e881b

Please sign in to comment.