Skip to content

Commit

Permalink
Add stream.iter_polars #1503 (#1504)
Browse files Browse the repository at this point in the history
  • Loading branch information
niccolopetti authored Feb 14, 2024
1 parent 1c98e9e commit af6d7c5
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 2 deletions.
3 changes: 2 additions & 1 deletion docs/unreleased.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
## drift

- Added `FHDDM` drift detector.
- Added `FHDDM` drift detector.
- Added a `iter_polars` function to iterate over the rows of a polars DataFrame.
40 changes: 39 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ python = ">=3.9,<3.13"
numpy = "^1.23.0"
scipy = "^1.8.1"
pandas = "^2.1"
polars = "^0.20.8"

[tool.poetry.group.dev.dependencies]
graphviz = "^0.20.1"
Expand Down
7 changes: 7 additions & 0 deletions river/stream/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@
"TwitchChatStream",
]

try:
from .iter_polars import iter_polars

__all__ += ["iter_polars"]
except ImportError:
pass

try:
from .iter_pandas import iter_pandas

Expand Down
49 changes: 49 additions & 0 deletions river/stream/iter_polars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from __future__ import annotations

import polars as pl

from river import base, stream


def iter_polars(
X: pl.DataFrame, y: pl.Series | pl.DataFrame | None = None, **kwargs
) -> base.typing.Stream:
"""Iterates over the rows of a `polars.DataFrame`.
Parameters
----------
X
A dataframe of features.
y
A series or a dataframe with one column per target.
kwargs
Extra keyword arguments are passed to the underlying call to `stream.iter_array`.
Examples
--------
>>> import polars as pl
>>> from river import stream
>>> X = pl.DataFrame({
... 'x1': [1, 2, 3, 4],
... 'x2': ['blue', 'yellow', 'yellow', 'blue'],
... 'y': [True, False, False, True]
... })
>>> y = X.get_column('y')
>>> X=X.drop("y")
>>> for xi, yi in stream.iter_polars(X, y):
... print(xi, yi)
{'x1': 1, 'x2': 'blue'} True
{'x1': 2, 'x2': 'yellow'} False
{'x1': 3, 'x2': 'yellow'} False
{'x1': 4, 'x2': 'blue'} True
"""

kwargs["feature_names"] = X.columns
if isinstance(y, pl.DataFrame):
kwargs["target_names"] = y.columns

yield from stream.iter_array(X=X.to_numpy(), y=y if y is None else y.to_numpy(), **kwargs)

0 comments on commit af6d7c5

Please sign in to comment.