Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "feat(python): Add replace_all expression to complement replace" #16630

Merged
merged 1 commit into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ Manipulation/selection
Expr.reinterpret
Expr.repeat_by
Expr.replace
Expr.replace_all
Expr.reshape
Expr.reverse
Expr.rle
Expand Down
1 change: 0 additions & 1 deletion py-polars/docs/source/reference/series/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ Computation
Series.peak_min
Series.rank
Series.replace
Series.replace_all
Series.rolling_apply
Series.rolling_map
Series.rolling_max
Expand Down
5 changes: 4 additions & 1 deletion py-polars/polars/_utils/various.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,10 @@ def str_duration_(td: str | None) -> int | None:
.cast(tp)
)
elif tp == Boolean:
cast_cols[c] = F.col(c).replace_all({"true": True, "false": False})
cast_cols[c] = F.col(c).replace(
{"true": True, "false": False},
default=None,
)
elif tp in INTEGER_DTYPES:
int_string = F.col(c).str.replace_all(r"[^\d+-]", "")
cast_cols[c] = (
Expand Down
192 changes: 14 additions & 178 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11582,26 +11582,16 @@ def replace(
Accepts expression input. Sequences are parsed as Series,
other non-expression inputs are parsed as literals.
Length must match the length of `old` or have length 1.

default
Set values that were not replaced to this value.
Defaults to keeping the original value.
Accepts expression input. Non-expression inputs are parsed as literals.

.. deprecated:: 0.20.31
Use :meth:`replace_all` instead to set a default while replacing values.

return_dtype
The data type of the resulting expression. If set to `None` (default),
the data type is determined automatically based on the other inputs.

.. deprecated:: 0.20.31
Use :meth:`replace_all` instead to set a return data type while
replacing values.

See Also
--------
replace_all
str.replace

Notes
Expand Down Expand Up @@ -11643,23 +11633,25 @@ def replace(
└─────┴──────────┘

Passing a mapping with replacements is also supported as syntactic sugar.
Specify a default to set all values that were not matched.

>>> mapping = {2: 100, 3: 200}
>>> df.with_columns(replaced=pl.col("a").replace(mapping))
>>> df.with_columns(replaced=pl.col("a").replace(mapping, default=-1))
shape: (4, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪══════════╡
│ 1 ┆ 1
│ 1 ┆ -1
│ 2 ┆ 100 │
│ 2 ┆ 100 │
│ 3 ┆ 200 │
└─────┴──────────┘

Replacing by values of a different data type sets the return type based on
a combination of the `new` data type and the original data type.
a combination of the `new` data type and either the original data type or the
default data type if it was set.

>>> df = pl.DataFrame({"a": ["x", "y", "z"]})
>>> mapping = {"x": 1, "y": 2, "z": 3}
Expand All @@ -11674,156 +11666,7 @@ def replace(
│ y ┆ 2 │
│ z ┆ 3 │
└─────┴──────────┘

Expression input is supported.

>>> df = pl.DataFrame({"a": [1, 2, 2, 3], "b": [1.5, 2.5, 5.0, 1.0]})
>>> df.with_columns(
... replaced=pl.col("a").replace(
... old=pl.col("a").max(),
... new=pl.col("b").sum(),
... )
... )
shape: (4, 3)
┌─────┬─────┬──────────┐
│ a ┆ b ┆ replaced │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ f64 │
╞═════╪═════╪══════════╡
│ 1 ┆ 1.5 ┆ 1.0 │
│ 2 ┆ 2.5 ┆ 2.0 │
│ 2 ┆ 5.0 ┆ 2.0 │
│ 3 ┆ 1.0 ┆ 10.0 │
└─────┴─────┴──────────┘
"""
if new is no_default and isinstance(old, Mapping):
new = pl.Series(old.values())
old = pl.Series(old.keys())
else:
if isinstance(old, Sequence) and not isinstance(old, (str, pl.Series)):
old = pl.Series(old)
if isinstance(new, Sequence) and not isinstance(new, (str, pl.Series)):
new = pl.Series(new)

old = parse_as_expression(old, str_as_lit=True) # type: ignore[arg-type]
new = parse_as_expression(new, str_as_lit=True) # type: ignore[arg-type]

if default is no_default:
default = None
else:
issue_deprecation_warning(
"The `default` parameter for `replace` is deprecated."
" Use `replace_all` instead to set a default while replacing values.",
version="0.20.31",
)
default = parse_as_expression(default, str_as_lit=True)

if return_dtype is not None:
issue_deprecation_warning(
"The `return_dtype` parameter for `replace` is deprecated."
" Use `replace_all` instead to set a return data type while replacing values.",
version="0.20.31",
)

return self._from_pyexpr(self._pyexpr.replace(old, new, default, return_dtype))

def replace_all(
self,
old: IntoExpr | Sequence[Any] | Mapping[Any, Any],
new: IntoExpr | Sequence[Any] | NoDefault = no_default,
*,
default: IntoExpr = None,
return_dtype: PolarsDataType | None = None,
) -> Self:
"""
Replace all values by different values.

Parameters
----------
old
Value or sequence of values to replace.
Accepts expression input. Sequences are parsed as Series,
other non-expression inputs are parsed as literals.
Also accepts a mapping of values to their replacement as syntactic sugar for
`replace_all(old=Series(mapping.keys()), new=Series(mapping.values()))`.
new
Value or sequence of values to replace by.
Accepts expression input. Sequences are parsed as Series,
other non-expression inputs are parsed as literals.
Length must match the length of `old` or have length 1.
default
Set values that were not replaced to this value. Defaults to null.
Accepts expression input. Non-expression inputs are parsed as literals.
return_dtype
The data type of the resulting expression. If set to `None` (default),
the data type is determined automatically based on the other inputs.

See Also
--------
replace
str.replace

Notes
-----
The global string cache must be enabled when replacing categorical values.

Examples
--------
Replace a single value by another value. Values that were not replaced are set
to null.

>>> df = pl.DataFrame({"a": [1, 2, 2, 3]})
>>> df.with_columns(replaced=pl.col("a").replace_all(2, 100))
shape: (4, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ i64 ┆ i32 │
╞═════╪══════════╡
│ 1 ┆ null │
│ 2 ┆ 100 │
│ 2 ┆ 100 │
│ 3 ┆ null │
└─────┴──────────┘

Replace multiple values by passing sequences to the `old` and `new` parameters.

>>> df.with_columns(replaced=pl.col("a").replace_all([2, 3], [100, 200]))
shape: (4, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪══════════╡
│ 1 ┆ null │
│ 2 ┆ 100 │
│ 2 ┆ 100 │
│ 3 ┆ 200 │
└─────┴──────────┘

Passing a mapping with replacements is also supported as syntactic sugar.
Specify a default to set all values that were not matched.

>>> mapping = {2: 100, 3: 200}
>>> df.with_columns(replaced=pl.col("a").replace_all(mapping, default=-1))
shape: (4, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪══════════╡
│ 1 ┆ -1 │
│ 2 ┆ 100 │
│ 2 ┆ 100 │
│ 3 ┆ 200 │
└─────┴──────────┘

Replacing by values of a different data type sets the return type based on
a combination of the `new` data type and the `default` data type.

>>> df = pl.DataFrame({"a": ["x", "y", "z"]})
>>> mapping = {"x": 1, "y": 2, "z": 3}
>>> df.with_columns(replaced=pl.col("a").replace_all(mapping))
>>> df.with_columns(replaced=pl.col("a").replace(mapping, default=None))
shape: (3, 2)
┌─────┬──────────┐
│ a ┆ replaced │
Expand All @@ -11834,22 +11677,11 @@ def replace_all(
│ y ┆ 2 │
│ z ┆ 3 │
└─────┴──────────┘
>>> df.with_columns(replaced=pl.col("a").replace_all(mapping, default="x"))
shape: (3, 2)
┌─────┬──────────┐
│ a ┆ replaced │
│ --- ┆ --- │
│ str ┆ str │
╞═════╪══════════╡
│ x ┆ 1 │
│ y ┆ 2 │
│ z ┆ 3 │
└─────┴──────────┘

Set the `return_dtype` parameter to control the resulting data type directly.

>>> df.with_columns(
... replaced=pl.col("a").replace_all(mapping, return_dtype=pl.UInt8)
... replaced=pl.col("a").replace(mapping, return_dtype=pl.UInt8)
... )
shape: (3, 2)
┌─────┬──────────┐
Expand All @@ -11866,7 +11698,7 @@ def replace_all(

>>> df = pl.DataFrame({"a": [1, 2, 2, 3], "b": [1.5, 2.5, 5.0, 1.0]})
>>> df.with_columns(
... replaced=pl.col("a").replace_all(
... replaced=pl.col("a").replace(
... old=pl.col("a").max(),
... new=pl.col("b").sum(),
... default=pl.col("b"),
Expand Down Expand Up @@ -11896,7 +11728,11 @@ def replace_all(
old = parse_as_expression(old, str_as_lit=True) # type: ignore[arg-type]
new = parse_as_expression(new, str_as_lit=True) # type: ignore[arg-type]

default = parse_as_expression(default, str_as_lit=True)
default = (
None
if default is no_default
else parse_as_expression(default, str_as_lit=True)
)

return self._from_pyexpr(self._pyexpr.replace(old, new, default, return_dtype))

Expand Down Expand Up @@ -12349,7 +12185,7 @@ def map_dict(
return_dtype
Set return dtype to override automatic return dtype determination.
"""
return self.replace_all(mapping, default=default, return_dtype=return_dtype)
return self.replace(mapping, default=default, return_dtype=return_dtype)

@classmethod
def from_json(cls, value: str) -> Self:
Expand Down
Loading