Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: groupby.fillna #55719

Merged
merged 8 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ Other Deprecations
- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
- Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)
- Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)
- Deprecated :meth:`.DataFrameGroupBy.fillna` and :meth:`.SeriesGroupBy.fillna`; use :meth:`.DataFrameGroupBy.ffill`, :meth:`.DataFrameGroupBy.bfill` for forward and backward filling or :meth:`.DataFrame.fillna` to fill with a single value (or the Series equivalents) (:issue:`55718`)
-

.. ---------------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,13 @@ def pytest_collection_modifyitems(items, config) -> None:
("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"),
("is_categorical_dtype", "is_categorical_dtype is deprecated"),
("is_sparse", "is_sparse is deprecated"),
("DataFrameGroupBy.fillna", "DataFrameGroupBy.fillna is deprecated"),
("NDFrame.replace", "The 'method' keyword"),
("NDFrame.replace", "Series.replace without 'value'"),
("NDFrame.clip", "Downcasting behavior in Series and DataFrame methods"),
("Series.idxmin", "The behavior of Series.idxmin"),
("Series.idxmax", "The behavior of Series.idxmax"),
("SeriesGroupBy.fillna", "SeriesGroupBy.fillna is deprecated"),
("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"),
("SeriesGroupBy.idxmax", "The behavior of Series.idxmax"),
# Docstring divides by zero to show behavior difference
Expand Down
54 changes: 28 additions & 26 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,12 @@ def fillna(
"""
Fill NA/NaN values using the specified method within groups.

.. deprecated:: 2.2.0
This method is deprecated and will be removed in a future version.
Use the :meth:`.SeriesGroupBy.ffill` or :meth:`.SeriesGroupBy.bfill`
for forward or backward filling instead. If you want to fill with a
single value, use :meth:`Series.fillna` instead.

Parameters
----------
value : scalar, dict, Series, or DataFrame
Expand All @@ -915,17 +921,8 @@ def fillna(
Method to use for filling holes. ``'ffill'`` will propagate
the last valid observation forward within a group.
``'bfill'`` will use next valid observation to fill the gap.

.. deprecated:: 2.1.0
Use obj.ffill or obj.bfill instead.

axis : {0 or 'index', 1 or 'columns'}
Unused, only for compatibility with :meth:`DataFrameGroupBy.fillna`.

.. deprecated:: 2.1.0
For axis=1, operate on the underlying object instead. Otherwise
the axis keyword is not necessary.

inplace : bool, default False
Broken. Do not set to True.
limit : int, default None
Expand All @@ -940,8 +937,6 @@ def fillna(
or the string 'infer' which will try to downcast to an appropriate
equal type (e.g. float64 to int64 if possible).

.. deprecated:: 2.1.0

Returns
-------
Series
Expand Down Expand Up @@ -973,6 +968,14 @@ def fillna(
mouse 0.0
dtype: float64
"""
warnings.warn(
mroeschke marked this conversation as resolved.
Show resolved Hide resolved
f"{type(self).__name__}.fillna is deprecated and "
"will be removed in a future version. Use obj.ffill() or obj.bfill() "
"for forward or backward filling instead. If you want to fill with a "
f"single value, use {type(self.obj).__name__}.fillna instead",
FutureWarning,
stacklevel=find_stack_level(),
)
result = self._op_via_apply(
"fillna",
value=value,
Expand Down Expand Up @@ -2401,6 +2404,12 @@ def fillna(
"""
Fill NA/NaN values using the specified method within groups.

.. deprecated:: 2.2.0
This method is deprecated and will be removed in a future version.
Use the :meth:`.DataFrameGroupBy.ffill` or :meth:`.DataFrameGroupBy.bfill`
for forward or backward filling instead. If you want to fill with a
single value, use :meth:`DataFrame.fillna` instead.

Parameters
----------
value : scalar, dict, Series, or DataFrame
Expand All @@ -2421,11 +2430,6 @@ def fillna(
the same results as :meth:`.DataFrame.fillna`. When the
:class:`DataFrameGroupBy` ``axis`` argument is ``1``, using ``axis=0``
or ``axis=1`` here will produce the same results.

.. deprecated:: 2.1.0
For axis=1, operate on the underlying object instead. Otherwise
the axis keyword is not necessary.

inplace : bool, default False
Broken. Do not set to True.
limit : int, default None
Expand All @@ -2440,8 +2444,6 @@ def fillna(
or the string 'infer' which will try to downcast to an appropriate
equal type (e.g. float64 to int64 if possible).

.. deprecated:: 2.1.0

Returns
-------
DataFrame
Expand Down Expand Up @@ -2516,14 +2518,14 @@ def fillna(
3 3.0 NaN 2.0
4 3.0 NaN NaN
"""
if method is not None:
warnings.warn(
f"{type(self).__name__}.fillna with 'method' is deprecated and "
"will raise in a future version. Use obj.ffill() or obj.bfill() "
"instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
warnings.warn(
f"{type(self).__name__}.fillna is deprecated and "
"will be removed in a future version. Use obj.ffill() or obj.bfill() "
"for forward or backward filling instead. If you want to fill with a "
f"single value, use {type(self.obj).__name__}.fillna instead",
FutureWarning,
stacklevel=find_stack_level(),
)

result = self._op_via_apply(
"fillna",
Expand Down
18 changes: 15 additions & 3 deletions pandas/tests/apply/test_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,11 @@ def test_transform_groupby_kernel_series(request, string_series, op):
)
args = [0.0] if op == "fillna" else []
ones = np.ones(string_series.shape[0])
expected = string_series.groupby(ones).transform(op, *args)

warn = FutureWarning if op == "fillna" else None
msg = "SeriesGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=msg):
expected = string_series.groupby(ones).transform(op, *args)
result = string_series.transform(op, 0, *args)
tm.assert_series_equal(result, expected)

Expand All @@ -285,7 +289,12 @@ def test_transform_groupby_kernel_frame(request, axis, float_frame, op):

with tm.assert_produces_warning(FutureWarning, match=msg):
gb = float_frame.groupby(ones, axis=axis)
expected = gb.transform(op, *args)

warn = FutureWarning if op == "fillna" else None
op_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=op_msg):
expected = gb.transform(op, *args)

result = float_frame.transform(op, axis, *args)
tm.assert_frame_equal(result, expected)

Expand All @@ -300,7 +309,10 @@ def test_transform_groupby_kernel_frame(request, axis, float_frame, op):
ones = np.ones(float_frame.shape[1])
with tm.assert_produces_warning(FutureWarning, match=msg):
gb2 = float_frame.groupby(ones, axis=axis)
expected2 = gb2.transform(op, *args)
warn = FutureWarning if op == "fillna" else None
op_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=op_msg):
expected2 = gb2.transform(op, *args)
result2 = float_frame.transform(op, axis, *args)
tm.assert_frame_equal(result2, expected2)

Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1975,7 +1975,10 @@ def test_category_order_transformer(
df = df.set_index(keys)
args = get_groupby_method_args(transformation_func, df)
gb = df.groupby(keys, as_index=as_index, sort=sort, observed=observed)
op_result = getattr(gb, transformation_func)(*args)
warn = FutureWarning if transformation_func == "fillna" else None
msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=msg):
op_result = getattr(gb, transformation_func)(*args)
result = op_result.index.get_level_values("a").categories
expected = Index([1, 4, 3, 2])
tm.assert_index_equal(result, expected)
Expand Down
35 changes: 27 additions & 8 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,10 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
and numeric_only is lib.no_default
)
):
result = method(*args, **kwargs)
warn = FutureWarning if kernel == "fillna" else None
msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=msg):
result = method(*args, **kwargs)
assert "b" in result.columns
elif has_arg:
assert numeric_only is not True
Expand Down Expand Up @@ -725,11 +728,18 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
msg = "cannot be performed against 'object' dtypes"
else:
msg = "is not supported for object dtype"
with pytest.raises(TypeError, match=msg):
method(*args)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
with pytest.raises(TypeError, match=msg):
method(*args)
elif dtype is object:
result = method(*args)
expected = expected_method(*args)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "SeriesGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = method(*args)
with tm.assert_produces_warning(warn, match=warn_msg):
expected = expected_method(*args)
if groupby_func in obj_result:
expected = expected.astype(object)
tm.assert_series_equal(result, expected)
Expand Down Expand Up @@ -813,7 +823,10 @@ def test_multiindex_group_all_columns_when_empty(groupby_func):
method = getattr(gb, groupby_func)
args = get_groupby_method_args(groupby_func, df)

result = method(*args).index
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = method(*args).index
expected = df.index
tm.assert_index_equal(result, expected)

Expand All @@ -826,12 +839,18 @@ def test_duplicate_columns(request, groupby_func, as_index):
df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb"))
args = get_groupby_method_args(groupby_func, df)
gb = df.groupby("a", as_index=as_index)
result = getattr(gb, groupby_func)(*args)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = getattr(gb, groupby_func)(*args)

expected_df = df.set_axis(["a", "b", "c"], axis=1)
expected_args = get_groupby_method_args(groupby_func, expected_df)
expected_gb = expected_df.groupby("a", as_index=as_index)
expected = getattr(expected_gb, groupby_func)(*expected_args)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
expected = getattr(expected_gb, groupby_func)(*expected_args)
if groupby_func not in ("size", "ngroup", "cumcount"):
expected = expected.rename(columns={"c": "b"})
tm.assert_equal(result, expected)
Expand Down
39 changes: 29 additions & 10 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2370,18 +2370,32 @@ def test_group_on_empty_multiindex(transformation_func, request):
args = ("ffill",)
else:
args = ()
result = df.iloc[:0].groupby(["col_1"]).transform(transformation_func, *args)
expected = df.groupby(["col_1"]).transform(transformation_func, *args).iloc[:0]
warn = FutureWarning if transformation_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = df.iloc[:0].groupby(["col_1"]).transform(transformation_func, *args)
with tm.assert_produces_warning(warn, match=warn_msg):
expected = df.groupby(["col_1"]).transform(transformation_func, *args).iloc[:0]
if transformation_func in ("diff", "shift"):
expected = expected.astype(int)
tm.assert_equal(result, expected)

result = (
df["col_3"].iloc[:0].groupby(["col_1"]).transform(transformation_func, *args)
)
expected = (
df["col_3"].groupby(["col_1"]).transform(transformation_func, *args).iloc[:0]
)
warn_msg = "SeriesGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = (
df["col_3"]
.iloc[:0]
.groupby(["col_1"])
.transform(transformation_func, *args)
)
warn_msg = "SeriesGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
expected = (
df["col_3"]
.groupby(["col_1"])
.transform(transformation_func, *args)
.iloc[:0]
)
if transformation_func in ("diff", "shift"):
expected = expected.astype(int)
tm.assert_equal(result, expected)
Expand All @@ -2402,7 +2416,10 @@ def test_dup_labels_output_shape(groupby_func, idx):
grp_by = df.groupby([0])

args = get_groupby_method_args(groupby_func, df)
result = getattr(grp_by, groupby_func)(*args)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = getattr(grp_by, groupby_func)(*args)

assert result.shape == (1, 2)
tm.assert_index_equal(result.columns, idx)
Expand Down Expand Up @@ -3158,7 +3175,9 @@ def test_groupby_selection_other_methods(df):
g_exp = df[["C"]].groupby(df["A"])

# methods which aren't just .foo()
tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0))
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0))
msg = "DataFrameGroupBy.dtypes is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
tm.assert_frame_equal(g.dtypes, g_exp.dtypes)
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/groupby/test_groupby_subclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,12 @@ def test_groupby_preserves_subclass(obj, groupby_func):

args = get_groupby_method_args(groupby_func, obj)

result1 = getattr(grouped, groupby_func)(*args)
result2 = grouped.agg(groupby_func, *args)
warn = FutureWarning if groupby_func == "fillna" else None
msg = f"{type(grouped).__name__}.fillna is deprecated"
with tm.assert_produces_warning(warn, match=msg, raise_on_extra_warnings=False):
result1 = getattr(grouped, groupby_func)(*args)
with tm.assert_produces_warning(warn, match=msg, raise_on_extra_warnings=False):
result2 = grouped.agg(groupby_func, *args)

# Reduction or transformation kernels should preserve type
slices = {"ngroup", "cumcount", "size"}
Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/groupby/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@ def test_groupby_fill_duplicate_column_names(func):
def test_ffill_missing_arguments():
# GH 14955
df = DataFrame({"a": [1, 2], "b": [1, 1]})
with pytest.raises(ValueError, match="Must specify a fill"):
df.groupby("b").fillna()
msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pytest.raises(ValueError, match="Must specify a fill"):
df.groupby("b").fillna()


@pytest.mark.parametrize(
Expand All @@ -50,7 +52,7 @@ def test_fillna_with_string_dtype(method, expected):
# GH 40250
df = DataFrame({"a": pd.array([None, "a", None], dtype="string"), "b": [0, 0, 0]})
grp = df.groupby("b")
msg = "DataFrameGroupBy.fillna with 'method' is deprecated"
msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = grp.fillna(method=method)
expected = DataFrame({"a": pd.array(expected, dtype="string")})
Expand Down
Loading
Loading