Skip to content

Commit

Permalink
DEPR: groupby.fillna (#55719)
Browse files Browse the repository at this point in the history
* DEPR: groupby.fillna

* fixup

* Ignore doctest warnings

* Add deprecation to docstrings

* Update doc/source/whatsnew/v2.2.0.rst

---------

Co-authored-by: Matthew Roeschke <[email protected]>
  • Loading branch information
rhshadrach and mroeschke authored Nov 16, 2023
1 parent 713c4dc commit 5457e59
Show file tree
Hide file tree
Showing 11 changed files with 190 additions and 79 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ Other Deprecations
- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
- Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)
- Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)
- Deprecated :meth:`.DataFrameGroupBy.fillna` and :meth:`.SeriesGroupBy.fillna`; use :meth:`.DataFrameGroupBy.ffill`, :meth:`.DataFrameGroupBy.bfill` for forward and backward filling or :meth:`.DataFrame.fillna` to fill with a single value (or the Series equivalents) (:issue:`55718`)
-

.. ---------------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,13 @@ def pytest_collection_modifyitems(items, config) -> None:
("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"),
("is_categorical_dtype", "is_categorical_dtype is deprecated"),
("is_sparse", "is_sparse is deprecated"),
("DataFrameGroupBy.fillna", "DataFrameGroupBy.fillna is deprecated"),
("NDFrame.replace", "The 'method' keyword"),
("NDFrame.replace", "Series.replace without 'value'"),
("NDFrame.clip", "Downcasting behavior in Series and DataFrame methods"),
("Series.idxmin", "The behavior of Series.idxmin"),
("Series.idxmax", "The behavior of Series.idxmax"),
("SeriesGroupBy.fillna", "SeriesGroupBy.fillna is deprecated"),
("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"),
("SeriesGroupBy.idxmax", "The behavior of Series.idxmax"),
# Docstring divides by zero to show behavior difference
Expand Down
54 changes: 28 additions & 26 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,12 @@ def fillna(
"""
Fill NA/NaN values using the specified method within groups.
.. deprecated:: 2.2.0
This method is deprecated and will be removed in a future version.
Use the :meth:`.SeriesGroupBy.ffill` or :meth:`.SeriesGroupBy.bfill`
for forward or backward filling instead. If you want to fill with a
single value, use :meth:`Series.fillna` instead.
Parameters
----------
value : scalar, dict, Series, or DataFrame
Expand All @@ -915,17 +921,8 @@ def fillna(
Method to use for filling holes. ``'ffill'`` will propagate
the last valid observation forward within a group.
``'bfill'`` will use next valid observation to fill the gap.
.. deprecated:: 2.1.0
Use obj.ffill or obj.bfill instead.
axis : {0 or 'index', 1 or 'columns'}
Unused, only for compatibility with :meth:`DataFrameGroupBy.fillna`.
.. deprecated:: 2.1.0
For axis=1, operate on the underlying object instead. Otherwise
the axis keyword is not necessary.
inplace : bool, default False
Broken. Do not set to True.
limit : int, default None
Expand All @@ -940,8 +937,6 @@ def fillna(
or the string 'infer' which will try to downcast to an appropriate
equal type (e.g. float64 to int64 if possible).
.. deprecated:: 2.1.0
Returns
-------
Series
Expand Down Expand Up @@ -973,6 +968,14 @@ def fillna(
mouse 0.0
dtype: float64
"""
warnings.warn(
f"{type(self).__name__}.fillna is deprecated and "
"will be removed in a future version. Use obj.ffill() or obj.bfill() "
"for forward or backward filling instead. If you want to fill with a "
f"single value, use {type(self.obj).__name__}.fillna instead",
FutureWarning,
stacklevel=find_stack_level(),
)
result = self._op_via_apply(
"fillna",
value=value,
Expand Down Expand Up @@ -2401,6 +2404,12 @@ def fillna(
"""
Fill NA/NaN values using the specified method within groups.
.. deprecated:: 2.2.0
This method is deprecated and will be removed in a future version.
Use the :meth:`.DataFrameGroupBy.ffill` or :meth:`.DataFrameGroupBy.bfill`
for forward or backward filling instead. If you want to fill with a
single value, use :meth:`DataFrame.fillna` instead.
Parameters
----------
value : scalar, dict, Series, or DataFrame
Expand All @@ -2421,11 +2430,6 @@ def fillna(
the same results as :meth:`.DataFrame.fillna`. When the
:class:`DataFrameGroupBy` ``axis`` argument is ``1``, using ``axis=0``
or ``axis=1`` here will produce the same results.
.. deprecated:: 2.1.0
For axis=1, operate on the underlying object instead. Otherwise
the axis keyword is not necessary.
inplace : bool, default False
Broken. Do not set to True.
limit : int, default None
Expand All @@ -2440,8 +2444,6 @@ def fillna(
or the string 'infer' which will try to downcast to an appropriate
equal type (e.g. float64 to int64 if possible).
.. deprecated:: 2.1.0
Returns
-------
DataFrame
Expand Down Expand Up @@ -2516,14 +2518,14 @@ def fillna(
3 3.0 NaN 2.0
4 3.0 NaN NaN
"""
if method is not None:
warnings.warn(
f"{type(self).__name__}.fillna with 'method' is deprecated and "
"will raise in a future version. Use obj.ffill() or obj.bfill() "
"instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
warnings.warn(
f"{type(self).__name__}.fillna is deprecated and "
"will be removed in a future version. Use obj.ffill() or obj.bfill() "
"for forward or backward filling instead. If you want to fill with a "
f"single value, use {type(self.obj).__name__}.fillna instead",
FutureWarning,
stacklevel=find_stack_level(),
)

result = self._op_via_apply(
"fillna",
Expand Down
18 changes: 15 additions & 3 deletions pandas/tests/apply/test_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,11 @@ def test_transform_groupby_kernel_series(request, string_series, op):
)
args = [0.0] if op == "fillna" else []
ones = np.ones(string_series.shape[0])
expected = string_series.groupby(ones).transform(op, *args)

warn = FutureWarning if op == "fillna" else None
msg = "SeriesGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=msg):
expected = string_series.groupby(ones).transform(op, *args)
result = string_series.transform(op, 0, *args)
tm.assert_series_equal(result, expected)

Expand All @@ -285,7 +289,12 @@ def test_transform_groupby_kernel_frame(request, axis, float_frame, op):

with tm.assert_produces_warning(FutureWarning, match=msg):
gb = float_frame.groupby(ones, axis=axis)
expected = gb.transform(op, *args)

warn = FutureWarning if op == "fillna" else None
op_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=op_msg):
expected = gb.transform(op, *args)

result = float_frame.transform(op, axis, *args)
tm.assert_frame_equal(result, expected)

Expand All @@ -300,7 +309,10 @@ def test_transform_groupby_kernel_frame(request, axis, float_frame, op):
ones = np.ones(float_frame.shape[1])
with tm.assert_produces_warning(FutureWarning, match=msg):
gb2 = float_frame.groupby(ones, axis=axis)
expected2 = gb2.transform(op, *args)
warn = FutureWarning if op == "fillna" else None
op_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=op_msg):
expected2 = gb2.transform(op, *args)
result2 = float_frame.transform(op, axis, *args)
tm.assert_frame_equal(result2, expected2)

Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1975,7 +1975,10 @@ def test_category_order_transformer(
df = df.set_index(keys)
args = get_groupby_method_args(transformation_func, df)
gb = df.groupby(keys, as_index=as_index, sort=sort, observed=observed)
op_result = getattr(gb, transformation_func)(*args)
warn = FutureWarning if transformation_func == "fillna" else None
msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=msg):
op_result = getattr(gb, transformation_func)(*args)
result = op_result.index.get_level_values("a").categories
expected = Index([1, 4, 3, 2])
tm.assert_index_equal(result, expected)
Expand Down
35 changes: 27 additions & 8 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,10 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
and numeric_only is lib.no_default
)
):
result = method(*args, **kwargs)
warn = FutureWarning if kernel == "fillna" else None
msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=msg):
result = method(*args, **kwargs)
assert "b" in result.columns
elif has_arg:
assert numeric_only is not True
Expand Down Expand Up @@ -725,11 +728,18 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
msg = "cannot be performed against 'object' dtypes"
else:
msg = "is not supported for object dtype"
with pytest.raises(TypeError, match=msg):
method(*args)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
with pytest.raises(TypeError, match=msg):
method(*args)
elif dtype is object:
result = method(*args)
expected = expected_method(*args)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "SeriesGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = method(*args)
with tm.assert_produces_warning(warn, match=warn_msg):
expected = expected_method(*args)
if groupby_func in obj_result:
expected = expected.astype(object)
tm.assert_series_equal(result, expected)
Expand Down Expand Up @@ -813,7 +823,10 @@ def test_multiindex_group_all_columns_when_empty(groupby_func):
method = getattr(gb, groupby_func)
args = get_groupby_method_args(groupby_func, df)

result = method(*args).index
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = method(*args).index
expected = df.index
tm.assert_index_equal(result, expected)

Expand All @@ -826,12 +839,18 @@ def test_duplicate_columns(request, groupby_func, as_index):
df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb"))
args = get_groupby_method_args(groupby_func, df)
gb = df.groupby("a", as_index=as_index)
result = getattr(gb, groupby_func)(*args)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = getattr(gb, groupby_func)(*args)

expected_df = df.set_axis(["a", "b", "c"], axis=1)
expected_args = get_groupby_method_args(groupby_func, expected_df)
expected_gb = expected_df.groupby("a", as_index=as_index)
expected = getattr(expected_gb, groupby_func)(*expected_args)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
expected = getattr(expected_gb, groupby_func)(*expected_args)
if groupby_func not in ("size", "ngroup", "cumcount"):
expected = expected.rename(columns={"c": "b"})
tm.assert_equal(result, expected)
Expand Down
39 changes: 29 additions & 10 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2370,18 +2370,32 @@ def test_group_on_empty_multiindex(transformation_func, request):
args = ("ffill",)
else:
args = ()
result = df.iloc[:0].groupby(["col_1"]).transform(transformation_func, *args)
expected = df.groupby(["col_1"]).transform(transformation_func, *args).iloc[:0]
warn = FutureWarning if transformation_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = df.iloc[:0].groupby(["col_1"]).transform(transformation_func, *args)
with tm.assert_produces_warning(warn, match=warn_msg):
expected = df.groupby(["col_1"]).transform(transformation_func, *args).iloc[:0]
if transformation_func in ("diff", "shift"):
expected = expected.astype(int)
tm.assert_equal(result, expected)

result = (
df["col_3"].iloc[:0].groupby(["col_1"]).transform(transformation_func, *args)
)
expected = (
df["col_3"].groupby(["col_1"]).transform(transformation_func, *args).iloc[:0]
)
warn_msg = "SeriesGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = (
df["col_3"]
.iloc[:0]
.groupby(["col_1"])
.transform(transformation_func, *args)
)
warn_msg = "SeriesGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
expected = (
df["col_3"]
.groupby(["col_1"])
.transform(transformation_func, *args)
.iloc[:0]
)
if transformation_func in ("diff", "shift"):
expected = expected.astype(int)
tm.assert_equal(result, expected)
Expand All @@ -2402,7 +2416,10 @@ def test_dup_labels_output_shape(groupby_func, idx):
grp_by = df.groupby([0])

args = get_groupby_method_args(groupby_func, df)
result = getattr(grp_by, groupby_func)(*args)
warn = FutureWarning if groupby_func == "fillna" else None
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(warn, match=warn_msg):
result = getattr(grp_by, groupby_func)(*args)

assert result.shape == (1, 2)
tm.assert_index_equal(result.columns, idx)
Expand Down Expand Up @@ -3158,7 +3175,9 @@ def test_groupby_selection_other_methods(df):
g_exp = df[["C"]].groupby(df["A"])

# methods which aren't just .foo()
tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0))
warn_msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0))
msg = "DataFrameGroupBy.dtypes is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
tm.assert_frame_equal(g.dtypes, g_exp.dtypes)
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/groupby/test_groupby_subclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,12 @@ def test_groupby_preserves_subclass(obj, groupby_func):

args = get_groupby_method_args(groupby_func, obj)

result1 = getattr(grouped, groupby_func)(*args)
result2 = grouped.agg(groupby_func, *args)
warn = FutureWarning if groupby_func == "fillna" else None
msg = f"{type(grouped).__name__}.fillna is deprecated"
with tm.assert_produces_warning(warn, match=msg, raise_on_extra_warnings=False):
result1 = getattr(grouped, groupby_func)(*args)
with tm.assert_produces_warning(warn, match=msg, raise_on_extra_warnings=False):
result2 = grouped.agg(groupby_func, *args)

# Reduction or transformation kernels should preserve type
slices = {"ngroup", "cumcount", "size"}
Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/groupby/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@ def test_groupby_fill_duplicate_column_names(func):
def test_ffill_missing_arguments():
# GH 14955
df = DataFrame({"a": [1, 2], "b": [1, 1]})
with pytest.raises(ValueError, match="Must specify a fill"):
df.groupby("b").fillna()
msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pytest.raises(ValueError, match="Must specify a fill"):
df.groupby("b").fillna()


@pytest.mark.parametrize(
Expand All @@ -50,7 +52,7 @@ def test_fillna_with_string_dtype(method, expected):
# GH 40250
df = DataFrame({"a": pd.array([None, "a", None], dtype="string"), "b": [0, 0, 0]})
grp = df.groupby("b")
msg = "DataFrameGroupBy.fillna with 'method' is deprecated"
msg = "DataFrameGroupBy.fillna is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = grp.fillna(method=method)
expected = DataFrame({"a": pd.array(expected, dtype="string")})
Expand Down
Loading

0 comments on commit 5457e59

Please sign in to comment.