From 56b75ae6a9613f0237c774a1700a96c86f23c409 Mon Sep 17 00:00:00 2001 From: Adam Date: Thu, 18 Apr 2019 23:47:05 +0100 Subject: [PATCH] BUG: groupby ffill adds labels as extra column (#21521) --- doc/source/whatsnew/v0.25.0.rst | 34 ++++++++++++++++++++++++++ pandas/core/groupby/generic.py | 9 ------- pandas/core/groupby/groupby.py | 1 - pandas/tests/groupby/test_transform.py | 17 ++++++++++--- 4 files changed, 48 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 485a0a06fc2476..3e00c1a7440b67 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -150,6 +150,40 @@ Providing any ``SparseSeries`` or ``SparseDataFrame`` to :func:`concat` will cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before. +``DataFrame`` groupby ffill/bfill no longer return group labels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The methods ``ffill``, ``bfill``, ``pad`` and ``backfill`` of +:class:`DataFrameGroupBy ` +previously included the group labels in the return value, which was +inconsistent with other groupby transforms. + +(:issue:`21521`) + +Now only the filled values are returned. + +.. ipython:: python + + df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]}) + df + +*Previous Behaviour*: + +.. code-block:: python + + In [3]: df.groupby("a").ffill() + Out[3]: + a b + 0 x 1 + 1 y 2 + +*New Behaviour*: + +.. ipython:: python + + df.groupby("a").ffill() + + .. _whatsnew_0250.api_breaking.deps: Increased minimum versions for dependencies diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f8b9ddce6000eb..b47d271f9a6fcf 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1469,15 +1469,6 @@ def _apply_to_column_groupbys(self, func): in self._iterate_column_groupbys()), keys=self._selected_obj.columns, axis=1) - def _fill(self, direction, limit=None): - """Overridden method to join grouped columns in output""" - res = super()._fill(direction, limit=limit) - output = OrderedDict( - (grp.name, grp.grouper) for grp in self.grouper.groupings) - - from pandas import concat - return concat((self._wrap_transformed_output(output), res), axis=1) - def count(self): """ Compute count of group, excluding missing values """ from pandas.core.dtypes.missing import _isna_ndarraylike as _isna diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index bd8a8852964e3b..e6b71a70cb2da1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2127,7 +2127,6 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, limit=limit, freq=freq, axis=axis)) filled = getattr(self, fill_method)(limit=limit) - filled = filled.drop(self.grouper.names, axis=1) fill_grp = filled.groupby(self.grouper.labels) shifted = fill_grp.shift(periods=periods, freq=freq) return (filled / shifted) - 1 diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index e330329644269d..2cc70ac5361290 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -749,7 +749,7 @@ def interweave(list_obj): assert_series_equal(result, exp) else: result = getattr(df.groupby('key'), fill_method)(limit=limit) - exp = DataFrame({'key': keys, 'val': _exp_vals}) + exp = DataFrame({'val': _exp_vals}) assert_frame_equal(result, exp) @@ -763,7 +763,7 @@ def test_pad_stable_sorting(fill_method): y = y[::-1] df = pd.DataFrame({'x': x, 'y': y}) - expected = df.copy() + expected = df.drop('x', 1) result = getattr(df.groupby('x'), fill_method)() @@ -789,7 +789,7 @@ def test_pct_change(test_series, freq, periods, fill_method, limit): df = DataFrame({'key': key_v, 'vals': vals * 2}) df_g = getattr(df.groupby('key'), fill_method)(limit=limit) - grp = df_g.groupby('key') + grp = df_g.groupby(df.key) expected = grp['vals'].obj / grp['vals'].shift(periods) - 1 @@ -880,3 +880,14 @@ def test_transform_absent_categories(func): result = getattr(df.y.groupby(df.x), func)() expected = df.y assert_series_equal(result, expected) + + +@pytest.mark.parametrize('func', ['ffill', 'bfill']) +@pytest.mark.parametrize('key, val', [('level', 0), ('by', Series([0]))]) +def test_ffill_not_in_axis(func, key, val): + # GH 21521 + df = pd.DataFrame([[0]]) + result = getattr(df.groupby(**{key: val}), func)() + expected = df + + assert_frame_equal(result, expected)