From 100fa27aec6aa291997a60a6e55bdb03f8018028 Mon Sep 17 00:00:00 2001 From: Adam Date: Thu, 18 Apr 2019 23:47:05 +0100 Subject: [PATCH] BUG: groupby ffill adds labels as extra column (#21521) --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/groupby/generic.py | 3 ++- pandas/tests/groupby/test_transform.py | 11 +++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 353af98f5b64db..b4b72f08a23e12 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -389,6 +389,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.window.Rolling.min` and :meth:`pandas.core.window.Rolling.max` that caused a memory leak (:issue:`25893`) - Bug in :meth:`pandas.core.groupby.GroupBy.idxmax` and :meth:`pandas.core.groupby.GroupBy.idxmin` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`) - Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`) +- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.ffill` and :meth:`pandas.core.groupby.DataFrameGroupBy.bfill` when group labels are not in frame, would concat them with the return value. (:issue:`21521`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 01784513704b4a..6dd7af2ce23cf3 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1473,7 +1473,8 @@ def _fill(self, direction, limit=None): """Overridden method to join grouped columns in output""" res = super(DataFrameGroupBy, self)._fill(direction, limit=limit) output = OrderedDict( - (grp.name, grp.grouper) for grp in self.grouper.groupings) + (grp.name, grp.grouper) for grp in self.grouper.groupings + if grp.in_axis) from pandas import concat return concat((self._wrap_transformed_output(output), res), axis=1) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index e330329644269d..d497b945d94df5 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -880,3 +880,14 @@ def test_transform_absent_categories(func): result = getattr(df.y.groupby(df.x), func)() expected = df.y assert_series_equal(result, expected) + + +@pytest.mark.parametrize('func', ['ffill', 'bfill']) +@pytest.mark.parametrize('key, val', [('level', 0), ('by', Series([0]))]) +def test_ffill_not_in_axis(func, key, val): + # GH 21521 + df = pd.DataFrame([[0]]) + result = getattr(df.groupby(**{key: val}), func)() + expected = df + + assert_frame_equal(result, expected)