Skip to content

Commit

Permalink
BUG: groupby ffill adds labels as extra column (pandas-dev#21521)
Browse files Browse the repository at this point in the history
  • Loading branch information
Adam committed Apr 27, 2019
1 parent 64104ec commit 56b75ae
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 13 deletions.
34 changes: 34 additions & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,40 @@ Providing any ``SparseSeries`` or ``SparseDataFrame`` to :func:`concat` will
cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before.


``DataFrame`` groupby ffill/bfill no longer return group labels
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The methods ``ffill``, ``bfill``, ``pad`` and ``backfill`` of
:class:`DataFrameGroupBy <pandas.core.groupby.DataFrameGroupBy>`
previously included the group labels in the return value, which was
inconsistent with other groupby transforms.

(:issue:`21521`)

Now only the filled values are returned.

.. ipython:: python
df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]})
df
*Previous Behaviour*:

.. code-block:: python
In [3]: df.groupby("a").ffill()
Out[3]:
a b
0 x 1
1 y 2
*New Behaviour*:

.. ipython:: python
df.groupby("a").ffill()
.. _whatsnew_0250.api_breaking.deps:

Increased minimum versions for dependencies
Expand Down
9 changes: 0 additions & 9 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1469,15 +1469,6 @@ def _apply_to_column_groupbys(self, func):
in self._iterate_column_groupbys()),
keys=self._selected_obj.columns, axis=1)

def _fill(self, direction, limit=None):
"""Overridden method to join grouped columns in output"""
res = super()._fill(direction, limit=limit)
output = OrderedDict(
(grp.name, grp.grouper) for grp in self.grouper.groupings)

from pandas import concat
return concat((self._wrap_transformed_output(output), res), axis=1)

def count(self):
""" Compute count of group, excluding missing values """
from pandas.core.dtypes.missing import _isna_ndarraylike as _isna
Expand Down
1 change: 0 additions & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2127,7 +2127,6 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
limit=limit, freq=freq,
axis=axis))
filled = getattr(self, fill_method)(limit=limit)
filled = filled.drop(self.grouper.names, axis=1)
fill_grp = filled.groupby(self.grouper.labels)
shifted = fill_grp.shift(periods=periods, freq=freq)
return (filled / shifted) - 1
Expand Down
17 changes: 14 additions & 3 deletions pandas/tests/groupby/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ def interweave(list_obj):
assert_series_equal(result, exp)
else:
result = getattr(df.groupby('key'), fill_method)(limit=limit)
exp = DataFrame({'key': keys, 'val': _exp_vals})
exp = DataFrame({'val': _exp_vals})
assert_frame_equal(result, exp)


Expand All @@ -763,7 +763,7 @@ def test_pad_stable_sorting(fill_method):
y = y[::-1]

df = pd.DataFrame({'x': x, 'y': y})
expected = df.copy()
expected = df.drop('x', 1)

result = getattr(df.groupby('x'), fill_method)()

Expand All @@ -789,7 +789,7 @@ def test_pct_change(test_series, freq, periods, fill_method, limit):
df = DataFrame({'key': key_v, 'vals': vals * 2})

df_g = getattr(df.groupby('key'), fill_method)(limit=limit)
grp = df_g.groupby('key')
grp = df_g.groupby(df.key)

expected = grp['vals'].obj / grp['vals'].shift(periods) - 1

Expand Down Expand Up @@ -880,3 +880,14 @@ def test_transform_absent_categories(func):
result = getattr(df.y.groupby(df.x), func)()
expected = df.y
assert_series_equal(result, expected)


@pytest.mark.parametrize('func', ['ffill', 'bfill'])
@pytest.mark.parametrize('key, val', [('level', 0), ('by', Series([0]))])
def test_ffill_not_in_axis(func, key, val):
# GH 21521
df = pd.DataFrame([[0]])
result = getattr(df.groupby(**{key: val}), func)()
expected = df

assert_frame_equal(result, expected)

0 comments on commit 56b75ae

Please sign in to comment.