From ff4437eb3b27440ea2304a8613369ebec0b8b72a Mon Sep 17 00:00:00 2001 From: HHest <3169669+HHest@users.noreply.github.com> Date: Wed, 15 May 2019 15:01:27 +0200 Subject: [PATCH] Fix .transform crash when SeriesGroupBy is empty (#26208) (#26228) --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/groupby/generic.py | 8 ++++++-- pandas/tests/groupby/test_grouping.py | 17 +++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 49518c57fc846..7924a029e72c3 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -440,6 +440,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.GroupBy.idxmax` and :meth:`pandas.core.groupby.GroupBy.idxmin` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`) - Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`) - Bug in :meth:`pandas.core.groupby.GroupBy.nth` where NA values in the grouping would return incorrect results (:issue:`26011`) +- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise error (:issue:`26208`) Reshaping diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b5b6553d2ae69..2f665975f96bd 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -916,8 +916,12 @@ def transform(self, func, *args, **kwargs): s = klass(res, indexer) results.append(s) - from pandas.core.reshape.concat import concat - result = concat(results).sort_index() + # check for empty "results" to avoid concat ValueError + if results: + from pandas.core.reshape.concat import concat + result = concat(results).sort_index() + else: + result = Series() # we will only try to coerce the result type if # we have a numeric dtype, as these are *always* udfs diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 5edc2c13673bc..4c84c29ff98cb 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -552,6 +552,23 @@ def test_list_grouper_with_nat(self): expected = {pd.Timestamp('2011-01-01'): 365} tm.assert_dict_equal(result.groups, expected) + @pytest.mark.parametrize( + 'func,expected', + [ + ('transform', pd.Series(name=2, index=pd.RangeIndex(0, 0, 1))), + ('agg', pd.Series(name=2, index=pd.Float64Index([], name=1))), + ('apply', pd.Series(name=2, index=pd.Float64Index([], name=1))), + ]) + def test_evaluate_with_empty_groups(self, func, expected): + # 26208 + # test transform'ing empty groups + # (not testing other agg fns, because they return + # different index objects. + df = pd.DataFrame({1: [], 2: []}) + g = df.groupby(1) + result = getattr(g[2], func)(lambda x: x) + assert_series_equal(result, expected) + # get_group # --------------------------------