From a68c94767faa7155d5cb1e9954a5539e3d55bd2e Mon Sep 17 00:00:00 2001 From: Chris Roth Date: Thu, 19 Jan 2023 15:42:46 -0600 Subject: [PATCH 1/7] Add failing test reproducing groupby-resample KeyError (#50840) --- .../tests/resample/test_resampler_grouper.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 0c8e303b4ac56..63e0ddd9bfc8c 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -537,3 +537,33 @@ def test_groupby_resample_size_all_index_same(): ), ) tm.assert_series_equal(result, expected) + + +def test_groupby_resample_on_index_with_list_of_keys(): + # GH 50840 + df = DataFrame( + data={ + "group": [0, 0, 0, 0, 1, 1, 1, 1], + "val": [3, 1, 4, 1, 5, 9, 2, 6], + }, + index=Series( + date_range(start="2016-01-01", periods=8), + name="date", + ), + ) + result = df.groupby("group").resample("2D")[["val"]].mean() + expected = DataFrame( + data={ + "val": [2.0, 2.5, 7.0, 4.0], + }, + index=Index( + data=[ + (0, Timestamp("2016-01-01")), + (0, Timestamp("2016-01-03")), + (1, Timestamp("2016-01-05")), + (1, Timestamp("2016-01-07")), + ], + name=("group", "date"), + ), + ) + tm.assert_frame_equal(result, expected) From b204958c7fd2442f4a0205b27f324fda62c167d5 Mon Sep 17 00:00:00 2001 From: Chris Roth Date: Thu, 19 Jan 2023 15:46:45 -0600 Subject: [PATCH 2/7] Fix groupby-resample KeyError (#50840) by adding None check. --- pandas/core/resample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 3c26299c93b70..907d6522d3236 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1202,7 +1202,7 @@ def _gotitem(self, key, ndim, subset=None): # Try to select from a DataFrame, falling back to a Series try: - if isinstance(key, list) and self.key not in key: + if isinstance(key, list) and self.key not in key and self.key is not None: key.append(self.key) groupby = self._groupby[key] except IndexError: From 44e3863f47ffb969858d90675707d032bff5c026 Mon Sep 17 00:00:00 2001 From: Chris Roth Date: Thu, 19 Jan 2023 15:49:32 -0600 Subject: [PATCH 3/7] Update whatsnew for #50840 --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 1a071ab978de9..f9cbe1af757ef 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1056,6 +1056,7 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`) - Bug in :class:`.DataFrameGroupBy` would raise when used with an empty DataFrame, categorical grouper, and ``dropna=False`` (:issue:`50634`) - Bug in :meth:`.SeriesGroupBy.value_counts` did not respect ``sort=False`` (:issue:`50482`) +- Bug in :meth:`DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list when resampling on time index (:issue:`50840`) - Reshaping From 645cf1fa4433830c9bf193b14849986a6caec8e5 Mon Sep 17 00:00:00 2001 From: Chris Roth Date: Fri, 20 Jan 2023 14:08:04 -0600 Subject: [PATCH 4/7] Improve coverage with multi and missing column groupby-resample tests. --- .../tests/resample/test_resampler_grouper.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index b668a95f10bfc..c3717cee05f2b 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -566,3 +566,52 @@ def test_groupby_resample_on_index_with_list_of_keys(): ), ) tm.assert_frame_equal(result, expected) + + +def test_groupby_resample_on_index_with_list_of_keys_multi_columns(): + # GH 50876 + df = DataFrame( + data={ + "group": [0, 0, 0, 0, 1, 1, 1, 1], + "first_val": [3, 1, 4, 1, 5, 9, 2, 6], + "second_val": [2, 7, 1, 8, 2, 8, 1, 8], + "third_val": [1, 4, 1, 4, 2, 1, 3, 5], + }, + index=Series( + date_range(start="2016-01-01", periods=8), + name="date", + ), + ) + result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean() + expected = DataFrame( + data={ + "first_val": [2.0, 2.5, 7.0, 4.0], + "second_val": [4.5, 4.5, 5.0, 4.5], + }, + index=Index( + data=[ + (0, Timestamp("2016-01-01")), + (0, Timestamp("2016-01-03")), + (1, Timestamp("2016-01-05")), + (1, Timestamp("2016-01-07")), + ], + name=("group", "date"), + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_groupby_resample_on_index_with_list_of_keys_missing_column(): + # GH 50876 + df = DataFrame( + data={ + "group": [0, 0, 0, 0, 1, 1, 1, 1], + "val": [3, 1, 4, 1, 5, 9, 2, 6], + }, + index=Series( + date_range(start="2016-01-01", periods=8), + name="date", + ), + ) + with pytest.raises(KeyError, match="Columns not found"): + df.groupby("group").resample("2D")[["val_not_in_dataframe"]].mean() From d5226067a889e5a2326707c9ef4c27934a035fab Mon Sep 17 00:00:00 2001 From: Chris Roth Date: Fri, 20 Jan 2023 14:18:48 -0600 Subject: [PATCH 5/7] Refactor groupby-resample tests via TestCase to remove duplicate code. --- .../tests/resample/test_resampler_grouper.py | 138 ++++++++---------- 1 file changed, 61 insertions(+), 77 deletions(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index c3717cee05f2b..c9d9ec3c8a6d6 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -1,3 +1,4 @@ +import unittest from textwrap import dedent import numpy as np @@ -538,80 +539,63 @@ def test_groupby_resample_size_all_index_same(): tm.assert_series_equal(result, expected) -def test_groupby_resample_on_index_with_list_of_keys(): - # GH 50840 - df = DataFrame( - data={ - "group": [0, 0, 0, 0, 1, 1, 1, 1], - "val": [3, 1, 4, 1, 5, 9, 2, 6], - }, - index=Series( - date_range(start="2016-01-01", periods=8), - name="date", - ), - ) - result = df.groupby("group").resample("2D")[["val"]].mean() - expected = DataFrame( - data={ - "val": [2.0, 2.5, 7.0, 4.0], - }, - index=Index( - data=[ - (0, Timestamp("2016-01-01")), - (0, Timestamp("2016-01-03")), - (1, Timestamp("2016-01-05")), - (1, Timestamp("2016-01-07")), - ], - name=("group", "date"), - ), - ) - tm.assert_frame_equal(result, expected) - - -def test_groupby_resample_on_index_with_list_of_keys_multi_columns(): - # GH 50876 - df = DataFrame( - data={ - "group": [0, 0, 0, 0, 1, 1, 1, 1], - "first_val": [3, 1, 4, 1, 5, 9, 2, 6], - "second_val": [2, 7, 1, 8, 2, 8, 1, 8], - "third_val": [1, 4, 1, 4, 2, 1, 3, 5], - }, - index=Series( - date_range(start="2016-01-01", periods=8), - name="date", - ), - ) - result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean() - expected = DataFrame( - data={ - "first_val": [2.0, 2.5, 7.0, 4.0], - "second_val": [4.5, 4.5, 5.0, 4.5], - }, - index=Index( - data=[ - (0, Timestamp("2016-01-01")), - (0, Timestamp("2016-01-03")), - (1, Timestamp("2016-01-05")), - (1, Timestamp("2016-01-07")), - ], - name=("group", "date"), - ), - ) - tm.assert_frame_equal(result, expected) - - -def test_groupby_resample_on_index_with_list_of_keys_missing_column(): - # GH 50876 - df = DataFrame( - data={ - "group": [0, 0, 0, 0, 1, 1, 1, 1], - "val": [3, 1, 4, 1, 5, 9, 2, 6], - }, - index=Series( - date_range(start="2016-01-01", periods=8), - name="date", - ), - ) - with pytest.raises(KeyError, match="Columns not found"): - df.groupby("group").resample("2D")[["val_not_in_dataframe"]].mean() +class TestGroupByResampleTimeIndex(unittest.TestCase): + """Test groupby resample with a time index where a list of columns is given.""" + def setUp(self) -> None: + self.df = DataFrame( + data={ + "group": [0, 0, 0, 0, 1, 1, 1, 1], + "first_val": [3, 1, 4, 1, 5, 9, 2, 6], + "second_val": [2, 7, 1, 8, 2, 8, 1, 8], + "third_val": [1, 4, 1, 4, 2, 1, 3, 5], + }, + index=Series( + date_range(start="2016-01-01", periods=8), + name="date", + ), + ) + + def test_list_of_one_key(self): + # GH 50840 + result = self.df.groupby("group").resample("2D")[["first_val"]].mean() + expected = DataFrame( + data={ + "first_val": [2.0, 2.5, 7.0, 4.0], + }, + index=Index( + data=[ + (0, Timestamp("2016-01-01")), + (0, Timestamp("2016-01-03")), + (1, Timestamp("2016-01-05")), + (1, Timestamp("2016-01-07")), + ], + name=("group", "date"), + ), + ) + tm.assert_frame_equal(result, expected) + + def test_list_of_multiple_keys(self): + # GH 50876 + result = self.df.groupby("group").resample("2D")[["first_val", "second_val"]].mean() + expected = DataFrame( + data={ + "first_val": [2.0, 2.5, 7.0, 4.0], + "second_val": [4.5, 4.5, 5.0, 4.5], + }, + index=Index( + data=[ + (0, Timestamp("2016-01-01")), + (0, Timestamp("2016-01-03")), + (1, Timestamp("2016-01-05")), + (1, Timestamp("2016-01-07")), + ], + name=("group", "date"), + ), + ) + tm.assert_frame_equal(result, expected) + + def test_missing_key_raises_KeyError(self): + """Test a key that is not in the list of columns.""" + # GH 50876 + with pytest.raises(KeyError, match="Columns not found"): + self.df.groupby("group").resample("2D")[["val_not_in_dataframe"]].mean() From d55d7645c91b9a874cbb3bf545a8a764c4ad69b1 Mon Sep 17 00:00:00 2001 From: Chris Roth Date: Fri, 20 Jan 2023 14:22:19 -0600 Subject: [PATCH 6/7] Revert "Refactor groupby-resample tests via TestCase to remove duplicate code." This reverts commit d5226067a889e5a2326707c9ef4c27934a035fab. --- .../tests/resample/test_resampler_grouper.py | 138 ++++++++++-------- 1 file changed, 77 insertions(+), 61 deletions(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index c9d9ec3c8a6d6..c3717cee05f2b 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -1,4 +1,3 @@ -import unittest from textwrap import dedent import numpy as np @@ -539,63 +538,80 @@ def test_groupby_resample_size_all_index_same(): tm.assert_series_equal(result, expected) -class TestGroupByResampleTimeIndex(unittest.TestCase): - """Test groupby resample with a time index where a list of columns is given.""" - def setUp(self) -> None: - self.df = DataFrame( - data={ - "group": [0, 0, 0, 0, 1, 1, 1, 1], - "first_val": [3, 1, 4, 1, 5, 9, 2, 6], - "second_val": [2, 7, 1, 8, 2, 8, 1, 8], - "third_val": [1, 4, 1, 4, 2, 1, 3, 5], - }, - index=Series( - date_range(start="2016-01-01", periods=8), - name="date", - ), - ) - - def test_list_of_one_key(self): - # GH 50840 - result = self.df.groupby("group").resample("2D")[["first_val"]].mean() - expected = DataFrame( - data={ - "first_val": [2.0, 2.5, 7.0, 4.0], - }, - index=Index( - data=[ - (0, Timestamp("2016-01-01")), - (0, Timestamp("2016-01-03")), - (1, Timestamp("2016-01-05")), - (1, Timestamp("2016-01-07")), - ], - name=("group", "date"), - ), - ) - tm.assert_frame_equal(result, expected) - - def test_list_of_multiple_keys(self): - # GH 50876 - result = self.df.groupby("group").resample("2D")[["first_val", "second_val"]].mean() - expected = DataFrame( - data={ - "first_val": [2.0, 2.5, 7.0, 4.0], - "second_val": [4.5, 4.5, 5.0, 4.5], - }, - index=Index( - data=[ - (0, Timestamp("2016-01-01")), - (0, Timestamp("2016-01-03")), - (1, Timestamp("2016-01-05")), - (1, Timestamp("2016-01-07")), - ], - name=("group", "date"), - ), - ) - tm.assert_frame_equal(result, expected) - - def test_missing_key_raises_KeyError(self): - """Test a key that is not in the list of columns.""" - # GH 50876 - with pytest.raises(KeyError, match="Columns not found"): - self.df.groupby("group").resample("2D")[["val_not_in_dataframe"]].mean() +def test_groupby_resample_on_index_with_list_of_keys(): + # GH 50840 + df = DataFrame( + data={ + "group": [0, 0, 0, 0, 1, 1, 1, 1], + "val": [3, 1, 4, 1, 5, 9, 2, 6], + }, + index=Series( + date_range(start="2016-01-01", periods=8), + name="date", + ), + ) + result = df.groupby("group").resample("2D")[["val"]].mean() + expected = DataFrame( + data={ + "val": [2.0, 2.5, 7.0, 4.0], + }, + index=Index( + data=[ + (0, Timestamp("2016-01-01")), + (0, Timestamp("2016-01-03")), + (1, Timestamp("2016-01-05")), + (1, Timestamp("2016-01-07")), + ], + name=("group", "date"), + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_groupby_resample_on_index_with_list_of_keys_multi_columns(): + # GH 50876 + df = DataFrame( + data={ + "group": [0, 0, 0, 0, 1, 1, 1, 1], + "first_val": [3, 1, 4, 1, 5, 9, 2, 6], + "second_val": [2, 7, 1, 8, 2, 8, 1, 8], + "third_val": [1, 4, 1, 4, 2, 1, 3, 5], + }, + index=Series( + date_range(start="2016-01-01", periods=8), + name="date", + ), + ) + result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean() + expected = DataFrame( + data={ + "first_val": [2.0, 2.5, 7.0, 4.0], + "second_val": [4.5, 4.5, 5.0, 4.5], + }, + index=Index( + data=[ + (0, Timestamp("2016-01-01")), + (0, Timestamp("2016-01-03")), + (1, Timestamp("2016-01-05")), + (1, Timestamp("2016-01-07")), + ], + name=("group", "date"), + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_groupby_resample_on_index_with_list_of_keys_missing_column(): + # GH 50876 + df = DataFrame( + data={ + "group": [0, 0, 0, 0, 1, 1, 1, 1], + "val": [3, 1, 4, 1, 5, 9, 2, 6], + }, + index=Series( + date_range(start="2016-01-01", periods=8), + name="date", + ), + ) + with pytest.raises(KeyError, match="Columns not found"): + df.groupby("group").resample("2D")[["val_not_in_dataframe"]].mean() From 4295db5fa44b68617bbe8133db2b124be9e4afef Mon Sep 17 00:00:00 2001 From: Chris Roth Date: Fri, 20 Jan 2023 14:29:11 -0600 Subject: [PATCH 7/7] Fix typo in bug fix entry for #50840 in doc/source/whatsnew/v2.0.0.rst --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index a5ee8c887c517..986e5891033a1 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1096,7 +1096,7 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`) - Bug in :class:`.DataFrameGroupBy` would raise when used with an empty DataFrame, categorical grouper, and ``dropna=False`` (:issue:`50634`) - Bug in :meth:`.SeriesGroupBy.value_counts` did not respect ``sort=False`` (:issue:`50482`) -- Bug in :meth:`DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list when resampling on time index (:issue:`50840`) +- Bug in :meth:`.DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list when resampling on time index (:issue:`50840`) - Reshaping