Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix groupby-resample KeyError when resampling on Index and giving explicit list of columns. #50876

Merged
merged 8 commits into from
Jan 23, 2023
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,7 @@ Groupby/resample/rolling
- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`)
- Bug in :class:`.DataFrameGroupBy` would raise when used with an empty DataFrame, categorical grouper, and ``dropna=False`` (:issue:`50634`)
- Bug in :meth:`.SeriesGroupBy.value_counts` did not respect ``sort=False`` (:issue:`50482`)
- Bug in :meth:`.DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list when resampling on time index (:issue:`50840`)
-

Reshaping
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1202,7 +1202,7 @@ def _gotitem(self, key, ndim, subset=None):

# Try to select from a DataFrame, falling back to a Series
try:
if isinstance(key, list) and self.key not in key:
if isinstance(key, list) and self.key not in key and self.key is not None:
key.append(self.key)
groupby = self._groupby[key]
except IndexError:
Expand Down
79 changes: 79 additions & 0 deletions pandas/tests/resample/test_resampler_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,3 +536,82 @@ def test_groupby_resample_size_all_index_same():
),
)
tm.assert_series_equal(result, expected)


def test_groupby_resample_on_index_with_list_of_keys():
# GH 50840
df = DataFrame(
data={
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"val": [3, 1, 4, 1, 5, 9, 2, 6],
},
index=Series(
date_range(start="2016-01-01", periods=8),
name="date",
),
)
result = df.groupby("group").resample("2D")[["val"]].mean()
expected = DataFrame(
data={
"val": [2.0, 2.5, 7.0, 4.0],
},
index=Index(
data=[
(0, Timestamp("2016-01-01")),
(0, Timestamp("2016-01-03")),
(1, Timestamp("2016-01-05")),
(1, Timestamp("2016-01-07")),
],
name=("group", "date"),
),
)
tm.assert_frame_equal(result, expected)


def test_groupby_resample_on_index_with_list_of_keys_multi_columns():
# GH 50876
df = DataFrame(
data={
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"first_val": [3, 1, 4, 1, 5, 9, 2, 6],
"second_val": [2, 7, 1, 8, 2, 8, 1, 8],
"third_val": [1, 4, 1, 4, 2, 1, 3, 5],
},
index=Series(
date_range(start="2016-01-01", periods=8),
name="date",
),
)
result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean()
expected = DataFrame(
data={
"first_val": [2.0, 2.5, 7.0, 4.0],
"second_val": [4.5, 4.5, 5.0, 4.5],
},
index=Index(
data=[
(0, Timestamp("2016-01-01")),
(0, Timestamp("2016-01-03")),
(1, Timestamp("2016-01-05")),
(1, Timestamp("2016-01-07")),
],
name=("group", "date"),
),
)
tm.assert_frame_equal(result, expected)


def test_groupby_resample_on_index_with_list_of_keys_missing_column():
# GH 50876
df = DataFrame(
data={
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"val": [3, 1, 4, 1, 5, 9, 2, 6],
},
index=Series(
date_range(start="2016-01-01", periods=8),
name="date",
),
)
with pytest.raises(KeyError, match="Columns not found"):
df.groupby("group").resample("2D")[["val_not_in_dataframe"]].mean()