Skip to content

Commit

Permalink
Backport PR #35639: BUG: RollingGroupby with closed and column select…
Browse files Browse the repository at this point in the history
…ion no longer raises ValueError (#35651)

Co-authored-by: Matthew Roeschke <[email protected]>
  • Loading branch information
meeseeksmachine and mroeschke authored Aug 10, 2020
1 parent 396ba93 commit a057e74
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 8 deletions.
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v1.1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ Categorical
-
-

**Groupby/resample/rolling**

- Bug in :class:`pandas.core.groupby.RollingGroupby` where passing ``closed`` with column selection would raise a ``ValueError`` (:issue:`35549`)

**Plotting**

-
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/window/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(self, obj, *args, **kwargs):
kwargs.pop("parent", None)
groupby = kwargs.pop("groupby", None)
if groupby is None:
groupby, obj = obj, obj.obj
groupby, obj = obj, obj._selected_obj
self._groupby = groupby
self._groupby.mutated = True
self._groupby.grouper.mutated = True
Expand Down
10 changes: 3 additions & 7 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -2196,7 +2196,7 @@ def _apply(
# Cannot use _wrap_outputs because we calculate the result all at once
# Compose MultiIndex result from grouping levels then rolling level
# Aggregate the MultiIndex data as tuples then the level names
grouped_object_index = self._groupby._selected_obj.index
grouped_object_index = self.obj.index
grouped_index_name = [grouped_object_index.name]
groupby_keys = [grouping.name for grouping in self._groupby.grouper._groupings]
result_index_names = groupby_keys + grouped_index_name
Expand All @@ -2220,10 +2220,6 @@ def _apply(
def _constructor(self):
return Rolling

@cache_readonly
def _selected_obj(self):
return self._groupby._selected_obj

def _create_blocks(self, obj: FrameOrSeries):
"""
Split data into blocks & return conformed data.
Expand Down Expand Up @@ -2262,7 +2258,7 @@ def _get_window_indexer(self, window: int) -> GroupbyRollingIndexer:
rolling_indexer: Union[Type[FixedWindowIndexer], Type[VariableWindowIndexer]]
if self.is_freq_type:
rolling_indexer = VariableWindowIndexer
index_array = self._groupby._selected_obj.index.asi8
index_array = self.obj.index.asi8
else:
rolling_indexer = FixedWindowIndexer
index_array = None
Expand All @@ -2279,7 +2275,7 @@ def _gotitem(self, key, ndim, subset=None):
# here so our index is carried thru to the selected obj
# when we do the splitting for the groupby
if self.on is not None:
self._groupby.obj = self._groupby.obj.set_index(self._on)
self.obj = self.obj.set_index(self._on)
self.on = None
return super()._gotitem(key, ndim, subset=subset)

Expand Down
51 changes: 51 additions & 0 deletions pandas/tests/window/test_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,3 +304,54 @@ def test_groupby_subselect_rolling(self):
name="b",
)
tm.assert_series_equal(result, expected)

def test_groupby_rolling_subset_with_closed(self):
# GH 35549
df = pd.DataFrame(
{
"column1": range(6),
"column2": range(6),
"group": 3 * ["A", "B"],
"date": [pd.Timestamp("2019-01-01")] * 6,
}
)
result = (
df.groupby("group").rolling("1D", on="date", closed="left")["column1"].sum()
)
expected = Series(
[np.nan, 0.0, 2.0, np.nan, 1.0, 4.0],
index=pd.MultiIndex.from_tuples(
[("A", pd.Timestamp("2019-01-01"))] * 3
+ [("B", pd.Timestamp("2019-01-01"))] * 3,
names=["group", "date"],
),
name="column1",
)
tm.assert_series_equal(result, expected)

def test_groupby_subset_rolling_subset_with_closed(self):
# GH 35549
df = pd.DataFrame(
{
"column1": range(6),
"column2": range(6),
"group": 3 * ["A", "B"],
"date": [pd.Timestamp("2019-01-01")] * 6,
}
)

result = (
df.groupby("group")[["column1", "date"]]
.rolling("1D", on="date", closed="left")["column1"]
.sum()
)
expected = Series(
[np.nan, 0.0, 2.0, np.nan, 1.0, 4.0],
index=pd.MultiIndex.from_tuples(
[("A", pd.Timestamp("2019-01-01"))] * 3
+ [("B", pd.Timestamp("2019-01-01"))] * 3,
names=["group", "date"],
),
name="column1",
)
tm.assert_series_equal(result, expected)

0 comments on commit a057e74

Please sign in to comment.