From f1d136744e0c9a91f7c78e7fcc442dcc5e2989d5 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Wed, 30 Jun 2021 02:19:45 -0700 Subject: [PATCH] Backport PR #42317: Revert "REF: move shift logic from BlockManager to DataFrame" (#42321) Co-authored-by: jbrockmendel --- pandas/core/frame.py | 51 ++++++++-------------- pandas/core/internals/managers.py | 19 ++++++++ pandas/tests/apply/test_frame_transform.py | 9 +++- 3 files changed, 44 insertions(+), 35 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cd127d9c9ec63..954ea24d0d8fc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5285,45 +5285,28 @@ def shift( axis = self._get_axis_number(axis) ncols = len(self.columns) + if axis == 1 and periods != 0 and fill_value is lib.no_default and ncols > 0: + # We will infer fill_value to match the closest column - if ( - axis == 1 - and periods != 0 - and ncols > 0 - and (fill_value is lib.no_default or len(self._mgr.arrays) > 1) - ): - # Exclude single-array-with-fill_value case so we issue a FutureWarning - # if an integer is passed with datetimelike dtype GH#31971 - from pandas import concat + # Use a column that we know is valid for our column's dtype GH#38434 + label = self.columns[0] - # tail: the data that is still in our shifted DataFrame if periods > 0: - tail = self.iloc[:, :-periods] - else: - tail = self.iloc[:, -periods:] - # pin a simple Index to avoid costly casting - tail.columns = range(len(tail.columns)) - - if fill_value is not lib.no_default: - # GH#35488 - # TODO(EA2D): with 2D EAs we could construct other directly - ser = Series(fill_value, index=self.index) + result = self.iloc[:, :-periods] + for col in range(min(ncols, abs(periods))): + # TODO(EA2D): doing this in a loop unnecessary with 2D EAs + # Define filler inside loop so we get a copy + filler = self.iloc[:, 0].shift(len(self)) + result.insert(0, label, filler, allow_duplicates=True) else: - # We infer fill_value to match the closest column - if periods > 0: - ser = self.iloc[:, 0].shift(len(self)) - else: - ser = self.iloc[:, -1].shift(len(self)) - - width = min(abs(periods), ncols) - other = concat([ser] * width, axis=1) - - if periods > 0: - result = concat([other, tail], axis=1) - else: - result = concat([tail, other], axis=1) + result = self.iloc[:, -periods:] + for col in range(min(ncols, abs(periods))): + # Define filler inside loop so we get a copy + filler = self.iloc[:, -1].shift(len(self)) + result.insert( + len(result.columns), label, filler, allow_duplicates=True + ) - result = cast(DataFrame, result) result.columns = self.columns.copy() return result diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 48f0b7f7f964b..cc07caac31c0c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -381,6 +381,25 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T: if fill_value is lib.no_default: fill_value = None + if axis == 0 and self.ndim == 2 and self.nblocks > 1: + # GH#35488 we need to watch out for multi-block cases + # We only get here with fill_value not-lib.no_default + ncols = self.shape[0] + if periods > 0: + indexer = [-1] * periods + list(range(ncols - periods)) + else: + nper = abs(periods) + indexer = list(range(nper, ncols)) + [-1] * nper + result = self.reindex_indexer( + self.items, + indexer, + axis=0, + fill_value=fill_value, + allow_dups=True, + consolidate=False, + ) + return result + return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value) def fillna(self: T, value, limit, inplace: bool, downcast) -> T: diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index 9050fab702881..0d3d4eecf92aa 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -39,8 +39,15 @@ def test_transform_ufunc(axis, float_frame, frame_or_series): @pytest.mark.parametrize("op", frame_transform_kernels) -def test_transform_groupby_kernel(axis, float_frame, op, request): +def test_transform_groupby_kernel(axis, float_frame, op, using_array_manager, request): # GH 35964 + if using_array_manager and op == "pct_change" and axis in (1, "columns"): + # TODO(ArrayManager) shift with axis=1 + request.node.add_marker( + pytest.mark.xfail( + reason="shift axis=1 not yet implemented for ArrayManager" + ) + ) args = [0.0] if op == "fillna" else [] if axis == 0 or axis == "index":