Skip to content

Commit

Permalink
Backport PR #42579: BUG: DataFrame.copy not consolidating (#42679)
Browse files Browse the repository at this point in the history
Co-authored-by: jbrockmendel <[email protected]>
  • Loading branch information
meeseeksmachine and jbrockmendel authored Jul 23, 2021
1 parent a96cd88 commit d71ca72
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 2 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Bug fixes
~~~~~~~~~
- Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`)
- Fixed bug in :meth:`DataFrame.to_xml` raising ``KeyError`` when called with ``index=False`` and an offset index (:issue:`42458`)
-
- Fixed bug in :meth:`DataFrame.copy` failing to consolidate blocks in the result (:issue:`42579`)

.. ---------------------------------------------------------------------------
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,9 @@ def copy_func(ax):

res = self.apply("copy", deep=deep)
res.axes = new_axes

if deep:
res._consolidate_inplace()
return res

def consolidate(self: T) -> T:
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,8 @@ def _unstack_multiple(data, clocs, fill_value=None):

return result

dummy = data.copy()
# GH#42579 deep=False to avoid consolidating
dummy = data.copy(deep=False)
dummy.index = dummy_index

unstacked = dummy.unstack("__placeholder__", fill_value=fill_value)
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/frame/methods/test_copy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas import DataFrame
import pandas._testing as tm

Expand Down Expand Up @@ -41,3 +44,20 @@ def test_copy(self, float_frame, float_string_frame):
# copy objects
copy = float_string_frame.copy()
assert copy._mgr is not float_string_frame._mgr

@td.skip_array_manager_invalid_test
def test_copy_consolidates(self):
# GH#42477
df = DataFrame(
{
"a": np.random.randint(0, 100, size=55),
"b": np.random.randint(0, 100, size=55),
}
)

for i in range(0, 10):
df.loc[:, f"n_{i}"] = np.random.randint(0, 100, size=55)

assert len(df._mgr.blocks) == 11
result = df.copy()
assert len(result._mgr.blocks) == 1
3 changes: 3 additions & 0 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,9 @@ def test_copy(self, mgr):
# DatetimeTZBlock has DatetimeIndex values
assert cp_blk.values._data.base is blk.values._data.base

# copy(deep=True) consolidates, so the block-wise assertions will
# fail is mgr is not consolidated
mgr._consolidate_inplace()
cp = mgr.copy(deep=True)
for blk, cp_blk in zip(mgr.blocks, cp.blocks):

Expand Down

0 comments on commit d71ca72

Please sign in to comment.