diff --git a/doc/source/whatsnew/v1.3.1.rst b/doc/source/whatsnew/v1.3.1.rst index b5f4b5f5375e8..33f286fccccef 100644 --- a/doc/source/whatsnew/v1.3.1.rst +++ b/doc/source/whatsnew/v1.3.1.rst @@ -37,7 +37,7 @@ Bug fixes ~~~~~~~~~ - Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`) - Fixed bug in :meth:`DataFrame.to_xml` raising ``KeyError`` when called with ``index=False`` and an offset index (:issue:`42458`) -- +- Fixed bug in :meth:`DataFrame.copy` failing to consolidate blocks in the result (:issue:`42579`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index cc07caac31c0c..1c4fb98ee16bb 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -594,6 +594,9 @@ def copy_func(ax): res = self.apply("copy", deep=deep) res.axes = new_axes + + if deep: + res._consolidate_inplace() return res def consolidate(self: T) -> T: diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 0edb150bdc273..9203cf625d43a 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -399,7 +399,8 @@ def _unstack_multiple(data, clocs, fill_value=None): return result - dummy = data.copy() + # GH#42579 deep=False to avoid consolidating + dummy = data.copy(deep=False) dummy.index = dummy_index unstacked = dummy.unstack("__placeholder__", fill_value=fill_value) diff --git a/pandas/tests/frame/methods/test_copy.py b/pandas/tests/frame/methods/test_copy.py index be52cf55fccb2..1c0b0755e7d94 100644 --- a/pandas/tests/frame/methods/test_copy.py +++ b/pandas/tests/frame/methods/test_copy.py @@ -1,5 +1,8 @@ +import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import DataFrame import pandas._testing as tm @@ -41,3 +44,20 @@ def test_copy(self, float_frame, float_string_frame): # copy objects copy = float_string_frame.copy() assert copy._mgr is not float_string_frame._mgr + + @td.skip_array_manager_invalid_test + def test_copy_consolidates(self): + # GH#42477 + df = DataFrame( + { + "a": np.random.randint(0, 100, size=55), + "b": np.random.randint(0, 100, size=55), + } + ) + + for i in range(0, 10): + df.loc[:, f"n_{i}"] = np.random.randint(0, 100, size=55) + + assert len(df._mgr.blocks) == 11 + result = df.copy() + assert len(result._mgr.blocks) == 1 diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 0f4a30cfa9cf9..38a6209283080 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -461,6 +461,9 @@ def test_copy(self, mgr): # DatetimeTZBlock has DatetimeIndex values assert cp_blk.values._data.base is blk.values._data.base + # copy(deep=True) consolidates, so the block-wise assertions will + # fail is mgr is not consolidated + mgr._consolidate_inplace() cp = mgr.copy(deep=True) for blk, cp_blk in zip(mgr.blocks, cp.blocks):