Skip to content

Commit

Permalink
BUG: ArrayManager not respecting copy keyword (#44889)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Jan 14, 2022
1 parent e53bda9 commit 83ea173
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 44 deletions.
23 changes: 7 additions & 16 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
)
from pandas.core.dtypes.common import (
is_1d_only_ea_dtype,
is_datetime64tz_dtype,
is_datetime_or_timedelta_dtype,
is_dtype_equal,
is_extension_array_dtype,
Expand All @@ -44,7 +43,6 @@
is_named_tuple,
is_object_dtype,
)
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCSeries,
Expand Down Expand Up @@ -475,23 +473,16 @@ def dict_to_mgr(
keys = list(data.keys())
columns = Index(keys)
arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
# GH#24096 need copy to be deep for datetime64tz case
# TODO: See if we can avoid these copies
arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays]
arrays = [
arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
]

if copy:
# arrays_to_mgr (via form_blocks) won't make copies for EAs
# dtype attr check to exclude EADtype-castable strs
arrays = [
x
if not hasattr(x, "dtype") or not isinstance(x.dtype, ExtensionDtype)
else x.copy()
for x in arrays
]
# TODO: can we get rid of the dt64tz special case above?
if typ == "block":
# We only need to copy arrays that will not get consolidated, i.e.
# only EA arrays
arrays = [x.copy() if isinstance(x, ExtensionArray) else x for x in arrays]
else:
# dtype check to exclude e.g. range objects, scalars
arrays = [x.copy() if hasattr(x, "dtype") else x for x in arrays]

return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)

Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,10 +421,6 @@ def test_setitem_frame_duplicate_columns(self, using_array_manager, request):
# TODO(ArrayManager) .loc still overwrites
expected["B"] = expected["B"].astype("int64")

mark = pytest.mark.xfail(
reason="Both 'A' columns get set with 3 instead of 0 and 3"
)
request.node.add_marker(mark)
else:
# set these with unique columns to be extra-unambiguous
expected[2] = expected[2].astype(np.int64)
Expand Down
13 changes: 4 additions & 9 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2157,8 +2157,6 @@ def test_constructor_ndarray_copy(self, float_frame, using_array_manager):
arr[0, 0] = 1000
assert df.iloc[0, 0] == 1000

# TODO(ArrayManager) keep view on Series?
@td.skip_array_manager_not_yet_implemented
def test_constructor_series_copy(self, float_frame):
series = float_frame._series

Expand Down Expand Up @@ -2507,13 +2505,10 @@ def test_constructor_list_str_na(self, string_dtype):
def test_dict_nocopy(
self, request, copy, any_numeric_ea_dtype, any_numpy_dtype, using_array_manager
):
if using_array_manager and not (
(any_numpy_dtype in (tm.STRING_DTYPES + tm.BYTES_DTYPES))
or (
any_numpy_dtype
in (tm.DATETIME64_DTYPES + tm.TIMEDELTA64_DTYPES + tm.BOOL_DTYPES)
and copy
)
if (
using_array_manager
and not copy
and not (any_numpy_dtype in (tm.STRING_DTYPES + tm.BYTES_DTYPES))
):
# TODO(ArrayManager) properly honor copy keyword for dict input
td.mark_array_manager_not_yet_implemented(request)
Expand Down
12 changes: 2 additions & 10 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,11 +773,7 @@ def test_operators_timedelta64(self):
def test_std_timedelta64_skipna_false(self):
# GH#37392
tdi = pd.timedelta_range("1 Day", periods=10)
df = DataFrame({"A": tdi, "B": tdi})
# Copy is needed for ArrayManager case, otherwise setting df.iloc
# below edits tdi, alterting both df['A'] and df['B']
# FIXME: passing copy=True to constructor does not fix this
df = df.copy()
df = DataFrame({"A": tdi, "B": tdi}, copy=True)
df.iloc[-2, -1] = pd.NaT

result = df.std(skipna=False)
Expand Down Expand Up @@ -1064,11 +1060,7 @@ def test_idxmax_idxmin_convert_dtypes(self, op, expected_value):

def test_idxmax_dt64_multicolumn_axis1(self):
dti = date_range("2016-01-01", periods=3)
df = DataFrame({3: dti, 4: dti[::-1]})
# FIXME: copy needed for ArrayManager, otherwise setting with iloc
# below also sets df.iloc[-1, 1]; passing copy=True to DataFrame
# does not solve this.
df = df.copy()
df = DataFrame({3: dti, 4: dti[::-1]}, copy=True)
df.iloc[0, 0] = pd.NaT

df._consolidate_inplace()
Expand Down
8 changes: 3 additions & 5 deletions pandas/tests/indexing/multiindex/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,11 +214,9 @@ def test_multiindex_assignment_single_dtype(self, using_array_manager):
exp = Series(arr, index=[8, 10], name="c", dtype="int64")
result = df.loc[4, "c"]
tm.assert_series_equal(result, exp)
if not using_array_manager:
# FIXME(ArrayManager): this correctly preserves dtype,
# but incorrectly is not inplace.
# extra check for inplace-ness
tm.assert_numpy_array_equal(view, exp.values)

# extra check for inplace-ness
tm.assert_numpy_array_equal(view, exp.values)

# arr + 0.5 cannot be cast losslessly to int, so we upcast
df.loc[4, "c"] = arr + 0.5
Expand Down

0 comments on commit 83ea173

Please sign in to comment.