Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ArrayManager] TST: run (+fix/skip) pandas/tests/indexing tests #40325

Merged
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ jobs:
pytest pandas/tests/frame/indexing/test_where.py
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column

pytest pandas/tests/indexing/

pytest pandas/tests/api/
pytest pandas/tests/arrays/
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,8 @@ def length_of_indexer(indexer, target=None) -> int:
# GH#25774
return indexer.sum()
return len(indexer)
elif isinstance(indexer, range):
return (indexer.stop - indexer.start) // indexer.step
elif not is_list_like_indexer(indexer):
return 1
raise AssertionError("cannot find the length of the indexer")
Expand Down
15 changes: 13 additions & 2 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def get_dtypes(self):
def __repr__(self) -> str:
output = type(self).__name__
output += f"\nIndex: {self._axes[0]}"
if self.ndim == 1:
if self.ndim == 2:
output += f"\nColumns: {self._axes[1]}"
output += f"\n{len(self.arrays)} arrays:"
for arr in self.arrays:
Expand All @@ -227,6 +227,11 @@ def _verify_integrity(self) -> None:
"Passed arrays should be np.ndarray or ExtensionArray instances, "
f"got {type(arr)} instead"
)
if not arr.ndim == 1:
raise ValueError(
"Passed arrays should be 1-dimensional, got array with "
f"{arr.ndim} dimensions instead."
)

def reduce(
self: T, func: Callable, ignore_failures: bool = False
Expand Down Expand Up @@ -1143,7 +1148,13 @@ def __init__(
def _verify_integrity(self) -> None:
(n_rows,) = self.shape
assert len(self.arrays) == 1
assert len(self.arrays[0]) == n_rows
arr = self.arrays[0]
assert len(arr) == n_rows
if not arr.ndim == 1:
raise ValueError(
"Passed array should be 1-dimensional, got array with "
f"{arr.ndim} dimensions instead."
)

@staticmethod
def _normalize_axis(axis):
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/indexing/multiindex/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas import (
DataFrame,
MultiIndex,
Expand Down Expand Up @@ -32,6 +34,7 @@ def test_detect_chained_assignment():
zed["eyes"]["right"].fillna(value=555, inplace=True)


@td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view
def test_cache_updating():
# 5216
# make sure that we don't try to set a dead cache
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/indexing/multiindex/test_partial.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas import (
DataFrame,
Float64Index,
Expand Down Expand Up @@ -114,6 +116,9 @@ def test_getitem_partial_column_select(self):
with pytest.raises(KeyError, match=r"\('a', 'foo'\)"):
df.loc[("a", "foo"), :]

# TODO(ArrayManager) rewrite test to not use .values
# exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view
@td.skip_array_manager_invalid_test
def test_partial_set(self, multiindex_year_month_day_dataframe_random_data):
# GH #397
ymd = multiindex_year_month_day_dataframe_random_data
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/indexing/multiindex/test_setitem.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -119,6 +121,9 @@ def test_setitem_multiindex3(self):
expected=copy,
)

# TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in
# all NaNs -> doesn't work in the "split" path (also for BlockManager actually)
@td.skip_array_manager_not_yet_implemented
def test_multiindex_setitem(self):

# GH 3738
Expand Down Expand Up @@ -457,6 +462,8 @@ def test_setitem_new_column_all_na(self):
assert df["new"].isna().all()


@td.skip_array_manager_invalid_test # df["foo"] select multiple columns -> .values
# is not a view
def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
# this works because we are modifying the underlying array
# really a no-no
Expand Down
52 changes: 39 additions & 13 deletions pandas/tests/indexing/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -169,7 +171,7 @@ def test_detect_chained_assignment(self):
tm.assert_frame_equal(df, expected)

@pytest.mark.arm_slow
def test_detect_chained_assignment_raises(self):
def test_detect_chained_assignment_raises(self, using_array_manager):

# test with the chaining
df = DataFrame(
Expand All @@ -180,13 +182,23 @@ def test_detect_chained_assignment_raises(self):
)
assert df._is_copy is None

with pytest.raises(com.SettingWithCopyError, match=msg):
df["A"][0] = -5
if not using_array_manager:
with pytest.raises(com.SettingWithCopyError, match=msg):
df["A"][0] = -5

with pytest.raises(com.SettingWithCopyError, match=msg):
df["A"][1] = np.nan
with pytest.raises(com.SettingWithCopyError, match=msg):
df["A"][1] = np.nan

assert df["A"]._is_copy is None

assert df["A"]._is_copy is None
else:
# INFO(ArrayManager) for ArrayManager it doesn't matter that it's
# a mixed dataframe
df["A"][0] = -5
df["A"][1] = -6
expected = DataFrame([[-5, 2], [-6, 3]], columns=list("AB"))
expected["B"] = expected["B"].astype("float64")
tm.assert_frame_equal(df, expected)

@pytest.mark.arm_slow
def test_detect_chained_assignment_fails(self):
Expand Down Expand Up @@ -219,18 +231,24 @@ def test_detect_chained_assignment_doc_example(self):
df[indexer]["c"] = 42

@pytest.mark.arm_slow
def test_detect_chained_assignment_object_dtype(self):
def test_detect_chained_assignment_object_dtype(self, using_array_manager):

expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})

with pytest.raises(com.SettingWithCopyError, match=msg):
df["A"][0] = 111

with pytest.raises(com.SettingWithCopyError, match=msg):
df.loc[0]["A"] = 111

df.loc[0, "A"] = 111
if not using_array_manager:
with pytest.raises(com.SettingWithCopyError, match=msg):
df["A"][0] = 111

df.loc[0, "A"] = 111
else:
# INFO(ArrayManager) for ArrayManager it doesn't matter that it's
# a mixed dataframe
df["A"][0] = 111

tm.assert_frame_equal(df, expected)

@pytest.mark.arm_slow
Expand Down Expand Up @@ -347,7 +365,7 @@ def test_detect_chained_assignment_undefined_column(self):
df.iloc[0:5]["group"] = "a"

@pytest.mark.arm_slow
def test_detect_chained_assignment_changing_dtype(self):
def test_detect_chained_assignment_changing_dtype(self, using_array_manager):

# Mixed type setting but same dtype & changing dtype
df = DataFrame(
Expand All @@ -365,8 +383,14 @@ def test_detect_chained_assignment_changing_dtype(self):
with pytest.raises(com.SettingWithCopyError, match=msg):
df.loc[2]["C"] = "foo"

with pytest.raises(com.SettingWithCopyError, match=msg):
if not using_array_manager:
with pytest.raises(com.SettingWithCopyError, match=msg):
df["C"][2] = "foo"
else:
# INFO(ArrayManager) for ArrayManager it doesn't matter if it's
# changing the dtype or not
df["C"][2] = "foo"
assert df.loc[2, "C"] == "foo"

def test_setting_with_copy_bug(self):

Expand Down Expand Up @@ -411,6 +435,8 @@ def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self):
)
tm.assert_frame_equal(df, expected)

# TODO(ArrayManager) fast_xs with array-like scalars is not yet working
@td.skip_array_manager_not_yet_implemented
def test_chained_getitem_with_lists(self):

# GH6394
Expand Down
63 changes: 50 additions & 13 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas import (
Categorical,
CategoricalDtype,
Expand Down Expand Up @@ -63,26 +65,30 @@ class TestiLocBaseIndependent:
],
)
@pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
def test_iloc_setitem_fullcol_categorical(self, indexer, key):
def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manager):
frame = DataFrame({0: range(3)}, dtype=object)

cat = Categorical(["alpha", "beta", "gamma"])

assert frame._mgr.blocks[0]._can_hold_element(cat)
if not using_array_manager:
assert frame._mgr.blocks[0]._can_hold_element(cat)

df = frame.copy()
orig_vals = df.values
indexer(df)[key, 0] = cat

overwrite = isinstance(key, slice) and key == slice(None)

if overwrite:
if overwrite or using_array_manager:
# TODO(ArrayManager) we always overwrite because ArrayManager takes
# the "split" path, which still overwrites
# TODO: GH#39986 this probably shouldn't behave differently
expected = DataFrame({0: cat})
assert not np.shares_memory(df.values, orig_vals)
else:
expected = DataFrame({0: cat}).astype(object)
assert np.shares_memory(df.values, orig_vals)
if not using_array_manager:
assert np.shares_memory(df[0].values, orig_vals)

tm.assert_frame_equal(df, expected)

Expand All @@ -93,13 +99,27 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key):
else:
assert cat[0] != "gamma"

# TODO with mixed dataframe ("split" path), we always overwrite the column
frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)})
df = frame.copy()
orig_vals = df.values
indexer(df)[key, 0] = cat
expected = DataFrame({0: cat, 1: range(3)})
tm.assert_frame_equal(df, expected)

# TODO(ArrayManager) does not yet update parent
@td.skip_array_manager_not_yet_implemented
@pytest.mark.parametrize("box", [array, Series])
def test_iloc_setitem_ea_inplace(self, frame_or_series, box):
def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why both the skip and the fixture? id expect one or the other

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because I already rewrote the test so that it could pass for ArrayManager (accessing .values for a DataFrame will never work to get a view of the data with AM, so that needs to be rewritten), it's only still failing for other reasons.

# GH#38952 Case with not setting a full column
# IntegerArray without NAs
arr = array([1, 2, 3, 4])
obj = frame_or_series(arr.to_numpy("i8"))
values = obj.values

if frame_or_series is Series or not using_array_manager:
values = obj.values
else:
values = obj[0].values

obj.iloc[:2] = box(arr[2:])
expected = frame_or_series(np.array([3, 4, 3, 4], dtype="i8"))
Expand All @@ -109,7 +129,10 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, box):
if frame_or_series is Series:
assert obj.values is values
else:
assert obj.values.base is values.base and values.base is not None
if using_array_manager:
assert obj[0].values is values
else:
assert obj.values.base is values.base and values.base is not None

def test_is_scalar_access(self):
# GH#32085 index with duplicates doesn't matter for _is_scalar_access
Expand Down Expand Up @@ -481,13 +504,16 @@ def test_iloc_setitem_dups(self):
df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True)
tm.assert_frame_equal(df, expected)

def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(self):
def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(
self, using_array_manager
):
# Same as the "assign back to self" check in test_iloc_setitem_dups
# but on a DataFrame with multiple blocks
df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"])

df.iloc[:, 0] = df.iloc[:, 0].astype("f8")
assert len(df._mgr.blocks) == 2
if not using_array_manager:
assert len(df._mgr.blocks) == 2
expected = df.copy()

# assign back to self
Expand Down Expand Up @@ -577,7 +603,7 @@ def test_iloc_getitem_labelled_frame(self):
with pytest.raises(ValueError, match=msg):
df.iloc["j", "D"]

def test_iloc_getitem_doc_issue(self):
def test_iloc_getitem_doc_issue(self, using_array_manager):

# multi axis slicing issue with single block
# surfaced in GH 6059
Expand Down Expand Up @@ -612,7 +638,8 @@ def test_iloc_getitem_doc_issue(self):
columns = list(range(0, 8, 2))
df = DataFrame(arr, index=index, columns=columns)

df._mgr.blocks[0].mgr_locs
if not using_array_manager:
df._mgr.blocks[0].mgr_locs
result = df.iloc[1:5, 2:4]
str(result)
result.dtypes
Expand Down Expand Up @@ -793,15 +820,20 @@ def test_iloc_empty_list_indexer_is_ok(self):
df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True
)

def test_identity_slice_returns_new_object(self):
def test_identity_slice_returns_new_object(self, using_array_manager):
# GH13873
original_df = DataFrame({"a": [1, 2, 3]})
sliced_df = original_df.iloc[:]
assert sliced_df is not original_df

# should be a shallow copy
original_df["a"] = [4, 4, 4]
assert (sliced_df["a"] == 4).all()
if using_array_manager:
# TODO(ArrayManager) verify it is expected that the original didn't change
# setitem is replacing full column, so doesn't update "viewing" dataframe
assert not (sliced_df["a"] == 4).all()
else:
assert (sliced_df["a"] == 4).all()

original_series = Series([1, 2, 3, 4, 5, 6])
sliced_series = original_series.iloc[:]
Expand Down Expand Up @@ -932,6 +964,9 @@ def test_iloc_getitem_readonly_key(self):
expected = df["data"].loc[[1, 3, 6]]
tm.assert_series_equal(result, expected)

# TODO(ArrayManager) setting single item with an iterable doesn't work yet
# in the "split" path
@td.skip_array_manager_not_yet_implemented
def test_iloc_assign_series_to_df_cell(self):
# GH 37593
df = DataFrame(columns=["a"], index=[0])
Expand Down Expand Up @@ -1088,6 +1123,8 @@ def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame):
# GH#32257 we let numpy do validation, get their exception
float_frame.iloc[:, :, :] = 1

# TODO(ArrayManager) "split" path doesn't properly implement DataFrame indexer
@td.skip_array_manager_not_yet_implemented
def test_iloc_frame_indexer(self):
# GH#39004
df = DataFrame({"a": [1, 2, 3]})
Expand Down
Loading