Skip to content

Commit

Permalink
A bit more coverage and better error messages
Browse files Browse the repository at this point in the history
  • Loading branch information
wence- committed May 24, 2023
1 parent ae4188d commit 163b3fc
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 10 deletions.
18 changes: 13 additions & 5 deletions python/cudf/cudf/core/column_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,12 +373,16 @@ def get_labels_by_index(self, index: Any) -> tuple:
return self.names[start:stop:step]
elif pd.api.types.is_integer(index):
return (self.names[index],)
elif index and all(map(is_bool, index)):
if len(index) != len(self.names):
raise IndexError("Invalid boolean mask for column selection")
elif (bn := len(index)) > 0 and all(map(is_bool, index)):
if bn != (n := len(self.names)):
raise IndexError(
f"Boolean mask has wrong length: {bn} not {n}"
)
if isinstance(index, (pd.Series, cudf.Series)):
# Don't allow iloc indexing with series
raise IndexError("Cannot use Series object for iloc indexing")
raise NotImplementedError(
"Cannot use Series object for mask iloc indexing"
)
# TODO: Doesn't handle on-device columns
return tuple(n for n, keep in zip(self.names, index) if keep)
else:
Expand Down Expand Up @@ -476,7 +480,11 @@ def _select_by_label_list_like(self, key: Any) -> ColumnAccessor:
# Might be a generator
key = tuple(key)
# Special-casing for boolean mask
if key and all(map(is_bool, key)):
if (bn := len(key)) > 0 and all(map(is_bool, key)):
if bn != (n := len(self.names)):
raise IndexError(
f"Boolean mask has wrong length: {bn} not {n}"
)
data = dict(
item
for item, keep in zip(self._grouped_data.items(), key)
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2905,7 +2905,7 @@ def _apply_boolean_mask(self, boolean_mask):
if not is_bool_dtype(boolean_mask.dtype):
raise ValueError("boolean_mask is not boolean type.")
if (bn := len(boolean_mask)) != (n := len(self)):
raise IndexError(f"Boolean index has wrong length: {bn} not {n}")
raise IndexError(f"Boolean mask has wrong length: {bn} not {n}")
return self._from_columns_like_self(
libcudf.stream_compaction.apply_boolean_mask(
list(self._index._columns + self._columns), boolean_mask
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/single_column_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def _get_elements_from_column(self, arg) -> Union[ScalarLike, ColumnBase]:
if is_bool_dtype(arg.dtype):
if (bn := len(arg)) != (n := len(self)):
raise IndexError(
f"Boolean index has wrong length: {bn} not {n}"
f"Boolean mask has wrong length: {bn} not {n}"
)
return self._column.apply_boolean_mask(arg)
raise NotImplementedError(f"Unknown indexer {type(arg)}")
Expand Down
34 changes: 31 additions & 3 deletions python/cudf/cudf/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1708,8 +1708,36 @@ def test_boolean_mask_wrong_length(indexer, mask):
def test_boolean_mask_columns(indexer):
df = pd.DataFrame(np.zeros((3, 3)))
cdf = cudf.from_pandas(df)

expect = getattr(df, indexer)[:, [True, False, True]]
got = getattr(cdf, indexer)[:, [True, False, True]]
mask = [True, False, True]
expect = getattr(df, indexer)[:, mask]
got = getattr(cdf, indexer)[:, mask]

assert_eq(expect, got)


@pytest.mark.parametrize("indexer", ["loc", "iloc"])
@pytest.mark.parametrize(
"mask",
[[False, True], [False, False, True, True, True]],
ids=["too-short", "too-long"],
)
def test_boolean_mask_columns_wrong_length(indexer, mask):
df = pd.DataFrame(np.zeros((3, 3)))
cdf = cudf.from_pandas(df)

with pytest.raises(IndexError):
getattr(df, indexer)[:, mask]
with pytest.raises(IndexError):
getattr(cdf, indexer)[:, mask]


def test_boolean_mask_columns_iloc_series():
df = pd.DataFrame(np.zeros((3, 3)))
cdf = cudf.from_pandas(df)

mask = pd.Series([True, False, True], dtype=bool)
with pytest.raises(NotImplementedError):
df.iloc[:, mask]

with pytest.raises(NotImplementedError):
cdf.iloc[:, mask]

0 comments on commit 163b3fc

Please sign in to comment.