From 151bdfecb5dfcf3994b43e427e6fae1915eee938 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 19 Dec 2019 10:15:46 -0600 Subject: [PATCH] updates --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/arrays/datetimelike.py | 4 +++- pandas/core/common.py | 19 ++++++------------- pandas/core/frame.py | 4 ---- pandas/core/indexes/base.py | 4 +++- pandas/core/indexes/datetimes.py | 2 ++ pandas/core/indexes/multi.py | 3 ++- pandas/core/indexing.py | 3 +++ pandas/core/series.py | 1 + pandas/tests/indexes/common.py | 5 +++++ 10 files changed, 26 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index e44fec112c5c1..06f2e95316c62 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -706,6 +706,7 @@ Datetimelike - Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`) - Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`) - Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`) +- Bug in datetimelike indexes and arrays not validating that the length of a boolean mask matches the array (:issue:`30308`) - Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) - diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index f5d1e62f44fd0..90e56930bf13b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -416,7 +416,9 @@ def __getitem__(self, key): return self._box_func(val) if com.is_bool_indexer(key): - key = np.asarray(key, dtype=bool) + from pandas.core.indexing import check_bool_indexer + + key = check_bool_indexer(self, key) if key.all(): key = slice(0, None, None) else: diff --git a/pandas/core/common.py b/pandas/core/common.py index 186bfb573a6b2..dbf0339c5b009 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -112,18 +112,18 @@ def is_bool_indexer(key: Any) -> bool: bool Whether `key` is a valid boolean indexer. - Raises - ------ - ValueError - When the array is an object-dtype ndarray or ExtensionArray - and contains missing values. + Notes + ----- + This function is inexpensive for `bool` and `BooleanDtype`. + It is expensive for object-dtype backed arrays. In this case + a scan of the data to check that all the values are bool is + needed. See Also -------- api.extensions.check_bool_array_indexer : Check that `key` is a valid mask for an array, and convert to an ndarary. """ - na_msg = "cannot index with vector containing NA / NaN values" if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or ( is_array_like(key) and is_extension_array_dtype(key.dtype) ): @@ -131,16 +131,9 @@ def is_bool_indexer(key: Any) -> bool: key = np.asarray(values_from_object(key)) if not lib.is_bool_array(key): - if isna(key).any(): - raise ValueError(na_msg) return False return True elif is_bool_dtype(key.dtype): - # an ndarray with bool-dtype by definition has no missing values. - # So we only need to check for NAs in ExtensionArrays - if is_extension_array_dtype(key.dtype): - if np.any(key.isna()): - raise ValueError(na_msg) return True elif isinstance(key, list): try: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b699961cf07e8..fd3a67fa73620 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2799,10 +2799,6 @@ def _setitem_slice(self, key, value): def _setitem_array(self, key, value): # also raises Exception if object array with NA values if com.is_bool_indexer(key): - if len(key) != len(self.index): - raise ValueError( - f"Item wrong length {len(key)} instead of {len(self.index)}!" - ) key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] self._check_setitem_copy() diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5abd049b9564c..e370417c8a0fe 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3998,6 +3998,8 @@ def __getitem__(self, key): corresponding `Index` subclass. """ + from pandas.core.indexing import check_bool_indexer + # There's no custom logic to be implemented in __getslice__, so it's # not overloaded intentionally. getitem = self._data.__getitem__ @@ -4013,7 +4015,7 @@ def __getitem__(self, key): return promote(getitem(key)) if com.is_bool_indexer(key): - key = np.asarray(key, dtype=bool) + key = check_bool_indexer(self, key) key = com.values_from_object(key) result = getitem(key) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 523c434cb7377..1fb2ec181605b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1115,6 +1115,8 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): _has_same_tz = ea_passthrough(DatetimeArray._has_same_tz) def __getitem__(self, key): + # if com.is_bool_indexer(key): + # breakpoint() result = self._data.__getitem__(key) if is_scalar(result): return result diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9e434d0f5f704..58e227f7364fc 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -42,6 +42,7 @@ ensure_index, ) from pandas.core.indexes.frozen import FrozenList +from pandas.core.indexing import check_bool_indexer import pandas.core.missing as missing from pandas.core.sorting import ( get_group_index, @@ -1934,7 +1935,7 @@ def __getitem__(self, key): return tuple(retval) else: if com.is_bool_indexer(key): - key = np.asarray(key, dtype=bool) + key = check_bool_indexer(self, key) sortorder = self.sortorder else: # cannot be sure whether the result will be sorted diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f8c698bee98c1..c0edd5e9dffaa 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1595,6 +1595,7 @@ def _validate_key(self, key, axis: int): return if com.is_bool_indexer(key): + # XXX: do we need to verify no NA here? return if not is_list_like_indexer(key): @@ -1681,6 +1682,7 @@ def _getitem_axis(self, key, axis: int): self._validate_key(key, axis) return self._get_slice_axis(key, axis=axis) elif com.is_bool_indexer(key): + # check_bool_indexer is called in getbool_axis return self._getbool_axis(key, axis=axis) elif is_list_like_indexer(key): @@ -2030,6 +2032,7 @@ def _getitem_axis(self, key, axis: int): key = np.asarray(key) if com.is_bool_indexer(key): + # check_bool_indexer is called in _getbool_axis self._validate_key(key, axis) return self._getbool_axis(key, axis=axis) diff --git a/pandas/core/series.py b/pandas/core/series.py index 54c163330e6ee..c634fc52bee29 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -879,6 +879,7 @@ def __getitem__(self, key): elif key is Ellipsis: return self elif com.is_bool_indexer(key): + # We check later on. pass else: diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 102949fe3f05e..a6103cff1f016 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -220,6 +220,11 @@ def test_get_indexer_consistency(self, indices): assert isinstance(indexer, np.ndarray) assert indexer.dtype == np.intp + def test_getitem_mask_wrong_length(self, indices): + mask = np.array([True]) + with pytest.raises(IndexError, match="Item wrong length 1"): + indices[mask] + def test_ndarray_compat_properties(self): idx = self.create_index() assert idx.T.equals(idx)