From 901fc641e7bcdaf285fb582b3e450498f4552fba Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 1 May 2018 16:13:06 +0200 Subject: [PATCH] Emit warning for missing labels in Multiindex.loc[[...]] (and more) (#20770) * API: emit warning to raise KeyError in the future for missing keys also for MultiIndex closes #17758 closes #20748 closes #20753 --- doc/source/whatsnew/v0.23.0.txt | 3 + pandas/core/indexes/base.py | 3 + pandas/core/indexing.py | 342 +++++++++++-------- pandas/core/sparse/frame.py | 3 + pandas/tests/frame/test_indexing.py | 8 +- pandas/tests/indexing/common.py | 36 +- pandas/tests/indexing/test_floats.py | 18 +- pandas/tests/indexing/test_indexing.py | 15 +- pandas/tests/indexing/test_loc.py | 30 +- pandas/tests/series/indexing/test_numeric.py | 25 +- 10 files changed, 309 insertions(+), 174 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 6c8a89ed9a80c..7ea10deb65cef 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -874,6 +874,7 @@ Deprecations - The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). - ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) - ``DataFrame.from_items`` is deprecated. Use :func:`DataFrame.from_dict` instead, or ``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`, :issue:`17312`) +- Indexing a :class:`MultiIndex` or a :class:`FloatIndex` with a list containing some missing keys will now show a :class:`FutureWarning`, which is consistent with other types of indexes (:issue:`17758`). - The ``broadcast`` parameter of ``.apply()`` is deprecated in favor of ``result_type='broadcast'`` (:issue:`18577`) - The ``reduce`` parameter of ``.apply()`` is deprecated in favor of ``result_type='reduce'`` (:issue:`18577`) @@ -1124,6 +1125,8 @@ Indexing - :func:`Index.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) - :func:`DatetimeIndex.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) - Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`) +- Bug in indexing with iterator containing only missing keys, which raised no error (:issue:`20748`) +- Fixed inconsistency in ``.ix`` between list and scalar keys when the index has integer dtype and does not include the desired keys (:issue:`20753`) - Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`) - Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`) - Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f208687a0cf1b..90ef80c958610 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4926,6 +4926,9 @@ def _ensure_index(index_like, copy=False): if hasattr(index_like, 'name'): return Index(index_like, name=index_like.name, copy=copy) + if is_iterator(index_like): + index_like = list(index_like) + # must check for exactly list here because of strict type # check in clean_index_list if isinstance(index_like, list): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index fe6d6775c4e0b..d23beba1c534d 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -16,6 +16,8 @@ _is_unorderable_exception, _ensure_platform_int) from pandas.core.dtypes.missing import isna, _infer_fill_value +from pandas.errors import AbstractMethodError +from pandas.util._decorators import Appender from pandas.core.index import Index, MultiIndex @@ -186,33 +188,43 @@ def __setitem__(self, key, value): indexer = self._get_setitem_indexer(key) self._setitem_with_indexer(indexer, value) - def _has_valid_type(self, k, axis): - raise NotImplementedError() + def _validate_key(self, key, axis): + """ + Ensure that key is valid for current indexer. + + Parameters + ---------- + key : scalar, slice or list-like + The key requested + + axis : int + Dimension on which the indexing is being made + + Raises + ------ + TypeError + If the key (or some element of it) has wrong type + + IndexError + If the key (or some element of it) is out of bounds + + KeyError + If the key was not found + """ + raise AbstractMethodError() def _has_valid_tuple(self, key): """ check the key for valid keys across my indexer """ for i, k in enumerate(key): if i >= self.obj.ndim: raise IndexingError('Too many indexers') - if not self._has_valid_type(k, i): + try: + self._validate_key(k, i) + except ValueError: raise ValueError("Location based indexing can only have " "[{types}] types" .format(types=self._valid_types)) - def _should_validate_iterable(self, axis=None): - """ return a boolean whether this axes needs validation for a passed - iterable - """ - if axis is None: - axis = self.axis or 0 - ax = self.obj._get_axis(axis) - if isinstance(ax, MultiIndex): - return False - elif ax.is_floating(): - return False - - return True - def _is_nested_tuple_indexer(self, tup): if any(isinstance(ax, MultiIndex) for ax in self.obj.axes): return any(is_nested_tuple(tup, ax) for ax in self.obj.axes) @@ -906,38 +918,34 @@ def _multi_take(self, tup): """ try: o = self.obj - d = {a: self._convert_for_reindex(t, axis=o._get_axis_number(a)) - for t, a in zip(tup, o._AXIS_ORDERS)} - return o.reindex(**d) - except(KeyError, IndexingError): - raise self._exception + d = {} + for key, axis in zip(tup, o._AXIS_ORDERS): + ax = o._get_axis(axis) + # Have the index compute an indexer or return None + # if it cannot handle: + indexer, keyarr = ax._convert_listlike_indexer(key, + kind=self.name) + # We only act on all found values: + if indexer is not None and (indexer != -1).all(): + self._validate_read_indexer(key, indexer, axis) + d[axis] = (ax[indexer], indexer) + continue + + # If we are trying to get actual keys from empty Series, we + # patiently wait for a KeyError later on - otherwise, convert + if len(ax) or not len(key): + key = self._convert_for_reindex(key, axis) + indexer = ax.get_indexer_for(key) + keyarr = ax.reindex(keyarr)[0] + self._validate_read_indexer(keyarr, indexer, + o._get_axis_number(axis)) + d[axis] = (keyarr, indexer) + return o._reindex_with_indexers(d, copy=True, allow_dups=True) + except (KeyError, IndexingError) as detail: + raise self._exception(detail) def _convert_for_reindex(self, key, axis=None): - if axis is None: - axis = self.axis or 0 - labels = self.obj._get_axis(axis) - - if com.is_bool_indexer(key): - key = check_bool_indexer(labels, key) - return labels[key] - else: - if isinstance(key, Index): - keyarr = labels._convert_index_indexer(key) - else: - # asarray can be unsafe, NumPy strings are weird - keyarr = com._asarray_tuplesafe(key) - - if is_integer_dtype(keyarr): - # Cast the indexer to uint64 if possible so - # that the values returned from indexing are - # also uint64. - keyarr = labels._convert_arr_indexer(keyarr) - - if not labels.is_integer(): - keyarr = _ensure_platform_int(keyarr) - return labels.take(keyarr) - - return keyarr + return key def _handle_lowerdim_multi_index_axis0(self, tup): # we have an axis0 multi-index, handle or raise @@ -1073,8 +1081,9 @@ def _getitem_axis(self, key, axis=None): if axis is None: axis = self.axis or 0 - if self._should_validate_iterable(axis): - self._has_valid_type(key, axis) + if is_iterator(key): + key = list(key) + self._validate_key(key, axis) labels = self.obj._get_axis(axis) if isinstance(key, slice): @@ -1110,8 +1119,7 @@ def _getitem_iterable(self, key, axis=None): if axis is None: axis = self.axis or 0 - if self._should_validate_iterable(axis): - self._has_valid_type(key, axis) + self._validate_key(key, axis) labels = self.obj._get_axis(axis) @@ -1125,19 +1133,18 @@ def _getitem_iterable(self, key, axis=None): indexer, keyarr = labels._convert_listlike_indexer( key, kind=self.name) if indexer is not None and (indexer != -1).all(): + self._validate_read_indexer(key, indexer, axis) return self.obj.take(indexer, axis=axis) + ax = self.obj._get_axis(axis) # existing labels are unique and indexer are unique if labels.is_unique and Index(keyarr).is_unique: + indexer = ax.get_indexer_for(key) + self._validate_read_indexer(key, indexer, axis) - try: - return self.obj.reindex(keyarr, axis=axis) - except AttributeError: - - # Series - if axis != 0: - raise AssertionError('axis must be 0') - return self.obj.reindex(keyarr) + d = {axis: [ax.reindex(keyarr)[0], indexer]} + return self.obj._reindex_with_indexers(d, copy=True, + allow_dups=True) # existing labels are non-unique else: @@ -1154,15 +1161,68 @@ def _getitem_iterable(self, key, axis=None): result = self.obj._take(indexer[indexer != -1], axis=axis, convert=False) + self._validate_read_indexer(key, new_indexer, axis) result = result._reindex_with_indexers( {axis: [new_target, new_indexer]}, copy=True, allow_dups=True) else: + self._validate_read_indexer(key, indexer, axis) result = self.obj._take(indexer, axis=axis) return result + def _validate_read_indexer(self, key, indexer, axis): + """ + Check that indexer can be used to return a result (e.g. at least one + element was found, unless the list of keys was actually empty). + + Parameters + ---------- + key : list-like + Target labels (only used to show correct error message) + indexer: array-like of booleans + Indices corresponding to the key (with -1 indicating not found) + axis: int + Dimension on which the indexing is being made + + Raises + ------ + KeyError + If at least one key was requested none was found. + """ + + ax = self.obj._get_axis(axis) + + if len(key) == 0: + return + + # Count missing values: + missing = (indexer < 0).sum() + + if missing: + if missing == len(indexer): + raise KeyError( + u"None of [{key}] are in the [{axis}]".format( + key=key, axis=self.obj._get_axis_name(axis))) + + # we skip the warning on Categorical/Interval + # as this check is actually done (check for + # non-missing values), but a bit later in the + # code, so we want to avoid warning & then + # just raising + + _missing_key_warning = textwrap.dedent(""" + Passing list-likes to .loc or [] with any missing label will raise + KeyError in the future, you can use .reindex() as an alternative. + + See the documentation here: + https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike""") # noqa + + if not (ax.is_categorical() or ax.is_interval()): + warnings.warn(_missing_key_warning, + FutureWarning, stacklevel=5) + def _convert_to_indexer(self, obj, axis=None, is_setter=False): """ Convert indexing key into something we can use to do actual fancy @@ -1337,7 +1397,8 @@ def __init__(self, name, obj): DeprecationWarning, stacklevel=2) super(_IXIndexer, self).__init__(name, obj) - def _has_valid_type(self, key, axis): + @Appender(_NDFrameIndexer._validate_key.__doc__) + def _validate_key(self, key, axis): if isinstance(key, slice): return True @@ -1353,6 +1414,49 @@ def _has_valid_type(self, key, axis): return True + def _convert_for_reindex(self, key, axis=None): + """ + Transform a list of keys into a new array ready to be used as axis of + the object we return (e.g. including NaNs). + + Parameters + ---------- + key : list-like + Target labels + axis: int + Where the indexing is being made + + Returns + ------- + list-like of labels + """ + + if axis is None: + axis = self.axis or 0 + labels = self.obj._get_axis(axis) + + if com.is_bool_indexer(key): + key = check_bool_indexer(labels, key) + return labels[key] + + if isinstance(key, Index): + keyarr = labels._convert_index_indexer(key) + else: + # asarray can be unsafe, NumPy strings are weird + keyarr = com._asarray_tuplesafe(key) + + if is_integer_dtype(keyarr): + # Cast the indexer to uint64 if possible so + # that the values returned from indexing are + # also uint64. + keyarr = labels._convert_arr_indexer(keyarr) + + if not labels.is_integer(): + keyarr = _ensure_platform_int(keyarr) + return labels.take(keyarr) + + return keyarr + class _LocationIndexer(_NDFrameIndexer): _exception = Exception @@ -1656,7 +1760,8 @@ class _LocIndexer(_LocationIndexer): "index is integers), listlike of labels, boolean") _exception = KeyError - def _has_valid_type(self, key, axis): + @Appender(_NDFrameIndexer._validate_key.__doc__) + def _validate_key(self, key, axis): ax = self.obj._get_axis(axis) # valid for a label where all labels are in the index @@ -1665,48 +1770,12 @@ def _has_valid_type(self, key, axis): # boolean if isinstance(key, slice): - return True + return elif com.is_bool_indexer(key): - return True - - elif is_list_like_indexer(key): - - # mi is just a passthru - if isinstance(key, tuple) and isinstance(ax, MultiIndex): - return True - - if not is_iterator(key) and len(key): - - # True indicates missing values - missing = ax.get_indexer_for(key) < 0 - - if np.any(missing): - if len(key) == 1 or np.all(missing): - raise KeyError( - u"None of [{key}] are in the [{axis}]".format( - key=key, axis=self.obj._get_axis_name(axis))) - else: - - # we skip the warning on Categorical/Interval - # as this check is actually done (check for - # non-missing values), but a bit later in the - # code, so we want to avoid warning & then - # just raising - _missing_key_warning = textwrap.dedent(""" - Passing list-likes to .loc or [] with any missing label will raise - KeyError in the future, you can use .reindex() as an alternative. - - See the documentation here: - https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike""") # noqa + return - if not (ax.is_categorical() or ax.is_interval()): - warnings.warn(_missing_key_warning, - FutureWarning, stacklevel=5) - - return True - - else: + elif not is_list_like_indexer(key): def error(): if isna(key): @@ -1729,8 +1798,6 @@ def error(): except: error() - return True - def _is_scalar_access(self, key): # this is a shortcut accessor to both .loc and .iloc # that provide the equivalent access of .at and .iat @@ -1789,11 +1856,14 @@ def _getitem_axis(self, key, axis=None): if axis is None: axis = self.axis or 0 + if is_iterator(key): + key = list(key) + labels = self.obj._get_axis(axis) key = self._get_partial_string_timestamp_match_key(key, labels) if isinstance(key, slice): - self._has_valid_type(key, axis) + self._validate_key(key, axis) return self._get_slice_axis(key, axis=axis) elif com.is_bool_indexer(key): return self._getbool_axis(key, axis=axis) @@ -1839,7 +1909,7 @@ def _getitem_axis(self, key, axis=None): return self.obj.iloc[tuple(indexer)] # fall thru to straight lookup - self._has_valid_type(key, axis) + self._validate_key(key, axis) return self._get_label(key, axis=axis) @@ -1871,7 +1941,7 @@ class _iLocIndexer(_LocationIndexer): "point is EXCLUDED), listlike of integers, boolean array") _exception = IndexError - def _has_valid_type(self, key, axis): + def _validate_key(self, key, axis): if com.is_bool_indexer(key): if hasattr(key, 'index') and isinstance(key.index, Index): if key.index.inferred_type == 'integer': @@ -1880,15 +1950,26 @@ def _has_valid_type(self, key, axis): "is not available") raise ValueError("iLocation based boolean indexing cannot use " "an indexable as a mask") - return True + return if isinstance(key, slice): - return True + return elif is_integer(key): - return self._is_valid_integer(key, axis) + self._validate_integer(key, axis) + elif isinstance(key, tuple): + # a tuple should already have been caught by this point + # so don't treat a tuple as a valid indexer + raise IndexingError('Too many indexers') elif is_list_like_indexer(key): - return self._is_valid_list_like(key, axis) - return False + # check that the key does not exceed the maximum size of the index + arr = np.array(key) + l = len(self.obj._get_axis(axis)) + + if len(arr) and (arr.max() >= l or arr.min() < -l): + raise IndexError("positional indexers are out-of-bounds") + else: + raise ValueError("Can only index by location with " + "a [{types}]".format(types=self._valid_types)) def _has_valid_setitem_indexer(self, indexer): self._has_valid_positional_setitem_indexer(indexer) @@ -1920,33 +2001,13 @@ def _getitem_scalar(self, key): values = self.obj._get_value(*key, takeable=True) return values - def _is_valid_integer(self, key, axis): + def _validate_integer(self, key, axis): # return a boolean if we have a valid integer indexer ax = self.obj._get_axis(axis) l = len(ax) if key >= l or key < -l: raise IndexError("single positional indexer is out-of-bounds") - return True - - def _is_valid_list_like(self, key, axis): - # return a boolean if we are a valid list-like (e.g. that we don't - # have out-of-bounds values) - - # a tuple should already have been caught by this point - # so don't treat a tuple as a valid indexer - if isinstance(key, tuple): - raise IndexingError('Too many indexers') - - # coerce the key to not exceed the maximum size of the index - arr = np.array(key) - ax = self.obj._get_axis(axis) - l = len(ax) - if (hasattr(arr, '__len__') and len(arr) and - (arr.max() >= l or arr.min() < -l)): - raise IndexError("positional indexers are out-of-bounds") - - return True def _getitem_tuple(self, tup): @@ -2017,17 +2078,13 @@ def _getitem_axis(self, key, axis=None): axis = self.axis or 0 if isinstance(key, slice): - self._has_valid_type(key, axis) return self._get_slice_axis(key, axis=axis) if isinstance(key, list): - try: - key = np.asarray(key) - except TypeError: # pragma: no cover - pass + key = np.asarray(key) if com.is_bool_indexer(key): - self._has_valid_type(key, axis) + self._validate_key(key, axis) return self._getbool_axis(key, axis=axis) # a list of integers @@ -2043,7 +2100,7 @@ def _getitem_axis(self, key, axis=None): "non-integer key") # validate the location - self._is_valid_integer(key, axis) + self._validate_integer(key, axis) return self._get_loc(key, axis=axis) @@ -2059,11 +2116,12 @@ def _convert_to_indexer(self, obj, axis=None, is_setter=False): elif is_float(obj): return self._convert_scalar_indexer(obj, axis) - elif self._has_valid_type(obj, axis): + try: + self._validate_key(obj, axis) return obj - - raise ValueError("Can only index by location with " - "a [{types}]".format(types=self._valid_types)) + except ValueError: + raise ValueError("Can only index by location with " + "a [{types}]".format(types=self._valid_types)) class _ScalarAccessIndexer(_NDFrameIndexer): diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 2cefbea722098..28299fbe61daf 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -715,6 +715,9 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=None, if fill_value is None: fill_value = np.nan + reindexers = {self._get_axis_number(a): val + for (a, val) in compat.iteritems(reindexers)} + index, row_indexer = reindexers.get(0, (None, None)) columns, col_indexer = reindexers.get(1, (None, None)) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 8e10e4c4fbc65..b887b1c9f1218 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -396,8 +396,12 @@ def test_getitem_setitem_ix_negative_integers(self): assert (self.frame['D'] == 0).all() df = DataFrame(np.random.randn(8, 4)) - with catch_warnings(record=True): - assert isna(df.ix[:, [-1]].values).all() + # ix does label-based indexing when having an integer index + with pytest.raises(KeyError): + df.ix[[-1]] + + with pytest.raises(KeyError): + df.ix[:, [-1]] # #1942 a = DataFrame(randn(20, 2), index=[chr(x + 65) for x in range(20)]) diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index ded16224aedf2..f0ba1851b28dd 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -1,12 +1,13 @@ """ common utilities """ import itertools -from warnings import catch_warnings +from warnings import catch_warnings, filterwarnings import numpy as np from pandas.compat import lrange from pandas.core.dtypes.common import is_scalar -from pandas import Series, DataFrame, Panel, date_range, UInt64Index +from pandas import (Series, DataFrame, Panel, date_range, UInt64Index, + Float64Index, MultiIndex) from pandas.util import testing as tm from pandas.io.formats.printing import pprint_thing @@ -29,7 +30,7 @@ class Base(object): _objs = set(['series', 'frame', 'panel']) _typs = set(['ints', 'uints', 'labels', 'mixed', - 'ts', 'floats', 'empty', 'ts_rev']) + 'ts', 'floats', 'empty', 'ts_rev', 'multi']) def setup_method(self, method): @@ -54,6 +55,32 @@ def setup_method(self, method): major_axis=UInt64Index(lrange(0, 12, 3)), minor_axis=UInt64Index(lrange(0, 16, 4))) + self.series_floats = Series(np.random.rand(4), + index=Float64Index(range(0, 8, 2))) + self.frame_floats = DataFrame(np.random.randn(4, 4), + index=Float64Index(range(0, 8, 2)), + columns=Float64Index(range(0, 12, 3))) + with catch_warnings(record=True): + self.panel_floats = Panel(np.random.rand(4, 4, 4), + items=Float64Index(range(0, 8, 2)), + major_axis=Float64Index(range(0, 12, 3)), + minor_axis=Float64Index(range(0, 16, 4))) + + m_idces = [MultiIndex.from_product([[1, 2], [3, 4]]), + MultiIndex.from_product([[5, 6], [7, 8]]), + MultiIndex.from_product([[9, 10], [11, 12]])] + + self.series_multi = Series(np.random.rand(4), + index=m_idces[0]) + self.frame_multi = DataFrame(np.random.randn(4, 4), + index=m_idces[0], + columns=m_idces[1]) + with catch_warnings(record=True): + self.panel_multi = Panel(np.random.rand(4, 4, 4), + items=m_idces[0], + major_axis=m_idces[1], + minor_axis=m_idces[2]) + self.series_labels = Series(np.random.randn(4), index=list('abcd')) self.frame_labels = DataFrame(np.random.randn(4, 4), index=list('abcd'), columns=list('ABCD')) @@ -273,7 +300,8 @@ def _call(obj=obj): # Panel deprecations if isinstance(obj, Panel): - with catch_warnings(record=True): + with catch_warnings(): + filterwarnings("ignore", "\nPanel*", FutureWarning) _call() else: _call() diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index e3f93924aca0d..32a56aeafc6ad 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -685,17 +685,23 @@ def test_floating_misc(self): assert_series_equal(result1, result3) assert_series_equal(result1, result4) - result1 = s[[1.6, 5, 10]] - result2 = s.loc[[1.6, 5, 10]] - result3 = s.loc[[1.6, 5, 10]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result1 = s[[1.6, 5, 10]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result2 = s.loc[[1.6, 5, 10]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result3 = s.loc[[1.6, 5, 10]] assert_series_equal(result1, result2) assert_series_equal(result1, result3) assert_series_equal(result1, Series( [np.nan, 2, 4], index=[1.6, 5, 10])) - result1 = s[[0, 1, 2]] - result2 = s.loc[[0, 1, 2]] - result3 = s.loc[[0, 1, 2]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result1 = s[[0, 1, 2]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result2 = s.loc[[0, 1, 2]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result3 = s.loc[[0, 1, 2]] assert_series_equal(result1, result2) assert_series_equal(result1, result3) assert_series_equal(result1, Series( diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 04225454f61f9..7a17408d4468f 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -20,6 +20,7 @@ validate_indices) from pandas import NaT, DataFrame, Index, Series, MultiIndex import pandas.util.testing as tm +from pandas.compat import PY2 from pandas.tests.indexing.common import Base, _mklbl @@ -193,15 +194,10 @@ def test_dups_fancy_indexing(self): result = df.loc[rows] tm.assert_frame_equal(result, expected) - # inconsistent returns for unique/duplicate indices when values are - # missing - df = DataFrame(np.random.randn(4, 3), index=list('ABCD')) - expected = df.reindex(['E']) - + # List containing only missing label dfnu = DataFrame(np.random.randn(5, 3), index=list('AABCD')) - with catch_warnings(record=True): - result = dfnu.ix[['E']] - tm.assert_frame_equal(result, expected) + with pytest.raises(KeyError): + dfnu.ix[['E']] # ToDo: check_index_type can be True after GH 11497 @@ -226,6 +222,9 @@ def test_dups_fancy_indexing(self): result = df.loc[['A', 'A', 'E']] tm.assert_frame_equal(result, expected) + @pytest.mark.skipif(PY2, + reason="GH-20770. Py2 unreliable warnings catching.") + def test_dups_fancy_indexing2(self): # GH 5835 # dups on index and missing values df = DataFrame( diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 39f4d2b7bd395..6ccff7e898a6a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -12,6 +12,7 @@ from pandas.util import testing as tm from pandas.tests.indexing.common import Base from pandas.api.types import is_scalar +from pandas.compat import PY2 class TestLoc(Base): @@ -120,7 +121,7 @@ def test_loc_getitem_label_out_of_range(self): typs=['ints', 'uints', 'labels', 'mixed', 'ts'], fails=KeyError) self.check_result('label range', 'loc', 'f', 'ix', 'f', - typs=['floats'], fails=TypeError) + typs=['floats'], fails=KeyError) self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ints', 'uints', 'mixed'], fails=KeyError) self.check_result('label range', 'loc', 20, 'ix', 20, @@ -128,7 +129,7 @@ def test_loc_getitem_label_out_of_range(self): self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'], axes=0, fails=TypeError) self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'], - axes=0, fails=TypeError) + axes=0, fails=KeyError) def test_loc_getitem_label_list(self): @@ -152,17 +153,32 @@ def test_loc_getitem_label_list(self): [Timestamp('20130102'), Timestamp('20130103')], typs=['ts'], axes=0) + @pytest.mark.skipif(PY2, reason=("Catching warnings unreliable with " + "Python 2 (GH #20770)")) def test_loc_getitem_label_list_with_missing(self): self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2], typs=['empty'], fails=KeyError) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3], - typs=['ints', 'uints'], axes=0, fails=KeyError) + self.check_result('list lbl', 'loc', [0, 2, 10], 'ix', [0, 2, 10], + typs=['ints', 'uints', 'floats'], + axes=0, fails=KeyError) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7], - typs=['ints', 'uints'], axes=1, fails=KeyError) - self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10], - typs=['ints', 'uints'], axes=2, fails=KeyError) + typs=['ints', 'uints', 'floats'], + axes=1, fails=KeyError) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10], + typs=['ints', 'uints', 'floats'], + axes=2, fails=KeyError) + + # GH 17758 - MultiIndex and missing keys + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.check_result('list lbl', 'loc', [(1, 3), (1, 4), (2, 5)], + 'ix', [(1, 3), (1, 4), (2, 5)], + typs=['multi'], + axes=0) def test_getitem_label_list_with_missing(self): s = Series(range(3), index=['a', 'b', 'c']) diff --git a/pandas/tests/series/indexing/test_numeric.py b/pandas/tests/series/indexing/test_numeric.py index b964ec3874998..6df63c3981af3 100644 --- a/pandas/tests/series/indexing/test_numeric.py +++ b/pandas/tests/series/indexing/test_numeric.py @@ -57,14 +57,29 @@ def test_get_nan(): assert s.get(np.nan) is None assert s.get(np.nan, default='Missing') == 'Missing' - # ensure that fixing the above hasn't broken get + +def test_get_nan_multiple(): + # GH 8569 + # ensure that fixing "test_get_nan" above hasn't broken get # with multiple elements + s = pd.Float64Index(range(10)).to_series() + + idx = [2, 30] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + assert_series_equal(s.get(idx), + Series([2, np.nan], index=idx)) + + idx = [2, np.nan] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + assert_series_equal(s.get(idx), + Series([2, np.nan], index=idx)) + + # GH 17295 - all missing keys idx = [20, 30] - assert_series_equal(s.get(idx), - Series([np.nan] * 2, index=idx)) + assert(s.get(idx) is None) + idx = [np.nan, np.nan] - assert_series_equal(s.get(idx), - Series([np.nan] * 2, index=idx)) + assert(s.get(idx) is None) def test_delitem():