From 74b306a8723fb1a4475e067e06b5520f5c1b409f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 10 Nov 2022 14:53:35 -0800 Subject: [PATCH 1/7] DEPR: Remove method and tolerance in Index.get_loc --- ci/deps/actions-38-minimum_versions.yaml | 2 +- doc/source/getting_started/install.rst | 2 +- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/compat/_optional.py | 2 +- pandas/core/indexes/base.py | 67 +++----------- pandas/core/indexes/datetimes.py | 14 +-- pandas/core/indexes/multi.py | 9 +- pandas/core/indexes/period.py | 10 +-- pandas/core/indexes/range.py | 20 ++--- pandas/core/indexes/timedeltas.py | 4 +- .../tests/indexes/datetimes/test_indexing.py | 88 ------------------- pandas/tests/indexes/numeric/test_indexing.py | 88 ++----------------- pandas/tests/indexes/object/test_indexing.py | 14 --- pandas/tests/indexes/period/test_indexing.py | 61 +------------ .../tests/indexes/timedeltas/test_indexing.py | 31 +------ pandas/tests/test_downstream.py | 7 +- setup.cfg | 2 +- 17 files changed, 47 insertions(+), 375 deletions(-) diff --git a/ci/deps/actions-38-minimum_versions.yaml b/ci/deps/actions-38-minimum_versions.yaml index 8e0ccd77b19a6..ff8af9fab7f0d 100644 --- a/ci/deps/actions-38-minimum_versions.yaml +++ b/ci/deps/actions-38-minimum_versions.yaml @@ -51,7 +51,7 @@ dependencies: - sqlalchemy=1.4.16 - tabulate=0.8.9 - tzdata=2022a - - xarray=0.19.0 + - xarray=0.21.0 - xlrd=2.0.1 - xlsxwriter=1.4.3 - zstandard=0.15.2 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 11c419c399877..cdcd06967dd39 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -324,7 +324,7 @@ Can be managed as optional_extra with ``pandas[computation]``. Dependency Minimum Version optional_extra Notes ========================= ================== =============== ============================================================= SciPy 1.7.1 computation Miscellaneous statistical functions -xarray 0.19.0 computation pandas-like API for N-dimensional data +xarray 0.21.0 computation pandas-like API for N-dimensional data ========================= ================== =============== ============================================================= Excel files diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index f4a6a6277b6a1..75e40cae1f8c4 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -440,6 +440,7 @@ Removal of prior version deprecations/changes - Removed the ``closed`` argument in :meth:`date_range` and :meth:`bdate_range` in favor of ``inclusive`` argument (:issue:`40245`) - Removed the ``center`` keyword in :meth:`DataFrame.expanding` (:issue:`20647`) - Removed the ``truediv`` keyword from :func:`eval` (:issue:`29812`) +- Removed the ``method`` and ``tolerance`` arguments in :meth:`Index.get_loc`. Use ``index.get_indexer([label], method=..., tolerance=...)`` instead (:issue:`42269`) - Removed the ``pandas.datetime`` submodule (:issue:`30489`) - Removed the ``pandas.np`` submodule (:issue:`30296`) - Removed ``pandas.util.testing`` in favor of ``pandas.testing`` (:issue:`30745`) diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index abad188f06720..9bd4b384fadb0 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -41,7 +41,7 @@ "sqlalchemy": "1.4.16", "tables": "3.6.1", "tabulate": "0.8.9", - "xarray": "0.19.0", + "xarray": "0.21.0", "xlrd": "2.0.1", "xlsxwriter": "1.4.3", "zstandard": "0.15.2", diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 27672c82fdf15..674a69ae7c339 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3507,27 +3507,13 @@ def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]: # -------------------------------------------------------------------- # Indexing Methods - def get_loc(self, key, method=None, tolerance=None): + def get_loc(self, key): """ Get integer location, slice or boolean mask for requested label. Parameters ---------- key : label - method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional - * default: exact matches only. - * pad / ffill: find the PREVIOUS index value if no exact match. - * backfill / bfill: use NEXT index value if no exact match - * nearest: use the NEAREST index value if no exact match. Tied - distances are broken by preferring the larger index value. - - .. deprecated:: 1.4 - Use index.get_indexer([item], method=...) instead. - - tolerance : int or float, optional - Maximum distance from index value for inexact matches. The value of - the index at the matching location must satisfy the equation - ``abs(index[loc] - key) <= tolerance``. Returns ------- @@ -3547,46 +3533,17 @@ def get_loc(self, key, method=None, tolerance=None): >>> non_monotonic_index.get_loc('b') array([False, True, False, True]) """ - if method is None: - if tolerance is not None: - raise ValueError( - "tolerance argument only valid if using pad, " - "backfill or nearest lookups" - ) - casted_key = self._maybe_cast_indexer(key) - try: - return self._engine.get_loc(casted_key) - except KeyError as err: - raise KeyError(key) from err - except TypeError: - # If we have a listlike key, _check_indexing_error will raise - # InvalidIndexError. Otherwise we fall through and re-raise - # the TypeError. - self._check_indexing_error(key) - raise - - # GH#42269 - warnings.warn( - f"Passing method to {type(self).__name__}.get_loc is deprecated " - "and will raise in a future version. Use " - "index.get_indexer([item], method=...) instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - if is_scalar(key) and isna(key) and not self.hasnans: - raise KeyError(key) - - if tolerance is not None: - tolerance = self._convert_tolerance(tolerance, np.asarray(key)) - - indexer = self.get_indexer([key], method=method, tolerance=tolerance) - if indexer.ndim > 1 or indexer.size > 1: - raise TypeError("get_loc requires scalar valued input") - loc = indexer.item() - if loc == -1: - raise KeyError(key) - return loc + casted_key = self._maybe_cast_indexer(key) + try: + return self._engine.get_loc(casted_key) + except KeyError as err: + raise KeyError(key) from err + except TypeError: + # If we have a listlike key, _check_indexing_error will raise + # InvalidIndexError. Otherwise we fall through and re-raise + # the TypeError. + self._check_indexing_error(key) + raise _index_shared_docs[ "get_indexer" diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 3a00301fbc042..e996b9becd599 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -568,7 +568,7 @@ def _deprecate_mismatched_indexing(self, key, one_way: bool = False) -> None: self._data._assert_tzawareness_compat(key) - def get_loc(self, key, method=None, tolerance=None): + def get_loc(self, key): """ Get integer location for requested label @@ -596,11 +596,7 @@ def get_loc(self, key, method=None, tolerance=None): self._disallow_mismatched_indexing(parsed, one_way=True) if self._can_partial_date_slice(reso): - try: - return self._partial_date_slice(reso, parsed) - except KeyError as err: - if method is None: - raise KeyError(key) from err + return self._partial_date_slice(reso, parsed) key = self._maybe_cast_for_get_loc(key) @@ -611,10 +607,6 @@ def get_loc(self, key, method=None, tolerance=None): ) elif isinstance(key, time): - if method is not None: - raise NotImplementedError( - "cannot yet lookup inexact labels when key is a time object" - ) return self.indexer_at_time(key) else: @@ -622,7 +614,7 @@ def get_loc(self, key, method=None, tolerance=None): raise KeyError(key) try: - return Index.get_loc(self, key, method, tolerance) + return Index.get_loc(self, key) except KeyError as err: raise KeyError(orig_key) from err diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1e255241cf222..1a1d439b430d4 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2754,7 +2754,7 @@ def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: else: return level_index.get_loc(key) - def get_loc(self, key, method=None): + def get_loc(self, key): """ Get location for a label or a tuple of labels. @@ -2764,7 +2764,6 @@ def get_loc(self, key, method=None): Parameters ---------- key : label or tuple of labels (one for each level) - method : None Returns ------- @@ -2796,12 +2795,6 @@ def get_loc(self, key, method=None): >>> mi.get_loc(('b', 'e')) 1 """ - if method is not None: - raise NotImplementedError( - "only the default get_loc method is " - "currently supported for MultiIndex" - ) - self._check_indexing_error(key) def _maybe_to_slice(loc): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index a09b987496a40..0083484e24cb2 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -375,7 +375,7 @@ def _convert_tolerance(self, tolerance, target): return tolerance - def get_loc(self, key, method=None, tolerance=None): + def get_loc(self, key): """ Get integer location for requested label. @@ -421,14 +421,12 @@ def get_loc(self, key, method=None, tolerance=None): # the reso < self._resolution_obj case goes # through _get_string_slice key = self._cast_partial_indexing_scalar(key) - loc = self.get_loc(key, method=method, tolerance=tolerance) + loc = self.get_loc(key) # Recursing instead of falling through matters for the exception # message in test_get_loc3 (though not clear if that really matters) return loc - elif method is None: - raise KeyError(key) else: - key = self._cast_partial_indexing_scalar(parsed) + raise KeyError(key) elif isinstance(key, Period): key = self._maybe_cast_for_get_loc(key) @@ -441,7 +439,7 @@ def get_loc(self, key, method=None, tolerance=None): raise KeyError(key) try: - return Index.get_loc(self, key, method, tolerance) + return Index.get_loc(self, key) except KeyError as err: raise KeyError(orig_key) from err diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index ae88b85aa06e1..b1bbee2976c0a 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -328,17 +328,15 @@ def inferred_type(self) -> str: # Indexing Methods @doc(Int64Index.get_loc) - def get_loc(self, key, method=None, tolerance=None): - if method is None and tolerance is None: - if is_integer(key) or (is_float(key) and key.is_integer()): - new_key = int(key) - try: - return self._range.index(new_key) - except ValueError as err: - raise KeyError(key) from err - self._check_indexing_error(key) - raise KeyError(key) - return super().get_loc(key, method=method, tolerance=tolerance) + def get_loc(self, key): + if is_integer(key) or (is_float(key) and key.is_integer()): + new_key = int(key) + try: + return self._range.index(new_key) + except ValueError as err: + raise KeyError(key) from err + self._check_indexing_error(key) + raise KeyError(key) def _get_indexer( self, diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 113f76a35e13f..a1ef93957bc0a 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -165,7 +165,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: # ------------------------------------------------------------------- # Indexing Methods - def get_loc(self, key, method=None, tolerance=None): + def get_loc(self, key): """ Get integer location for requested label @@ -180,7 +180,7 @@ def get_loc(self, key, method=None, tolerance=None): except TypeError as err: raise KeyError(key) from err - return Index.get_loc(self, key, method, tolerance) + return Index.get_loc(self, key) def _parse_with_reso(self, label: str): # the "with_reso" is a no-op for TimedeltaIndex diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 887766dd3fc29..04d1d8204a346 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -8,8 +8,6 @@ import numpy as np import pytest -from pandas.errors import InvalidIndexError - import pandas as pd from pandas import ( DatetimeIndex, @@ -405,75 +403,6 @@ def test_get_loc_key_unit_mismatch_not_castable(self): assert key not in dti - @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") - def test_get_loc_method_exact_match(self, method): - idx = date_range("2000-01-01", periods=3) - assert idx.get_loc(idx[1], method) == 1 - assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 - assert idx.get_loc(str(idx[1]), method) == 1 - - if method is not None: - assert idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1 - - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") - def test_get_loc(self): - idx = date_range("2000-01-01", periods=3) - - assert idx.get_loc("2000-01-01", method="nearest") == 0 - assert idx.get_loc("2000-01-01T12", method="nearest") == 1 - - assert idx.get_loc("2000-01-01T12", method="nearest", tolerance="1 day") == 1 - assert ( - idx.get_loc("2000-01-01T12", method="nearest", tolerance=pd.Timedelta("1D")) - == 1 - ) - assert ( - idx.get_loc( - "2000-01-01T12", method="nearest", tolerance=np.timedelta64(1, "D") - ) - == 1 - ) - assert ( - idx.get_loc("2000-01-01T12", method="nearest", tolerance=timedelta(1)) == 1 - ) - with pytest.raises(ValueError, match="unit abbreviation w/o a number"): - idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo") - with pytest.raises(KeyError, match="'2000-01-01T03'"): - idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours") - with pytest.raises( - ValueError, match="tolerance size must match target index size" - ): - idx.get_loc( - "2000-01-01", - method="nearest", - tolerance=[ - pd.Timedelta("1day").to_timedelta64(), - pd.Timedelta("1day").to_timedelta64(), - ], - ) - - assert idx.get_loc("2000", method="nearest") == slice(0, 3) - assert idx.get_loc("2000-01", method="nearest") == slice(0, 3) - - assert idx.get_loc("1999", method="nearest") == 0 - assert idx.get_loc("2001", method="nearest") == 2 - - with pytest.raises(KeyError, match="'1999'"): - idx.get_loc("1999", method="pad") - with pytest.raises(KeyError, match="'2001'"): - idx.get_loc("2001", method="backfill") - - with pytest.raises(KeyError, match="'foobar'"): - idx.get_loc("foobar") - with pytest.raises(InvalidIndexError, match=r"slice\(None, 2, None\)"): - idx.get_loc(slice(2)) - - idx = DatetimeIndex(["2000-01-01", "2000-01-04"]) - assert idx.get_loc("2000-01-02", method="nearest") == 0 - assert idx.get_loc("2000-01-03", method="nearest") == 1 - assert idx.get_loc("2000-01", method="nearest") == slice(0, 2) - def test_get_loc_time_obj(self): # time indexing idx = date_range("2000-01-01", periods=24, freq="H") @@ -486,11 +415,6 @@ def test_get_loc_time_obj(self): expected = np.array([]) tm.assert_numpy_array_equal(result, expected, check_dtype=False) - msg = "cannot yet lookup inexact labels when key is a time object" - with pytest.raises(NotImplementedError, match=msg): - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - idx.get_loc(time(12, 30), method="pad") - def test_get_loc_time_obj2(self): # GH#8667 @@ -525,18 +449,6 @@ def test_get_loc_time_nat(self): expected = np.array([], dtype=np.intp) tm.assert_numpy_array_equal(loc, expected) - def test_get_loc_tz_aware(self): - # https://github.com/pandas-dev/pandas/issues/32140 - dti = date_range( - Timestamp("2019-12-12 00:00:00", tz="US/Eastern"), - Timestamp("2019-12-13 00:00:00", tz="US/Eastern"), - freq="5s", - ) - key = Timestamp("2019-12-12 10:19:25", tz="US/Eastern") - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - result = dti.get_loc(key, method="nearest") - assert result == 7433 - def test_get_loc_nat(self): # GH#20464 index = DatetimeIndex(["1/3/2000", "NaT"]) diff --git a/pandas/tests/indexes/numeric/test_indexing.py b/pandas/tests/indexes/numeric/test_indexing.py index 0c2c5e0b903bc..4e66afb01fbd4 100644 --- a/pandas/tests/indexes/numeric/test_indexing.py +++ b/pandas/tests/indexes/numeric/test_indexing.py @@ -25,99 +25,27 @@ def index_large(): class TestGetLoc: - @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) - def test_get_loc(self, method): + def test_get_loc(self): index = Index([0, 1, 2]) - warn = None if method is None else FutureWarning + assert index.get_loc(1) == 1 - with tm.assert_produces_warning(warn, match="deprecated"): - assert index.get_loc(1, method=method) == 1 - - if method: - with tm.assert_produces_warning(warn, match="deprecated"): - assert index.get_loc(1, method=method, tolerance=0) == 1 - - @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") - def test_get_loc_raises_bad_label(self, method): - index = Index([0, 1, 2]) - if method: - msg = "not supported between" - err = TypeError - else: - msg = r"\[1, 2\]" - err = InvalidIndexError - - with pytest.raises(err, match=msg): - index.get_loc([1, 2], method=method) - - @pytest.mark.parametrize( - "method,loc", [("pad", 1), ("backfill", 2), ("nearest", 1)] - ) - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") - def test_get_loc_tolerance(self, method, loc): - index = Index([0, 1, 2]) - assert index.get_loc(1.1, method) == loc - assert index.get_loc(1.1, method, tolerance=1) == loc - - @pytest.mark.parametrize("method", ["pad", "backfill", "nearest"]) - def test_get_loc_outside_tolerance_raises(self, method): - index = Index([0, 1, 2]) - with pytest.raises(KeyError, match="1.1"): - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - index.get_loc(1.1, method, tolerance=0.05) - - def test_get_loc_bad_tolerance_raises(self): - index = Index([0, 1, 2]) - with pytest.raises(ValueError, match="must be numeric"): - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - index.get_loc(1.1, "nearest", tolerance="invalid") - - def test_get_loc_tolerance_no_method_raises(self): + def test_get_loc_raises_bad_label(self): index = Index([0, 1, 2]) - with pytest.raises(ValueError, match="tolerance .* valid if"): - index.get_loc(1.1, tolerance=1) - - def test_get_loc_raises_missized_tolerance(self): - index = Index([0, 1, 2]) - with pytest.raises(ValueError, match="tolerance size must match"): - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - index.get_loc(1.1, "nearest", tolerance=[1, 1]) + with pytest.raises(InvalidIndexError, match=r"\[1, 2\]"): + index.get_loc([1, 2]) - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") def test_get_loc_float64(self): idx = Float64Index([0.0, 1.0, 2.0]) - for method in [None, "pad", "backfill", "nearest"]: - assert idx.get_loc(1, method) == 1 - if method is not None: - assert idx.get_loc(1, method, tolerance=0) == 1 - - for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: - assert idx.get_loc(1.1, method) == loc - assert idx.get_loc(1.1, method, tolerance=0.9) == loc with pytest.raises(KeyError, match="^'foo'$"): idx.get_loc("foo") with pytest.raises(KeyError, match=r"^1\.5$"): idx.get_loc(1.5) - with pytest.raises(KeyError, match=r"^1\.5$"): - idx.get_loc(1.5, method="pad", tolerance=0.1) with pytest.raises(KeyError, match="^True$"): idx.get_loc(True) with pytest.raises(KeyError, match="^False$"): idx.get_loc(False) - with pytest.raises(ValueError, match="must be numeric"): - idx.get_loc(1.4, method="nearest", tolerance="foo") - - with pytest.raises(ValueError, match="must contain numeric elements"): - idx.get_loc(1.4, method="nearest", tolerance=np.array(["foo"])) - - with pytest.raises( - ValueError, match="tolerance size must match target index size" - ): - idx.get_loc(1.4, method="nearest", tolerance=np.array([1, 2])) - def test_get_loc_na(self): idx = Float64Index([np.nan, 1, 2]) assert idx.get_loc(1) == 1 @@ -150,13 +78,11 @@ def test_get_loc_missing_nan(self): idx.get_loc([np.nan]) @pytest.mark.parametrize("vals", [[1], [1.0], [Timestamp("2019-12-31")], ["test"]]) - @pytest.mark.parametrize("method", ["nearest", "pad", "backfill"]) - def test_get_loc_float_index_nan_with_method(self, vals, method): + def test_get_loc_float_index_nan_with_method(self, vals): # GH#39382 idx = Index(vals) with pytest.raises(KeyError, match="nan"): - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - idx.get_loc(np.nan, method=method) + idx.get_loc(np.nan) @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"]) def test_get_loc_numericindex_none_raises(self, dtype): diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py index 38bd96921b991..a33173dc83569 100644 --- a/pandas/tests/indexes/object/test_indexing.py +++ b/pandas/tests/indexes/object/test_indexing.py @@ -10,20 +10,6 @@ import pandas._testing as tm -class TestGetLoc: - def test_get_loc_raises_object_nearest(self): - index = Index(["a", "c"]) - with pytest.raises(TypeError, match="unsupported operand type"): - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - index.get_loc("a", method="nearest") - - def test_get_loc_raises_object_tolerance(self): - index = Index(["a", "c"]) - with pytest.raises(TypeError, match="unsupported operand type"): - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - index.get_loc("a", method="pad", tolerance="invalid") - - class TestGetIndexer: @pytest.mark.parametrize( "method,expected", diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index fcc7fa083691e..2717f50a890ad 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -1,7 +1,4 @@ -from datetime import ( - datetime, - timedelta, -) +from datetime import datetime import re import numpy as np @@ -331,62 +328,6 @@ def test_get_loc_integer(self): with pytest.raises(KeyError, match="46"): pi2.get_loc(46) - # TODO: This method came from test_period; de-dup with version above - @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") - def test_get_loc_method(self, method): - idx = period_range("2000-01-01", periods=3) - - assert idx.get_loc(idx[1], method) == 1 - assert idx.get_loc(idx[1].to_timestamp(), method) == 1 - assert idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method) == 1 - assert idx.get_loc(str(idx[1]), method) == 1 - - key = idx[1].asfreq("H", how="start") - with pytest.raises(KeyError, match=str(key)): - idx.get_loc(key, method=method) - - # TODO: This method came from test_period; de-dup with version above - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") - def test_get_loc3(self): - - idx = period_range("2000-01-01", periods=5)[::2] - assert idx.get_loc("2000-01-02T12", method="nearest", tolerance="1 day") == 1 - assert ( - idx.get_loc("2000-01-02T12", method="nearest", tolerance=Timedelta("1D")) - == 1 - ) - assert ( - idx.get_loc( - "2000-01-02T12", method="nearest", tolerance=np.timedelta64(1, "D") - ) - == 1 - ) - assert ( - idx.get_loc("2000-01-02T12", method="nearest", tolerance=timedelta(1)) == 1 - ) - - msg = "unit abbreviation w/o a number" - with pytest.raises(ValueError, match=msg): - idx.get_loc("2000-01-10", method="nearest", tolerance="foo") - - msg = "Input has different freq=None from PeriodArray\\(freq=D\\)" - with pytest.raises(ValueError, match=msg): - idx.get_loc("2000-01-10", method="nearest", tolerance="1 hour") - with pytest.raises(KeyError, match=r"^Period\('2000-01-10', 'D'\)$"): - idx.get_loc("2000-01-10", method="nearest", tolerance="1 day") - with pytest.raises( - ValueError, match="list-like tolerance size must match target index size" - ): - idx.get_loc( - "2000-01-10", - method="nearest", - tolerance=[ - Timedelta("1 day").to_timedelta64(), - Timedelta("1 day").to_timedelta64(), - ], - ) - def test_get_loc_invalid_string_raises_keyerror(self): # GH#34240 pi = period_range("2000", periods=3, name="A") diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 12aece23738ec..cc166f9f32a34 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -1,7 +1,4 @@ -from datetime import ( - datetime, - timedelta, -) +from datetime import datetime import re import numpy as np @@ -91,35 +88,9 @@ def test_get_loc_key_unit_mismatch_not_castable(self): assert key not in tdi - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") def test_get_loc(self): idx = to_timedelta(["0 days", "1 days", "2 days"]) - for method in [None, "pad", "backfill", "nearest"]: - assert idx.get_loc(idx[1], method) == 1 - assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1 - assert idx.get_loc(str(idx[1]), method) == 1 - - assert idx.get_loc(idx[1], "pad", tolerance=Timedelta(0)) == 1 - assert idx.get_loc(idx[1], "pad", tolerance=np.timedelta64(0, "s")) == 1 - assert idx.get_loc(idx[1], "pad", tolerance=timedelta(0)) == 1 - - with pytest.raises(ValueError, match="unit abbreviation w/o a number"): - idx.get_loc(idx[1], method="nearest", tolerance="foo") - - with pytest.raises(ValueError, match="tolerance size must match"): - idx.get_loc( - idx[1], - method="nearest", - tolerance=[ - Timedelta(0).to_timedelta64(), - Timedelta(0).to_timedelta64(), - ], - ) - - for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: - assert idx.get_loc("1 day 1 hour", method) == loc - # GH 16909 assert idx.get_loc(idx[1].to_timedelta64()) == 1 diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 1396ab262a79a..7bed743cbf730 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -118,7 +118,7 @@ def test_xarray(df): @td.skip_if_no("cftime") -@td.skip_if_no("xarray", "0.10.4") +@td.skip_if_no("xarray", "0.21.0") def test_xarray_cftimeindex_nearest(): # https://github.com/pydata/xarray/issues/3751 import cftime @@ -126,10 +126,7 @@ def test_xarray_cftimeindex_nearest(): times = xarray.cftime_range("0001", periods=2) key = cftime.DatetimeGregorian(2000, 1, 1) - with tm.assert_produces_warning( - FutureWarning, match="deprecated", check_stacklevel=False - ): - result = times.get_loc(key, method="nearest") + result = times.get_loc(key) expected = 1 assert result == expected diff --git a/setup.cfg b/setup.cfg index 785143c7b647c..49a483198b111 100644 --- a/setup.cfg +++ b/setup.cfg @@ -154,7 +154,7 @@ all = tables>=3.6.1 tabulate>=0.8.9 tzdata>=2022.1 - xarray>=0.19.0 + xarray>=0.21.0 xlrd>=2.0.1 xlsxwriter>=1.4.3 zstandard>=0.15.2 From 61935a9b0c78e9f3a294b7d211491c9f24079de4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 10 Nov 2022 14:54:55 -0800 Subject: [PATCH 2/7] note xarray bump --- doc/source/whatsnew/v2.0.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 75e40cae1f8c4..4468506d5d00e 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -268,6 +268,8 @@ Optional libraries below the lowest tested version may still work, but are not c +-----------------+-----------------+---------+ | fastparquet | 0.6.3 | X | +-----------------+-----------------+---------+ +| xarray | 0.21.0 | X | ++-----------------+-----------------+---------+ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. From cf1f5da4f41ff9d12479050f4f08c683bb49edb5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 2 Dec 2022 15:59:21 -0800 Subject: [PATCH 3/7] Fix tests --- pandas/core/indexes/datetimes.py | 5 ++++- pandas/tests/indexes/multi/test_indexing.py | 4 ---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 9c5ae3d8cecef..784b5c8b24e32 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -584,7 +584,10 @@ def get_loc(self, key): self._disallow_mismatched_indexing(parsed) if self._can_partial_date_slice(reso): - return self._partial_date_slice(reso, parsed) + try: + return self._partial_date_slice(reso, parsed) + except KeyError as err: + raise KeyError(key) from err key = parsed diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 4c879c8ff5736..31c5ab333ecfa 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -570,10 +570,6 @@ def test_get_loc(self, idx): with pytest.raises(KeyError, match=r"^'quux'$"): idx.get_loc("quux") - msg = "only the default get_loc method is currently supported for MultiIndex" - with pytest.raises(NotImplementedError, match=msg): - idx.get_loc("foo", method="nearest") - # 3 levels index = MultiIndex( levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], From 2ca2fea82e8fe2911f221bed2e25309142486d0b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 5 Dec 2022 15:13:03 -0800 Subject: [PATCH 4/7] Fix refactor in period --- pandas/core/indexes/period.py | 9 ++- pandas/tests/indexes/period/test_indexing.py | 64 +++++++++++--------- 2 files changed, 40 insertions(+), 33 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index d734c0f456286..877bb2844e8c9 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -417,9 +417,12 @@ def get_loc(self, key): # TODO: pass if method is not None, like DTI does? raise KeyError(key) from err - # the reso < self._resolution_obj case goes - # through _get_string_slice - key = self._cast_partial_indexing_scalar(parsed) + if reso == self._resolution_obj: + # the reso < self._resolution_obj case goes + # through _get_string_slice + key = self._cast_partial_indexing_scalar(parsed) + else: + raise KeyError(key) elif isinstance(key, Period): self._disallow_mismatched_indexing(key) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 2717f50a890ad..1f376533806ed 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -203,38 +203,42 @@ def test_getitem_seconds(self): for d in ["2013/01/01", "2013/01", "2013"]: tm.assert_series_equal(ser[d], ser) - def test_getitem_day(self): + @pytest.mark.parametrize( + "idx", + [ + date_range(start="2013/01/01", freq="D", periods=400), + period_range(start="2013/01/01", freq="D", periods=400), + ], + ids=lambda x: type(x).__name__, + ) + def test_getitem_day(self, idx): # GH#6716 # Confirm DatetimeIndex and PeriodIndex works identically - didx = date_range(start="2013/01/01", freq="D", periods=400) - pidx = period_range(start="2013/01/01", freq="D", periods=400) - - for idx in [didx, pidx]: - # getitem against index should raise ValueError - values = [ - "2014", - "2013/02", - "2013/01/02", - "2013/02/01 9H", - "2013/02/01 09:00", - ] - for val in values: - - # GH7116 - # these show deprecations as we are trying - # to slice with non-integer indexers - with pytest.raises(IndexError, match="only integers, slices"): - idx[val] - - ser = Series(np.random.rand(len(idx)), index=idx) - tm.assert_series_equal(ser["2013/01"], ser[0:31]) - tm.assert_series_equal(ser["2013/02"], ser[31:59]) - tm.assert_series_equal(ser["2014"], ser[365:]) - - invalid = ["2013/02/01 9H", "2013/02/01 09:00"] - for val in invalid: - with pytest.raises(KeyError, match=val): - ser[val] + # getitem against index should raise ValueError + values = [ + "2014", + "2013/02", + "2013/01/02", + "2013/02/01 9H", + "2013/02/01 09:00", + ] + for val in values: + + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + with pytest.raises(IndexError, match="only integers, slices"): + idx[val] + + ser = Series(np.random.rand(len(idx)), index=idx) + tm.assert_series_equal(ser["2013/01"], ser[0:31]) + tm.assert_series_equal(ser["2013/02"], ser[31:59]) + tm.assert_series_equal(ser["2014"], ser[365:]) + + invalid = ["2013/02/01 9H", "2013/02/01 09:00"] + for val in invalid: + with pytest.raises(KeyError, match=val): + ser[val] class TestGetLoc: From 12708b8d6bf0ddf1af29b611bb81c986249ff59c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 5 Dec 2022 15:30:40 -0800 Subject: [PATCH 5/7] Lighter parameterization --- pandas/tests/indexes/period/test_indexing.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 1f376533806ed..6cf942ad3d5d5 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -204,17 +204,17 @@ def test_getitem_seconds(self): tm.assert_series_equal(ser[d], ser) @pytest.mark.parametrize( - "idx", + "idx_range", [ - date_range(start="2013/01/01", freq="D", periods=400), - period_range(start="2013/01/01", freq="D", periods=400), + date_range, + period_range, ], - ids=lambda x: type(x).__name__, ) - def test_getitem_day(self, idx): + def test_getitem_day(self, idx_range): # GH#6716 # Confirm DatetimeIndex and PeriodIndex works identically # getitem against index should raise ValueError + idx = idx_range(start="2013/01/01", freq="D", periods=400) values = [ "2014", "2013/02", From 6d4451669675a2d150fcaeb03d8ecfb1a6017946 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 5 Dec 2022 17:46:09 -0800 Subject: [PATCH 6/7] xfail xarray test --- pandas/tests/test_downstream.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index e68e3fe30e1cc..b0512e1b32951 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -119,8 +119,10 @@ def test_xarray(df): @td.skip_if_no("cftime") @td.skip_if_no("xarray", "0.21.0") +@pytest.mark.xfail(reason="xarray.CFTimeIndex.get_loc did not address 2.0 deprecation") def test_xarray_cftimeindex_nearest(): # https://github.com/pydata/xarray/issues/3751 + # TODO: xfail addressable once https://github.com/pydata/xarray/pull/7361 is merged import cftime import xarray From 39192020e7934b2fc2a72e413ee873715af45bb6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 6 Dec 2022 10:57:59 -0800 Subject: [PATCH 7/7] Just use get_indexer --- pandas/tests/test_downstream.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index b0512e1b32951..fa32c558eeb70 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -119,16 +119,14 @@ def test_xarray(df): @td.skip_if_no("cftime") @td.skip_if_no("xarray", "0.21.0") -@pytest.mark.xfail(reason="xarray.CFTimeIndex.get_loc did not address 2.0 deprecation") def test_xarray_cftimeindex_nearest(): # https://github.com/pydata/xarray/issues/3751 - # TODO: xfail addressable once https://github.com/pydata/xarray/pull/7361 is merged import cftime import xarray times = xarray.cftime_range("0001", periods=2) key = cftime.DatetimeGregorian(2000, 1, 1) - result = times.get_loc(key) + result = times.get_indexer([key], method="nearest") expected = 1 assert result == expected