Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Remove method and tolerance in Index.get_loc, bump xarray #49630

Merged
merged 10 commits into from
Dec 6, 2022
2 changes: 1 addition & 1 deletion ci/deps/actions-38-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ dependencies:
- sqlalchemy=1.4.16
- tabulate=0.8.9
- tzdata=2022a
- xarray=0.19.0
- xarray=0.21.0
- xlrd=2.0.1
- xlsxwriter=1.4.3
- zstandard=0.15.2
Expand Down
2 changes: 1 addition & 1 deletion doc/source/getting_started/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ Installable with ``pip install "pandas[computation]"``.
Dependency Minimum Version pip extra Notes
========================= ================== =============== =============================================================
SciPy 1.7.1 computation Miscellaneous statistical functions
xarray 0.19.0 computation pandas-like API for N-dimensional data
xarray 0.21.0 computation pandas-like API for N-dimensional data
========================= ================== =============== =============================================================

Excel files
Expand Down
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,8 @@ Optional libraries below the lowest tested version may still work, but are not c
+-----------------+-----------------+---------+
| fastparquet | 0.6.3 | X |
+-----------------+-----------------+---------+
| xarray | 0.21.0 | X |
+-----------------+-----------------+---------+

See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.

Expand Down Expand Up @@ -520,6 +522,7 @@ Removal of prior version deprecations/changes
- Removed the ``closed`` argument in :meth:`date_range` and :meth:`bdate_range` in favor of ``inclusive`` argument (:issue:`40245`)
- Removed the ``center`` keyword in :meth:`DataFrame.expanding` (:issue:`20647`)
- Removed the ``truediv`` keyword from :func:`eval` (:issue:`29812`)
- Removed the ``method`` and ``tolerance`` arguments in :meth:`Index.get_loc`. Use ``index.get_indexer([label], method=..., tolerance=...)`` instead (:issue:`42269`)
- Removed the ``pandas.datetime`` submodule (:issue:`30489`)
- Removed the ``pandas.np`` submodule (:issue:`30296`)
- Removed ``pandas.util.testing`` in favor of ``pandas.testing`` (:issue:`30745`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/compat/_optional.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"sqlalchemy": "1.4.16",
"tables": "3.6.1",
"tabulate": "0.8.9",
"xarray": "0.19.0",
"xarray": "0.21.0",
"xlrd": "2.0.1",
"xlsxwriter": "1.4.3",
"zstandard": "0.15.2",
Expand Down
67 changes: 12 additions & 55 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3429,27 +3429,13 @@ def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]:
# --------------------------------------------------------------------
# Indexing Methods

def get_loc(self, key, method=None, tolerance=None):
def get_loc(self, key):
"""
Get integer location, slice or boolean mask for requested label.

Parameters
----------
key : label
method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
* default: exact matches only.
* pad / ffill: find the PREVIOUS index value if no exact match.
* backfill / bfill: use NEXT index value if no exact match
* nearest: use the NEAREST index value if no exact match. Tied
distances are broken by preferring the larger index value.

.. deprecated:: 1.4
Use index.get_indexer([item], method=...) instead.

tolerance : int or float, optional
Maximum distance from index value for inexact matches. The value of
the index at the matching location must satisfy the equation
``abs(index[loc] - key) <= tolerance``.

Returns
-------
Expand All @@ -3469,46 +3455,17 @@ def get_loc(self, key, method=None, tolerance=None):
>>> non_monotonic_index.get_loc('b')
array([False, True, False, True])
"""
if method is None:
if tolerance is not None:
raise ValueError(
"tolerance argument only valid if using pad, "
"backfill or nearest lookups"
)
casted_key = self._maybe_cast_indexer(key)
try:
return self._engine.get_loc(casted_key)
except KeyError as err:
raise KeyError(key) from err
except TypeError:
# If we have a listlike key, _check_indexing_error will raise
# InvalidIndexError. Otherwise we fall through and re-raise
# the TypeError.
self._check_indexing_error(key)
raise

# GH#42269
warnings.warn(
f"Passing method to {type(self).__name__}.get_loc is deprecated "
"and will raise in a future version. Use "
"index.get_indexer([item], method=...) instead.",
FutureWarning,
stacklevel=find_stack_level(),
)

if is_scalar(key) and isna(key) and not self.hasnans:
raise KeyError(key)

if tolerance is not None:
tolerance = self._convert_tolerance(tolerance, np.asarray(key))

indexer = self.get_indexer([key], method=method, tolerance=tolerance)
if indexer.ndim > 1 or indexer.size > 1:
raise TypeError("get_loc requires scalar valued input")
loc = indexer.item()
if loc == -1:
raise KeyError(key)
return loc
casted_key = self._maybe_cast_indexer(key)
try:
return self._engine.get_loc(casted_key)
except KeyError as err:
raise KeyError(key) from err
except TypeError:
# If we have a listlike key, _check_indexing_error will raise
# InvalidIndexError. Otherwise we fall through and re-raise
# the TypeError.
self._check_indexing_error(key)
raise

_index_shared_docs[
"get_indexer"
Expand Down
11 changes: 3 additions & 8 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ def _disallow_mismatched_indexing(self, key) -> None:
except TypeError as err:
raise KeyError(key) from err

def get_loc(self, key, method=None, tolerance=None):
def get_loc(self, key):
"""
Get integer location for requested label

Expand Down Expand Up @@ -587,8 +587,7 @@ def get_loc(self, key, method=None, tolerance=None):
try:
return self._partial_date_slice(reso, parsed)
except KeyError as err:
if method is None:
raise KeyError(key) from err
raise KeyError(key) from err

key = parsed

Expand All @@ -599,18 +598,14 @@ def get_loc(self, key, method=None, tolerance=None):
)

elif isinstance(key, dt.time):
if method is not None:
raise NotImplementedError(
"cannot yet lookup inexact labels when key is a time object"
)
return self.indexer_at_time(key)

else:
# unrecognized type
raise KeyError(key)

try:
return Index.get_loc(self, key, method, tolerance)
return Index.get_loc(self, key)
except KeyError as err:
raise KeyError(orig_key) from err

Expand Down
9 changes: 1 addition & 8 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2730,7 +2730,7 @@ def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int:
else:
return level_index.get_loc(key)

def get_loc(self, key, method=None):
def get_loc(self, key):
"""
Get location for a label or a tuple of labels.

Expand All @@ -2740,7 +2740,6 @@ def get_loc(self, key, method=None):
Parameters
----------
key : label or tuple of labels (one for each level)
method : None

Returns
-------
Expand Down Expand Up @@ -2772,12 +2771,6 @@ def get_loc(self, key, method=None):
>>> mi.get_loc(('b', 'e'))
1
"""
if method is not None:
raise NotImplementedError(
"only the default get_loc method is "
"currently supported for MultiIndex"
)

self._check_indexing_error(key)

def _maybe_to_slice(loc):
Expand Down
8 changes: 3 additions & 5 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ def _convert_tolerance(self, tolerance, target):

return tolerance

def get_loc(self, key, method=None, tolerance=None):
def get_loc(self, key):
"""
Get integer location for requested label.

Expand Down Expand Up @@ -421,10 +421,8 @@ def get_loc(self, key, method=None, tolerance=None):
# the reso < self._resolution_obj case goes
# through _get_string_slice
key = self._cast_partial_indexing_scalar(parsed)
elif method is None:
raise KeyError(key)
else:
key = self._cast_partial_indexing_scalar(parsed)
raise KeyError(key)

elif isinstance(key, Period):
self._disallow_mismatched_indexing(key)
Expand All @@ -437,7 +435,7 @@ def get_loc(self, key, method=None, tolerance=None):
raise KeyError(key)

try:
return Index.get_loc(self, key, method, tolerance)
return Index.get_loc(self, key)
except KeyError as err:
raise KeyError(orig_key) from err

Expand Down
20 changes: 9 additions & 11 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,17 +328,15 @@ def inferred_type(self) -> str:
# Indexing Methods

@doc(Int64Index.get_loc)
def get_loc(self, key, method=None, tolerance=None):
if method is None and tolerance is None:
if is_integer(key) or (is_float(key) and key.is_integer()):
new_key = int(key)
try:
return self._range.index(new_key)
except ValueError as err:
raise KeyError(key) from err
self._check_indexing_error(key)
raise KeyError(key)
return super().get_loc(key, method=method, tolerance=tolerance)
def get_loc(self, key):
if is_integer(key) or (is_float(key) and key.is_integer()):
new_key = int(key)
try:
return self._range.index(new_key)
except ValueError as err:
raise KeyError(key) from err
self._check_indexing_error(key)
raise KeyError(key)

def _get_indexer(
self,
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
# -------------------------------------------------------------------
# Indexing Methods

def get_loc(self, key, method=None, tolerance=None):
def get_loc(self, key):
"""
Get integer location for requested label

Expand All @@ -189,7 +189,7 @@ def get_loc(self, key, method=None, tolerance=None):
except TypeError as err:
raise KeyError(key) from err

return Index.get_loc(self, key, method, tolerance)
return Index.get_loc(self, key)

def _parse_with_reso(self, label: str):
# the "with_reso" is a no-op for TimedeltaIndex
Expand Down
88 changes: 0 additions & 88 deletions pandas/tests/indexes/datetimes/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import numpy as np
import pytest

from pandas.errors import InvalidIndexError

import pandas as pd
from pandas import (
DatetimeIndex,
Expand Down Expand Up @@ -405,75 +403,6 @@ def test_get_loc_key_unit_mismatch_not_castable(self):

assert key not in dti

@pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"])
@pytest.mark.filterwarnings("ignore:Passing method:FutureWarning")
def test_get_loc_method_exact_match(self, method):
idx = date_range("2000-01-01", periods=3)
assert idx.get_loc(idx[1], method) == 1
assert idx.get_loc(idx[1].to_pydatetime(), method) == 1
assert idx.get_loc(str(idx[1]), method) == 1

if method is not None:
assert idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1

@pytest.mark.filterwarnings("ignore:Passing method:FutureWarning")
def test_get_loc(self):
idx = date_range("2000-01-01", periods=3)

assert idx.get_loc("2000-01-01", method="nearest") == 0
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
assert idx.get_loc("2000-01-01T12", method="nearest") == 1

assert idx.get_loc("2000-01-01T12", method="nearest", tolerance="1 day") == 1
assert (
idx.get_loc("2000-01-01T12", method="nearest", tolerance=pd.Timedelta("1D"))
== 1
)
assert (
idx.get_loc(
"2000-01-01T12", method="nearest", tolerance=np.timedelta64(1, "D")
)
== 1
)
assert (
idx.get_loc("2000-01-01T12", method="nearest", tolerance=timedelta(1)) == 1
)
with pytest.raises(ValueError, match="unit abbreviation w/o a number"):
idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo")
with pytest.raises(KeyError, match="'2000-01-01T03'"):
idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours")
with pytest.raises(
ValueError, match="tolerance size must match target index size"
):
idx.get_loc(
"2000-01-01",
method="nearest",
tolerance=[
pd.Timedelta("1day").to_timedelta64(),
pd.Timedelta("1day").to_timedelta64(),
],
)

assert idx.get_loc("2000", method="nearest") == slice(0, 3)
assert idx.get_loc("2000-01", method="nearest") == slice(0, 3)

assert idx.get_loc("1999", method="nearest") == 0
assert idx.get_loc("2001", method="nearest") == 2

with pytest.raises(KeyError, match="'1999'"):
idx.get_loc("1999", method="pad")
with pytest.raises(KeyError, match="'2001'"):
idx.get_loc("2001", method="backfill")

with pytest.raises(KeyError, match="'foobar'"):
idx.get_loc("foobar")
with pytest.raises(InvalidIndexError, match=r"slice\(None, 2, None\)"):
idx.get_loc(slice(2))

idx = DatetimeIndex(["2000-01-01", "2000-01-04"])
assert idx.get_loc("2000-01-02", method="nearest") == 0
assert idx.get_loc("2000-01-03", method="nearest") == 1
assert idx.get_loc("2000-01", method="nearest") == slice(0, 2)

def test_get_loc_time_obj(self):
# time indexing
idx = date_range("2000-01-01", periods=24, freq="H")
Expand All @@ -486,11 +415,6 @@ def test_get_loc_time_obj(self):
expected = np.array([])
tm.assert_numpy_array_equal(result, expected, check_dtype=False)

msg = "cannot yet lookup inexact labels when key is a time object"
with pytest.raises(NotImplementedError, match=msg):
with tm.assert_produces_warning(FutureWarning, match="deprecated"):
idx.get_loc(time(12, 30), method="pad")

def test_get_loc_time_obj2(self):
# GH#8667

Expand Down Expand Up @@ -525,18 +449,6 @@ def test_get_loc_time_nat(self):
expected = np.array([], dtype=np.intp)
tm.assert_numpy_array_equal(loc, expected)

def test_get_loc_tz_aware(self):
# https://github.com/pandas-dev/pandas/issues/32140
dti = date_range(
Timestamp("2019-12-12 00:00:00", tz="US/Eastern"),
Timestamp("2019-12-13 00:00:00", tz="US/Eastern"),
freq="5s",
)
key = Timestamp("2019-12-12 10:19:25", tz="US/Eastern")
with tm.assert_produces_warning(FutureWarning, match="deprecated"):
result = dti.get_loc(key, method="nearest")
assert result == 7433

def test_get_loc_nat(self):
# GH#20464
index = DatetimeIndex(["1/3/2000", "NaT"])
Expand Down
Loading