Skip to content

Commit

Permalink
DEPR: casting in datetimelike isin (#56427)
Browse files Browse the repository at this point in the history
* DEPR: casting in datetimelike isin

* GH ref

* update doctest
  • Loading branch information
jbrockmendel authored Dec 9, 2023
1 parent 2dcb963 commit d95a7a7
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 14 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ Other Deprecations
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_xml` except ``path_or_buffer``. (:issue:`54229`)
- Deprecated allowing passing :class:`BlockManager` objects to :class:`DataFrame` or :class:`SingleBlockManager` objects to :class:`Series` (:issue:`52419`)
- Deprecated behavior of :meth:`Index.insert` with an object-dtype index silently performing type inference on the result, explicitly call ``result.infer_objects(copy=False)`` for the old behavior instead (:issue:`51363`)
- Deprecated casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`)
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`)
- Deprecated including the groups in computations when using :meth:`.DataFrameGroupBy.apply` and :meth:`.DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
- Deprecated indexing an :class:`Index` with a boolean indexer of length zero (:issue:`55820`)
Expand Down Expand Up @@ -526,6 +527,7 @@ Datetimelike
^^^^^^^^^^^^
- Bug in :class:`DatetimeIndex` construction when passing both a ``tz`` and either ``dayfirst`` or ``yearfirst`` ignoring dayfirst/yearfirst (:issue:`55813`)
- Bug in :class:`DatetimeIndex` when passing an object-dtype ndarray of float objects and a ``tz`` incorrectly localizing the result (:issue:`55780`)
- Bug in :func:`Series.isin` with :class:`DatetimeTZDtype` dtype and comparison values that are all ``NaT`` incorrectly returning all-``False`` even if the series contains ``NaT`` entries (:issue:`56427`)
- Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`)
- Bug in :func:`testing.assert_extension_array_equal` that could use the wrong unit when comparing resolutions (:issue:`55730`)
- Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing a list of mixed-string-and-numeric types incorrectly raising (:issue:`55780`)
Expand Down
7 changes: 5 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2756,8 +2756,11 @@ def maybe_convert_objects(ndarray[object] objects,
res[:] = NPY_NAT
return res
elif dtype is not None:
# EA, we don't expect to get here, but _could_ implement
raise NotImplementedError(dtype)
# i.e. PeriodDtype, DatetimeTZDtype
cls = dtype.construct_array_type()
obj = cls._from_sequence([], dtype=dtype)
taker = -np.ones((<object>objects).shape, dtype=np.intp)
return obj.take(taker, allow_fill=True)
else:
# we don't guess
seen.object_ = True
Expand Down
21 changes: 21 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,8 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
# TODO: de-duplicate with equals, validate_comparison_value
return np.zeros(self.shape, dtype=bool)

values = ensure_wrapped_if_datetimelike(values)

if not isinstance(values, type(self)):
inferable = [
"timedelta",
Expand All @@ -761,6 +763,14 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
"period",
]
if values.dtype == object:
values = lib.maybe_convert_objects(
values,
convert_non_numeric=True,
dtype_if_all_nat=self.dtype,
)
if values.dtype != object:
return self.isin(values)

inferred = lib.infer_dtype(values, skipna=False)
if inferred not in inferable:
if inferred == "string":
Expand All @@ -775,6 +785,17 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
values = type(self)._from_sequence(values)
except ValueError:
return isin(self.astype(object), values)
else:
warnings.warn(
# GH#53111
f"The behavior of 'isin' with dtype={self.dtype} and "
"castable values (e.g. strings) is deprecated. In a "
"future version, these will not be considered matching "
"by isin. Explicitly cast to the appropriate dtype before "
"calling isin instead.",
FutureWarning,
stacklevel=find_stack_level(),
)

if self.dtype.kind in "mM":
self = cast("DatetimeArray | TimedeltaArray", self)
Expand Down
12 changes: 0 additions & 12 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6534,18 +6534,6 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
>>> midx.isin([(1, 'red'), (3, 'red')])
array([ True, False, False])
For a DatetimeIndex, string values in `values` are converted to
Timestamps.
>>> dates = ['2000-03-11', '2000-03-12', '2000-03-13']
>>> dti = pd.to_datetime(dates)
>>> dti
DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'],
dtype='datetime64[ns]', freq=None)
>>> dti.isin(['2000-03-11'])
array([ True, False, False])
"""
if level is not None:
self._validate_index_level(level)
Expand Down
39 changes: 39 additions & 0 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,45 @@ def test_large(self):
expected[1] = True
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]", "M8[ns, UTC]", "period[D]"])
def test_isin_datetimelike_all_nat(self, dtype):
# GH#56427
dta = date_range("2013-01-01", periods=3)._values
arr = Series(dta.view("i8")).array.view(dtype)

arr[0] = NaT
result = algos.isin(arr, [NaT])
expected = np.array([True, False, False], dtype=bool)
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]", "M8[ns, UTC]"])
def test_isin_datetimelike_strings_deprecated(self, dtype):
# GH#53111
dta = date_range("2013-01-01", periods=3)._values
arr = Series(dta.view("i8")).array.view(dtype)

vals = [str(x) for x in arr]
msg = "The behavior of 'isin' with dtype=.* is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = algos.isin(arr, vals)
assert res.all()

vals2 = np.array(vals, dtype=str)
with tm.assert_produces_warning(FutureWarning, match=msg):
res2 = algos.isin(arr, vals2)
assert res2.all()

def test_isin_dt64tz_with_nat(self):
# the all-NaT values used to get inferred to tznaive, which was evaluated
# as non-matching GH#56427
dti = date_range("2016-01-01", periods=3, tz="UTC")
ser = Series(dti)
ser[0] = NaT

res = algos.isin(ser._values, [NaT])
exp = np.array([True, False, False], dtype=bool)
tm.assert_numpy_array_equal(res, exp)

def test_categorical_from_codes(self):
# GH 16639
vals = np.array([0, 1, 2, 0])
Expand Down

0 comments on commit d95a7a7

Please sign in to comment.