diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index fb91219582b14..e51a347dec46c 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -455,6 +455,7 @@ Other Deprecations - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_xml` except ``path_or_buffer``. (:issue:`54229`) - Deprecated allowing passing :class:`BlockManager` objects to :class:`DataFrame` or :class:`SingleBlockManager` objects to :class:`Series` (:issue:`52419`) - Deprecated behavior of :meth:`Index.insert` with an object-dtype index silently performing type inference on the result, explicitly call ``result.infer_objects(copy=False)`` for the old behavior instead (:issue:`51363`) +- Deprecated casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`) - Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`) - Deprecated including the groups in computations when using :meth:`.DataFrameGroupBy.apply` and :meth:`.DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`) - Deprecated indexing an :class:`Index` with a boolean indexer of length zero (:issue:`55820`) @@ -526,6 +527,7 @@ Datetimelike ^^^^^^^^^^^^ - Bug in :class:`DatetimeIndex` construction when passing both a ``tz`` and either ``dayfirst`` or ``yearfirst`` ignoring dayfirst/yearfirst (:issue:`55813`) - Bug in :class:`DatetimeIndex` when passing an object-dtype ndarray of float objects and a ``tz`` incorrectly localizing the result (:issue:`55780`) +- Bug in :func:`Series.isin` with :class:`DatetimeTZDtype` dtype and comparison values that are all ``NaT`` incorrectly returning all-``False`` even if the series contains ``NaT`` entries (:issue:`56427`) - Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`) - Bug in :func:`testing.assert_extension_array_equal` that could use the wrong unit when comparing resolutions (:issue:`55730`) - Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing a list of mixed-string-and-numeric types incorrectly raising (:issue:`55780`) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 8493f8bd066e0..c483f35513a40 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2756,8 +2756,11 @@ def maybe_convert_objects(ndarray[object] objects, res[:] = NPY_NAT return res elif dtype is not None: - # EA, we don't expect to get here, but _could_ implement - raise NotImplementedError(dtype) + # i.e. PeriodDtype, DatetimeTZDtype + cls = dtype.construct_array_type() + obj = cls._from_sequence([], dtype=dtype) + taker = -np.ones((objects).shape, dtype=np.intp) + return obj.take(taker, allow_fill=True) else: # we don't guess seen.object_ = True diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 2a6a45ad18421..3249f432e22a9 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -751,6 +751,8 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]: # TODO: de-duplicate with equals, validate_comparison_value return np.zeros(self.shape, dtype=bool) + values = ensure_wrapped_if_datetimelike(values) + if not isinstance(values, type(self)): inferable = [ "timedelta", @@ -761,6 +763,14 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]: "period", ] if values.dtype == object: + values = lib.maybe_convert_objects( + values, + convert_non_numeric=True, + dtype_if_all_nat=self.dtype, + ) + if values.dtype != object: + return self.isin(values) + inferred = lib.infer_dtype(values, skipna=False) if inferred not in inferable: if inferred == "string": @@ -775,6 +785,17 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]: values = type(self)._from_sequence(values) except ValueError: return isin(self.astype(object), values) + else: + warnings.warn( + # GH#53111 + f"The behavior of 'isin' with dtype={self.dtype} and " + "castable values (e.g. strings) is deprecated. In a " + "future version, these will not be considered matching " + "by isin. Explicitly cast to the appropriate dtype before " + "calling isin instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) if self.dtype.kind in "mM": self = cast("DatetimeArray | TimedeltaArray", self) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3d3056f47f15e..febceeb7623b5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6534,18 +6534,6 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]: >>> midx.isin([(1, 'red'), (3, 'red')]) array([ True, False, False]) - - For a DatetimeIndex, string values in `values` are converted to - Timestamps. - - >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13'] - >>> dti = pd.to_datetime(dates) - >>> dti - DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'], - dtype='datetime64[ns]', freq=None) - - >>> dti.isin(['2000-03-11']) - array([ True, False, False]) """ if level is not None: self._validate_index_level(level) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 5356704cc64a2..d9162c428b492 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -992,6 +992,45 @@ def test_large(self): expected[1] = True tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]", "M8[ns, UTC]", "period[D]"]) + def test_isin_datetimelike_all_nat(self, dtype): + # GH#56427 + dta = date_range("2013-01-01", periods=3)._values + arr = Series(dta.view("i8")).array.view(dtype) + + arr[0] = NaT + result = algos.isin(arr, [NaT]) + expected = np.array([True, False, False], dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]", "M8[ns, UTC]"]) + def test_isin_datetimelike_strings_deprecated(self, dtype): + # GH#53111 + dta = date_range("2013-01-01", periods=3)._values + arr = Series(dta.view("i8")).array.view(dtype) + + vals = [str(x) for x in arr] + msg = "The behavior of 'isin' with dtype=.* is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = algos.isin(arr, vals) + assert res.all() + + vals2 = np.array(vals, dtype=str) + with tm.assert_produces_warning(FutureWarning, match=msg): + res2 = algos.isin(arr, vals2) + assert res2.all() + + def test_isin_dt64tz_with_nat(self): + # the all-NaT values used to get inferred to tznaive, which was evaluated + # as non-matching GH#56427 + dti = date_range("2016-01-01", periods=3, tz="UTC") + ser = Series(dti) + ser[0] = NaT + + res = algos.isin(ser._values, [NaT]) + exp = np.array([True, False, False], dtype=bool) + tm.assert_numpy_array_equal(res, exp) + def test_categorical_from_codes(self): # GH 16639 vals = np.array([0, 1, 2, 0])