Skip to content

Commit

Permalink
Deprecate datelike isin casting strings to dates to match pandas 2.2 (#…
Browse files Browse the repository at this point in the history
…15046)

Matching pandas-dev/pandas#56427

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #15046
  • Loading branch information
mroeschke authored Feb 20, 2024
1 parent c0e370b commit f6c00ff
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 19 deletions.
10 changes: 10 additions & 0 deletions python/cudf/cudf/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,10 +767,20 @@ def _isin_datetimelike(
rhs = None
try:
rhs = cudf.core.column.as_column(values)
was_string = len(rhs) and rhs.dtype.kind == "O"

if rhs.dtype.kind in {"f", "i", "u"}:
return cudf.core.column.full(len(lhs), False, dtype="bool")
rhs = rhs.astype(lhs.dtype)
if was_string:
warnings.warn(
f"The behavior of 'isin' with dtype={lhs.dtype} and "
"castable values (e.g. strings) is deprecated. In a "
"future version, these will not be considered matching "
"by isin. Explicitly cast to the appropriate dtype before "
"calling isin instead.",
FutureWarning,
)
res = lhs._isin_earlystop(rhs)
if res is not None:
return res
Expand Down
34 changes: 17 additions & 17 deletions python/cudf/cudf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2497,19 +2497,12 @@ def test_index_nan_as_null(data, nan_idx, NA_idx, nan_as_null):


@pytest.mark.parametrize(
"data",
"index",
[
[],
pd.Series(
["this", "is", None, "a", "test"], index=["a", "b", "c", "d", "e"]
),
pd.Series([0, 15, 10], index=[0, None, 9]),
pd.Series(
range(25),
index=pd.date_range(
start="2019-01-01", end="2019-01-02", freq="h"
),
),
pd.Index([]),
pd.Index(["a", "b", "c", "d", "e"]),
pd.Index([0, None, 9]),
pd.date_range("2019-01-01", periods=3),
],
)
@pytest.mark.parametrize(
Expand All @@ -2521,12 +2514,19 @@ def test_index_nan_as_null(data, nan_idx, NA_idx, nan_as_null):
["2019-01-01 04:00:00", "2019-01-01 06:00:00", "2018-03-02 10:00:00"],
],
)
def test_isin_index(data, values):
psr = pd.Series(data)
gsr = cudf.Series.from_pandas(psr)
def test_isin_index(index, values):
pidx = index
gidx = cudf.Index.from_pandas(pidx)

got = gsr.index.isin(values)
expected = psr.index.isin(values)
is_dt_str = (
next(iter(values), None) == "2019-01-01 04:00:00"
and len(pidx)
and pidx.dtype.kind == "M"
)
with expect_warning_if(is_dt_str):
got = gidx.isin(values)
with expect_warning_if(PANDAS_GE_220 and is_dt_str):
expected = pidx.isin(values)

assert_eq(got, expected)

Expand Down
8 changes: 6 additions & 2 deletions python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import cudf
from cudf.api.extensions import no_default
from cudf.core._compat import PANDAS_GE_220
from cudf.errors import MixedTypeError
from cudf.testing._utils import (
NUMERIC_TYPES,
Expand Down Expand Up @@ -1795,8 +1796,11 @@ def test_isin_datetime(data, values):
psr = pd.Series(data)
gsr = cudf.Series.from_pandas(psr)

got = gsr.isin(values)
expected = psr.isin(values)
is_len_str = isinstance(next(iter(values), None), str) and len(data)
with expect_warning_if(is_len_str):
got = gsr.isin(values)
with expect_warning_if(PANDAS_GE_220 and is_len_str):
expected = psr.isin(values)
assert_eq(got, expected)


Expand Down

0 comments on commit f6c00ff

Please sign in to comment.