Skip to content

Commit

Permalink
Fix handling of typecasting in searchsorted (#13925)
Browse files Browse the repository at this point in the history
Fixes: #13902 

This PR fixes a type-casting issue with `searchsorted` where typecast was done to the `values` dtype instead of inspecting both input and values columns and converting them to common dtypes.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: #13925
  • Loading branch information
galipremsagar authored Aug 22, 2023
1 parent 0d90b8e commit 0e5f9db
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 4 deletions.
19 changes: 16 additions & 3 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1409,12 +1409,25 @@ def searchsorted(
if len(values) != len(self._data):
raise ValueError("Mismatch number of columns to search for.")

# TODO: Change behavior based on the decision in
# https://github.com/pandas-dev/pandas/issues/54668
common_dtype_list = [
find_common_type([col.dtype, val.dtype])
for col, val in zip(self._columns, values)
]
sources = [
col
if is_dtype_equal(col.dtype, val.dtype)
else col.astype(val.dtype)
for col, val in zip(self._columns, values)
if is_dtype_equal(col.dtype, common_dtype)
else col.astype(common_dtype)
for col, common_dtype in zip(self._columns, common_dtype_list)
]
values = [
val
if is_dtype_equal(val.dtype, common_dtype)
else val.astype(common_dtype)
for val, common_dtype in zip(values, common_dtype_list)
]

outcol = libcudf.search.search_sorted(
sources,
values,
Expand Down
13 changes: 13 additions & 0 deletions python/cudf/cudf/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2067,3 +2067,16 @@ def test_loc_index_notinindex_slice(
expect = pdf.loc[lo:hi:take_order]
actual = df.loc[lo:hi:take_order]
assert_eq(expect, actual)


@pytest.mark.parametrize(
"arg", [slice(2, 4), slice(2, 5), slice(2.3, 5), slice(4.6, 6)]
)
def test_series_iloc_float_int(arg):
gs = cudf.Series(range(4), index=[2.0, 3.0, 4.5, 5.5])
ps = gs.to_pandas()

actual = gs.loc[arg]
expected = ps.loc[arg]

assert_eq(actual, expected)
14 changes: 13 additions & 1 deletion python/cudf/cudf/tests/test_search.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2022, NVIDIA CORPORATION.
# Copyright (c) 2018-2023, NVIDIA CORPORATION.
import cupy
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -156,3 +156,15 @@ def test_searchsorted_misc():
psr.searchsorted([-100, 3.00001, 2.2, 2.0, 2.000000001]),
sr.searchsorted([-100, 3.00001, 2.2, 2.0, 2.000000001]),
)


@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/54668")
def test_searchsorted_mixed_str_int():
psr = pd.Series([1, 2, 3], dtype="int")
sr = cudf.from_pandas(psr)

with pytest.raises(ValueError):
actual = sr.searchsorted("a")
with pytest.raises(ValueError):
expect = psr.searchsorted("a")
assert_eq(expect, actual)

0 comments on commit 0e5f9db

Please sign in to comment.