From a9ad80990d8c9ea6b0eddb118177e822e9fa38f9 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Sat, 19 Aug 2023 10:03:11 -0700 Subject: [PATCH 1/3] Fix issue with searchsorted --- python/cudf/cudf/core/frame.py | 17 ++++++++++++++--- python/cudf/cudf/tests/test_indexing.py | 13 +++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 69757fe900d..5434a6d1aa7 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1409,12 +1409,23 @@ def searchsorted( if len(values) != len(self._data): raise ValueError("Mismatch number of columns to search for.") + common_dtype_list = [ + find_common_type([col.dtype, val.dtype]) + for col, val in zip(self._columns, values) + ] sources = [ col - if is_dtype_equal(col.dtype, val.dtype) - else col.astype(val.dtype) - for col, val in zip(self._columns, values) + if is_dtype_equal(col.dtype, common_dtype) + else col.astype(common_dtype) + for col, common_dtype in zip(self._columns, common_dtype_list) ] + values = [ + val + if is_dtype_equal(val.dtype, common_dtype) + else val.astype(common_dtype) + for val, common_dtype in zip(values, common_dtype_list) + ] + outcol = libcudf.search.search_sorted( sources, values, diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index a2398c3b705..d747ed13e27 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -2067,3 +2067,16 @@ def test_loc_index_notinindex_slice( expect = pdf.loc[lo:hi:take_order] actual = df.loc[lo:hi:take_order] assert_eq(expect, actual) + + +@pytest.mark.parametrize( + "arg", [slice(2, 4), slice(2, 5), slice(2.3, 5), slice(4.6, 6)] +) +def test_series_iloc_float_int(arg): + gs = cudf.Series(range(4), index=[2.0, 3.0, 4.5, 5.5]) + ps = gs.to_pandas() + + actual = gs.loc[arg] + expected = ps.loc[arg] + + assert_eq(actual, expected) From 18545aa767b04ad2d0c3e1f848c0a9781310d5ee Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 21 Aug 2023 13:43:52 -0500 Subject: [PATCH 2/3] add todo --- python/cudf/cudf/core/frame.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 5434a6d1aa7..b9f052e7626 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1409,6 +1409,8 @@ def searchsorted( if len(values) != len(self._data): raise ValueError("Mismatch number of columns to search for.") + # TODO: Change behavior based on the decision in + # https://github.com/pandas-dev/pandas/issues/54668 common_dtype_list = [ find_common_type([col.dtype, val.dtype]) for col, val in zip(self._columns, values) From fedcc7f69327fba51354338f0dbd9e82b14d821f Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 22 Aug 2023 07:51:02 -0700 Subject: [PATCH 3/3] Add test --- python/cudf/cudf/tests/test_search.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_search.py b/python/cudf/cudf/tests/test_search.py index d3433a589a7..b0eacb1a709 100644 --- a/python/cudf/cudf/tests/test_search.py +++ b/python/cudf/cudf/tests/test_search.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. import cupy import numpy as np import pandas as pd @@ -156,3 +156,15 @@ def test_searchsorted_misc(): psr.searchsorted([-100, 3.00001, 2.2, 2.0, 2.000000001]), sr.searchsorted([-100, 3.00001, 2.2, 2.0, 2.000000001]), ) + + +@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/54668") +def test_searchsorted_mixed_str_int(): + psr = pd.Series([1, 2, 3], dtype="int") + sr = cudf.from_pandas(psr) + + with pytest.raises(ValueError): + actual = sr.searchsorted("a") + with pytest.raises(ValueError): + expect = psr.searchsorted("a") + assert_eq(expect, actual)