From 425a5dac64b7c74c061b588dc8725c5390517cf9 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Sun, 12 May 2024 15:44:24 -0500 Subject: [PATCH] Return same type as the original index for `.loc` operations (#15717) Fixes: #15716 This PR makes changes to `.loc` by preserving the original type at the end of the operation. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: https://github.com/rapidsai/cudf/pull/15717 --- python/cudf/cudf/core/dataframe.py | 5 +++++ python/cudf/cudf/tests/test_dataframe.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index b937d2da25c..b29089cb81a 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -357,6 +357,11 @@ def _getitem_tuple_arg(self, arg): # as join is not assigning any names to index, # update it over here df.index.name = columns_df.index.name + if not isinstance( + df.index, MultiIndex + ) and is_numeric_dtype(df.index.dtype): + # Preserve the original index type. + df.index = df.index.astype(self._frame.index.dtype) df = df.sort_values(by=[tmp_col_name, cantor_name]) df.drop(columns=[tmp_col_name, cantor_name], inplace=True) # There were no indices found diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 8550bc91253..96301670e9c 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -11010,3 +11010,21 @@ def test_dataframe_init_with_nans(): assert gdf["a"].dtype == np.dtype("float64") pdf = pd.DataFrame({"a": [1, 2, 3, np.nan]}) assert_eq(pdf, gdf) + + +@pytest.mark.parametrize("dtype1", ["int16", "float32"]) +@pytest.mark.parametrize("dtype2", ["int16", "float32"]) +def test_dataframe_loc_int_float(dtype1, dtype2): + df = cudf.DataFrame( + {"a": [10, 11, 12, 13, 14]}, + index=cudf.Index([1, 2, 3, 4, 5], dtype=dtype1), + ) + pdf = df.to_pandas() + + gidx = cudf.Index([2, 3, 4], dtype=dtype2) + pidx = gidx.to_pandas() + + actual = df.loc[gidx] + expected = pdf.loc[pidx] + + assert_eq(actual, expected, check_index_type=True, check_dtype=True)