From d57d707f720d4349a2962afb489c44dc47ed4b8e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 Nov 2023 06:32:12 -1000 Subject: [PATCH] Return correct index when loc.__getitem__[scalar] with CategoricalIndex (#156) Before, this would return an Index of the same type of the Categorical's sub type. I think long term it would be great to translate loc indexing in terms of iloc indexing (IIRC that's what pandas tries to do for a lot of cases) Authors: - Matthew Roeschke (https://github.com/mroeschke) - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf-private/pull/156 --- python/cudf/cudf/core/dataframe.py | 10 +++++++++- python/cudf/cudf/tests/test_indexing.py | 9 +++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 46c7557148..b188fd019b 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -318,7 +318,15 @@ def _getitem_tuple_arg(self, arg): tmp_arg = ([tmp_arg[0]], tmp_arg[1]) if len(tmp_arg[0]) == 0: return columns_df._empty_like(keep_index=True) - tmp_arg = (as_column(tmp_arg[0]), tmp_arg[1]) + tmp_arg = ( + as_column( + tmp_arg[0], + dtype=self._frame.index.dtype + if is_categorical_dtype(self._frame.index.dtype) + else None, + ), + tmp_arg[1], + ) if is_bool_dtype(tmp_arg[0]): df = columns_df._apply_boolean_mask( diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 87f5753548..27e84f179b 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -2186,3 +2186,12 @@ def test_dataframe_iloc_scalar_interval_return_pd_scalar( result = getattr(obj, idx_method)[row_key, col_key] expected = getattr(obj.to_pandas(), idx_method)[row_key, col_key] assert result == expected + + +def test_scalar_loc_row_categoricalindex(): + df = cudf.DataFrame( + range(4), index=cudf.CategoricalIndex(["a", "a", "b", "c"]) + ) + result = df.loc["a"] + expected = df.to_pandas().loc["a"] + assert_eq(result, expected)