From d57d707f720d4349a2962afb489c44dc47ed4b8e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 3 Nov 2023 06:32:12 -1000
Subject: [PATCH] Return correct index when loc.__getitem__[scalar] with
 CategoricalIndex (#156)

Before, this would return an Index of the same type of the Categorical's sub type.

I think long term it would be great to translate loc indexing in terms of iloc indexing (IIRC that's what pandas tries to do for a lot of cases)

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf-private/pull/156
---
 python/cudf/cudf/core/dataframe.py      | 10 +++++++++-
 python/cudf/cudf/tests/test_indexing.py |  9 +++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 46c7557148..b188fd019b 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -318,7 +318,15 @@ def _getitem_tuple_arg(self, arg):
                     tmp_arg = ([tmp_arg[0]], tmp_arg[1])
                 if len(tmp_arg[0]) == 0:
                     return columns_df._empty_like(keep_index=True)
-                tmp_arg = (as_column(tmp_arg[0]), tmp_arg[1])
+                tmp_arg = (
+                    as_column(
+                        tmp_arg[0],
+                        dtype=self._frame.index.dtype
+                        if is_categorical_dtype(self._frame.index.dtype)
+                        else None,
+                    ),
+                    tmp_arg[1],
+                )
 
                 if is_bool_dtype(tmp_arg[0]):
                     df = columns_df._apply_boolean_mask(
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 87f5753548..27e84f179b 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -2186,3 +2186,12 @@ def test_dataframe_iloc_scalar_interval_return_pd_scalar(
         result = getattr(obj, idx_method)[row_key, col_key]
     expected = getattr(obj.to_pandas(), idx_method)[row_key, col_key]
     assert result == expected
+
+
+def test_scalar_loc_row_categoricalindex():
+    df = cudf.DataFrame(
+        range(4), index=cudf.CategoricalIndex(["a", "a", "b", "c"])
+    )
+    result = df.loc["a"]
+    expected = df.to_pandas().loc["a"]
+    assert_eq(result, expected)