From b9134db0ad48ab33f5a38d727a31269652f5e0d6 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 17 Jul 2023 10:41:33 +0100
Subject: [PATCH 01/16] ColumnAccessor does not support duplicate key names

Raise NotImplementedError if indexing would try and produce a new
object with duplicate key names. This avoids silently losing
information.
---
 python/cudf/cudf/core/column_accessor.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index bec9c367ba9..0607b2bf6e4 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -404,6 +404,10 @@ def select_by_index(self, index: Any) -> ColumnAccessor:
         ColumnAccessor
         """
         keys = self.get_labels_by_index(index)
+        if len(set(keys)) != len(keys):
+            raise NotImplementedError(
+                "cudf DataFrames do not support repeated column names"
+            )
         data = {k: self._data[k] for k in keys}
         return self.__class__(
             data,
@@ -499,6 +503,10 @@ def _select_by_label_list_like(self, key: Any) -> ColumnAccessor:
                 if keep
             )
         else:
+            if len(set(key)) != len(key):
+                raise NotImplementedError(
+                    "cudf DataFrames do not support repeated column names"
+                )
             data = {k: self._grouped_data[k] for k in key}
         if self.multiindex:
             data = _to_flat_dict(data)

From f5bfb7b3cc1048e0d154a820c9b588644bdb4549 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 17 Jul 2023 10:48:49 +0100
Subject: [PATCH 02/16] Column index destructuring returns ColumnAccessor

Rather than just returning column names, since we will immediately
construct the ColumnAccessor, make that and return it.
---
 python/cudf/cudf/core/dataframe.py      | 18 ++++++------------
 python/cudf/cudf/core/indexing_utils.py | 19 ++++++-------------
 2 files changed, 12 insertions(+), 25 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index e67604069f1..6fcba4e9603 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -421,22 +421,16 @@ class _DataFrameIlocIndexer(_DataFrameIndexer):
     def __getitem__(self, arg):
         row_key, (
             col_is_scalar,
-            column_names,
+            ca,
         ) = indexing_utils.destructure_dataframe_iloc_indexer(arg, self._frame)
         row_spec = indexing_utils.parse_row_iloc_indexer(
             row_key, len(self._frame)
         )
-        ca = self._frame._data
-        index = self._frame.index
         if col_is_scalar:
-            s = Series._from_data(
-                ca._select_by_names(column_names), index=index
-            )
-            return s._getitem_preprocessed(row_spec)
-        if column_names != list(self._frame._column_names):
-            frame = self._frame._from_data(
-                ca._select_by_names(column_names), index=index
-            )
+            series = Series._from_data(ca, index=self._frame.index)
+            return series._getitem_preprocessed(row_spec)
+        if ca.names != self._frame._data.names:
+            frame = self._frame._from_data(ca, index=self._frame.index)
         else:
             frame = self._frame
         if isinstance(row_spec, indexing_utils.MapIndexer):
@@ -454,7 +448,7 @@ def __getitem__(self, arg):
                 # you only ask for one row.
                 new_name = result.index[0]
                 result = Series._concat(
-                    [result[name] for name in column_names],
+                    [result[name] for name in frame._data.names],
                     index=result.keys(),
                 )
                 result.name = new_name
diff --git a/python/cudf/cudf/core/indexing_utils.py b/python/cudf/cudf/core/indexing_utils.py
index 7242de9964f..385f0074563 100644
--- a/python/cudf/cudf/core/indexing_utils.py
+++ b/python/cudf/cudf/core/indexing_utils.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Any, List, Tuple, Union
+from typing import Any, Tuple, Union
 
 from typing_extensions import TypeAlias
 
@@ -14,6 +14,7 @@
     is_integer,
     is_integer_dtype,
 )
+from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.copy_types import BooleanMask, GatherMap
 
 
@@ -55,8 +56,6 @@ class ScalarIndexer:
     EmptyIndexer, MapIndexer, MaskIndexer, ScalarIndexer, SliceIndexer
 ]
 
-ColumnLabels: TypeAlias = List[str]
-
 
 def destructure_iloc_key(
     key: Any, frame: Union[cudf.Series, cudf.DataFrame]
@@ -124,7 +123,7 @@ def destructure_iloc_key(
 
 def destructure_dataframe_iloc_indexer(
     key: Any, frame: cudf.DataFrame
-) -> Tuple[Any, Tuple[bool, ColumnLabels]]:
+) -> Tuple[Any, Tuple[bool, ColumnAccessor]]:
     """Destructure an index key for DataFrame iloc getitem.
 
     Parameters
@@ -154,13 +153,7 @@ def destructure_dataframe_iloc_indexer(
         cols = slice(None)
     scalar = is_integer(cols)
     try:
-        column_names: ColumnLabels = list(
-            frame._data.get_labels_by_index(cols)
-        )
-        if len(set(column_names)) != len(column_names):
-            raise NotImplementedError(
-                "cudf DataFrames do not support repeated column names"
-            )
+        ca = frame._data.select_by_index(cols)
     except TypeError:
         raise TypeError(
             "Column indices must be integers, slices, "
@@ -168,10 +161,10 @@ def destructure_dataframe_iloc_indexer(
         )
     if scalar:
         assert (
-            len(column_names) == 1
+            len(ca) == 1
         ), "Scalar column indexer should not produce more than one column"
 
-    return rows, (scalar, column_names)
+    return rows, (scalar, ca)
 
 
 def destructure_series_iloc_indexer(key: Any, frame: cudf.Series) -> Any:

From 58a7cb7a03334f84936d3682a91c9c5929d85400 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 17 Jul 2023 10:50:22 +0100
Subject: [PATCH 03/16] Remove unused code

---
 python/cudf/cudf/core/column_accessor.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 0607b2bf6e4..72428da0e8a 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -20,7 +20,6 @@
 import pandas as pd
 from packaging.version import Version
 from pandas.api.types import is_bool
-from typing_extensions import Self
 
 import cudf
 from cudf.core import column
@@ -481,13 +480,6 @@ def set_by_label(self, key: Any, value: Any, validate: bool = True):
         self._data[key] = value
         self._clear_cache()
 
-    def _select_by_names(self, names: abc.Sequence) -> Self:
-        return self.__class__(
-            {key: self[key] for key in names},
-            multiindex=self.multiindex,
-            level_names=self.level_names,
-        )
-
     def _select_by_label_list_like(self, key: Any) -> ColumnAccessor:
         # Might be a generator
         key = tuple(key)

From f5a9d61eb183516b86d85ff8afd0a95401486aae Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 17 Jul 2023 12:24:58 +0100
Subject: [PATCH 04/16] Fall back to positional in Series.__getitem__ for
 integer key

If the index is non-numeric, Series.__getitem__ should fall back to
positional lookup for integer keys.
---
 python/cudf/cudf/core/series.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 2fef741ac09..5c3d320df8e 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1362,6 +1362,17 @@ def _getitem_preprocessed(
     def __getitem__(self, arg):
         if isinstance(arg, slice):
             return self.iloc[arg]
+        elif is_integer(arg) and self.index.dtype.kind not in {"i", "u", "f"}:
+            # Series getitem looks up integers by position if the
+            # index is non-numeric, but is deprecated in pandas 2
+            # https://github.com/pandas-dev/pandas/issues/50617
+            # warnings.warn(
+            #     "Treating integer keys positionally is deprecated and "
+            #     "will be removed in a future version. To find a value "
+            #     "by position use ser.iloc[pos] instead",
+            #     FutureWarning,
+            # )
+            return self.iloc[arg]
         else:
             return self.loc[arg]
 

From fed2bcbb1aa25cf5e844daf370d4678afcda06a3 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 17 Jul 2023 12:36:14 +0100
Subject: [PATCH 05/16] Parsing to structured data for label-based lookup

Similar to positional indexing, implement parsing to structured
IndexingSpec data for label indexing. This does not yet treat
MultiIndex lookups for which the rules for combining the multi-level
keys are more complicated.
---
 python/cudf/cudf/core/indexing_utils.py | 341 +++++++++++++++++++++++-
 1 file changed, 340 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/indexing_utils.py b/python/cudf/cudf/core/indexing_utils.py
index 385f0074563..e089040bbaf 100644
--- a/python/cudf/cudf/core/indexing_utils.py
+++ b/python/cudf/cudf/core/indexing_utils.py
@@ -3,20 +3,27 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Any, Tuple, Union
+from typing import TYPE_CHECKING, Any, Tuple, Union
 
+import numpy as np
 from typing_extensions import TypeAlias
 
 import cudf
+import cudf._lib as libcudf
+from cudf._lib.types import size_type_dtype
 from cudf.api.types import (
     _is_scalar_or_zero_d_array,
     is_bool_dtype,
     is_integer,
     is_integer_dtype,
+    is_scalar,
 )
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.copy_types import BooleanMask, GatherMap
 
+if TYPE_CHECKING:
+    from cudf.core.column import ColumnBase
+
 
 class EmptyIndexer:
     """An indexer that will produce an empty result."""
@@ -234,3 +241,335 @@ def parse_row_iloc_indexer(key: Any, n: int) -> IndexingSpec:
                 "Cannot index by location "
                 f"with non-integer key of type {type(key)}"
             )
+
+
+def destructure_loc_key(
+    key: Any, frame: cudf.Series | cudf.DataFrame
+) -> tuple[Any, ...]:
+    """
+    Destructure a potentially tuple-typed key into row and column indexers
+
+    Tuple arguments to loc indexing are treated specially. They are
+    picked apart into indexers for the row and column. If the number
+    of entries is less than the number of modes of the frame, missing
+    entries are slice-expanded.
+
+    If the user-provided key is not a tuple, it is treated as if it
+    were a singleton tuple, and then slice-expanded.
+
+    Once this destructuring has occurred, any entries that are
+    callables are then called with the indexed frame. This should
+    return a valid indexing object for the rows (respectively
+    columns), namely one of:
+
+    - A boolean mask of the same length as the frame in the given
+      dimension
+    - A scalar label looked up in the index
+    - A scalar integer that indexes the frame
+    - An array-like of labels looked up in the index
+    - A slice of the index
+    - For multiindices, a tuple of per level indexers
+
+    Slice-based indexing is on the closed interval [start, end], rather
+    than the semi-open interval [start, end)
+
+    Parameters
+    ----------
+    key
+        The key to destructure
+    frame
+        DataFrame or Series to provide context
+
+    Returns
+    -------
+    tuple of indexers with length equal to the dimension of the frame
+
+    Raises
+    ------
+    IndexError
+        If there are too many indexers.
+    """
+    n = len(frame.shape)
+    if (
+        isinstance(frame.index, cudf.MultiIndex)
+        and n == 2
+        and isinstance(key, tuple)
+        and all(map(is_scalar, key))
+    ):
+        # This is "best-effort" and ambiguous
+        if len(key) == 2:
+            if key[1] in frame.index._columns[1]:
+                # key just indexes the rows
+                key = (key,)
+            elif key[1] in frame._data:
+                # key indexes rows and columns
+                key = key
+            else:
+                # key indexes rows and we will raise a keyerror
+                key = (key,)
+        else:
+            # key just indexes rows
+            key = (key,)
+    if isinstance(key, tuple):
+        # Key potentially indexes rows and columns, slice-expand to
+        # shape of frame
+        indexers = key + (slice(None),) * (n - len(key))
+        if len(indexers) > n:
+            raise IndexError(
+                f"Too many indexers: got {len(indexers)} expected {n}"
+            )
+    else:
+        # Key indexes rows, slice-expand to shape of frame
+        indexers = (key, *(slice(None),) * (n - 1))
+    return tuple(k(frame) if callable(k) else k for k in indexers)
+
+
+def destructure_dataframe_loc_indexer(
+    key: Any, frame: cudf.DataFrame
+) -> Tuple[Any, Tuple[bool, ColumnAccessor]]:
+    """Destructure an index key for DataFrame loc getitem.
+
+    Parameters
+    ----------
+    key
+        Key to destructure
+    frame
+        DataFrame to provide context context
+
+    Returns
+    -------
+    tuple
+        2-tuple of a key for the rows and tuple of
+        (column_index_is_scalar, column_names) for the columns
+
+    Raises
+    ------
+    TypeError
+        If the column indexer is invalid
+    IndexError
+        If the provided key does not destructure correctly
+    NotImplementedError
+        If the requested column indexer repeats columns
+    """
+    rows, cols = destructure_loc_key(key, frame)
+    if cols is Ellipsis:
+        cols = slice(None)
+    try:
+        scalar = cols in frame._data
+    except TypeError:
+        scalar = False
+    try:
+        ca = frame._data.select_by_label(cols)
+    except TypeError:
+        raise TypeError(
+            "Column indices must be names, slices, "
+            "list-like of names, or boolean mask"
+        )
+    if scalar:
+        assert (
+            len(ca) == 1
+        ), "Scalar column indexer should not produce more than one column"
+
+    return rows, (scalar, ca)
+
+
+def destructure_series_loc_indexer(key: Any, frame: cudf.Series) -> Any:
+    """Destructure an index key for Series loc getitem.
+
+    Parameters
+    ----------
+    key
+        Key to destructure
+    frame
+        Series for unpacking context
+
+    Returns
+    -------
+    Single key that will index the rows
+    """
+    (rows,) = destructure_loc_key(key, frame)
+    return rows
+
+
+def ordered_find(needles: "ColumnBase", haystack: "ColumnBase") -> GatherMap:
+    """Find locations of needles in a haystack preserving order
+
+    Parameters
+    ----------
+    needles
+        Labels to look for
+    haystack
+        Haystack to search in
+
+    Returns
+    -------
+    NumericalColumn
+        Integer gather map of locations needles were found in haystack
+
+    Raises
+    ------
+    KeyError
+        If not all needles were found in the haystack.
+        If needles cannot be converted to the dtype of haystack.
+
+    Notes
+    -----
+    This sorts the gather map so that the result comes back in the
+    order the needles were specified (and are found in the haystack).
+    """
+    # Pre-process to match dtypes
+    needle_kind = needles.dtype.kind
+    haystack_kind = haystack.dtype.kind
+    if haystack_kind == "O":
+        try:
+            needles = needles.astype(haystack.dtype)
+        except ValueError:
+            # Pandas raise KeyError here
+            raise KeyError("Dtype mismatch in label lookup")
+    elif needle_kind == haystack_kind or {
+        haystack_kind,
+        needle_kind,
+    }.issubset({"i", "u", "f"}):
+        needles = needles.astype(haystack.dtype)
+    elif needles.dtype != haystack.dtype:
+        # Pandas raise KeyError here
+        raise KeyError("Dtype mismatch in label lookup")
+    # Can't always do an inner join because then we can't check if we
+    # had missing keys (can't check the length because the entries in
+    # the needle might appear multiple times in the haystack).
+    lgather, rgather = libcudf.join.join([needles], [haystack], how="left")
+    (left_order,) = libcudf.copying.gather(
+        [cudf.core.column.arange(len(needles), dtype=size_type_dtype)],
+        lgather,
+        nullify=False,
+    )
+    (right_order,) = libcudf.copying.gather(
+        [cudf.core.column.arange(len(haystack), dtype=size_type_dtype)],
+        rgather,
+        nullify=True,
+    )
+    if right_order.null_count > 0:
+        raise KeyError("Not all keys in index")
+    (rgather,) = libcudf.sort.sort_by_key(
+        [rgather],
+        [left_order, right_order],
+        [True, True],
+        ["last", "last"],
+        stable=True,
+    )
+    return GatherMap.from_column_unchecked(
+        rgather, len(haystack), nullify=False
+    )
+
+
+def parse_single_row_loc_key(
+    key: Any,
+    index: cudf.BaseIndex,
+) -> IndexingSpec:
+    """
+    Turn a single label-based row indexer into structured information.
+
+    This converts label-based lookups into structured positional
+    lookups.
+
+    Valid values for the key are
+    - a slice (endpoints are looked up)
+    - a scalar label
+    - a boolean mask of the same length as the index
+    - a column of labels to look up (may be empty)
+
+    Parameters
+    ----------
+    key
+        Key for label-based row indexing
+    index
+        Index to act as haystack for labels
+
+    Returns
+    -------
+    IndexingSpec
+        Structured information for indexing
+
+    Raises
+    ------
+    KeyError
+        If any label is not found
+    ValueError
+        If labels cannot be coerced to index dtype
+    """
+    n = len(index)
+    if isinstance(key, slice):
+        # Convert label slice to index slice
+        # TODO: datetime index must be handled specially (unless we go for
+        # pandas 2 compatibility)
+        parsed_key = index.find_label_range(key)
+        if len(range(n)[parsed_key]) == 0:
+            return EmptyIndexer()
+        else:
+            return SliceIndexer(parsed_key)
+    else:
+        is_scalar = _is_scalar_or_zero_d_array(key)
+        if is_scalar and isinstance(key, np.ndarray):
+            key = cudf.core.column.as_column(key.item(), dtype=key.dtype)
+        else:
+            key = cudf.core.column.as_column(key)
+        if (
+            isinstance(key, cudf.core.column.CategoricalColumn)
+            and index.dtype != key.dtype
+        ):
+            # TODO: is this right?
+            key = key._get_decategorized_column()
+        if is_bool_dtype(key.dtype):
+            # The only easy one.
+            return MaskIndexer(BooleanMask(key, n))
+        elif len(key) == 0:
+            return EmptyIndexer()
+        else:
+            # TODO: promote to Index objects, so this can handle
+            # categoricals correctly?
+            (haystack,) = index._columns
+            if isinstance(index, cudf.DatetimeIndex):
+                # Try to turn strings into datetimes
+                key = cudf.core.column.as_column(key, dtype=index.dtype)
+            gather_map = ordered_find(key, haystack)
+            if is_scalar and len(gather_map.column) == 1:
+                return ScalarIndexer(gather_map)
+            else:
+                return MapIndexer(gather_map)
+
+
+def parse_row_loc_indexer(key: Any, index: cudf.BaseIndex) -> IndexingSpec:
+    """
+    Normalize to return structured information for a label-based row indexer.
+
+    Given a label-based row indexer that has already been destructured by
+    :func:`destructure_loc_key`, inspect further and produce structured
+    information for indexing operations to act upon.
+
+    Parameters
+    ----------
+    key
+        Suitably destructured key for row indexing
+    index
+        Index to provide context
+
+    Returns
+    -------
+    IndexingSpec
+        Structured data for indexing. A tag + parsed data.
+
+    Raises
+    ------
+    KeyError
+        If a valid type of indexer is provided, but not all keys are
+        found
+    TypeError
+        If the indexing key is otherwise invalid.
+    """
+    # TODO: multiindices need to be treated separately
+    if key is Ellipsis:
+        # Ellipsis is handled here because multiindex level-based
+        # indices don't handle ellipsis in pandas.
+        return SliceIndexer(slice(None))
+    else:
+        return parse_single_row_loc_key(key, index)

From 4be3035100f83971a143334204a9a6813909cfca Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 17 Jul 2023 12:43:10 +0100
Subject: [PATCH 06/16] Refactor to expose DataFrame._getitem_preprocessed

---
 python/cudf/cudf/core/dataframe.py     | 99 +++++++++++++++++---------
 python/cudf/cudf/core/indexed_frame.py |  2 +-
 2 files changed, 65 insertions(+), 36 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 6fcba4e9603..38e5c2742ab 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -36,7 +36,7 @@
 from pandas.core.dtypes.common import is_float, is_integer
 from pandas.io.formats import console
 from pandas.io.formats.printing import pprint_thing
-from typing_extensions import assert_never
+from typing_extensions import Self, assert_never
 
 import cudf
 import cudf.core.common
@@ -426,39 +426,7 @@ def __getitem__(self, arg):
         row_spec = indexing_utils.parse_row_iloc_indexer(
             row_key, len(self._frame)
         )
-        if col_is_scalar:
-            series = Series._from_data(ca, index=self._frame.index)
-            return series._getitem_preprocessed(row_spec)
-        if ca.names != self._frame._data.names:
-            frame = self._frame._from_data(ca, index=self._frame.index)
-        else:
-            frame = self._frame
-        if isinstance(row_spec, indexing_utils.MapIndexer):
-            return frame._gather(row_spec.key, keep_index=True)
-        elif isinstance(row_spec, indexing_utils.MaskIndexer):
-            return frame._apply_boolean_mask(row_spec.key, keep_index=True)
-        elif isinstance(row_spec, indexing_utils.SliceIndexer):
-            return frame._slice(row_spec.key)
-        elif isinstance(row_spec, indexing_utils.ScalarIndexer):
-            result = frame._gather(row_spec.key, keep_index=True)
-            # Attempt to turn into series.
-            try:
-                # Behaviour difference from pandas, which will merrily
-                # turn any heterogeneous set of columns into a series if
-                # you only ask for one row.
-                new_name = result.index[0]
-                result = Series._concat(
-                    [result[name] for name in frame._data.names],
-                    index=result.keys(),
-                )
-                result.name = new_name
-                return result
-            except TypeError:
-                # Couldn't find a common type, just return a 1xN dataframe.
-                return result
-        elif isinstance(row_spec, indexing_utils.EmptyIndexer):
-            return frame._empty_like(keep_index=True)
-        assert_never(row_spec)
+        return self._frame._getitem_preprocessed(row_spec, col_is_scalar, ca)
 
     @_cudf_nvtx_annotate
     def _setitem_tuple_arg(self, key, value):
@@ -957,7 +925,7 @@ def _from_data(
         data: MutableMapping,
         index: Optional[BaseIndex] = None,
         columns: Any = None,
-    ) -> DataFrame:
+    ) -> Self:
         out = super()._from_data(data=data, index=index)
         if columns is not None:
             out.columns = columns
@@ -1116,6 +1084,67 @@ def __setattr__(self, key, col):
         else:
             super().__setattr__(key, col)
 
+    def _getitem_preprocessed(
+        self,
+        spec: indexing_utils.IndexingSpec,
+        col_is_scalar: bool,
+        ca: ColumnAccessor,
+    ) -> Union[Self, Series]:
+        """Get a subset of rows and columns given structured data
+
+        Parameters
+        ----------
+        spec
+            Indexing specification for the rows
+        col_is_scalar
+            Was the indexer of the columns a scalar (return a Series)
+        ca
+            ColumnAccessor representing the subsetted column data
+
+        Returns
+        -------
+        Subsetted DataFrame or Series (if a scalar row is requested
+        and the concatenation of the column types is possible)
+
+        Notes
+        -----
+        This function performs no bounds-checking or massaging of the
+        inputs.
+        """
+        if col_is_scalar:
+            series = Series._from_data(ca, index=self.index)
+            return series._getitem_preprocessed(spec)
+        if ca.names != self._data.names:
+            frame = self._from_data(ca, index=self.index)
+        else:
+            frame = self
+        if isinstance(spec, indexing_utils.MapIndexer):
+            return frame._gather(spec.key, keep_index=True)
+        elif isinstance(spec, indexing_utils.MaskIndexer):
+            return frame._apply_boolean_mask(spec.key, keep_index=True)
+        elif isinstance(spec, indexing_utils.SliceIndexer):
+            return frame._slice(spec.key)
+        elif isinstance(spec, indexing_utils.ScalarIndexer):
+            result = frame._gather(spec.key, keep_index=True)
+            # Attempt to turn into series.
+            try:
+                # Behaviour difference from pandas, which will merrily
+                # turn any heterogeneous set of columns into a series if
+                # you only ask for one row.
+                new_name = result.index[0]
+                result = Series._concat(
+                    [result[name] for name in frame._data.names],
+                    index=result.keys(),
+                )
+                result.name = new_name
+                return result
+            except TypeError:
+                # Couldn't find a common type, just return a 1xN dataframe.
+                return result
+        elif isinstance(spec, indexing_utils.EmptyIndexer):
+            return frame._empty_like(keep_index=True)
+        assert_never(spec)
+
     @_cudf_nvtx_annotate
     def __getitem__(self, arg):
         """
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 4c6eb3a50e9..657ed2defec 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -1812,7 +1812,7 @@ def _gather(
         self,
         gather_map: GatherMap,
         keep_index=True,
-    ):
+    ) -> Self:
         """Gather rows of frame specified by indices in `gather_map`.
 
         Maintain the index if keep_index is True.

From 9880b3c2bae0cc5f2541efef00f7077d9e6d094b Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 17 Jul 2023 13:17:19 +0100
Subject: [PATCH 07/16] Use structured data for label indexing for
 non-MultiIndex case

---
 python/cudf/cudf/core/dataframe.py | 101 +++++++----------------------
 python/cudf/cudf/core/series.py    |  10 +++
 2 files changed, 32 insertions(+), 79 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 38e5c2742ab..a5a6fba02a4 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -122,22 +122,6 @@ def _shape_mismatch_error(x, y):
 
 
 class _DataFrameIndexer(_FrameIndexer):
-    def __getitem__(self, arg):
-        if (
-            isinstance(self._frame.index, MultiIndex)
-            or self._frame._data.multiindex
-        ):
-            # This try/except block allows the use of pandas-like
-            # tuple arguments into MultiIndex dataframes.
-            try:
-                return self._getitem_tuple_arg(arg)
-            except (TypeError, KeyError, IndexError, ValueError):
-                return self._getitem_tuple_arg((arg, slice(None)))
-        else:
-            if not isinstance(arg, tuple):
-                arg = (arg, slice(None))
-            return self._getitem_tuple_arg(arg)
-
     def __setitem__(self, key, value):
         if not isinstance(key, tuple):
             key = (key, slice(None))
@@ -229,14 +213,30 @@ class _DataFrameLocIndexer(_DataFrameIndexer):
     For selection by label.
     """
 
-    @_cudf_nvtx_annotate
-    def _getitem_scalar(self, arg):
-        return self._frame[arg[1]].loc[arg[0]]
+    def __getitem__(self, arg):
+        if isinstance(self._frame.index, cudf.MultiIndex):
+            # This try/except block allows the use of pandas-like
+            # tuple arguments into MultiIndex dataframes.
+            try:
+                return self._getitem_tuple_arg(arg)
+            except (TypeError, KeyError, IndexError, ValueError):
+                return self._getitem_tuple_arg((arg, slice(None)))
+        else:
+            row_key, (
+                col_is_scalar,
+                ca,
+            ) = indexing_utils.destructure_dataframe_loc_indexer(
+                arg, self._frame
+            )
+            row_spec = indexing_utils.parse_row_loc_indexer(
+                row_key, self._frame.index
+            )
+            return self._frame._getitem_preprocessed(
+                row_spec, col_is_scalar, ca
+            )
 
     @_cudf_nvtx_annotate
     def _getitem_tuple_arg(self, arg):
-        from uuid import uuid4
-
         # Step 1: Gather columns
         if isinstance(arg, tuple):
             columns_df = self._frame._get_columns_by_label(arg[1])
@@ -262,64 +262,7 @@ def _getitem_tuple_arg(self, arg):
                     row_arg = arg
                 return columns_df.index._get_row_major(columns_df, row_arg)
         else:
-            if isinstance(arg[0], slice):
-                out = _get_label_range_or_mask(
-                    columns_df.index, arg[0].start, arg[0].stop, arg[0].step
-                )
-                if isinstance(out, slice):
-                    df = columns_df._slice(out)
-                else:
-                    df = columns_df._apply_boolean_mask(
-                        BooleanMask.from_column_unchecked(
-                            cudf.core.column.as_column(out)
-                        )
-                    )
-            else:
-                tmp_arg = arg
-                if is_scalar(arg[0]):
-                    # If a scalar, there is possibility of having duplicates.
-                    # Join would get all the duplicates. So, converting it to
-                    # an array kind.
-                    tmp_arg = ([tmp_arg[0]], tmp_arg[1])
-                if len(tmp_arg[0]) == 0:
-                    return columns_df._empty_like(keep_index=True)
-                tmp_arg = (as_column(tmp_arg[0]), tmp_arg[1])
-
-                if is_bool_dtype(tmp_arg[0]):
-                    df = columns_df._apply_boolean_mask(
-                        BooleanMask(tmp_arg[0], len(columns_df))
-                    )
-                else:
-                    tmp_col_name = str(uuid4())
-                    cantor_name = "_" + "_".join(
-                        map(str, columns_df._data.names)
-                    )
-                    if columns_df._data.multiindex:
-                        # column names must be appropriate length tuples
-                        extra = tuple(
-                            "" for _ in range(columns_df._data.nlevels - 1)
-                        )
-                        tmp_col_name = (tmp_col_name, *extra)
-                        cantor_name = (cantor_name, *extra)
-                    other_df = DataFrame(
-                        {tmp_col_name: column.arange(len(tmp_arg[0]))},
-                        index=as_index(tmp_arg[0]),
-                    )
-                    columns_df[cantor_name] = column.arange(len(columns_df))
-                    df = other_df.join(columns_df, how="inner")
-                    # as join is not assigning any names to index,
-                    # update it over here
-                    df.index.name = columns_df.index.name
-                    df = df.sort_values(by=[tmp_col_name, cantor_name])
-                    df.drop(columns=[tmp_col_name, cantor_name], inplace=True)
-                    # There were no indices found
-                    if len(df) == 0:
-                        raise KeyError(arg)
-
-        # Step 3: Downcast
-        if self._can_downcast_to_series(df, arg):
-            return self._downcast_to_series(df, arg)
-        return df
+            raise RuntimeError("Should have been handled by now")
 
     @_cudf_nvtx_annotate
     def _setitem_tuple_arg(self, key, value):
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 5c3d320df8e..8c584b2f17e 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -237,8 +237,18 @@ class _SeriesLocIndexer(_FrameIndexer):
     Label-based selection
     """
 
+    _frame: Series
+
     @_cudf_nvtx_annotate
     def __getitem__(self, arg: Any) -> Union[ScalarLike, DataFrameOrSeries]:
+        if not isinstance(self._frame.index, cudf.MultiIndex):
+            indexing_spec = indexing_utils.parse_row_loc_indexer(
+                indexing_utils.destructure_series_loc_indexer(
+                    arg, self._frame
+                ),
+                self._frame.index,
+            )
+            return self._frame._getitem_preprocessed(indexing_spec)
         if isinstance(arg, pd.MultiIndex):
             arg = cudf.from_pandas(arg)
 

From 199ffdfe634f0460ea4ebeb0d64d58d1f73b84f6 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 17 Jul 2023 16:50:34 +0100
Subject: [PATCH 08/16] Move get_label_range_or_mask to indexing_utils

Trying to move all the special-case handling into one place.
Additionally, handle step != 1 correctly for datetime indexing.
---
 python/cudf/cudf/core/dataframe.py      | 13 ++++---
 python/cudf/cudf/core/indexed_frame.py  | 23 ------------
 python/cudf/cudf/core/indexing_utils.py | 48 ++++++++++++++++++++-----
 python/cudf/cudf/core/series.py         | 11 ++++--
 4 files changed, 57 insertions(+), 38 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index a5a6fba02a4..09386f88f3d 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -75,7 +75,6 @@
 from cudf.core.indexed_frame import (
     IndexedFrame,
     _FrameIndexer,
-    _get_label_range_or_mask,
     _indices_from_labels,
     doc_reset_index_template,
 )
@@ -279,10 +278,16 @@ def _setitem_tuple_arg(self, key, value):
             columns_df = self._frame._get_columns_by_label(key[1])
         except KeyError:
             if not self._frame.empty and isinstance(key[0], slice):
-                pos_range = _get_label_range_or_mask(
-                    self._frame.index, key[0].start, key[0].stop, key[0].step
+                indexer = indexing_utils.find_label_range_or_mask(
+                    key[0], self._frame.index
                 )
-                idx = self._frame.index[pos_range]
+                index = self._frame.index
+                if isinstance(indexer, indexing_utils.EmptyIndexer):
+                    idx = index[0:0:1]
+                elif isinstance(indexer, indexing_utils.SliceIndexer):
+                    idx = index[indexer.key]
+                else:
+                    idx = index[indexer.key.column]
             elif self._frame.empty and isinstance(key[0], slice):
                 idx = None
             else:
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 657ed2defec..e120282948a 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -193,29 +193,6 @@ def _indices_from_labels(obj, labels):
     return lhs.join(rhs).sort_values(by=["__", "_"])["_"]
 
 
-def _get_label_range_or_mask(index, start, stop, step):
-    if (
-        not (start is None and stop is None)
-        and type(index) is cudf.core.index.DatetimeIndex
-        and index.is_monotonic_increasing is False
-    ):
-        start = pd.to_datetime(start)
-        stop = pd.to_datetime(stop)
-        if start is not None and stop is not None:
-            if start > stop:
-                return slice(0, 0, None)
-            # TODO: Once Index binary ops are updated to support logical_and,
-            # can use that instead of using cupy.
-            boolean_mask = cp.logical_and((index >= start), (index <= stop))
-        elif start is not None:
-            boolean_mask = index >= start
-        else:
-            boolean_mask = index <= stop
-        return boolean_mask
-    else:
-        return index.find_label_range(slice(start, stop, step))
-
-
 class _FrameIndexer:
     """Parent class for indexers."""
 
diff --git a/python/cudf/cudf/core/indexing_utils.py b/python/cudf/cudf/core/indexing_utils.py
index e089040bbaf..87cb8270593 100644
--- a/python/cudf/cudf/core/indexing_utils.py
+++ b/python/cudf/cudf/core/indexing_utils.py
@@ -3,9 +3,12 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
+from functools import partial, reduce
 from typing import TYPE_CHECKING, Any, Tuple, Union
 
+import cupy as cp
 import numpy as np
+import pandas as pd
 from typing_extensions import TypeAlias
 
 import cudf
@@ -462,6 +465,42 @@ def ordered_find(needles: "ColumnBase", haystack: "ColumnBase") -> GatherMap:
     )
 
 
+def find_label_range_or_mask(
+    key: slice, index: cudf.BaseIndex
+) -> EmptyIndexer | MapIndexer | MaskIndexer | SliceIndexer:
+    # TODO: datetime index must only be handled specially until pandas 2
+    if (
+        not (key.start is None and key.stop is None)
+        and isinstance(index, cudf.core.index.DatetimeIndex)
+        and not index.is_monotonic_increasing
+    ):
+        start = pd.to_datetime(key.start)
+        stop = pd.to_datetime(key.stop)
+        mask = []
+        if start is not None:
+            mask.append(index >= start)
+        if stop is not None:
+            mask.append(index <= stop)
+        bool_mask = reduce(partial(cp.logical_and, out=mask[0]), mask)
+        if key.step is None or key.step == 1:
+            return MaskIndexer(BooleanMask(bool_mask, len(index)))
+        else:
+            (map_,) = bool_mask.nonzero()
+            return MapIndexer(
+                GatherMap.from_column_unchecked(
+                    cudf.core.column.as_column(map_[:: key.step]),
+                    len(index),
+                    nullify=False,
+                )
+            )
+    else:
+        parsed_key = index.find_label_range(key)
+        if len(range(len(index))[parsed_key]) == 0:
+            return EmptyIndexer()
+        else:
+            return SliceIndexer(parsed_key)
+
+
 def parse_single_row_loc_key(
     key: Any,
     index: cudf.BaseIndex,
@@ -499,14 +538,7 @@ def parse_single_row_loc_key(
     """
     n = len(index)
     if isinstance(key, slice):
-        # Convert label slice to index slice
-        # TODO: datetime index must be handled specially (unless we go for
-        # pandas 2 compatibility)
-        parsed_key = index.find_label_range(key)
-        if len(range(n)[parsed_key]) == 0:
-            return EmptyIndexer()
-        else:
-            return SliceIndexer(parsed_key)
+        return find_label_range_or_mask(key, index)
     else:
         is_scalar = _is_scalar_or_zero_d_array(key)
         if is_scalar and isinstance(key, np.ndarray):
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 8c584b2f17e..1c5b9173a97 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -66,7 +66,6 @@
 from cudf.core.indexed_frame import (
     IndexedFrame,
     _FrameIndexer,
-    _get_label_range_or_mask,
     _indices_from_labels,
     doc_reset_index_template,
 )
@@ -322,9 +321,15 @@ def _loc_to_iloc(self, arg):
                 raise KeyError("Label scalar is out of bounds")
 
         elif isinstance(arg, slice):
-            return _get_label_range_or_mask(
-                self._frame.index, arg.start, arg.stop, arg.step
+            indexer = indexing_utils.find_label_range_or_mask(
+                arg, self._frame.index
             )
+            if isinstance(indexer, indexing_utils.EmptyIndexer):
+                return slice(0, 0, 1)
+            elif isinstance(indexer, indexing_utils.SliceIndexer):
+                return indexer.key
+            else:
+                return indexer.key.column
         elif isinstance(arg, (cudf.MultiIndex, pd.MultiIndex)):
             if isinstance(arg, pd.MultiIndex):
                 arg = cudf.MultiIndex.from_pandas(arg)

From 00de6c4536c9c52fa23b03fab6c0dca0aa8f6000 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 17 Jul 2023 16:51:26 +0100
Subject: [PATCH 09/16] Cast labels to datetime in
 DatetimeIndex.get_slice_bound

---
 python/cudf/cudf/core/index.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index c7e25cdc430..ad1b9e7d937 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2129,6 +2129,11 @@ def searchsorted(
             value, side=side, ascending=ascending, na_position=na_position
         )
 
+    def get_slice_bound(self, label, side: str, kind=None) -> int:
+        if isinstance(label, str):
+            label = pd.to_datetime(label)
+        return super().get_slice_bound(label, side, kind=kind)
+
     @property  # type: ignore
     @_cudf_nvtx_annotate
     def year(self):

From da093f4b33900402e007393ffa8f0c9a9b67e17d Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 17 Jul 2023 17:09:49 +0100
Subject: [PATCH 10/16] Remove some xfails

---
 python/cudf/cudf/tests/test_indexing.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 2e169a2b0b1..05e14107f12 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -1824,7 +1824,6 @@ def test_loc_repeated_index_label_issue_8693():
     assert_eq(expect, actual)
 
 
-@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/13268")
 @pytest.mark.parametrize(
     "indexer", [(..., 0), (0, ...)], ids=["row_ellipsis", "column_ellipsis"]
 )
@@ -1873,6 +1872,17 @@ def test_iloc_integer_categorical_issue_13013(indexer):
     assert_eq(expect, actual)
 
 
+@pytest.mark.parametrize("indexer", [[1], [0, 2]])
+def test_loc_integer_categorical_issue_13014(indexer):
+    # https://github.com/rapidsai/cudf/issues/13014
+    s = pd.Series([0, 1, 2])
+    index = pd.Categorical(indexer)
+    expect = s.loc[index]
+    c = cudf.from_pandas(s)
+    actual = c.loc[index]
+    assert_eq(expect, actual)
+
+
 def test_iloc_incorrect_boolean_mask_length_issue_13015():
     # https://github.com/rapidsai/cudf/issues/13015
     s = pd.Series([0, 1, 2])
@@ -1959,7 +1969,6 @@ def test_loc_unsorted_index_slice_lookup_keyerror_issue_12833():
         cdf.loc[1:5]
 
 
-@pytest.mark.xfail(reason="https://github.com/rapidsai/cudf/issues/13379")
 @pytest.mark.parametrize("index", [range(5), list(range(5))])
 def test_loc_missing_label_keyerror_issue_13379(index):
     # https://github.com/rapidsai/cudf/issues/13379
@@ -1973,6 +1982,16 @@ def test_loc_missing_label_keyerror_issue_13379(index):
         cdf.loc[[0, 5]]
 
 
+def test_loc_categorical_no_integer_fallback_issue_13653():
+    # https://github.com/rapidsai/cudf/issues/13653
+    s = cudf.Series(
+        [1, 2], index=cudf.CategoricalIndex([3, 4], categories=[3, 4])
+    )
+    actual = s.loc[3]
+    expect = s.to_pandas().loc[3]
+    assert_eq(actual, expect)
+
+
 @pytest.mark.parametrize("series", [True, False], ids=["Series", "DataFrame"])
 def test_loc_repeated_label_ordering_issue_13658(series):
     # https://github.com/rapidsai/cudf/issues/13658

From 9cbd2be87f3e826907045bd9ceeb7969e48740e8 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 17 Jul 2023 17:36:11 +0100
Subject: [PATCH 11/16] Docstring

---
 python/cudf/cudf/core/indexing_utils.py | 32 ++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/indexing_utils.py b/python/cudf/cudf/core/indexing_utils.py
index 87cb8270593..52a764d1e10 100644
--- a/python/cudf/cudf/core/indexing_utils.py
+++ b/python/cudf/cudf/core/indexing_utils.py
@@ -468,12 +468,42 @@ def ordered_find(needles: "ColumnBase", haystack: "ColumnBase") -> GatherMap:
 def find_label_range_or_mask(
     key: slice, index: cudf.BaseIndex
 ) -> EmptyIndexer | MapIndexer | MaskIndexer | SliceIndexer:
-    # TODO: datetime index must only be handled specially until pandas 2
+    """
+    Convert a slice of labels into a slice of positions
+
+    Parameters
+    ----------
+    key
+        Slice to convert
+    index
+        Index to look up in
+
+    Returns
+    -------
+    IndexingSpec
+        Structured data for indexing (but never a :class:`ScalarIndexer`)
+
+    Raises
+    ------
+    KeyError
+        If the index is unsorted and not a DatetimeIndex
+
+    Notes
+    -----
+    Until Pandas 2, looking up slices in an unsorted DatetimeIndex
+    constructs a mask by checking which dates fall in the range.
+
+    From Pandas 2, slice lookup in DatetimeIndexes will behave
+    identically to other index types and fail with a KeyError for
+    an unsorted index if either of the slice endpoints are not unique
+    in the index or are not in the index at all.
+    """
     if (
         not (key.start is None and key.stop is None)
         and isinstance(index, cudf.core.index.DatetimeIndex)
         and not index.is_monotonic_increasing
     ):
+        # TODO: datetime index must only be handled specially until pandas 2
         start = pd.to_datetime(key.start)
         stop = pd.to_datetime(key.stop)
         mask = []

From da385ccece193218ac66fd2980de6c066081450d Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Tue, 18 Jul 2023 11:04:52 +0100
Subject: [PATCH 12/16] Share some logic between loc and iloc paths

Perhaps not worth it...
---
 python/cudf/cudf/core/indexing_utils.py | 134 ++++++++++++------------
 1 file changed, 68 insertions(+), 66 deletions(-)

diff --git a/python/cudf/cudf/core/indexing_utils.py b/python/cudf/cudf/core/indexing_utils.py
index 52a764d1e10..bd5b7bba962 100644
--- a/python/cudf/cudf/core/indexing_utils.py
+++ b/python/cudf/cudf/core/indexing_utils.py
@@ -4,7 +4,7 @@
 
 from dataclasses import dataclass
 from functools import partial, reduce
-from typing import TYPE_CHECKING, Any, Tuple, Union
+from typing import TYPE_CHECKING, Any, Callable, Tuple, Union
 
 import cupy as cp
 import numpy as np
@@ -67,6 +67,52 @@ class ScalarIndexer:
 ]
 
 
+# Helpers for code-sharing between loc and iloc paths
+def expand_key(key, frame):
+    """Slice-expand key into a tuple of length frame.dim
+
+    Also apply callables on each piece.
+    """
+    dim = len(frame.shape)
+    if isinstance(key, tuple):
+        # Key potentially indexes rows and columns, slice-expand to
+        # shape of frame
+        indexers = key + (slice(None),) * (dim - len(key))
+        if len(indexers) > dim:
+            raise IndexError(
+                f"Too many indexers: got {len(indexers)} expected {dim}"
+            )
+    else:
+        # Key indexes rows, slice-expand to shape of frame
+        indexers = (key, *(slice(None),) * (dim - 1))
+    return tuple(k(frame) if callable(k) else k for k in indexers)
+
+
+def destructure_dataframe_indexer(
+    key: Any,
+    frame: cudf.DataFrame,
+    destructure: Callable[[Any, cudf.DataFrame], tuple[Any, ...]],
+    is_scalar: Callable[[Any, ColumnAccessor], bool],
+    get_ca: str,
+):
+    rows, cols = destructure(key, frame)
+    if cols is Ellipsis:
+        cols = slice(None)
+    try:
+        ca = getattr(frame._data, get_ca)(cols)
+    except TypeError as e:
+        raise TypeError(
+            "Column indices must be names, slices, "
+            "list-like of names, or boolean mask"
+        ) from e
+    scalar = is_scalar(cols, ca)
+    if scalar:
+        assert (
+            len(ca) == 1
+        ), "Scalar column indexer should not produce more than one column"
+    return rows, (scalar, ca)
+
+
 def destructure_iloc_key(
     key: Any, frame: Union[cudf.Series, cudf.DataFrame]
 ) -> tuple[Any, ...]:
@@ -111,19 +157,7 @@ def destructure_iloc_key(
     IndexError
         If there are too many indexers, or any individual indexer is a tuple.
     """
-    n = len(frame.shape)
-    if isinstance(key, tuple):
-        # Key potentially indexes rows and columns, slice-expand to
-        # shape of frame
-        indexers = key + (slice(None),) * (n - len(key))
-        if len(indexers) > n:
-            raise IndexError(
-                f"Too many indexers: got {len(indexers)} expected {n}"
-            )
-    else:
-        # Key indexes rows, slice-expand to shape of frame
-        indexers = (key, *(slice(None),) * (n - 1))
-    indexers = tuple(k(frame) if callable(k) else k for k in indexers)
+    indexers = expand_key(key, frame)
     if any(isinstance(k, tuple) for k in indexers):
         raise IndexError(
             "Too many indexers: can't have nested tuples in iloc indexing"
@@ -147,7 +181,7 @@ def destructure_dataframe_iloc_indexer(
     -------
     tuple
         2-tuple of a key for the rows and tuple of
-        (column_index_is_scalar, column_names) for the columns
+        (column_index_is_scalar, ColumnAccessor) for the columns
 
     Raises
     ------
@@ -158,23 +192,13 @@ def destructure_dataframe_iloc_indexer(
     NotImplementedError
         If the requested column indexer repeats columns
     """
-    rows, cols = destructure_iloc_key(key, frame)
-    if cols is Ellipsis:
-        cols = slice(None)
-    scalar = is_integer(cols)
-    try:
-        ca = frame._data.select_by_index(cols)
-    except TypeError:
-        raise TypeError(
-            "Column indices must be integers, slices, "
-            "or list-like of integers"
-        )
-    if scalar:
-        assert (
-            len(ca) == 1
-        ), "Scalar column indexer should not produce more than one column"
-
-    return rows, (scalar, ca)
+    return destructure_dataframe_indexer(
+        key,
+        frame,
+        destructure_iloc_key,
+        lambda col, _ca: is_integer(col),
+        "select_by_index",
+    )
 
 
 def destructure_series_iloc_indexer(key: Any, frame: cudf.Series) -> Any:
@@ -292,10 +316,9 @@ def destructure_loc_key(
     IndexError
         If there are too many indexers.
     """
-    n = len(frame.shape)
     if (
         isinstance(frame.index, cudf.MultiIndex)
-        and n == 2
+        and len(frame.shape) == 2
         and isinstance(key, tuple)
         and all(map(is_scalar, key))
     ):
@@ -313,18 +336,7 @@ def destructure_loc_key(
         else:
             # key just indexes rows
             key = (key,)
-    if isinstance(key, tuple):
-        # Key potentially indexes rows and columns, slice-expand to
-        # shape of frame
-        indexers = key + (slice(None),) * (n - len(key))
-        if len(indexers) > n:
-            raise IndexError(
-                f"Too many indexers: got {len(indexers)} expected {n}"
-            )
-    else:
-        # Key indexes rows, slice-expand to shape of frame
-        indexers = (key, *(slice(None),) * (n - 1))
-    return tuple(k(frame) if callable(k) else k for k in indexers)
+    return expand_key(key, frame)
 
 
 def destructure_dataframe_loc_indexer(
@@ -343,7 +355,7 @@ def destructure_dataframe_loc_indexer(
     -------
     tuple
         2-tuple of a key for the rows and tuple of
-        (column_index_is_scalar, column_names) for the columns
+        (column_index_is_scalar, ColumnAccessor) for the columns
 
     Raises
     ------
@@ -354,26 +366,16 @@ def destructure_dataframe_loc_indexer(
     NotImplementedError
         If the requested column indexer repeats columns
     """
-    rows, cols = destructure_loc_key(key, frame)
-    if cols is Ellipsis:
-        cols = slice(None)
-    try:
-        scalar = cols in frame._data
-    except TypeError:
-        scalar = False
-    try:
-        ca = frame._data.select_by_label(cols)
-    except TypeError:
-        raise TypeError(
-            "Column indices must be names, slices, "
-            "list-like of names, or boolean mask"
-        )
-    if scalar:
-        assert (
-            len(ca) == 1
-        ), "Scalar column indexer should not produce more than one column"
 
-    return rows, (scalar, ca)
+    def is_scalar(name, ca):
+        try:
+            return name in ca
+        except TypeError:
+            return False
+
+    return destructure_dataframe_indexer(
+        key, frame, destructure_loc_key, is_scalar, "select_by_label"
+    )
 
 
 def destructure_series_loc_indexer(key: Any, frame: cudf.Series) -> Any:

From ee00399beec2c4fcac4902269b4f733db36168c3 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Tue, 18 Jul 2023 15:44:53 +0100
Subject: [PATCH 13/16] Explicitly raise for MultiIndex

---
 python/cudf/cudf/core/indexing_utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/cudf/cudf/core/indexing_utils.py b/python/cudf/cudf/core/indexing_utils.py
index bd5b7bba962..4d31afb1731 100644
--- a/python/cudf/cudf/core/indexing_utils.py
+++ b/python/cudf/cudf/core/indexing_utils.py
@@ -630,6 +630,10 @@ def parse_row_loc_indexer(key: Any, index: cudf.BaseIndex) -> IndexingSpec:
     TypeError
         If the indexing key is otherwise invalid.
     """
+    if isinstance(index, cudf.MultiIndex):
+        raise NotImplementedError(
+            "This code path is not designed for multiindices"
+        )
     # TODO: multiindices need to be treated separately
     if key is Ellipsis:
         # Ellipsis is handled here because multiindex level-based

From 87d6998b3cfc8aa41b701e554ca3a3de52b99936 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Tue, 18 Jul 2023 15:45:25 +0100
Subject: [PATCH 14/16] Move type annotation to DataframeIndexer

---
 python/cudf/cudf/core/dataframe.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 09386f88f3d..1486f870789 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -121,6 +121,8 @@ def _shape_mismatch_error(x, y):
 
 
 class _DataFrameIndexer(_FrameIndexer):
+    _frame: DataFrame
+
     def __setitem__(self, key, value):
         if not isinstance(key, tuple):
             key = (key, slice(None))
@@ -364,8 +366,6 @@ class _DataFrameIlocIndexer(_DataFrameIndexer):
     For selection by index.
     """
 
-    _frame: DataFrame
-
     def __getitem__(self, arg):
         row_key, (
             col_is_scalar,

From b8fd56e1ac5b4c911b2d07e76efec8e934172238 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Tue, 18 Jul 2023 16:17:50 +0100
Subject: [PATCH 15/16] Remove code handling multiindex keys for now

---
 python/cudf/cudf/core/indexing_utils.py | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/python/cudf/cudf/core/indexing_utils.py b/python/cudf/cudf/core/indexing_utils.py
index 4d31afb1731..e19607a1ba2 100644
--- a/python/cudf/cudf/core/indexing_utils.py
+++ b/python/cudf/cudf/core/indexing_utils.py
@@ -19,7 +19,6 @@
     is_bool_dtype,
     is_integer,
     is_integer_dtype,
-    is_scalar,
 )
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.copy_types import BooleanMask, GatherMap
@@ -316,26 +315,6 @@ def destructure_loc_key(
     IndexError
         If there are too many indexers.
     """
-    if (
-        isinstance(frame.index, cudf.MultiIndex)
-        and len(frame.shape) == 2
-        and isinstance(key, tuple)
-        and all(map(is_scalar, key))
-    ):
-        # This is "best-effort" and ambiguous
-        if len(key) == 2:
-            if key[1] in frame.index._columns[1]:
-                # key just indexes the rows
-                key = (key,)
-            elif key[1] in frame._data:
-                # key indexes rows and columns
-                key = key
-            else:
-                # key indexes rows and we will raise a keyerror
-                key = (key,)
-        else:
-            # key just indexes rows
-            key = (key,)
     return expand_key(key, frame)
 
 

From 546fef54d4bc4873767a15defa889a1ee86e1780 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Thu, 20 Jul 2023 12:12:36 +0100
Subject: [PATCH 16/16] Add test of #13652

---
 python/cudf/cudf/tests/test_indexing.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 05e14107f12..8a81e617d77 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -1982,6 +1982,28 @@ def test_loc_missing_label_keyerror_issue_13379(index):
         cdf.loc[[0, 5]]
 
 
+@pytest.mark.parametrize("index_is_ordered", [False, True])
+@pytest.mark.parametrize("label_is_ordered", [False, True])
+def test_loc_categorical_ordering_mismatch_issue_13652(
+    index_is_ordered, label_is_ordered
+):
+    # https://github.com/rapidsai/cudf/issues/13652
+    s = cudf.Series(
+        [0, 2, 8, 4, 2],
+        index=cudf.CategoricalIndex(
+            [1, 2, 3, 4, 5],
+            categories=[1, 2, 3, 4, 5],
+            ordered=index_is_ordered,
+        ),
+    )
+    labels = cudf.CategoricalIndex(
+        [1, 4], categories=[1, 4], ordered=label_is_ordered
+    )
+    actual = s.loc[labels]
+    expect = s.to_pandas().loc[labels.to_pandas()]
+    assert_eq(actual, expect)
+
+
 def test_loc_categorical_no_integer_fallback_issue_13653():
     # https://github.com/rapidsai/cudf/issues/13653
     s = cudf.Series(