rapidsai · rapids-bot · Jun 30, 2023 · Jun 27, 2023 · Jun 27, 2023 · Jun 27, 2023
@@ -1432,7 +1432,94 @@ def rename(self, name, inplace=False):
             out.name = name
             return out
 
-    def get_slice_bound(self, label, side, kind=None):
+    def _indices_of(self, value) -> cudf.core.column.NumericalColumn:
+        """
+        Return indices corresponding to value
+
+        Parameters
+        ----------
+        value
+            Value to look for in index
+
+        Returns
+        -------
+        Column of indices
+        """
+        raise NotImplementedError
+
+    def find_label_range(self, loc: slice) -> slice:
+        """
+        Translate a label-based slice to an index-based slice
+
+        Parameters
+        ----------
+        loc
+            slice to search for.
+
+        Notes
+        -----
+        As with all label-based searches, the slice is right-closed.
+
+        Returns
+        -------
+        New slice translated into integer indices of the index (right-open).
+        """
+        start = loc.start
+        stop = loc.stop
+        step = 1 if loc.step is None else loc.step
+        if step < 0:
+            start_side, stop_side = "right", "left"
+        else:
+            start_side, stop_side = "left", "right"
+        istart = (
+            None
+            if start is None
+            else self.get_slice_bound(start, side=start_side)
+        )
+        istop = (
+            None
+            if stop is None
+            else self.get_slice_bound(stop, side=stop_side)
+        )
+        if step < 0:
+            # Fencepost
+            istart = None if istart is None else max(istart - 1, 0)
+            istop = None if (istop is None or istop == 0) else istop - 1
+        return slice(istart, istop, step)
+
+    def searchsorted(
+        self,
+        value,
+        side: str = "left",
+        ascending: bool = True,
+        na_position: str = "last",
+    ):
+        """Find index where elements should be inserted to maintain order
+
+        Parameters
+        ----------
+        value :
+            Value to be hypothetically inserted into Self
+        side : str {'left', 'right'} optional, default 'left'
+            If 'left', the index of the first suitable location found is given
+            If 'right', return the last such index
+        ascending : bool optional, default True
+            Index is in ascending order (otherwise descending)
+        na_position : str {'last', 'first'} optional, default 'last'
+            Position of null values in sorted order
+
+        Returns
+        -------
+        Insertion point.
+
+        Notes
+        -----
+        As a precondition the index must be sorted in the same order
+        as requested by the `ascending` flag.
+        """
+        raise NotImplementedError()
+
+    def get_slice_bound(self, label, side: str, kind=None) -> int:
         """
         Calculate slice bound that corresponds to given label.
         Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
@@ -1449,7 +1536,31 @@ def get_slice_bound(self, label, side, kind=None):
         int
             Index of label.
         """
-        raise NotImplementedError
+        if kind is not None:
+            warnings.warn(
+                "'kind' argument in get_slice_bound is deprecated and will be "
+                "removed in a future version.",
+                FutureWarning,
+            )
+        if side not in {"left", "right"}:
+            raise ValueError(f"Invalid side argument {side}")
+        if self.is_monotonic_increasing or self.is_monotonic_decreasing:
+            return self.searchsorted(
+                label, side=side, ascending=self.is_monotonic_increasing
+            )
+        else:
+            try:
+                left, right = self._values._find_first_and_last(label)
+            except ValueError:
+                raise KeyError(f"{label=} not in index")
+            if left != right:
+                raise KeyError(
+                    f"Cannot get slice bound for non-unique label {label=}"
+                )
+            if side == "left":
+                return left
+            else:
+                return right + 1
 
     def __array_function__(self, func, types, args, kwargs):
         # check if the function is implemented for the current type

@@ -1286,19 +1286,10 @@ def fillna(
 
         return result
 
-    def find_first_value(
-        self, value: ScalarLike, closest: bool = False
-    ) -> int:
-        """
-        Returns offset of first value that matches
-        """
-        return self.as_numerical.find_first_value(self._encode(value))
-
-    def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
-        """
-        Returns offset of last value that matches
-        """
-        return self.as_numerical.find_last_value(self._encode(value))
+    def indices_of(
+        self, value: ScalarLike
+    ) -> cudf.core.column.NumericalColumn:
+        return self.as_numerical.indices_of(self._encode(value))
 
     @property
     def is_monotonic_increasing(self) -> bool:

@@ -45,6 +45,7 @@
     drop_nulls,
 )
 from cudf._lib.transform import bools_to_mask
+from cudf._lib.types import size_type_dtype
 from cudf._typing import ColumnLike, Dtype, ScalarLike
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
@@ -734,29 +735,79 @@ def notnull(self) -> ColumnBase:
 
         return result
 
-    def find_first_value(
-        self, value: ScalarLike, closest: bool = False
-    ) -> int:
+    def indices_of(
+        self, value: ScalarLike | Self
+    ) -> cudf.core.column.NumericalColumn:
         """
-        Returns offset of first value that matches
+        Find locations of value in the column
+
+        Parameters
+        ----------
+        value
+            Scalar to look for (cast to dtype of column), or a length-1 column
+
+        Returns
+        -------
+        Column of indices that match value
         """
-        # FIXME: Inefficient, may be need a libcudf api
-        index = cudf.core.index.RangeIndex(0, stop=len(self))
-        indices = index.take(self == value)
-        if not len(indices):
-            raise ValueError("value not found")
-        return indices[0]
-
-    def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
+        if not isinstance(value, ColumnBase):
+            value = as_column([value], dtype=self.dtype)
+        else:
+            assert len(value) == 1
+        mask = libcudf.search.contains(value, self)
+        return apply_boolean_mask(
+            [arange(0, len(self), dtype=size_type_dtype)], mask
+        )[0]
+
+    def _find_first_and_last(self, value: ScalarLike) -> Tuple[int, int]:
+        indices = self.indices_of(value)
+        if n := len(indices):
+            return (
+                indices.element_indexing(0),
+                indices.element_indexing(n - 1),
+            )
+        else:
+            raise ValueError(f"Value {value} not found in column")
+
+    def find_first_value(self, value: ScalarLike) -> int:
         """
-        Returns offset of last value that matches
+        Return index of first value that matches
+
+        Parameters
+        ----------
+        value
+            Value to search for (cast to dtype of column)
+
+        Returns
+        -------
+        Index of value
+
+        Raises
+        ------
+        ValueError if value is not found
+        """
+        first, _ = self._find_first_and_last(value)
+        return first
+
+    def find_last_value(self, value: ScalarLike) -> int:
         """
-        # FIXME: Inefficient, may be need a libcudf api
-        index = cudf.core.index.RangeIndex(0, stop=len(self))
-        indices = index.take(self == value)
-        if not len(indices):
-            raise ValueError("value not found")
-        return indices[-1]
+        Return index of last value that matches
+
+        Parameters
+        ----------
+        value
+            Value to search for (cast to dtype of column)
+
+        Returns
+        -------
+        Index of value
+
+        Raises
+        ------
+        ValueError if value is not found
+        """
+        _, last = self._find_first_and_last(value)
+        return last
 
     def append(self, other: ColumnBase) -> ColumnBase:
         return concat_columns([self, as_column(other)])
@@ -893,39 +944,6 @@ def is_monotonic_decreasing(self) -> bool:
             ascending=[False], null_position=None
         )
 
-    def get_slice_bound(self, label: ScalarLike, side: str, kind: str) -> int:
-        """
-        Calculate slice bound that corresponds to given label.
-        Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
-        of given label.
-
-        Parameters
-        ----------
-        label : Scalar
-        side : {'left', 'right'}
-        kind : {'ix', 'loc', 'getitem'}
-        """
-        if kind not in {"ix", "loc", "getitem", None}:
-            raise ValueError(
-                f"Invalid value for ``kind`` parameter,"
-                f" must be either one of the following: "
-                f"{'ix', 'loc', 'getitem', None}, but found: {kind}"
-            )
-        if side not in {"left", "right"}:
-            raise ValueError(
-                "Invalid value for side kwarg,"
-                " must be either 'left' or 'right': %s" % (side,)
-            )
-
-        # TODO: Handle errors/missing keys correctly
-        #       Not currently using `kind` argument.
-        if side == "left":
-            return self.find_first_value(label, closest=True)
-        elif side == "right":
-            return self.find_last_value(label, closest=True) + 1
-        else:
-            raise ValueError(f"Invalid value for side: {side}")
-
     def sort_values(
         self: ColumnBase,
         ascending: bool = True,

@@ -470,27 +470,13 @@ def fillna(
 
         return super().fillna(fill_value, method)
 
-    def find_first_value(
-        self, value: ScalarLike, closest: bool = False
-    ) -> int:
-        """
-        Returns offset of first value that matches
-        """
-        value = pd.to_datetime(value)
-        value = column.as_column(
-            value, dtype=self.dtype
-        ).as_numerical.element_indexing(0)
-        return self.as_numerical.find_first_value(value, closest=closest)
-
-    def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
-        """
-        Returns offset of last value that matches
-        """
-        value = pd.to_datetime(value)
+    def indices_of(
+        self, value: ScalarLike
+    ) -> cudf.core.column.NumericalColumn:
         value = column.as_column(
-            value, dtype=self.dtype
-        ).as_numerical.element_indexing(0)
-        return self.as_numerical.find_last_value(value, closest=closest)
+            pd.to_datetime(value), dtype=self.dtype
+        ).as_numerical
+        return self.as_numerical.indices_of(value)
 
     @property
     def is_unique(self) -> bool: