rapidsai · rapids-bot · Mar 10, 2021 · Feb 3, 2021 · Feb 4, 2021 · Feb 4, 2021
@@ -26,11 +26,12 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf._lib.null_mask import MaskState, create_null_mask
+from cudf._typing import ColumnLike
 from cudf.core import column, reshape
 from cudf.core.abc import Serializable
 from cudf.core.column import as_column, column_empty
 from cudf.core.column_accessor import ColumnAccessor
-from cudf.core.frame import Frame
+from cudf.core.frame import Frame, _drop_rows_by_labels
 from cudf.core.groupby.groupby import DataFrameGroupBy
 from cudf.core.index import Index, RangeIndex, as_index
 from cudf.core.indexing import _DataFrameIlocIndexer, _DataFrameLocIndexer
@@ -3269,46 +3270,26 @@ def drop(
             )
 
         if inplace:
-            outdf = self
+            out = self
         else:
-            outdf = self.copy()
+            out = self.copy()
 
         if axis in (1, "columns"):
             target = _get_host_unique(target)
 
-            _drop_columns(outdf, target, errors)
+            _drop_columns(out, target, errors)
         elif axis in (0, "index"):
-            if not isinstance(target, (cudf.Series, cudf.Index)):
-                target = column.as_column(target)
-
-            if isinstance(self._index, cudf.MultiIndex):
-                if level is None:
-                    level = 0
-
-                levels_index = outdf.index.get_level_values(level)
-                if errors == "raise" and not target.isin(levels_index).all():
-                    raise KeyError("One or more values not found in axis")
-
-                # TODO : Could use anti-join as a future optimization
-                sliced_df = outdf.take(~levels_index.isin(target))
-                sliced_df._index.names = self._index.names
-            else:
-                if errors == "raise" and not target.isin(outdf.index).all():
-                    raise KeyError("One or more values not found in axis")
-
-                sliced_df = outdf.join(
-                    cudf.DataFrame(index=target), how="leftanti"
-                )
+            dropped = _drop_rows_by_labels(out, target, level, errors)
 
             if columns is not None:
                 columns = _get_host_unique(columns)
-                _drop_columns(sliced_df, columns, errors)
+                _drop_columns(dropped, columns, errors)
 
-            outdf._data = sliced_df._data
-            outdf._index = sliced_df._index
+            out._data = dropped._data
+            out._index = dropped._index
 
         if not inplace:
-            return outdf
+            return out
 
     def _drop_column(self, name):
         """Drop a column by *name*
@@ -7362,6 +7343,16 @@ def equals(self, other):
                 return False
         return super().equals(other)
 
+    def _drop_rows_by_labels(
+        self: "cudf.DataFrame", labels: ColumnLike
+    ) -> "cudf.DataFrame":
+        """Delete rows specified by `label` parameter. In `DataFrame`, this can
+        be achieved efficiently by a left-anti join operation
+
+        labels: a list of labels specifying the rows to drop
+        """
+        return self.join(cudf.DataFrame(index=labels), how="leftanti")
+
     _accessors = set()  # type: Set[Any]
 
 
@@ -7608,17 +7599,6 @@ def _get_union_of_series_names(series_list):
     return names_list
 
 
-def _drop_columns(df, columns, errors):
-    for c in columns:
-        try:
-            df._drop_column(c)
-        except KeyError as e:
-            if errors == "ignore":
-                pass
-            else:
-                raise e
-
-
 def _get_host_unique(array):
     if isinstance(
         array, (cudf.Series, cudf.Index, cudf.core.column.ColumnBase)
@@ -7628,3 +7608,14 @@ def _get_host_unique(array):
         return [array]
     else:
         return set(array)
+
+
+def _drop_columns(df: DataFrame, columns: Iterable, errors: str):
+    for c in columns:
+        try:
+            df._drop_column(c)
+        except KeyError as e:
+            if errors == "ignore":
+                pass
+            else:
+                raise e
@@ -18,6 +18,7 @@
 
 import cudf
 from cudf import _lib as libcudf
+from cudf._typing import ColumnLike
 from cudf.core.column import as_column, build_categorical_column, column_empty
 from cudf.utils.dtypes import (
     is_categorical_dtype,
@@ -3812,3 +3813,37 @@ def _is_series(obj):
     instead of checking for isinstance(obj, cudf.Series)
     """
     return isinstance(obj, Frame) and obj.ndim == 1 and obj._index is not None
+
+
+def _drop_rows_by_labels(
+    obj: Union[cudf.DataFrame, cudf.Series],
+    labels: Union[ColumnLike, abc.Iterable, str],
+    level: Union[int, str],
+    errors: str,
+) -> Union[cudf.DataFrame, cudf.Series]:
+    """Remove rows specified by `labels`. If `errors=True`, an error is raised
+    if some items in `labels` do not exist in `obj._index`.
+
+    Parameter `level` is ignored if `obj._index` is not `MultiIndex`
+    """
+    if not isinstance(labels, (cudf.Series, cudf.Index)):
+        labels = as_column(labels)
+
+    if isinstance(obj._index, cudf.MultiIndex):
+        if level is None:
+            level = 0
+
+        levels_index = obj.index.get_level_values(level)
+        if errors == "raise" and not labels.isin(levels_index).all():
+            raise KeyError("One or more values not found in axis")
+
+        # TODO : Could use anti-join as a future optimization
+        sliced_df = obj.take(~levels_index.isin(labels))
+        sliced_df._index.names = obj._index.names
+    else:
+        if errors == "raise" and not labels.isin(obj.index).all():
+            raise KeyError("One or more values not found in axis")
+
+        sliced_df = obj._drop_rows_by_labels(labels)
+
+    return sliced_df
@@ -35,7 +35,7 @@
 from cudf.core.column.lists import ListMethods
 from cudf.core.column.string import StringMethods
 from cudf.core.column_accessor import ColumnAccessor
-from cudf.core.frame import Frame
+from cudf.core.frame import Frame, _drop_rows_by_labels
 from cudf.core.groupby.groupby import SeriesGroupBy
 from cudf.core.index import Index, RangeIndex, as_index
 from cudf.core.indexing import _SeriesIlocIndexer, _SeriesLocIndexer
@@ -554,6 +554,128 @@ def copy(self, deep=True):
             result.index = self.index.copy(deep=deep)
         return result
 
+    def drop(
+        self,
+        labels=None,
+        axis=0,
+        index=None,
+        columns=None,
+        level=None,
+        inplace=False,
+        errors="raise",
+    ):
+        """
+        Return Series with specified index labels removed.
+
+        Remove elements of a Series based on specifying the index labels.
+        When using a multi-index, labels on different levels can be removed by
+        specifying the level.
+
+        Parameters
+        ----------
+        labels : single label or list-like
+            Index labels to drop.
+        axis : 0, default 0
+            Redundant for application on Series.
+        index : single label or list-like
+            Redundant for application on Series. But ``index`` can be used
+            instead of ``labels``
+        columns : single label or list-like
+            This parameter is ignored. Use ``index`` or ``labels`` to specify.
+        level : int or level name, optional
+            For MultiIndex, level from which the labels will be removed.
+        inplace : bool, default False
+            If False, return a copy. Otherwise, do operation
+            inplace and return None.
+        errors : {'ignore', 'raise'}, default 'raise'
+            If 'ignore', suppress error and only existing labels are
+            dropped.
+
+        Returns
+        -------
+        Series or None
+            Series with specified index labels removed or None if
+            ``inplace=True``
+
+        Raises
+        ------
+        KeyError
+            If any of the labels is not found in the selected axis and
+            ``error='raise'``
+
+        See Also
+        --------
+        Series.reindex
+            Return only specified index labels of Series
+        Series.dropna
+            Return series without null values
+        Series.drop_duplicates
+            Return series with duplicate values removed
+        cudf.core.dataframe.DataFrame.drop
+            Drop specified labels from rows or columns in dataframe
+
+        Examples
+        --------
+        >>> s = cudf.Series([1,2,3], index=['x', 'y', 'z'])
+        >>> s
+        x    1
+        y    2
+        z    3
+        dtype: int64
+
+        Drop labels x and z
+
+        >>> s.drop(labels=['x', 'z'])
+        y    2
+        dtype: int64
+
+        Drop a label from the second level in MultiIndex Series.
+
+        >>> midx = cudf.MultiIndex.from_product([[0, 1, 2], ['x', 'y']])
+        >>> s = cudf.Series(range(6), index=midx)
+        >>> s
+        0  x    0
+           y    1
+        1  x    2
+           y    3
+        2  x    4
+           y    5
+        >>> s.drop(labels='y', level=1)
+        0  x    0
+        1  x    2
+        2  x    4
+        """
+        if labels is not None:
+            if index is not None or columns is not None:
+                raise ValueError(
+                    "Cannot specify both 'labels' and 'index'/'columns'"
+                )
+            if axis == 1:
+                raise ValueError("No axis named 1 for object type Series")
+            target = labels
+        elif index is not None:
+            target = index
+        elif columns is not None:
+            target = []  # Ignore parameter columns
+        else:
+            raise ValueError(
+                "Need to specify at least one of 'labels', "
+                "'index' or 'columns'"
+            )
+
+        if inplace:
+            out = self
+        else:
+            out = self.copy()
+
+        dropped = _drop_rows_by_labels(out, target, level, errors)
+
+        out._data = dropped._data
+        out._index = dropped._index
+
+        if not inplace:
+            return out
+
     def __copy__(self, deep=True):
         return self.copy(deep)
 
@@ -4567,6 +4689,18 @@ def keys(self):
         """
         return self.index
 
+    def _drop_rows_by_labels(self, labels):
+        """Delete rows specified by `label` parameter. Resort to the efficient
+        implementation in `cudf.DataFrame`
+
+        labels: a list of labels specifying the rows to drop
+        """
+        df = self.to_frame(name="tmp")
+        dropped = df._drop_rows_by_labels(labels)["tmp"]
+        dropped.name = self.name
+
+        return dropped
+
     _accessors = set()  # type: Set[Any]