From 85151e0c76b482a1d952888486f5643b311b5ede Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 14 Oct 2021 16:32:04 -0700
Subject: [PATCH 01/20] Unify argsort implementations.

---
 python/cudf/cudf/core/_base_index.py | 51 --------------------
 python/cudf/cudf/core/dataframe.py   | 51 --------------------
 python/cudf/cudf/core/frame.py       | 72 +++++++++++++++++++++++-----
 python/cudf/cudf/core/index.py       | 12 +++++
 python/cudf/cudf/core/multiindex.py  | 13 ++++-
 python/cudf/cudf/core/series.py      | 64 +++++++------------------
 6 files changed, 101 insertions(+), 162 deletions(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 590bff3b19d..a014f3c0265 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -508,57 +508,6 @@ def take(self, indices):
         """
         return self[indices]
 
-    def argsort(self, ascending=True, **kwargs):
-        """
-        Return the integer indices that would sort the index.
-
-        Parameters
-        ----------
-        ascending : bool, default True
-            If True, returns the indices for ascending order.
-            If False, returns the indices for descending order.
-
-        Returns
-        -------
-        array : A cupy array containing Integer indices that
-            would sort the index if used as an indexer.
-
-        Examples
-        --------
-        >>> import cudf
-        >>> index = cudf.Index([10, 100, 1, 1000])
-        >>> index
-        Int64Index([10, 100, 1, 1000], dtype='int64')
-        >>> index.argsort()
-        array([2, 0, 1, 3], dtype=int32)
-
-        The order of argsort can be reversed using
-        ``ascending`` parameter, by setting it to ``False``.
-        >>> index.argsort(ascending=False)
-        array([3, 1, 0, 2], dtype=int32)
-
-        ``argsort`` on a MultiIndex:
-
-        >>> index = cudf.MultiIndex(
-        ...      levels=[[1, 3, 4, -10], [1, 11, 5]],
-        ...      codes=[[0, 0, 1, 2, 3], [0, 2, 1, 1, 0]],
-        ...      names=["x", "y"],
-        ... )
-        >>> index
-        MultiIndex([(  1,  1),
-                    (  1,  5),
-                    (  3, 11),
-                    (  4, 11),
-                    (-10,  1)],
-                   names=['x', 'y'])
-        >>> index.argsort()
-        array([4, 0, 1, 2, 3], dtype=int32)
-        >>> index.argsort(ascending=False)
-        array([3, 2, 1, 0, 4], dtype=int32)
-        """
-        indices = self._values.argsort(ascending=ascending, **kwargs)
-        return cupy.asarray(indices)
-
     def to_frame(self, index=True, name=None):
         """Create a DataFrame with a column containing this Index
 
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 5239cf9d648..e14fc397d90 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3248,57 +3248,6 @@ def label_encoding(
 
         return outdf
 
-    @annotate("ARGSORT", color="yellow", domain="cudf_python")
-    def argsort(self, ascending=True, na_position="last"):
-        """
-        Sort by the values.
-
-        Parameters
-        ----------
-        ascending : bool or list of bool, default True
-            If True, sort values in ascending order, otherwise descending.
-        na_position : {‘first’ or ‘last’}, default ‘last’
-            Argument ‘first’ puts NaNs at the beginning, ‘last’ puts NaNs
-            at the end.
-
-        Returns
-        -------
-        out_column_inds : cuDF Column of indices sorted based on input
-
-        Notes
-        -----
-        Difference from pandas:
-
-        - Support axis='index' only.
-        - Not supporting: inplace, kind
-        - Ascending can be a list of bools to control per column
-
-        Examples
-        --------
-        >>> import cudf
-        >>> df = cudf.DataFrame({'a':[10, 0, 2], 'b':[-10, 10, 1]})
-        >>> df
-            a   b
-        0  10 -10
-        1   0  10
-        2   2   1
-        >>> inds = df.argsort()
-        >>> inds
-        0    1
-        1    2
-        2    0
-        dtype: int32
-        >>> df.take(inds)
-            a   b
-        1   0  10
-        2   2   1
-        0  10 -10
-        """
-        inds_col = self._get_sorted_inds(
-            ascending=ascending, na_position=na_position
-        )
-        return cudf.Series(inds_col)
-
     def sort_values(
         self,
         by,
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 0b895460410..972e96978ca 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2899,29 +2899,77 @@ def searchsorted(
         else:
             return result
 
-    def _get_sorted_inds(self, by=None, ascending=True, na_position="last"):
-        """
-        Sort by the values.
+    def argsort(
+        self,
+        axis=0,
+        kind="quicksort",
+        order=None,
+        ascending=True,
+        na_position="last",
+    ):
+        """Return the integer indices that would sort the Series values.
 
         Parameters
         ----------
-        by: list, optional
-            Labels specifying columns to sort by. By default,
-            sort by all columns of `self`
+        axis : {0 or "index"}
+            Has no effect but is accepted for compatibility with numpy.
+        kind : {'mergesort', 'quicksort', 'heapsort', 'stable'}, default 'quicksort'
+            Choice of sorting algorithm. See :func:`numpy.sort` for more
+            information. 'mergesort' and 'stable' are the only stable
+            algorithms. Only quicksort is supported in cuDF.
+        order : None
+            Has no effect but is accepted for compatibility with numpy.
         ascending : bool or list of bool, default True
             If True, sort values in ascending order, otherwise descending.
         na_position : {‘first’ or ‘last’}, default ‘last’
             Argument ‘first’ puts NaNs at the beginning, ‘last’ puts NaNs
             at the end.
+
         Returns
         -------
-        out_column_inds : cuDF Column of indices sorted based on input
+        Series of indices sorted based on input.
 
-        Difference from pandas:
-        * Support axis='index' only.
-        * Not supporting: inplace, kind
-        * Ascending can be a list of bools to control per column
-        """
+        Examples
+        --------
+        >>> import cudf
+        >>> s = cudf.Series([3, 1, 2])
+        >>> s
+        0    3
+        1    1
+        2    2
+        dtype: int64
+        >>> s.argsort()
+        0    1
+        1    2
+        2    0
+        dtype: int32
+        >>> s[s.argsort()]
+        1    1
+        2    2
+        0    3
+        dtype: int64
+        """  # noqa: E501
+        if kind != "quicksort":
+            if kind not in {"mergesort", "heapsort", "stable"}:
+                raise AttributeError(
+                    f"{kind} is not a valid sorting algorithm for "
+                    f"'DataFrame' object"
+                )
+            warnings.warn(
+                f"GPU-accelerated {kind} is currently not supported, "
+                "defaulting to quicksort."
+            )
+        return cudf.Series._from_data(
+            {
+                None: self._get_sorted_inds(
+                    ascending=ascending, na_position=na_position
+                )
+            }
+        )
+
+    def _get_sorted_inds(self, by=None, ascending=True, na_position="last"):
+        # Get an int64 column consisting of the indices required to sort self
+        # according to the columns specified in by.
 
         to_sort = (
             self
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index c003454fb59..aa215c418b5 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -748,6 +748,18 @@ def __init__(self, data, **kwargs):
         name = kwargs.get("name")
         super().__init__({name: data})
 
+    def argsort(
+        self,
+        axis=0,
+        kind="quicksort",
+        order=None,
+        ascending=True,
+        na_position="last",
+    ):
+        return (
+            super().argsort(axis, kind, order, ascending, na_position).values
+        )
+
     @classmethod
     def deserialize(cls, header, frames):
         if "index_column" in header:
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 27edd41ed92..f891d26ba8c 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1364,8 +1364,17 @@ def is_monotonic_decreasing(self):
             ascending=[False] * len(self.levels), null_position=None
         )
 
-    def argsort(self, ascending=True, **kwargs):
-        return self._get_sorted_inds(ascending=ascending, **kwargs).values
+    def argsort(
+        self,
+        axis=0,
+        kind="quicksort",
+        order=None,
+        ascending=True,
+        na_position="last",
+    ):
+        return (
+            super().argsort(axis, kind, order, ascending, na_position).values
+        )
 
     def sort_values(self, return_indexer=False, ascending=True, key=None):
         if key is not None:
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index aada0534f42..89d86f8a01b 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2269,37 +2269,6 @@ def astype(self, dtype, copy=False, errors="raise"):
                 pass
             return self
 
-    def argsort(self, ascending=True, na_position="last"):
-        """Returns a Series of int64 index that will sort the series.
-
-        Uses Thrust sort.
-
-        Returns
-        -------
-        result: Series
-
-        Examples
-        --------
-        >>> import cudf
-        >>> s = cudf.Series([3, 1, 2])
-        >>> s
-        0    3
-        1    1
-        2    2
-        dtype: int64
-        >>> s.argsort()
-        0    1
-        1    2
-        2    0
-        dtype: int32
-        >>> s[s.argsort()]
-        1    1
-        2    2
-        0    3
-        dtype: int64
-        """
-        return self._sort(ascending=ascending, na_position=na_position)[1]
-
     def sort_index(self, axis=0, *args, **kwargs):
         if axis not in (0, "index"):
             raise ValueError("Only axis=0 is valid for Series.")
@@ -2359,7 +2328,13 @@ def sort_values(
 
         if len(self) == 0:
             return self
-        vals, inds = self._sort(ascending=ascending, na_position=na_position)
+
+        col_keys, col_inds = self._column.sort_by_values(
+            ascending=ascending, na_position=na_position
+        )
+        vals = self._from_data({self.name: col_keys}, self._index)
+        inds = self._from_data({self.name: col_inds}, self._index)
+
         if not ignore_index:
             index = self.index.take(inds)
         else:
@@ -2519,20 +2494,17 @@ def nsmallest(self, n=5, keep="first"):
         """
         return self._n_largest_or_smallest(n=n, keep=keep, largest=False)
 
-    def _sort(self, ascending=True, na_position="last"):
-        """
-        Sort by values
-
-        Returns
-        -------
-        2-tuple of key and index
-        """
-        col_keys, col_inds = self._column.sort_by_values(
-            ascending=ascending, na_position=na_position
-        )
-        sr_keys = self._from_data({self.name: col_keys}, self._index)
-        sr_inds = self._from_data({self.name: col_inds}, self._index)
-        return sr_keys, sr_inds
+    def argsort(
+        self,
+        axis=0,
+        kind="quicksort",
+        order=None,
+        ascending=True,
+        na_position="last",
+    ):
+        obj = super().argsort(axis, kind, order, ascending, na_position)
+        obj.name = self.name
+        return obj
 
     def replace(self, to_replace=None, value=None, *args, **kwargs):
         if is_dict_like(to_replace) and value is not None:

From 785c7ed72078f4f282a32763909312934f272b15 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 15 Oct 2021 12:54:45 -0700
Subject: [PATCH 02/20] Standardize implementation of take.

---
 python/cudf/cudf/core/dataframe.py       | 39 ++-----------------
 python/cudf/cudf/core/frame.py           | 39 +++++++++++++++++++
 python/cudf/cudf/core/groupby/groupby.py | 24 ------------
 python/cudf/cudf/core/index.py           |  6 +--
 python/cudf/cudf/core/series.py          | 48 +-----------------------
 5 files changed, 47 insertions(+), 109 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index e14fc397d90..768f6214526 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -2575,40 +2575,7 @@ class max_speed
             return result
 
     def take(self, positions, keep_index=True):
-        """
-        Return a new DataFrame containing the rows specified by *positions*
-
-        Parameters
-        ----------
-        positions : array-like
-            Integer or boolean array-like specifying the rows of the output.
-            If integer, each element represents the integer index of a row.
-            If boolean, *positions* must be of the same length as *self*,
-            and represents a boolean mask.
-
-        Returns
-        -------
-        out : DataFrame
-            New DataFrame
-
-        Examples
-        --------
-        >>> a = cudf.DataFrame({'a': [1.0, 2.0, 3.0],
-        ...                    'b': cudf.Series(['a', 'b', 'c'])})
-        >>> a.take([0, 2, 2])
-             a  b
-        0  1.0  a
-        2  3.0  c
-        2  3.0  c
-        >>> a.take([True, False, True])
-             a  b
-        0  1.0  a
-        2  3.0  c
-        """
-        positions = as_column(positions)
-        if is_bool_dtype(positions):
-            return self._apply_boolean_mask(positions)
-        out = self._gather(positions, keep_index=keep_index)
+        out = super().take(positions, keep_index)
         out.columns = self.columns
         return out
 
@@ -3314,7 +3281,9 @@ def sort_values(
 
         # argsort the `by` column
         return self.take(
-            self[by].argsort(ascending=ascending, na_position=na_position),
+            self[by]._get_sorted_inds(
+                ascending=ascending, na_position=na_position
+            ),
             keep_index=not ignore_index,
         )
 
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 972e96978ca..d18ca38792e 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -30,6 +30,7 @@
 from cudf._typing import ColumnLike, DataFrameOrSeries, Dtype
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
+    is_bool_dtype,
     is_decimal_dtype,
     is_dict_like,
     is_integer_dtype,
@@ -529,6 +530,7 @@ def _gather(self, gather_map, keep_index=True, nullify=False):
         )
 
         result._copy_type_metadata(self, include_index=keep_index)
+        result._data.names = self._data.names
         if keep_index and self._index is not None:
             result._index.names = self._index.names
         return result
@@ -2983,6 +2985,43 @@ def _get_sorted_inds(self, by=None, ascending=True, na_position="last"):
 
         return libcudf.sort.order_by(to_sort, ascending, na_position)
 
+    def take(self, positions, keep_index=True):
+        """Return a new object containing the rows specified by *positions*
+
+        Parameters
+        ----------
+        positions : array-like
+            Integer or boolean array-like specifying the rows of the output.
+            If integer, each element represents the integer index of a row.
+            If boolean, *positions* must be of the same length as *self*,
+            and represents a boolean mask.
+        keep_index : bool, default True
+            Whether to retain the index in result or not.
+
+        Returns
+        -------
+        out : DataFrame
+            New DataFrame
+
+        Examples
+        --------
+        >>> a = cudf.DataFrame({'a': [1.0, 2.0, 3.0],
+        ...                    'b': cudf.Series(['a', 'b', 'c'])})
+        >>> a.take([0, 2, 2])
+             a  b
+        0  1.0  a
+        2  3.0  c
+        2  3.0  c
+        >>> a.take([True, False, True])
+             a  b
+        0  1.0  a
+        2  3.0  c
+        """
+        positions = as_column(positions)
+        if is_bool_dtype(positions):
+            return self._apply_boolean_mask(positions)
+        return self._gather(positions, keep_index=keep_index)
+
     def sin(self):
         """
         Get Trigonometric sine, element-wise.
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 6ffba8da069..a62684b6dd7 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1181,18 +1181,6 @@ class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin):
 
     _PROTECTED_KEYS = frozenset(("obj",))
 
-    def __init__(
-        self, obj, by=None, level=None, sort=False, as_index=True, dropna=True
-    ):
-        super().__init__(
-            obj=obj,
-            by=by,
-            level=level,
-            sort=sort,
-            as_index=as_index,
-            dropna=dropna,
-        )
-
     def __getitem__(self, key):
         return self.obj[key].groupby(
             self.grouping, dropna=self._dropna, sort=self._sort
@@ -1265,18 +1253,6 @@ class SeriesGroupBy(GroupBy):
     Name: Max Speed, dtype: float64
     """
 
-    def __init__(
-        self, obj, by=None, level=None, sort=False, as_index=True, dropna=True
-    ):
-        super().__init__(
-            obj=obj,
-            by=by,
-            level=level,
-            sort=sort,
-            as_index=as_index,
-            dropna=dropna,
-        )
-
     def agg(self, func):
         result = super().agg(func)
 
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index aa215c418b5..ea45f9086c0 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -748,6 +748,9 @@ def __init__(self, data, **kwargs):
         name = kwargs.get("name")
         super().__init__({name: data})
 
+    def take(self, indices):
+        return self[indices]
+
     def argsort(
         self,
         axis=0,
@@ -2383,9 +2386,6 @@ def to_pandas(self):
             self.to_numpy(na_value=None), name=self.name, dtype="object"
         )
 
-    def take(self, indices):
-        return self._values[indices]
-
     def __repr__(self):
         return (
             f"{self.__class__.__name__}({self._values.to_array()},"
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 89d86f8a01b..f7f3c5239ad 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1169,52 +1169,6 @@ def __setitem__(self, key, value):
         else:
             self.loc[key] = value
 
-    def take(self, indices, keep_index=True):
-        """
-        Return Series by taking values from the corresponding *indices*.
-
-        Parameters
-        ----------
-        indices : array-like or scalar
-            An array/scalar like integers indicating which positions to take.
-        keep_index : bool, default True
-            Whethere to retain the index in result Series or not.
-
-        Returns
-        -------
-        Series
-
-        Examples
-        --------
-        >>> import cudf
-        >>> series = cudf.Series([10, 11, 12, 13, 14])
-        >>> series
-        0    10
-        1    11
-        2    12
-        3    13
-        4    14
-        dtype: int64
-        >>> series.take([0, 4])
-        0    10
-        4    14
-        dtype: int64
-
-        If you want to drop the index, pass `keep_index=False`
-
-        >>> series.take([0, 4], keep_index=False)
-        0    10
-        1    14
-        dtype: int64
-        """
-        if keep_index is True or is_scalar(indices):
-            return self.iloc[indices]
-        else:
-            col_inds = as_column(indices)
-            return self._from_data(
-                {self.name: self._column.take(col_inds, keep_index=False)}
-            )
-
     def __repr__(self):
         _, height = get_terminal_size()
         max_rows = (
@@ -2353,7 +2307,7 @@ def _n_largest_or_smallest(self, largest, n, keep):
                 data = data[-n:-n]
             else:
                 data = data.tail(n)
-            return data.reverse()
+            return data[::-1]
         else:
             raise ValueError('keep must be either "first", "last"')
 

From c98bc0281f81346f3160e052ed4e46773a7297c7 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 15 Oct 2021 14:25:40 -0700
Subject: [PATCH 03/20] Deprecate parameters and remove unnecessary impls.

---
 python/cudf/cudf/core/dataframe.py |  2 +-
 python/cudf/cudf/core/frame.py     | 23 ++++++++++++++++++-----
 python/cudf/cudf/core/index.py     |  3 ---
 python/cudf/cudf/core/series.py    |  3 +--
 4 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 768f6214526..cb47cfbe8c3 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -2574,7 +2574,7 @@ class max_speed
         if not inplace:
             return result
 
-    def take(self, positions, keep_index=True):
+    def take(self, positions, keep_index=None):
         out = super().take(positions, keep_index)
         out.columns = self.columns
         return out
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index d18ca38792e..fe704ab014b 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2985,16 +2985,13 @@ def _get_sorted_inds(self, by=None, ascending=True, na_position="last"):
 
         return libcudf.sort.order_by(to_sort, ascending, na_position)
 
-    def take(self, positions, keep_index=True):
+    def take(self, positions, keep_index=None):
         """Return a new object containing the rows specified by *positions*
 
         Parameters
         ----------
         positions : array-like
-            Integer or boolean array-like specifying the rows of the output.
-            If integer, each element represents the integer index of a row.
-            If boolean, *positions* must be of the same length as *self*,
-            and represents a boolean mask.
+            Array of ints indicating which positions to take.
         keep_index : bool, default True
             Whether to retain the index in result or not.
 
@@ -3017,8 +3014,24 @@ def take(self, positions, keep_index=True):
         0  1.0  a
         2  3.0  c
         """
+        # TODO: When we remove keep_index we should introduce the axis
+        # parameter. We could also introduce is_copy, but that's already
+        # deprecated in pandas so it's probably unnecessary.
+        if keep_index is not None:
+            warnings.warn(
+                "keep_index is deprecated and will be removed in the future.",
+                FutureWarning,
+            )
+        else:
+            keep_index = True
+
         positions = as_column(positions)
         if is_bool_dtype(positions):
+            warnings.warn(
+                "Calling take with a boolean array is deprecated and will be "
+                "removed in the future.",
+                FutureWarning,
+            )
             return self._apply_boolean_mask(positions)
         return self._gather(positions, keep_index=keep_index)
 
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index ea45f9086c0..2331f6efdd1 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -748,9 +748,6 @@ def __init__(self, data, **kwargs):
         name = kwargs.get("name")
         super().__init__({name: data})
 
-    def take(self, indices):
-        return self[indices]
-
     def argsort(
         self,
         axis=0,
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index f7f3c5239ad..21f4515b9ac 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -111,8 +111,7 @@ def __getitem__(self, arg):
         ):
             return data
         return self._frame._from_data(
-            {self._frame.name: data},
-            index=cudf.Index(self._frame.index.take(arg)),
+            {self._frame.name: data}, index=cudf.Index(self._frame.index[arg]),
         )
 
     def __setitem__(self, key, value):

From f8ad3b83a19b1d0f11d8b908344999e1cd9d7165 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 15 Oct 2021 14:29:11 -0700
Subject: [PATCH 04/20] Remove MultiIndex take implementation.

---
 python/cudf/cudf/core/multiindex.py | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index f891d26ba8c..ab174901974 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -7,7 +7,6 @@
 import pickle
 import warnings
 from collections.abc import Sequence
-from numbers import Integral
 from typing import Any, List, MutableMapping, Optional, Tuple, Union
 
 import cupy
@@ -835,22 +834,9 @@ def size(self):
         return self._num_rows
 
     def take(self, indices):
-        if isinstance(indices, (Integral, Sequence)):
-            indices = np.array(indices)
-        elif isinstance(indices, cudf.Series) and indices.has_nulls:
-            raise ValueError("Column must have no nulls.")
-        elif isinstance(indices, slice):
-            start, stop, step = indices.indices(len(self))
-            indices = column.arange(start, stop, step)
-        result = MultiIndex.from_frame(
-            self.to_frame(index=False).take(indices)
-        )
-        if self._codes is not None:
-            result._codes = self._codes.take(indices)
-        if self._levels is not None:
-            result._levels = self._levels
-        result.names = self.names
-        return result
+        obj = super().take(indices)
+        obj.names = self.names
+        return obj
 
     def serialize(self):
         header, frames = super().serialize()

From 13e9911dcb1b10074edcd8499f65f4039f8932c0 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 15 Oct 2021 14:59:36 -0700
Subject: [PATCH 05/20] Share sort_values between Series and DataFrame.

---
 python/cudf/cudf/core/dataframe.py     | 72 -------------------------
 python/cudf/cudf/core/frame.py         |  3 +-
 python/cudf/cudf/core/indexed_frame.py | 74 ++++++++++++++++++++++++++
 python/cudf/cudf/core/series.py        | 47 ++++++----------
 4 files changed, 93 insertions(+), 103 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index cb47cfbe8c3..e13c3922d60 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3215,78 +3215,6 @@ def label_encoding(
 
         return outdf
 
-    def sort_values(
-        self,
-        by,
-        axis=0,
-        ascending=True,
-        inplace=False,
-        kind="quicksort",
-        na_position="last",
-        ignore_index=False,
-    ):
-        """
-        Sort by the values row-wise.
-
-        Parameters
-        ----------
-        by : str or list of str
-            Name or list of names to sort by.
-        ascending : bool or list of bool, default True
-            Sort ascending vs. descending. Specify list for multiple sort
-            orders. If this is a list of bools, must match the length of the
-            by.
-        na_position : {‘first’, ‘last’}, default ‘last’
-            'first' puts nulls at the beginning, 'last' puts nulls at the end
-        ignore_index : bool, default False
-            If True, index will not be sorted.
-
-        Returns
-        -------
-        sorted_obj : cuDF DataFrame
-
-        Notes
-        -----
-        Difference from pandas:
-          * Support axis='index' only.
-          * Not supporting: inplace, kind
-
-        Examples
-        --------
-        >>> import cudf
-        >>> df = cudf.DataFrame()
-        >>> df['a'] = [0, 1, 2]
-        >>> df['b'] = [-3, 2, 0]
-        >>> df.sort_values('b')
-           a  b
-        0  0 -3
-        2  2  0
-        1  1  2
-        """
-        if inplace:
-            raise NotImplementedError("`inplace` not currently implemented.")
-        if kind not in {"quicksort", "mergesort", "heapsort", "stable"}:
-            raise AttributeError(
-                f"{kind} is not a valid sorting algorithm for "
-                f"'DataFrame' object"
-            )
-        elif kind != "quicksort":
-            msg = (
-                f"GPU-accelerated {kind} is currently not supported, "
-                f"now defaulting to GPU-accelerated quicksort."
-            )
-            warnings.warn(msg)
-        if axis != 0:
-            raise NotImplementedError("`axis` not currently implemented.")
-
-        # argsort the `by` column
-        return self.take(
-            self[by]._get_sorted_inds(
-                ascending=ascending, na_position=na_position
-            ),
-            keep_index=not ignore_index,
-        )
-
     def agg(self, aggs, axis=None):
         """
         Aggregate using one or more operations over the specified axis.
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index fe704ab014b..5301cb1a5f1 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -3016,7 +3016,8 @@ def take(self, positions, keep_index=None):
         """
         # TODO: When we remove keep_index we should introduce the axis
         # parameter. We could also introduce is_copy, but that's already
-        # deprecated in pandas so it's probably unnecessary.
+        # deprecated in pandas so it's probably unnecessary. We also need to
+        # introduce Index.take's allow_fill and fill_value parameters.
         if keep_index is not None:
             warnings.warn(
                 "keep_index is deprecated and will be removed in the future.",
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index aba24171f06..25d6357a8af 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -3,6 +3,7 @@
 
 from __future__ import annotations
 
+import warnings
 from typing import Type, TypeVar
 
 import cupy as cp
@@ -388,3 +389,76 @@ def sort_index(
         if ignore_index is True:
             out = out.reset_index(drop=True)
         return self._mimic_inplace(out, inplace=inplace)
+
+    def sort_values(
+        self,
+        by,
+        axis=0,
+        ascending=True,
+        inplace=False,
+        kind="quicksort",
+        na_position="last",
+        ignore_index=False,
+    ):
+        """Sort by the values along either axis.
+
+        Parameters
+        ----------
+        by : str or list of str
+            Name or list of names to sort by.
+        ascending : bool or list of bool, default True
+            Sort ascending vs. descending. Specify list for multiple sort
+            orders. If this is a list of bools, must match the length of the
+            by.
+        na_position : {‘first’, ‘last’}, default ‘last’
+            'first' puts nulls at the beginning, 'last' puts nulls at the end
+        ignore_index : bool, default False
+            If True, index will not be sorted.
+
+        Returns
+        -------
+        Frame : Frame with sorted values.
+
+        Notes
+        -----
+        Difference from pandas:
+          * Support axis='index' only.
+          * Not supporting: inplace, kind
+
+        Examples
+        --------
+        >>> import cudf
+        >>> df = cudf.DataFrame()
+        >>> df['a'] = [0, 1, 2]
+        >>> df['b'] = [-3, 2, 0]
+        >>> df.sort_values('b')
+           a  b
+        0  0 -3
+        2  2  0
+        1  1  2
+        """
+        if inplace:
+            raise NotImplementedError("`inplace` not currently implemented.")
+        if kind != "quicksort":
+            if kind not in {"mergesort", "heapsort", "stable"}:
+                raise AttributeError(
+                    f"{kind} is not a valid sorting algorithm for "
+                    f"'DataFrame' object"
+                )
+            warnings.warn(
+                f"GPU-accelerated {kind} is currently not supported, "
+                f"defaulting to quicksort."
+            )
+        if axis != 0:
+            raise NotImplementedError("`axis` not currently implemented.")
+
+        if len(self) == 0:
+            return self
+
+        # argsort the `by` column
+        return self.take(
+            self._get_columns_by_label(by)._get_sorted_inds(
+                ascending=ascending, na_position=na_position
+            ),
+            keep_index=not ignore_index,
+        )
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 21f4515b9ac..d6a64e3911b 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2236,28 +2236,28 @@ def sort_values(
         na_position="last",
         ignore_index=False,
     ):
-        """
-        Sort by the values.
-
-        Sort a Series in ascending or descending order by some criterion.
+        """Sort by the values along either axis.
 
         Parameters
         ----------
-        ascending : bool, default True
-            If True, sort values in ascending order, otherwise descending.
+        ascending : bool or list of bool, default True
+            Sort ascending vs. descending. Specify list for multiple sort
+            orders. If this is a list of bools, must match the length of the
+            by.
         na_position : {‘first’, ‘last’}, default ‘last’
-            'first' puts nulls at the beginning, 'last' puts nulls at the end.
+            'first' puts nulls at the beginning, 'last' puts nulls at the end
         ignore_index : bool, default False
             If True, index will not be sorted.
 
         Returns
         -------
-        sorted_obj : cuDF Series
+        Series : Series with sorted values.
 
         Notes
         -----
         Difference from pandas:
-          * Not supporting: `inplace`, `kind`
+          * Support axis='index' only.
+          * Not supporting: inplace, kind
 
         Examples
         --------
@@ -2271,28 +2271,15 @@ def sort_values(
         1    5
         dtype: int64
         """
-
-        if inplace:
-            raise NotImplementedError("`inplace` not currently implemented.")
-        if kind != "quicksort":
-            raise NotImplementedError("`kind` not currently implemented.")
-        if axis != 0:
-            raise NotImplementedError("`axis` not currently implemented.")
-
-        if len(self) == 0:
-            return self
-
-        col_keys, col_inds = self._column.sort_by_values(
-            ascending=ascending, na_position=na_position
+        return super().sort_values(
+            by=self.name,
+            axis=axis,
+            ascending=ascending,
+            inplace=inplace,
+            kind=kind,
+            na_position=na_position,
+            ignore_index=ignore_index,
         )
-        vals = self._from_data({self.name: col_keys}, self._index)
-        inds = self._from_data({self.name: col_inds}, self._index)
-
-        if not ignore_index:
-            index = self.index.take(inds)
-        else:
-            index = self.index
-        return vals.set_index(index)
 
     def _n_largest_or_smallest(self, largest, n, keep):
         direction = largest

From c1c7ef9ba5f36da758b0b9733ffcb578f5b4c2b9 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 15 Oct 2021 15:21:21 -0700
Subject: [PATCH 06/20] Standardize argsort to return cupy arrays except for
 Series.

---
 python/cudf/cudf/core/frame.py           | 12 ++++--------
 python/cudf/cudf/core/index.py           | 12 ------------
 python/cudf/cudf/core/multiindex.py      | 12 ------------
 python/cudf/cudf/core/series.py          |  4 +++-
 python/cudf/cudf/tests/test_dataframe.py |  4 ++--
 5 files changed, 9 insertions(+), 35 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 5301cb1a5f1..b2366dd394c 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2929,7 +2929,7 @@ def argsort(
 
         Returns
         -------
-        Series of indices sorted based on input.
+        cupy.ndarray: The indices sorted based on input.
 
         Examples
         --------
@@ -2961,13 +2961,9 @@ def argsort(
                 f"GPU-accelerated {kind} is currently not supported, "
                 "defaulting to quicksort."
             )
-        return cudf.Series._from_data(
-            {
-                None: self._get_sorted_inds(
-                    ascending=ascending, na_position=na_position
-                )
-            }
-        )
+        return self._get_sorted_inds(
+            ascending=ascending, na_position=na_position
+        ).values
 
     def _get_sorted_inds(self, by=None, ascending=True, na_position="last"):
         # Get an int64 column consisting of the indices required to sort self
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 2331f6efdd1..8ca604a45de 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -748,18 +748,6 @@ def __init__(self, data, **kwargs):
         name = kwargs.get("name")
         super().__init__({name: data})
 
-    def argsort(
-        self,
-        axis=0,
-        kind="quicksort",
-        order=None,
-        ascending=True,
-        na_position="last",
-    ):
-        return (
-            super().argsort(axis, kind, order, ascending, na_position).values
-        )
-
     @classmethod
     def deserialize(cls, header, frames):
         if "index_column" in header:
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index ab174901974..bc0c4ef00af 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1350,18 +1350,6 @@ def is_monotonic_decreasing(self):
             ascending=[False] * len(self.levels), null_position=None
         )
 
-    def argsort(
-        self,
-        axis=0,
-        kind="quicksort",
-        order=None,
-        ascending=True,
-        na_position="last",
-    ):
-        return (
-            super().argsort(axis, kind, order, ascending, na_position).values
-        )
-
     def sort_values(self, return_indexer=False, ascending=True, key=None):
         if key is not None:
             raise NotImplementedError("key parameter is not yet implemented.")
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index d6a64e3911b..926fe97c59b 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2442,7 +2442,9 @@ def argsort(
         ascending=True,
         na_position="last",
     ):
-        obj = super().argsort(axis, kind, order, ascending, na_position)
+        obj = type(self)._from_data(
+            {None: super().argsort(axis, kind, order, ascending, na_position)}
+        )
         obj.name = self.name
         return obj
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index c1eade0fcdc..ae331a1d5ce 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8732,12 +8732,12 @@ def test_explode(data, labels, ignore_index, p_index, label_to_explode):
         (
             cudf.DataFrame({"a": [10, 0, 2], "b": [-10, 10, 1]}),
             True,
-            cudf.Series([1, 2, 0], dtype="int32"),
+            cupy.array([1, 2, 0], dtype="int32"),
         ),
         (
             cudf.DataFrame({"a": [10, 0, 2], "b": [-10, 10, 1]}),
             False,
-            cudf.Series([0, 2, 1], dtype="int32"),
+            cupy.array([0, 2, 1], dtype="int32"),
         ),
     ],
 )

From 3d14373b27f7b376cfb09c221dab1516eb6e9386 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 15 Oct 2021 15:23:54 -0700
Subject: [PATCH 07/20] Fix MultiIndex.__getitem__ for slices.

---
 python/cudf/cudf/core/multiindex.py | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index bc0c4ef00af..d06d9705bdb 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -7,6 +7,7 @@
 import pickle
 import warnings
 from collections.abc import Sequence
+from numbers import Integral
 from typing import Any, List, MutableMapping, Optional, Tuple, Union
 
 import cupy
@@ -873,11 +874,26 @@ def deserialize(cls, header, frames):
         return obj._set_names(column_names)
 
     def __getitem__(self, index):
-        if isinstance(index, int):
-            # we are indexing into a single row of the MultiIndex,
-            # return that row as a tuple:
-            return self.take(index).to_pandas()[0]
-        return self.take(index)
+        flatten = isinstance(index, int)
+
+        if isinstance(index, (Integral, Sequence)):
+            index = np.array(index)
+        elif isinstance(index, slice):
+            start, stop, step = index.indices(len(self))
+            index = column.arange(start, stop, step)
+        result = MultiIndex.from_frame(self.to_frame(index=False).take(index))
+
+        # we are indexing into a single row of the MultiIndex,
+        # return that row as a tuple:
+        if flatten:
+            return result.to_pandas()[0]
+
+        if self._codes is not None:
+            result._codes = self._codes.take(index)
+        if self._levels is not None:
+            result._levels = self._levels
+        result.names = self.names
+        return result
 
     def to_frame(self, index=True, name=None):
         # TODO: Currently this function makes a shallow copy, which is

From 6bbcaa4a7ee125c07714ab1575883ced7263e3a6 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 15 Oct 2021 15:48:52 -0700
Subject: [PATCH 08/20] Unify index sort_values implementations.

---
 python/cudf/cudf/core/_base_index.py | 18 +++++++++++++-----
 python/cudf/cudf/core/multiindex.py  | 14 --------------
 2 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index a014f3c0265..14eea97a487 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -5,7 +5,6 @@
 import pickle
 from typing import Any, Set
 
-import cupy
 import pandas as pd
 
 import cudf
@@ -974,7 +973,13 @@ def _intersection(self, other, sort=None):
             return intersection_result.sort_values()
         return intersection_result
 
-    def sort_values(self, return_indexer=False, ascending=True, key=None):
+    def sort_values(
+        self,
+        return_indexer=False,
+        ascending=True,
+        na_position="last",
+        key=None,
+    ):
         """
         Return a sorted copy of the index, and optionally return the indices
         that sorted the index itself.
@@ -985,6 +990,9 @@ def sort_values(self, return_indexer=False, ascending=True, key=None):
             Should the indices that would sort the index be returned.
         ascending : bool, default True
             Should the index values be sorted in an ascending order.
+        na_position : {'first' or 'last'}, default 'last'
+            Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
+            the end.
         key : None, optional
             This parameter is NON-FUNCTIONAL.
 
@@ -1051,11 +1059,11 @@ def sort_values(self, return_indexer=False, ascending=True, key=None):
         if key is not None:
             raise NotImplementedError("key parameter is not yet implemented.")
 
-        indices = self._values.argsort(ascending=ascending)
-        index_sorted = cudf.Index(self.take(indices), name=self.name)
+        indices = self.argsort(ascending=ascending, na_position=na_position)
+        index_sorted = self.take(indices)
 
         if return_indexer:
-            return index_sorted, cupy.asarray(indices)
+            return index_sorted, indices
         else:
             return index_sorted
 
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index d06d9705bdb..b77133a7431 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1366,20 +1366,6 @@ def is_monotonic_decreasing(self):
             ascending=[False] * len(self.levels), null_position=None
         )
 
-    def sort_values(self, return_indexer=False, ascending=True, key=None):
-        if key is not None:
-            raise NotImplementedError("key parameter is not yet implemented.")
-
-        indices = cudf.Series._from_data(
-            {None: self._get_sorted_inds(ascending=ascending)}
-        )
-        index_sorted = as_index(self.take(indices), name=self.names)
-
-        if return_indexer:
-            return index_sorted, cupy.asarray(indices)
-        else:
-            return index_sorted
-
     def fillna(self, value):
         """
         Fill null values with the specified value.

From 79cb909c0df0ea31c2aaf64f4fffb62d42ded9e2 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 15 Oct 2021 16:00:00 -0700
Subject: [PATCH 09/20] Remove BaseIndex.gpu_values.

---
 python/cudf/cudf/core/_base_index.py | 7 -------
 python/cudf/cudf/core/dataframe.py   | 2 +-
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 14eea97a487..d4c958594dc 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -564,13 +564,6 @@ def to_dlpack(self):
 
         return cudf.io.dlpack.to_dlpack(self)
 
-    @property
-    def gpu_values(self):
-        """
-        View the data as a numba device array object
-        """
-        return self._values.data_array_view
-
     def append(self, other):
         """
         Append a collection of Index options together.
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index e13c3922d60..ce6dbdea675 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3488,7 +3488,7 @@ def _n_largest_or_smallest(self, method, n, columns, keep):
         # Operate
         sorted_series = getattr(col, method)(n=n, keep=keep)
         df = DataFrame()
-        new_positions = sorted_series.index.gpu_values
+        new_positions = sorted_series.index
         for k in self._data.names:
             if k == column:
                 df[k] = sorted_series

From 8fe0038296afd242f93e29146cb2de66063733a1 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 15 Oct 2021 16:42:13 -0700
Subject: [PATCH 10/20] Unify implementations of nsmallest and nlargest.

---
 python/cudf/cudf/core/dataframe.py     | 23 ++----------------
 python/cudf/cudf/core/indexed_frame.py | 33 ++++++++++++++++++++++++++
 python/cudf/cudf/core/series.py        | 20 ++--------------
 3 files changed, 37 insertions(+), 39 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index ce6dbdea675..8517d7a2a62 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3407,7 +3407,7 @@ def nlargest(self, n, columns, keep="first"):
         Italy     59000000  1937894      IT
         Brunei      434000    12128      BN
         """
-        return self._n_largest_or_smallest("nlargest", n, columns, keep)
+        return self._n_largest_or_smallest(True, n, columns, keep)
 
     def nsmallest(self, n, columns, keep="first"):
         """Get the rows of the DataFrame sorted by the n smallest value of *columns*
@@ -3475,26 +3475,7 @@ def nsmallest(self, n, columns, keep="first"):
         Tuvalu         11300   38      TV
         Nauru         337000  182      NR
         """
-        return self._n_largest_or_smallest("nsmallest", n, columns, keep)
-
-    def _n_largest_or_smallest(self, method, n, columns, keep):
-        # Get column to operate on
-        if not isinstance(columns, str):
-            [column] = columns
-        else:
-            column = columns
-
-        col = self[column].reset_index(drop=True)
-        # Operate
-        sorted_series = getattr(col, method)(n=n, keep=keep)
-        df = DataFrame()
-        new_positions = sorted_series.index
-        for k in self._data.names:
-            if k == column:
-                df[k] = sorted_series
-            else:
-                df[k] = self[k].reset_index(drop=True).take(new_positions)
-        return df.set_index(self.index.take(new_positions))
+        return self._n_largest_or_smallest(False, n, columns, keep)
 
     def transpose(self):
         """Transpose index and columns.
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 25d6357a8af..7a509002ef8 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -462,3 +462,36 @@ def sort_values(
             ),
             keep_index=not ignore_index,
         )
+
+    def _n_largest_or_smallest(self, largest, n, columns, keep):
+        # Get column to operate on
+        if isinstance(columns, str):
+            columns = [columns]
+
+        if len(self) == 0:
+            return self
+
+        if keep == "first":
+            if n < 0:
+                n = 0
+
+            # argsort the `by` column
+            return self.take(
+                self._get_columns_by_label(columns)._get_sorted_inds(
+                    ascending=not largest
+                )[:n],
+                keep_index=True,
+            )
+        elif keep == "last":
+            indices = self._get_columns_by_label(columns)._get_sorted_inds(
+                ascending=largest
+            )
+
+            if n <= 0:
+                # Empty slice.
+                indices = indices[0:0]
+            else:
+                indices = indices[: -n - 1 : -1]
+            return self.take(indices, keep_index=True)
+        else:
+            raise ValueError('keep must be either "first", "last"')
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 926fe97c59b..b70a5776649 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2281,22 +2281,6 @@ def sort_values(
             ignore_index=ignore_index,
         )
 
-    def _n_largest_or_smallest(self, largest, n, keep):
-        direction = largest
-        if keep == "first":
-            if n < 0:
-                n = 0
-            return self.sort_values(ascending=not direction).head(n)
-        elif keep == "last":
-            data = self.sort_values(ascending=direction)
-            if n <= 0:
-                data = data[-n:-n]
-            else:
-                data = data.tail(n)
-            return data[::-1]
-        else:
-            raise ValueError('keep must be either "first", "last"')
-
     def nlargest(self, n=5, keep="first"):
         """Returns a new Series of the *n* largest element.
 
@@ -2357,7 +2341,7 @@ def nlargest(self, n=5, keep="first"):
         Brunei      434000
         dtype: int64
         """
-        return self._n_largest_or_smallest(n=n, keep=keep, largest=True)
+        return self._n_largest_or_smallest(True, n, [self.name], keep)
 
     def nsmallest(self, n=5, keep="first"):
         """
@@ -2432,7 +2416,7 @@ def nsmallest(self, n=5, keep="first"):
         Tuvalu      11300
         dtype: int64
         """
-        return self._n_largest_or_smallest(n=n, keep=keep, largest=False)
+        return self._n_largest_or_smallest(False, n, [self.name], keep)
 
     def argsort(
         self,

From 4b405a0b9ea48d8d339e5761ce9b103c298c739d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Sun, 17 Oct 2021 09:13:59 -0700
Subject: [PATCH 11/20] Make all argsort signatures consistent with pandas.

---
 python/cudf/cudf/core/frame.py  |  7 ++++++
 python/cudf/cudf/core/index.py  | 38 +++++++++++++++++++++++++++++++++
 python/cudf/cudf/core/series.py | 10 ++++++++-
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index b2366dd394c..b286a19bb13 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2903,6 +2903,7 @@ def searchsorted(
 
     def argsort(
         self,
+        by=None,
         axis=0,
         kind="quicksort",
         order=None,
@@ -2913,6 +2914,8 @@ def argsort(
 
         Parameters
         ----------
+        by : str or list of str, default None
+            Name or list of names to sort by. If None, sort by all columns.
         axis : {0 or "index"}
             Has no effect but is accepted for compatibility with numpy.
         kind : {'mergesort', 'quicksort', 'heapsort', 'stable'}, default 'quicksort'
@@ -2951,6 +2954,10 @@ def argsort(
         0    3
         dtype: int64
         """  # noqa: E501
+        if by is None:
+            by = list(self._data.names)
+        elif isinstance(by, str):
+            by = [by]
         if kind != "quicksort":
             if kind not in {"mergesort", "heapsort", "stable"}:
                 raise AttributeError(
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 8ca604a45de..de463269743 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1162,6 +1162,44 @@ def is_categorical(self):
     def is_interval(self):
         return False
 
+    def argsort(
+        self,
+        axis=0,
+        kind="quicksort",
+        order=None,
+        ascending=True,
+        na_position="last",
+    ):
+        """Return the integer indices that would sort the Series values.
+
+        Parameters
+        ----------
+        axis : {0 or "index"}
+            Has no effect but is accepted for compatibility with numpy.
+        kind : {'mergesort', 'quicksort', 'heapsort', 'stable'}, default 'quicksort'
+            Choice of sorting algorithm. See :func:`numpy.sort` for more
+            information. 'mergesort' and 'stable' are the only stable
+            algorithms. Only quicksort is supported in cuDF.
+        order : None
+            Has no effect but is accepted for compatibility with numpy.
+        ascending : bool or list of bool, default True
+            If True, sort values in ascending order, otherwise descending.
+        na_position : {‘first’ or ‘last’}, default ‘last’
+            Argument ‘first’ puts NaNs at the beginning, ‘last’ puts NaNs
+            at the end.
+
+        Returns
+        -------
+        cupy.ndarray: The indices sorted based on input.
+        """  # noqa: E501
+        return super().argsort(
+            axis=axis,
+            kind=kind,
+            order=order,
+            ascending=ascending,
+            na_position=na_position,
+        )
+
 
 class NumericIndex(GenericIndex):
     """Immutable, ordered and sliceable sequence of labels.
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index b70a5776649..0ec029c3397 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2427,7 +2427,15 @@ def argsort(
         na_position="last",
     ):
         obj = type(self)._from_data(
-            {None: super().argsort(axis, kind, order, ascending, na_position)}
+            {
+                None: super().argsort(
+                    axis=axis,
+                    kind=kind,
+                    order=order,
+                    ascending=ascending,
+                    na_position=na_position,
+                )
+            }
         )
         obj.name = self.name
         return obj

From 128a00c17e0b4e3c454682318b5bd6a37255b723 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Sun, 17 Oct 2021 09:36:22 -0700
Subject: [PATCH 12/20] Put back groupby overrides for this PR.

---
 python/cudf/cudf/core/groupby/groupby.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index a62684b6dd7..6ffba8da069 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1181,6 +1181,18 @@ class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin):
 
     _PROTECTED_KEYS = frozenset(("obj",))
 
+    def __init__(
+        self, obj, by=None, level=None, sort=False, as_index=True, dropna=True
+    ):
+        super().__init__(
+            obj=obj,
+            by=by,
+            level=level,
+            sort=sort,
+            as_index=as_index,
+            dropna=dropna,
+        )
+
     def __getitem__(self, key):
         return self.obj[key].groupby(
             self.grouping, dropna=self._dropna, sort=self._sort
@@ -1253,6 +1265,18 @@ class SeriesGroupBy(GroupBy):
     Name: Max Speed, dtype: float64
     """
 
+    def __init__(
+        self, obj, by=None, level=None, sort=False, as_index=True, dropna=True
+    ):
+        super().__init__(
+            obj=obj,
+            by=by,
+            level=level,
+            sort=sort,
+            as_index=as_index,
+            dropna=dropna,
+        )
+
     def agg(self, func):
         result = super().agg(func)
 

From d89fe29079f2563e077293307a4129da2728290e Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Sun, 17 Oct 2021 09:59:09 -0700
Subject: [PATCH 13/20] Don't construct list of columns unnecessarily.

---
 python/cudf/cudf/core/frame.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index b286a19bb13..c27b4f4c255 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2954,9 +2954,7 @@ def argsort(
         0    3
         dtype: int64
         """  # noqa: E501
-        if by is None:
-            by = list(self._data.names)
-        elif isinstance(by, str):
+        if isinstance(by, str):
             by = [by]
         if kind != "quicksort":
             if kind not in {"mergesort", "heapsort", "stable"}:
@@ -2969,7 +2967,7 @@ def argsort(
                 "defaulting to quicksort."
             )
         return self._get_sorted_inds(
-            ascending=ascending, na_position=na_position
+            by=by, ascending=ascending, na_position=na_position
         ).values
 
     def _get_sorted_inds(self, by=None, ascending=True, na_position="last"):
@@ -2979,7 +2977,7 @@ def _get_sorted_inds(self, by=None, ascending=True, na_position="last"):
         to_sort = (
             self
             if by is None
-            else self._get_columns_by_label(by, downcast=False)
+            else self._get_columns_by_label(list(by), downcast=False)
         )
 
         # If given a scalar need to construct a sequence of length # of columns

From e2d57ba38782db1ee11cf269eb0f9eb535a89af7 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 18 Oct 2021 11:58:10 -0700
Subject: [PATCH 14/20] Add DataFrame tests of multiple column sorts.

---
 python/cudf/cudf/tests/test_sorting.py | 33 +++++++-------------------
 1 file changed, 8 insertions(+), 25 deletions(-)

diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py
index 53676a47046..6533aa74fb7 100644
--- a/python/cudf/cudf/tests/test_sorting.py
+++ b/python/cudf/cudf/tests/test_sorting.py
@@ -154,33 +154,16 @@ def test_series_nsmallest(data, n):
 
 
 @pytest.mark.parametrize("nelem,n", [(1, 1), (100, 100), (10, 5), (100, 10)])
-def test_dataframe_nlargest(nelem, n):
+@pytest.mark.parametrize("op", ["nsmallest", "nlargest"])
+def test_dataframe_nlargest_nsmallest(nelem, n, op):
     np.random.seed(0)
-    df = DataFrame()
-    df["a"] = aa = np.random.random(nelem)
-    df["b"] = bb = np.random.random(nelem)
-    res = df.nlargest(n, "a")
-
-    # Check
-    inds = np.argsort(aa)
-    assert_eq(res["a"].to_numpy(), aa[inds][-n:][::-1])
-    assert_eq(res["b"].to_numpy(), bb[inds][-n:][::-1])
-    assert_eq(res.index.values, inds[-n:][::-1])
-
+    aa = np.random.random(nelem)
+    bb = np.random.random(nelem)
 
-@pytest.mark.parametrize("nelem,n", [(10, 5), (100, 10)])
-def test_dataframe_nsmallest(nelem, n):
-    np.random.seed(0)
-    df = DataFrame()
-    df["a"] = aa = np.random.random(nelem)
-    df["b"] = bb = np.random.random(nelem)
-    res = df.nsmallest(n, "a")
-
-    # Check
-    inds = np.argsort(-aa)
-    assert_eq(res["a"].to_numpy(), aa[inds][-n:][::-1])
-    assert_eq(res["b"].to_numpy(), bb[inds][-n:][::-1])
-    assert_eq(res.index.values, inds[-n:][::-1])
+    df = DataFrame({"a": aa, "b": bb})
+    pdf = df.to_pandas()
+    assert_eq(getattr(df, op)(n, "a"), getattr(pdf, op)(n, "a"))
+    assert_eq(getattr(df, op)(n, ["b", "a"]), getattr(pdf, op)(n, ["b", "a"]))
 
 
 @pytest.mark.parametrize(

From a71c408ec1084e9789002a25117a8e0d2e548033 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 18 Oct 2021 16:05:03 -0700
Subject: [PATCH 15/20] Remove now superfluous take implementation.

---
 python/cudf/cudf/core/_base_index.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index d4c958594dc..35e5cbd0f1e 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -498,15 +498,6 @@ def fillna(self, value, downcast=None):
 
         return super().fillna(value=value)
 
-    def take(self, indices):
-        """Gather only the specific subset of indices
-
-        Parameters
-        ----------
-        indices: An array-like that maps to values contained in this Index.
-        """
-        return self[indices]
-
     def to_frame(self, index=True, name=None):
         """Create a DataFrame with a column containing this Index
 

From 1c2549e4d70e29080ec3885ac7cc8c31f418e396 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 18 Oct 2021 16:08:04 -0700
Subject: [PATCH 16/20] Rename positions to indices for pandas consistency.

---
 python/cudf/cudf/core/dataframe.py |  4 ++--
 python/cudf/cudf/core/frame.py     | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 8517d7a2a62..ed38c9da3c6 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -2574,8 +2574,8 @@ class max_speed
         if not inplace:
             return result
 
-    def take(self, positions, keep_index=None):
-        out = super().take(positions, keep_index)
+    def take(self, indices, keep_index=None):
+        out = super().take(indices, keep_index)
         out.columns = self.columns
         return out
 
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index c27b4f4c255..55bce9a8f34 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2986,12 +2986,12 @@ def _get_sorted_inds(self, by=None, ascending=True, na_position="last"):
 
         return libcudf.sort.order_by(to_sort, ascending, na_position)
 
-    def take(self, positions, keep_index=None):
+    def take(self, indices, keep_index=None):
         """Return a new object containing the rows specified by *positions*
 
         Parameters
         ----------
-        positions : array-like
+        indices : array-like
             Array of ints indicating which positions to take.
         keep_index : bool, default True
             Whether to retain the index in result or not.
@@ -3027,15 +3027,15 @@ def take(self, positions, keep_index=None):
         else:
             keep_index = True
 
-        positions = as_column(positions)
-        if is_bool_dtype(positions):
+        indices = as_column(indices)
+        if is_bool_dtype(indices):
             warnings.warn(
                 "Calling take with a boolean array is deprecated and will be "
                 "removed in the future.",
                 FutureWarning,
             )
-            return self._apply_boolean_mask(positions)
-        return self._gather(positions, keep_index=keep_index)
+            return self._apply_boolean_mask(indices)
+        return self._gather(indices, keep_index=keep_index)
 
     def sin(self):
         """

From 3fac33b91194eb4b4c28facd519956ba61b83a24 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 25 Oct 2021 14:03:28 -0700
Subject: [PATCH 17/20] Allow deserialization to return RangeIndex.

---
 python/cudf/cudf/tests/test_pickling.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py
index 0f8b46cee35..28e63ec41f1 100644
--- a/python/cudf/cudf/tests/test_pickling.py
+++ b/python/cudf/cudf/tests/test_pickling.py
@@ -6,7 +6,7 @@
 import pandas as pd
 import pytest
 
-from cudf import DataFrame, GenericIndex, Series
+from cudf import DataFrame, GenericIndex, RangeIndex, Series
 from cudf.core.buffer import Buffer
 from cudf.testing._utils import assert_eq
 
@@ -28,7 +28,7 @@ def check_serialization(df):
     assert_frame_picklable(df[2:-2])
     # sorted
     sortvaldf = df.sort_values("vals")
-    assert isinstance(sortvaldf.index, GenericIndex)
+    assert isinstance(sortvaldf.index, (GenericIndex, RangeIndex))
     assert_frame_picklable(sortvaldf)
     # out-of-band
     if pickle.HIGHEST_PROTOCOL >= 5:

From ea0d075f9377cfd51a43ad5351d43f84c0cbd5c0 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 25 Oct 2021 14:16:22 -0700
Subject: [PATCH 18/20] Parameterize the columns.

---
 python/cudf/cudf/tests/test_sorting.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py
index 6533aa74fb7..00cd31e7539 100644
--- a/python/cudf/cudf/tests/test_sorting.py
+++ b/python/cudf/cudf/tests/test_sorting.py
@@ -155,15 +155,15 @@ def test_series_nsmallest(data, n):
 
 @pytest.mark.parametrize("nelem,n", [(1, 1), (100, 100), (10, 5), (100, 10)])
 @pytest.mark.parametrize("op", ["nsmallest", "nlargest"])
-def test_dataframe_nlargest_nsmallest(nelem, n, op):
+@pytest.mark.parametrize("columns", ["a", ["b", "a"]])
+def test_dataframe_nlargest_nsmallest(nelem, n, op, columns):
     np.random.seed(0)
     aa = np.random.random(nelem)
     bb = np.random.random(nelem)
 
     df = DataFrame({"a": aa, "b": bb})
     pdf = df.to_pandas()
-    assert_eq(getattr(df, op)(n, "a"), getattr(pdf, op)(n, "a"))
-    assert_eq(getattr(df, op)(n, ["b", "a"]), getattr(pdf, op)(n, ["b", "a"]))
+    assert_eq(getattr(df, op)(n, columns), getattr(pdf, op)(n, columns))
 
 
 @pytest.mark.parametrize(

From 3e412bcf5421633dfb449f9ac0b35beee296488d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
Date: Wed, 27 Oct 2021 15:09:04 -0700
Subject: [PATCH 19/20] Update python/cudf/cudf/core/series.py

Co-authored-by: GALI PREM SAGAR <sagarprem75@gmail.com>
---
 python/cudf/cudf/core/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 0ec029c3397..b9636580a91 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2426,7 +2426,7 @@ def argsort(
         ascending=True,
         na_position="last",
     ):
-        obj = type(self)._from_data(
+        obj = self.__class__._from_data(
             {
                 None: super().argsort(
                     axis=axis,

From b06a43e86d65974f043105ba659664160f023c00 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 1 Nov 2021 17:15:08 -0700
Subject: [PATCH 20/20] Address PR reviews.

---
 python/cudf/cudf/core/_base_index.py   | 14 ++++++++
 python/cudf/cudf/core/frame.py         | 46 +++++++++++++++++++++++---
 python/cudf/cudf/core/indexed_frame.py |  5 +++
 python/cudf/cudf/core/multiindex.py    |  2 ++
 4 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 35e5cbd0f1e..eea8e3c418f 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -3,6 +3,7 @@
 from __future__ import annotations, division, print_function
 
 import pickle
+import warnings
 from typing import Any, Set
 
 import pandas as pd
@@ -555,6 +556,17 @@ def to_dlpack(self):
 
         return cudf.io.dlpack.to_dlpack(self)
 
+    @property
+    def gpu_values(self):
+        """
+        View the data as a numba device array object
+        """
+        warnings.warn(
+            "The gpu_values property is deprecated and will be removed.",
+            FutureWarning,
+        )
+        return self._values.data_array_view
+
     def append(self, other):
         """
         Append a collection of Index options together.
@@ -1042,6 +1054,8 @@ def sort_values(
         """
         if key is not None:
             raise NotImplementedError("key parameter is not yet implemented.")
+        if na_position not in {"first", "last"}:
+            raise ValueError(f"invalid na_position: {na_position}")
 
         indices = self.argsort(ascending=ascending, na_position=na_position)
         index_sorted = self.take(indices)
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 55bce9a8f34..666144d6819 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2880,6 +2880,9 @@ def searchsorted(
         """
         # Call libcudf++ search_sorted primitive
 
+        if na_position not in {"first", "last"}:
+            raise ValueError(f"invalid na_position: {na_position}")
+
         scalar_flag = None
         if is_scalar(values):
             scalar_flag = True
@@ -2901,6 +2904,7 @@ def searchsorted(
         else:
             return result
 
+    @annotate("ARGSORT", color="yellow", domain="cudf_python")
     def argsort(
         self,
         by=None,
@@ -2936,6 +2940,8 @@ def argsort(
 
         Examples
         --------
+        **Series**
+
         >>> import cudf
         >>> s = cudf.Series([3, 1, 2])
         >>> s
@@ -2953,9 +2959,21 @@ def argsort(
         2    2
         0    3
         dtype: int64
+
+        **DataFrame**
+        >>> import cudf
+        >>> df = cudf.DataFrame({'foo': [3, 1, 2]})
+        >>> df.argsort()
+        array([1, 2, 0], dtype=int32)
+
+        **Index**
+        >>> import cudf
+        >>> idx = cudf.Index([3, 1, 2])
+        >>> idx.argsort()
+        array([1, 2, 0], dtype=int32)
         """  # noqa: E501
-        if isinstance(by, str):
-            by = [by]
+        if na_position not in {"first", "last"}:
+            raise ValueError(f"invalid na_position: {na_position}")
         if kind != "quicksort":
             if kind not in {"mergesort", "heapsort", "stable"}:
                 raise AttributeError(
@@ -2966,6 +2984,9 @@ def argsort(
                 f"GPU-accelerated {kind} is currently not supported, "
                 "defaulting to quicksort."
             )
+
+        if isinstance(by, str):
+            by = [by]
         return self._get_sorted_inds(
             by=by, ascending=ascending, na_position=na_position
         ).values
@@ -2998,11 +3019,22 @@ def take(self, indices, keep_index=None):
 
         Returns
         -------
-        out : DataFrame
-            New DataFrame
+        out : Series or DataFrame or Index
+            New object with desired subset of rows.
 
         Examples
         --------
+        **Series**
+        >>> s = cudf.Series(['a', 'b', 'c', 'd', 'e'])
+        >>> s.take([2, 0, 4, 3])
+        2    c
+        0    a
+        4    e
+        3    d
+        dtype: object
+
+        **DataFrame**
+
         >>> a = cudf.DataFrame({'a': [1.0, 2.0, 3.0],
         ...                    'b': cudf.Series(['a', 'b', 'c'])})
         >>> a.take([0, 2, 2])
@@ -3014,6 +3046,12 @@ def take(self, indices, keep_index=None):
              a  b
         0  1.0  a
         2  3.0  c
+
+        **Index**
+
+        >>> idx = cudf.Index(['a', 'b', 'c', 'd', 'e'])
+        >>> idx.take([2, 0, 4, 3])
+        StringIndex(['c' 'a' 'e' 'd'], dtype='object')
         """
         # TODO: When we remove keep_index we should introduce the axis
         # parameter. We could also introduce is_copy, but that's already
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 7a509002ef8..552b590e0d8 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -352,6 +352,9 @@ def sort_index(
         if key is not None:
             raise NotImplementedError("key is not yet supported.")
 
+        if na_position not in {"first", "last"}:
+            raise ValueError(f"invalid na_position: {na_position}")
+
         if axis in (0, "index"):
             idx = self.index
             if isinstance(idx, MultiIndex):
@@ -437,6 +440,8 @@ def sort_values(
         2  2  0
         1  1  2
         """
+        if na_position not in {"first", "last"}:
+            raise ValueError(f"invalid na_position: {na_position}")
         if inplace:
             raise NotImplementedError("`inplace` not currently implemented.")
         if kind != "quicksort":
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index b77133a7431..7c132e3fb71 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -835,6 +835,8 @@ def size(self):
         return self._num_rows
 
     def take(self, indices):
+        if isinstance(indices, cudf.Series) and indices.has_nulls:
+            raise ValueError("Column must have no nulls.")
         obj = super().take(indices)
         obj.names = self.names
         return obj