rapidsai · rapids-bot · Sep 16, 2021 · Sep 15, 2021 · Sep 15, 2021 · Sep 15, 2021
@@ -3980,44 +3980,16 @@ def sort_index(
         3  1  2
         2  3  1
         """
-        if kind is not None:
-            raise NotImplementedError("kind is not yet supported")
-
-        if not sort_remaining:
-            raise NotImplementedError(
-                "sort_remaining == False is not yet supported"
-            )
-
-        if axis in (0, "index"):
-            if level is not None and isinstance(self.index, cudf.MultiIndex):
-                # Pandas currently don't handle na_position
-                # in case of MultiIndex
-                if ascending is True:
-                    na_position = "first"
-                else:
-                    na_position = "last"
-
-                if is_list_like(level):
-                    labels = [
-                        self.index._get_level_label(lvl) for lvl in level
-                    ]
-                else:
-                    labels = [self.index._get_level_label(level)]
-                inds = self.index.to_frame(index=False)[labels].argsort(
-                    ascending=ascending, na_position=na_position
-                )
-            else:
-                inds = self.index.argsort(
-                    ascending=ascending, na_position=na_position
-                )
-            outdf = self.take(inds)
-        else:
-            labels = sorted(self._data.names, reverse=not ascending)
-            outdf = self[labels]
-
-        if ignore_index is True:
-            outdf = outdf.reset_index(drop=True)
-        return self._mimic_inplace(outdf, inplace=inplace)
+        return super()._sort_index(
+            axis=axis,
+            level=level,
+            ascending=ascending,
+            inplace=inplace,
+            kind=kind,
+            na_position=na_position,
+            sort_remaining=sort_remaining,
+            ignore_index=ignore_index,
+        )
 
     def sort_values(
         self,

@@ -429,6 +429,93 @@ def _explode(self, explode_column: Any, ignore_index: bool):
             res.index.names = self._index.names
         return res
 
+    def _sort_index(
+        self,
+        axis=0,
+        level=None,
+        ascending=True,
+        inplace=False,
+        kind=None,
+        na_position="last",
+        sort_remaining=True,
+        ignore_index=False,
+    ):
+        """
+        Helper for .sort_index
+
+        Parameters
+        ----------
+        axis : {0 or ‘index’, 1 or ‘columns’}, default 0
+            The axis along which to sort. The value 0 identifies the rows,
+            and 1 identifies the columns.
+        level : int or level name or list of ints or list of level names
+            If not None, sort on values in specified index level(s).
+            This is only useful in the case of MultiIndex.
+        ascending : bool, default True
+            Sort ascending vs. descending.
+        inplace : bool, default False
+            If True, perform operation in-place.
+        kind : sorting method such as `quick sort` and others.
+            Not yet supported.
+        na_position : {‘first’, ‘last’}, default ‘last’
+            Puts NaNs at the beginning if first; last puts NaNs at the end.
+        sort_remaining : bool, default True
+            Not yet supported
+        ignore_index : bool, default False
+            if True, index will be replaced with RangeIndex.
+
+        Returns
+        -------
+        DataFrame/Series or None
+        """
+        if kind is not None:
+            raise NotImplementedError("kind is not yet supported")
+
+        if not sort_remaining:
+            raise NotImplementedError(
+                "sort_remaining == False is not yet supported"
+            )
+
+        if axis in (0, "index"):
+            if isinstance(self.index, cudf.MultiIndex):
+                if level is None:
+                    midx_data = self.index._source_data
+                else:
+                    # Pandas currently don't handle na_position
+                    # in case of MultiIndex
+                    if ascending is True:
+                        na_position = "first"
+                    else:
+                        na_position = "last"
+
+                    if cudf.api.types.is_list_like(level):
+                        labels = [
+                            self.index._get_level_label(lvl) for lvl in level
+                        ]
+                    else:
+                        labels = [self.index._get_level_label(level)]
+                    midx_data = self.index._source_data[labels]
+                inds = midx_data.argsort(
+                    ascending=ascending, na_position=na_position
+                )
+                outdf = self.take(inds)
+            elif (ascending and self.index.is_monotonic_increasing) or (
+                not ascending and self.index.is_monotonic_decreasing
+            ):
+                outdf = self.copy()
+            else:
+                inds = self.index.argsort(
+                    ascending=ascending, na_position=na_position
+                )
+                outdf = self.take(inds)
+        else:
+            labels = sorted(self._data.names, reverse=not ascending)
+            outdf = self[labels]
+
+        if ignore_index is True:
+            outdf = outdf.reset_index(drop=True)
+        return self._mimic_inplace(outdf, inplace=inplace)
+
     def _get_columns_by_label(self, labels, downcast=False):
         """
         Returns columns of the Frame specified by `labels`

@@ -2912,14 +2912,39 @@ def argsort(self, ascending=True, na_position="last"):
         """
         return self._sort(ascending=ascending, na_position=na_position)[1]
 
-    def sort_index(self, ascending=True):
+    def sort_index(
+        self,
+        axis=0,
+        level=None,
+        ascending=True,
+        inplace=False,
+        kind=None,
+        na_position="last",
+        sort_remaining=True,
+        ignore_index=False,
+    ):
         """
         Sort by the index.
 
         Parameters
         ----------
+        axis : {0 or ‘index’, 1 or ‘columns’}, default 0
+            Axis to direct sorting. This can only be 0 for Series.
+        level : int or level name or list of ints or list of level names
+            If not None, sort on values in specified index level(s).
+            This is only useful in the case of MultiIndex.
         ascending : bool, default True
             Sort ascending vs. descending.
+        inplace : bool, default False
+            If True, perform operation in-place.
+        kind : sorting method such as `quick sort` and others.
+            Not yet supported.
+        na_position : {‘first’, ‘last’}, default ‘last’
+            Puts NaNs at the beginning if first; last puts NaNs at the end.
+        sort_remaining : bool, default True
+            Not yet supported
+        ignore_index : bool, default False
+            if True, index will be replaced with RangeIndex.
 
         Returns
         -------
@@ -2952,8 +2977,16 @@ def sort_index(self, ascending=True):
         1    c
         dtype: object
         """
-        inds = self.index.argsort(ascending=ascending)
-        return self.take(inds)
+        return super()._sort_index(
+            axis=axis,
+            level=level,
+            ascending=ascending,
+            inplace=inplace,
+            kind=kind,
+            na_position=na_position,
+            sort_remaining=sort_remaining,
+            ignore_index=ignore_index,
+        )
 
     def sort_values(
         self,

@@ -2898,20 +2898,34 @@ def test_dataframe_empty_sort_index():
     expect = pdf.sort_index()
     got = gdf.sort_index()
 
-    assert_eq(expect, got)
+    assert_eq(expect, got, check_index_type=True)
 
 
+@pytest.mark.parametrize(
+    "index",
+    [
+        pd.RangeIndex(0, 3, 1),
+        [3.0, 1.0, np.nan],
+        pytest.param(
+            pd.RangeIndex(2, -1, -1),
+            marks=[
+                pytest.mark.xfail(
+                    reason="https://github.com/pandas-dev/pandas/issues/43591"
+                )
+            ],
+        ),
+    ],
+)
 @pytest.mark.parametrize("axis", [0, 1, "index", "columns"])
 @pytest.mark.parametrize("ascending", [True, False])
 @pytest.mark.parametrize("ignore_index", [True, False])
 @pytest.mark.parametrize("inplace", [True, False])
 @pytest.mark.parametrize("na_position", ["first", "last"])
 def test_dataframe_sort_index(
-    axis, ascending, inplace, ignore_index, na_position
+    index, axis, ascending, inplace, ignore_index, na_position
 ):
     pdf = pd.DataFrame(
-        {"b": [1, 3, 2], "a": [1, 4, 3], "c": [4, 1, 5]},
-        index=[3.0, 1.0, np.nan],
+        {"b": [1, 3, 2], "a": [1, 4, 3], "c": [4, 1, 5]}, index=index,
     )
     gdf = cudf.DataFrame.from_pandas(pdf)
 
@@ -2931,9 +2945,9 @@ def test_dataframe_sort_index(
     )
 
     if inplace is True:
-        assert_eq(pdf, gdf)
+        assert_eq(pdf, gdf, check_index_type=True)
     else:
-        assert_eq(expected, got)
+        assert_eq(expected, got, check_index_type=True)
 
 
 @pytest.mark.parametrize("axis", [0, 1, "index", "columns"])
@@ -2972,6 +2986,7 @@ def test_dataframe_mulitindex_sort_index(
     gdf = cudf.DataFrame.from_pandas(pdf)
 
     # ignore_index is supported in v.1.0
+
     expected = pdf.sort_index(
         axis=axis,
         level=level,

@@ -512,6 +512,7 @@ def test_series_datetime_value_counts(data, nulls, normalize, dropna):
         expected.reset_index(drop=True),
         got.reset_index(drop=True),
         check_dtype=False,
+        check_index_type=True,
     )
 
 
@@ -547,11 +548,13 @@ def test_categorical_value_counts(dropna, normalize, num_elements):
         pdf_value_counts.sort_index(),
         gdf_value_counts.sort_index(),
         check_dtype=False,
+        check_index_type=True,
     )
     assert_eq(
         pdf_value_counts.reset_index(drop=True),
         gdf_value_counts.reset_index(drop=True),
         check_dtype=False,
+        check_index_type=True,
     )
 
 
@@ -1228,3 +1231,51 @@ def test_series_upcast_float16(data):
     actual_series = cudf.Series(data)
     expected_series = cudf.Series(data, dtype="float32")
     assert_eq(actual_series, expected_series)
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        pd.RangeIndex(0, 3, 1),
+        [3.0, 1.0, np.nan],
+        ["a", "z", None],
+        pytest.param(
+            pd.RangeIndex(4, -1, -2),
+            marks=[
+                pytest.mark.xfail(
+                    reason="https://github.com/pandas-dev/pandas/issues/43591"
+                )
+            ],
+        ),
+    ],
+)
+@pytest.mark.parametrize("axis", [0, "index"])
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("ignore_index", [True, False])
+@pytest.mark.parametrize("inplace", [True, False])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+def test_series_sort_index(
+    index, axis, ascending, inplace, ignore_index, na_position
+):
+    ps = pd.Series([10, 3, 12], index=index)
+    gs = cudf.from_pandas(ps)
+
+    expected = ps.sort_index(
+        axis=axis,
+        ascending=ascending,
+        ignore_index=ignore_index,
+        inplace=inplace,
+        na_position=na_position,
+    )
+    got = gs.sort_index(
+        axis=axis,
+        ascending=ascending,
+        ignore_index=ignore_index,
+        inplace=inplace,
+        na_position=na_position,
+    )
+
+    if inplace is True:
+        assert_eq(ps, gs, check_index_type=True)
+    else:
+        assert_eq(expected, got, check_index_type=True)
@@ -106,12 +106,12 @@ def test_series_argsort(nelem, dtype, asc):
 def test_series_sort_index(nelem, asc):
     np.random.seed(0)
     sr = Series((100 * np.random.random(nelem)))
-    orig = sr.to_array()
-    got = sr.sort_values().sort_index(ascending=asc).to_array()
-    if not asc:
-        # Reverse the array for descending sort
-        got = got[::-1]
-    np.testing.assert_array_equal(orig, got)
+    psr = sr.to_pandas()
+
+    expected = psr.sort_index(ascending=asc)
+    got = sr.sort_index(ascending=asc)
+
+    assert_eq(expected, got)
 
 
 @pytest.mark.parametrize("data", [[0, 1, 1, 2, 2, 2, 3, 3], [0], [1, 2, 3]])

@@ -486,7 +486,6 @@ def test_struct_field_integer(data):
 def test_dask_struct_field_Key_Error(data):
     got = dgd.from_cudf(Series(data), 2)
 
-    # import pdb; pdb.set_trace()
     with pytest.raises(KeyError):
         got.struct.field("notakey").compute()