From 2a1b83bd8c361a28db71e172b428d32bd44c67fd Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 15 Sep 2021 10:46:22 -0700
Subject: [PATCH 01/10] optimize DataFrame.sort_index

---
 python/cudf/cudf/core/dataframe.py                | 7 ++++++-
 python/dask_cudf/dask_cudf/tests/test_accessor.py | 1 -
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 0da31cfe583..d90b2d236f6 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -4002,11 +4002,16 @@ def sort_index(
                 inds = self.index._source_data[labels].argsort(
                     ascending=ascending, na_position=na_position
                 )
+                outdf = self.take(inds)
+            elif (ascending and self.index.is_monotonic_increasing) or (
+                not ascending and self.index.is_monotonic_decreasing
+            ):
+                outdf = self.copy()
             else:
                 inds = self.index.argsort(
                     ascending=ascending, na_position=na_position
                 )
-            outdf = self.take(inds)
+                outdf = self.take(inds)
         else:
             labels = sorted(self._data.names, reverse=not ascending)
             outdf = self[labels]
diff --git a/python/dask_cudf/dask_cudf/tests/test_accessor.py b/python/dask_cudf/dask_cudf/tests/test_accessor.py
index 8227023aa51..805927dd474 100644
--- a/python/dask_cudf/dask_cudf/tests/test_accessor.py
+++ b/python/dask_cudf/dask_cudf/tests/test_accessor.py
@@ -486,7 +486,6 @@ def test_struct_field_integer(data):
 def test_dask_struct_field_Key_Error(data):
     got = dgd.from_cudf(Series(data), 2)
 
-    # import pdb; pdb.set_trace()
     with pytest.raises(KeyError):
         got.struct.field("notakey").compute()
 

From 8e74cd046fddb9577ff15543a111efcb9c218c0d Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 15 Sep 2021 13:37:05 -0700
Subject: [PATCH 02/10] fix sort_index

---
 python/cudf/cudf/core/dataframe.py       | 53 ++++-----------------
 python/cudf/cudf/core/frame.py           | 60 ++++++++++++++++++++++++
 python/cudf/cudf/core/series.py          | 39 +++++++++++++--
 python/cudf/cudf/tests/test_dataframe.py |  7 +--
 python/cudf/cudf/tests/test_series.py    |  3 ++
 python/cudf/cudf/tests/test_sorting.py   | 12 ++---
 6 files changed, 119 insertions(+), 55 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index d90b2d236f6..e3bbce658c6 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3976,49 +3976,16 @@ def sort_index(
         3  1  2
         2  3  1
         """
-        if kind is not None:
-            raise NotImplementedError("kind is not yet supported")
-
-        if not sort_remaining:
-            raise NotImplementedError(
-                "sort_remaining == False is not yet supported"
-            )
-
-        if axis in (0, "index"):
-            if level is not None and isinstance(self.index, cudf.MultiIndex):
-                # Pandas currently don't handle na_position
-                # in case of MultiIndex
-                if ascending is True:
-                    na_position = "first"
-                else:
-                    na_position = "last"
-
-                if is_list_like(level):
-                    labels = [
-                        self.index._get_level_label(lvl) for lvl in level
-                    ]
-                else:
-                    labels = [self.index._get_level_label(level)]
-                inds = self.index._source_data[labels].argsort(
-                    ascending=ascending, na_position=na_position
-                )
-                outdf = self.take(inds)
-            elif (ascending and self.index.is_monotonic_increasing) or (
-                not ascending and self.index.is_monotonic_decreasing
-            ):
-                outdf = self.copy()
-            else:
-                inds = self.index.argsort(
-                    ascending=ascending, na_position=na_position
-                )
-                outdf = self.take(inds)
-        else:
-            labels = sorted(self._data.names, reverse=not ascending)
-            outdf = self[labels]
-
-        if ignore_index is True:
-            outdf = outdf.reset_index(drop=True)
-        return self._mimic_inplace(outdf, inplace=inplace)
+        return super()._sort_index(
+            axis=axis,
+            level=level,
+            ascending=ascending,
+            inplace=inplace,
+            kind=kind,
+            na_position=na_position,
+            sort_remaining=sort_remaining,
+            ignore_index=ignore_index,
+        )
 
     def sort_values(
         self,
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 037f6f7ff94..74cdb929621 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -428,6 +428,66 @@ def _explode(self, explode_column: Any, ignore_index: bool):
             res.index.names = self._index.names
         return res
 
+    def _sort_index(
+        self,
+        axis=0,
+        level=None,
+        ascending=True,
+        inplace=False,
+        kind=None,
+        na_position="last",
+        sort_remaining=True,
+        ignore_index=False,
+    ):
+
+        if kind is not None:
+            raise NotImplementedError("kind is not yet supported")
+
+        if not sort_remaining:
+            raise NotImplementedError(
+                "sort_remaining == False is not yet supported"
+            )
+
+        if axis in (0, "index"):
+            if isinstance(self.index, cudf.MultiIndex):
+                if level is None:
+                    midx_data = self.index._source_data
+                else:
+                    # Pandas currently don't handle na_position
+                    # in case of MultiIndex
+                    if ascending is True:
+                        na_position = "first"
+                    else:
+                        na_position = "last"
+
+                    if cudf.utils.dtypes.is_list_like(level):
+                        labels = [
+                            self.index._get_level_label(lvl) for lvl in level
+                        ]
+                    else:
+                        labels = [self.index._get_level_label(level)]
+                    midx_data = self.index._source_data[labels]
+                inds = midx_data.argsort(
+                    ascending=ascending, na_position=na_position
+                )
+                outdf = self.take(inds)
+            elif (ascending and self.index.is_monotonic_increasing) or (
+                not ascending and self.index.is_monotonic_decreasing
+            ):
+                outdf = self.copy()
+            else:
+                inds = self.index.argsort(
+                    ascending=ascending, na_position=na_position
+                )
+                outdf = self.take(inds)
+        else:
+            labels = sorted(self._data.names, reverse=not ascending)
+            outdf = self[labels]
+
+        if ignore_index is True:
+            outdf = outdf.reset_index(drop=True)
+        return self._mimic_inplace(outdf, inplace=inplace)
+
     def _get_columns_by_label(self, labels, downcast=False):
         """
         Returns columns of the Frame specified by `labels`
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 8ccd967b1b3..868e2256925 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2908,14 +2908,39 @@ def argsort(self, ascending=True, na_position="last"):
         """
         return self._sort(ascending=ascending, na_position=na_position)[1]
 
-    def sort_index(self, ascending=True):
+    def sort_index(
+        self,
+        axis=0,
+        level=None,
+        ascending=True,
+        inplace=False,
+        kind=None,
+        na_position="last",
+        sort_remaining=True,
+        ignore_index=False,
+    ):
         """
         Sort by the index.
 
         Parameters
         ----------
+        axis : {0 or ‘index’, 1 or ‘columns’}, default 0
+            Axis to direct sorting. This can only be 0 for Series.
+        level : int or level name or list of ints or list of level names
+            If not None, sort on values in specified index level(s).
+            This is only useful in the case of MultiIndex.
         ascending : bool, default True
             Sort ascending vs. descending.
+        inplace : bool, default False
+            If True, perform operation in-place.
+        kind : sorting method such as `quick sort` and others.
+            Not yet supported.
+        na_position : {‘first’, ‘last’}, default ‘last’
+            Puts NaNs at the beginning if first; last puts NaNs at the end.
+        sort_remaining : bool, default True
+            Not yet supported
+        ignore_index : bool, default False
+            if True, index will be replaced with RangeIndex.
 
         Returns
         -------
@@ -2948,8 +2973,16 @@ def sort_index(self, ascending=True):
         1    c
         dtype: object
         """
-        inds = self.index.argsort(ascending=ascending)
-        return self.take(inds)
+        return super()._sort_index(
+            axis=axis,
+            level=level,
+            ascending=ascending,
+            inplace=inplace,
+            kind=kind,
+            na_position=na_position,
+            sort_remaining=sort_remaining,
+            ignore_index=ignore_index,
+        )
 
     def sort_values(
         self,
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 03a140c138b..74b0b8cd4cd 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -2898,7 +2898,7 @@ def test_dataframe_empty_sort_index():
     expect = pdf.sort_index()
     got = gdf.sort_index()
 
-    assert_eq(expect, got)
+    assert_eq(expect, got, check_index_type=True)
 
 
 @pytest.mark.parametrize("axis", [0, 1, "index", "columns"])
@@ -2931,9 +2931,9 @@ def test_dataframe_sort_index(
     )
 
     if inplace is True:
-        assert_eq(pdf, gdf)
+        assert_eq(pdf, gdf, check_index_type=True)
     else:
-        assert_eq(expected, got)
+        assert_eq(expected, got, check_index_type=True)
 
 
 @pytest.mark.parametrize("axis", [0, 1, "index", "columns"])
@@ -2972,6 +2972,7 @@ def test_dataframe_mulitindex_sort_index(
     gdf = cudf.DataFrame.from_pandas(pdf)
 
     # ignore_index is supported in v.1.0
+
     expected = pdf.sort_index(
         axis=axis,
         level=level,
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index d29ba08a848..0103895ce59 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -512,6 +512,7 @@ def test_series_datetime_value_counts(data, nulls, normalize, dropna):
         expected.reset_index(drop=True),
         got.reset_index(drop=True),
         check_dtype=False,
+        check_index_type=True,
     )
 
 
@@ -547,11 +548,13 @@ def test_categorical_value_counts(dropna, normalize, num_elements):
         pdf_value_counts.sort_index(),
         gdf_value_counts.sort_index(),
         check_dtype=False,
+        check_index_type=True,
     )
     assert_eq(
         pdf_value_counts.reset_index(drop=True),
         gdf_value_counts.reset_index(drop=True),
         check_dtype=False,
+        check_index_type=True,
     )
 
 
diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py
index ef9f853bd11..fed391ac6be 100644
--- a/python/cudf/cudf/tests/test_sorting.py
+++ b/python/cudf/cudf/tests/test_sorting.py
@@ -106,12 +106,12 @@ def test_series_argsort(nelem, dtype, asc):
 def test_series_sort_index(nelem, asc):
     np.random.seed(0)
     sr = Series((100 * np.random.random(nelem)))
-    orig = sr.to_array()
-    got = sr.sort_values().sort_index(ascending=asc).to_array()
-    if not asc:
-        # Reverse the array for descending sort
-        got = got[::-1]
-    np.testing.assert_array_equal(orig, got)
+    psr = sr.to_pandas()
+
+    expected = psr.sort_index(ascending=asc)
+    got = sr.sort_index(ascending=asc)
+
+    assert_eq(expected, got)
 
 
 @pytest.mark.parametrize("data", [[0, 1, 1, 2, 2, 2, 3, 3], [0], [1, 2, 3]])

From 0eb3302f828b6e76709926ad5eb09e0ae5e3379a Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 15 Sep 2021 13:49:51 -0700
Subject: [PATCH 03/10] doc

---
 python/cudf/cudf/core/frame.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 74cdb929621..c7725764836 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -439,7 +439,34 @@ def _sort_index(
         sort_remaining=True,
         ignore_index=False,
     ):
+        """
+        Helper for .sort_index
 
+        Parameters
+        ----------
+        axis : {0 or ‘index’, 1 or ‘columns’}, default 0
+            The axis along which to sort. The value 0 identifies the rows,
+            and 1 identifies the columns.
+        level : int or level name or list of ints or list of level names
+            If not None, sort on values in specified index level(s).
+            This is only useful in the case of MultiIndex.
+        ascending : bool, default True
+            Sort ascending vs. descending.
+        inplace : bool, default False
+            If True, perform operation in-place.
+        kind : sorting method such as `quick sort` and others.
+            Not yet supported.
+        na_position : {‘first’, ‘last’}, default ‘last’
+            Puts NaNs at the beginning if first; last puts NaNs at the end.
+        sort_remaining : bool, default True
+            Not yet supported
+        ignore_index : bool, default False
+            if True, index will be replaced with RangeIndex.
+
+        Returns
+        -------
+        DataFrame/Series or None
+        """
         if kind is not None:
             raise NotImplementedError("kind is not yet supported")
 

From 509dd6cdfbd4dfca77bfb82d3fda44ec0aafe756 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 15 Sep 2021 16:20:47 -0700
Subject: [PATCH 04/10] add tests

---
 python/cudf/cudf/tests/test_dataframe.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 74b0b8cd4cd..80f03dbb916 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -2901,17 +2901,31 @@ def test_dataframe_empty_sort_index():
     assert_eq(expect, got, check_index_type=True)
 
 
+@pytest.mark.parametrize(
+    "index",
+    [
+        pd.RangeIndex(0, 3, 1),
+        [3.0, 1.0, np.nan],
+        pytest.param(
+            pd.RangeIndex(2, -1, -1),
+            marks=[
+                pytest.mark.xfail(
+                    reason="https://github.com/pandas-dev/pandas/issues/43591"
+                )
+            ],
+        ),
+    ],
+)
 @pytest.mark.parametrize("axis", [0, 1, "index", "columns"])
 @pytest.mark.parametrize("ascending", [True, False])
 @pytest.mark.parametrize("ignore_index", [True, False])
 @pytest.mark.parametrize("inplace", [True, False])
 @pytest.mark.parametrize("na_position", ["first", "last"])
 def test_dataframe_sort_index(
-    axis, ascending, inplace, ignore_index, na_position
+    index, axis, ascending, inplace, ignore_index, na_position
 ):
     pdf = pd.DataFrame(
-        {"b": [1, 3, 2], "a": [1, 4, 3], "c": [4, 1, 5]},
-        index=[3.0, 1.0, np.nan],
+        {"b": [1, 3, 2], "a": [1, 4, 3], "c": [4, 1, 5]}, index=index,
     )
     gdf = cudf.DataFrame.from_pandas(pdf)
 

From 36fe12ed27e122782396cd0a386a57bc392bc56d Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 15 Sep 2021 16:27:24 -0700
Subject: [PATCH 05/10] add Series.sort_index tests

---
 python/cudf/cudf/tests/test_series.py | 48 +++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 0103895ce59..11deb6c0842 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -1231,3 +1231,51 @@ def test_series_upcast_float16(data):
     actual_series = cudf.Series(data)
     expected_series = cudf.Series(data, dtype="float32")
     assert_eq(actual_series, expected_series)
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        pd.RangeIndex(0, 3, 1),
+        [3.0, 1.0, np.nan],
+        ["a", "z", None],
+        pytest.param(
+            pd.RangeIndex(4, -1, -2),
+            marks=[
+                pytest.mark.xfail(
+                    reason="https://github.com/pandas-dev/pandas/issues/43591"
+                )
+            ],
+        ),
+    ],
+)
+@pytest.mark.parametrize("axis", [0, "index"])
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("ignore_index", [True, False])
+@pytest.mark.parametrize("inplace", [True, False])
+@pytest.mark.parametrize("na_position", ["first", "last"])
+def test_series_sort_index(
+    index, axis, ascending, inplace, ignore_index, na_position
+):
+    ps = pd.Series([10, 3, 12], index=index)
+    gs = cudf.from_pandas(ps)
+
+    expected = ps.sort_index(
+        axis=axis,
+        ascending=ascending,
+        ignore_index=ignore_index,
+        inplace=inplace,
+        na_position=na_position,
+    )
+    got = gs.sort_index(
+        axis=axis,
+        ascending=ascending,
+        ignore_index=ignore_index,
+        inplace=inplace,
+        na_position=na_position,
+    )
+
+    if inplace is True:
+        assert_eq(ps, gs, check_index_type=True)
+    else:
+        assert_eq(expected, got, check_index_type=True)

From dc859593f11e78745a7dbfc952edc5ec06cc5dc3 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 15 Sep 2021 17:00:45 -0700
Subject: [PATCH 06/10] update api

---
 python/cudf/cudf/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 845d370e5fb..6a18361e290 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -488,7 +488,7 @@ def _sort_index(
                     else:
                         na_position = "last"
 
-                    if cudf.utils.dtypes.is_list_like(level):
+                    if cudf.api.types.is_list_like(level):
                         labels = [
                             self.index._get_level_label(lvl) for lvl in level
                         ]

From b878e6bb11d0a9f9daf911fc6d351ab455a434f4 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 15 Sep 2021 20:00:58 -0500
Subject: [PATCH 07/10] Update python/cudf/cudf/core/frame.py

---
 python/cudf/cudf/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 9f4d5855e2f..bda9e113849 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -494,7 +494,7 @@ def _sort_index(
                         ]
                     else:
                         labels = [self.index._get_level_label(level)]
-                    midx_data = self.index._source_data[labels]
+                    midx_data = self.index.to_frame(index=False)[labels]
                 inds = midx_data.argsort(
                     ascending=ascending, na_position=na_position
                 )

From ffa07ea72b68cc4e27b181be8eb6d8ef3e9f34bc Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 15 Sep 2021 20:01:03 -0500
Subject: [PATCH 08/10] Update python/cudf/cudf/core/frame.py

---
 python/cudf/cudf/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index bda9e113849..5ba11a55f5d 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -479,7 +479,7 @@ def _sort_index(
         if axis in (0, "index"):
             if isinstance(self.index, cudf.MultiIndex):
                 if level is None:
-                    midx_data = self.index._source_data
+                    midx_data = self.index.to_frame(index=False)
                 else:
                     # Pandas currently don't handle na_position
                     # in case of MultiIndex

From 68f1e9720e508e618acc97b6e6d0f3502a2af214 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 16 Sep 2021 14:46:08 -0500
Subject: [PATCH 09/10] Update python/cudf/cudf/core/frame.py

Co-authored-by: Michael Wang <isVoid@users.noreply.github.com>
---
 python/cudf/cudf/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 5ba11a55f5d..2c763c60341 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -494,7 +494,7 @@ def _sort_index(
                         ]
                     else:
                         labels = [self.index._get_level_label(level)]
-                    midx_data = self.index.to_frame(index=False)[labels]
+                    midx_data = cudf.DataFrame._from_data(self.index._data.select_by_label(labels))
                 inds = midx_data.argsort(
                     ascending=ascending, na_position=na_position
                 )

From 2c8110b7292c4c8bf2b860244c3f74ed562d7478 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 16 Sep 2021 13:02:08 -0700
Subject: [PATCH 10/10] style

---
 python/cudf/cudf/core/frame.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 2c763c60341..09a6df67da5 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -494,7 +494,9 @@ def _sort_index(
                         ]
                     else:
                         labels = [self.index._get_level_label(level)]
-                    midx_data = cudf.DataFrame._from_data(self.index._data.select_by_label(labels))
+                    midx_data = cudf.DataFrame._from_data(
+                        self.index._data.select_by_label(labels)
+                    )
                 inds = midx_data.argsort(
                     ascending=ascending, na_position=na_position
                 )