From 2d84400c8d1a6186840483e6172d6eb2929171d2 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Fri, 15 Apr 2022 15:24:25 -0700
Subject: [PATCH 1/8] Rewrites transpose cython API and update
 `dataframe.transpose`

---
 python/cudf/cudf/_lib/transpose.pyx | 60 ++++-------------------------
 python/cudf/cudf/_lib/utils.pyx     |  8 ++--
 python/cudf/cudf/core/dataframe.py  | 37 +++++++++++++++---
 3 files changed, 42 insertions(+), 63 deletions(-)

diff --git a/python/cudf/cudf/_lib/transpose.pyx b/python/cudf/cudf/_lib/transpose.pyx
index 931a2702612..b9eea6169bd 100644
--- a/python/cudf/cudf/_lib/transpose.pyx
+++ b/python/cudf/cudf/_lib/transpose.pyx
@@ -1,7 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-
-import cudf
-from cudf.api.types import is_categorical_dtype
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
@@ -9,65 +6,22 @@ from libcpp.utility cimport move
 
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
-from cudf._lib.cpp.column.column_view cimport column_view
-from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.transpose cimport transpose as cpp_transpose
-from cudf._lib.utils cimport data_from_table_view, table_view_from_table
-
+from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns
 
-def transpose(source):
-    """Transpose index and columns.
 
-    See Also
-    --------
-    cudf.core.DataFrame.transpose
+def transpose(list source_columns):
+    """Transpose m n-row columns into n m-row columns
     """
-
-    if source._num_columns == 0:
-        return source
-
-    cats = None
-    columns = source._columns
-    dtype = columns[0].dtype
-
-    if is_categorical_dtype(dtype):
-        if any(not is_categorical_dtype(c.dtype) for c in columns):
-            raise ValueError('Columns must all have the same dtype')
-        cats = list(c.categories for c in columns)
-        cats = cudf.core.column.concat_columns(cats).unique()
-        source = cudf.core.frame.Frame(index=source._index, data=[
-            (name, col._set_categories(cats, is_unique=True).codes)
-            for name, col in source._data.items()
-        ])
-    elif any(c.dtype != dtype for c in columns):
-        raise ValueError('Columns must all have the same dtype')
-
     cdef pair[unique_ptr[column], table_view] c_result
-    cdef table_view c_input = table_view_from_table(
-        source, ignore_index=True)
+    cdef table_view c_input = table_view_from_columns(source_columns)
 
     with nogil:
         c_result = move(cpp_transpose(c_input))
 
     result_owner = Column.from_unique_ptr(move(c_result.first))
-    data, _ = data_from_table_view(
+    return columns_from_table_view(
         c_result.second,
-        owner=result_owner,
-        column_names=range(c_input.num_rows())
+        owners=[result_owner] * c_result.second.num_columns()
     )
-
-    if cats is not None:
-        data= [
-            (name, cudf.core.column.column.build_categorical_column(
-                codes=cudf.core.column.column.build_column(
-                    col.base_data, dtype=col.dtype),
-                mask=col.base_mask,
-                size=col.size,
-                categories=cats,
-                offset=col.offset,
-            ))
-            for name, col in data.items()
-        ]
-
-    return data
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index 8557f430e25..643a1adca9f 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -317,10 +317,10 @@ cdef columns_from_table_view(
 ):
     """
     Given a ``cudf::table_view``, construsts a list of columns from it,
-    along with referencing an ``owner`` Python object that owns the memory
-    lifetime. ``owner`` must be either None or a list of column. If ``owner``
-    is a list of columns, the owner of the `i`th ``cudf::column_view`` in the
-    table view is ``owners[i]``. For more about memory ownership,
+    along with referencing an owner Python object that owns the memory
+    lifetime. owner must be either None or a list of column. If owner
+    is a list of columns, the owner of the `i`th ``cudf::column_view``
+    in the table view is ``owners[i]``. For more about memory ownership,
     see ``Column.from_column_view``.
     """
 
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 8893b85c97c..063539c2c7a 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3193,17 +3193,42 @@ def transpose(self):
         Difference from pandas:
         Not supporting *copy* because default and only behavior is copy=True
         """
-        # Never transpose a MultiIndex - remove the existing columns and
-        # replace with a RangeIndex. Afterward, reassign.
-        columns = self.index.copy(deep=False)
+
         index = self._data.to_pandas_index()
+        columns = self.index.copy(deep=False)
         if self._num_columns == 0 or self._num_rows == 0:
             return DataFrame(index=index, columns=columns)
+
+        # No column from index is transposed with libcudf.
+        source_columns = [*self._columns]
+        source_dtype = source_columns[0].dtype
+        if is_categorical_dtype(source_dtype):
+            if any(not is_categorical_dtype(c.dtype) for c in source_columns):
+                raise ValueError("Columns must all have the same dtype")
+            cats = list(c.categories for c in source_columns)
+            cats = cudf.core.column.concat_columns(cats).unique()
+            source_columns = [
+                col._set_categories(cats, is_unique=True).codes
+                for col in source_columns
+            ]
+
+        if any(c.dtype != source_columns[0].dtype for c in source_columns):
+            raise ValueError("Columns must all have the same dtype")
+
+        result_columns = libcudf.transpose.transpose(source_columns)
+
+        if is_categorical_dtype(source_dtype):
+            result_columns = [
+                codes._with_type_metadata(
+                    cudf.core.dtypes.CategoricalDtype(categories=cats)
+                )
+                for codes in result_columns
+            ]
+
         # Set the old column names as the new index
         result = self.__class__._from_data(
-            # Cython renames the columns to the range [0...ncols]
-            libcudf.transpose.transpose(self),
-            as_index(index),
+            {i: col for i, col in enumerate(result_columns)},
+            index=as_index(index),
         )
         # Set the old index as the new column names
         result.columns = columns

From fd2bb8e50760e31b2ea0c5c38e9f2e84fe038b39 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Fri, 15 Apr 2022 15:50:11 -0700
Subject: [PATCH 2/8] Refactor `is_sorted`

---
 python/cudf/cudf/_lib/sort.pyx | 51 ++++++++++++++--------------------
 python/cudf/cudf/core/frame.py | 16 ++++++++++-
 2 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx
index 3aa0b35e90e..d34f3a7aa87 100644
--- a/python/cudf/cudf/_lib/sort.pyx
+++ b/python/cudf/cudf/_lib/sort.pyx
@@ -1,6 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-
-import pandas as pd
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -23,19 +21,24 @@ from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport null_order, null_policy, order
 from cudf._lib.sort cimport underlying_type_t_rank_method
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+from cudf._lib.utils cimport (
+    columns_from_unique_ptr,
+    data_from_unique_ptr,
+    table_view_from_columns,
+    table_view_from_table,
+)
 
 
 def is_sorted(
-    source_table, object ascending=None, object null_position=None
+    list source_columns, object ascending=None, object null_position=None
 ):
     """
     Checks whether the rows of a `table` are sorted in lexicographical order.
 
     Parameters
     ----------
-    source_table : Frame
-        Frame whose columns are to be checked for sort order
+    source_columns : list of columns
+        columns to be checked for sort order
     ascending : None or list-like of booleans
         None or list-like of boolean values indicating expected sort order of
         each column. If list-like, size of list-like must be len(columns). If
@@ -58,51 +61,39 @@ def is_sorted(
     cdef vector[null_order] null_precedence
 
     if ascending is None:
-        column_order = vector[order](
-            source_table._num_columns, order.ASCENDING
-        )
-    elif pd.api.types.is_list_like(ascending):
-        if len(ascending) != source_table._num_columns:
+        column_order = vector[order](len(source_columns), order.ASCENDING)
+    else:
+        if len(ascending) != len(source_columns):
             raise ValueError(
-                f"Expected a list-like of length {source_table._num_columns}, "
+                f"Expected a list-like of length {len(source_columns)}, "
                 f"got length {len(ascending)} for `ascending`"
             )
         column_order = vector[order](
-            source_table._num_columns, order.DESCENDING
+            len(source_columns), order.DESCENDING
         )
         for idx, val in enumerate(ascending):
             if val:
                 column_order[idx] = order.ASCENDING
-    else:
-        raise TypeError(
-            f"Expected a list-like or None for `ascending`, got "
-            f"{type(ascending)}"
-        )
 
     if null_position is None:
         null_precedence = vector[null_order](
-            source_table._num_columns, null_order.AFTER
+            len(source_columns), null_order.AFTER
         )
-    elif pd.api.types.is_list_like(null_position):
-        if len(null_position) != source_table._num_columns:
+    else:
+        if len(null_position) != len(source_columns):
             raise ValueError(
-                f"Expected a list-like of length {source_table._num_columns}, "
+                f"Expected a list-like of length {len(source_columns)}, "
                 f"got length {len(null_position)} for `null_position`"
             )
         null_precedence = vector[null_order](
-            source_table._num_columns, null_order.AFTER
+            len(source_columns), null_order.AFTER
         )
         for idx, val in enumerate(null_position):
             if val:
                 null_precedence[idx] = null_order.BEFORE
-    else:
-        raise TypeError(
-            f"Expected a list-like or None for `null_position`, got "
-            f"{type(null_position)}"
-        )
 
     cdef bool c_result
-    cdef table_view source_table_view = table_view_from_table(source_table)
+    cdef table_view source_table_view = table_view_from_columns(source_columns)
     with nogil:
         c_result = cpp_is_sorted(
             source_table_view,
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 806cdf14c71..e99f0fc8f78 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2441,8 +2441,22 @@ def _is_sorted(self, ascending=None, null_position=None):
             Returns True, if sorted as expected by ``ascending`` and
             ``null_position``, False otherwise.
         """
+        if ascending is not None and not cudf.api.types.is_list_like(
+            ascending
+        ):
+            raise TypeError(
+                f"Expected a list-like or None for `ascending`, got "
+                f"{type(ascending)}"
+            )
+        if null_position is not None and not cudf.api.types.is_list_like(
+            null_position
+        ):
+            raise TypeError(
+                f"Expected a list-like or None for `null_position`, got "
+                f"{type(null_position)}"
+            )
         return libcudf.sort.is_sorted(
-            self, ascending=ascending, null_position=null_position
+            [*self._columns], ascending=ascending, null_position=null_position
         )
 
     @_cudf_nvtx_annotate

From db9ac6eb4e1f0daba08b91a0e69c33eaac8c3bb6 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Fri, 15 Apr 2022 16:13:27 -0700
Subject: [PATCH 3/8] Refactor `order_by`

---
 python/cudf/cudf/_lib/sort.pyx | 12 ++++++------
 python/cudf/cudf/core/frame.py | 14 ++++++++------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx
index d34f3a7aa87..d36cd20792a 100644
--- a/python/cudf/cudf/_lib/sort.pyx
+++ b/python/cudf/cudf/_lib/sort.pyx
@@ -104,21 +104,21 @@ def is_sorted(
     return c_result
 
 
-def order_by(source_table, object ascending, str na_position):
+def order_by(list columns_from_table, object ascending, str na_position):
     """
-    Sorting the table ascending/descending
+    Get index to sort the table in ascending/descending order.
 
     Parameters
     ----------
-    source_table : table which will be sorted
-    ascending : list of boolean values which correspond to each column
+    columns_from_table : columns from the table which will be sorted
+    ascending : sequence of boolean values which correspond to each column
                 in source_table signifying order of each column
                 True - Ascending and False - Descending
     na_position : whether null value should show up at the "first" or "last"
                 position of **all** sorted column.
     """
-    cdef table_view source_table_view = table_view_from_table(
-        source_table, ignore_index=True
+    cdef table_view source_table_view = table_view_from_columns(
+        columns_from_table
     )
     cdef vector[order] column_order
     column_order.reserve(len(ascending))
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index e99f0fc8f78..38182a71676 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2273,15 +2273,17 @@ def _get_sorted_inds(self, by=None, ascending=True, na_position="last"):
         # Get an int64 column consisting of the indices required to sort self
         # according to the columns specified in by.
 
-        to_sort = (
-            self
-            if by is None
-            else self._get_columns_by_label(list(by), downcast=False)
-        )
+        to_sort = [
+            *(
+                self
+                if by is None
+                else self._get_columns_by_label(list(by), downcast=False)
+            )._columns
+        ]
 
         # If given a scalar need to construct a sequence of length # of columns
         if np.isscalar(ascending):
-            ascending = [ascending] * to_sort._num_columns
+            ascending = [ascending] * len(to_sort)
 
         return libcudf.sort.order_by(to_sort, ascending, na_position)
 

From 46ab81272aad972f0bee186f115688c738b6dd29 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Fri, 15 Apr 2022 16:26:45 -0700
Subject: [PATCH 4/8] Rename sort.pyx variables

---
 python/cudf/cudf/_lib/sort.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx
index d36cd20792a..9ad6b36f87a 100644
--- a/python/cudf/cudf/_lib/sort.pyx
+++ b/python/cudf/cudf/_lib/sort.pyx
@@ -125,13 +125,13 @@ def order_by(list columns_from_table, object ascending, str na_position):
     cdef vector[null_order] null_precedence
     null_precedence.reserve(len(ascending))
 
-    for i in ascending:
-        if i is True:
+    for asc in ascending:
+        if asc:
             column_order.push_back(order.ASCENDING)
         else:
             column_order.push_back(order.DESCENDING)
 
-        if i ^ (na_position == "first"):
+        if asc ^ (na_position == "first"):
             null_precedence.push_back(null_order.AFTER)
         else:
             null_precedence.push_back(null_order.BEFORE)

From 174280544e2d4cbbc535e3346c58e77e7f747fa3 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Fri, 15 Apr 2022 16:29:38 -0700
Subject: [PATCH 5/8] Refactor `digitize`

---
 python/cudf/cudf/_lib/sort.pyx            | 18 +++++++++---------
 python/cudf/cudf/core/column/numerical.py |  4 +---
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx
index 9ad6b36f87a..56db54ee2a9 100644
--- a/python/cudf/cudf/_lib/sort.pyx
+++ b/python/cudf/cudf/_lib/sort.pyx
@@ -145,21 +145,21 @@ def order_by(list columns_from_table, object ascending, str na_position):
     return Column.from_unique_ptr(move(c_result))
 
 
-def digitize(source_values_table, bins, bool right=False):
+def digitize(list source_columns, list bins, bool right=False):
     """
     Return the indices of the bins to which each value in source_table belongs.
 
     Parameters
     ----------
-    source_table : Input table to be binned.
-    bins : Frame containing columns of bins
+    source_columns : Input columns to be binned.
+    bins : List containing columns of bins
     right : Indicating whether the intervals include the
             right or the left bin edge.
     """
 
-    cdef table_view bins_view = table_view_from_table(bins)
-    cdef table_view source_values_table_view = table_view_from_table(
-        source_values_table
+    cdef table_view bins_view = table_view_from_columns(bins)
+    cdef table_view source_table_view = table_view_from_columns(
+        source_columns
     )
     cdef vector[order] column_order = (
         vector[order](
@@ -175,11 +175,11 @@ def digitize(source_values_table, bins, bool right=False):
     )
 
     cdef unique_ptr[column] c_result
-    if right is True:
+    if right:
         with nogil:
             c_result = move(lower_bound(
                 bins_view,
-                source_values_table_view,
+                source_table_view,
                 column_order,
                 null_precedence)
             )
@@ -187,7 +187,7 @@ def digitize(source_values_table, bins, bool right=False):
         with nogil:
             c_result = move(upper_bound(
                 bins_view,
-                source_values_table_view,
+                source_table_view,
                 column_order,
                 null_precedence)
             )
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 216faaa8250..e7b8d62f886 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -774,6 +774,4 @@ def digitize(
     if bin_col.nullable:
         raise ValueError("`bins` cannot contain null entries.")
 
-    return as_column(
-        libcudf.sort.digitize(column.as_frame(), bin_col.as_frame(), right)
-    )
+    return as_column(libcudf.sort.digitize([column], [bin_col], right))

From 6a765c923bd72c58b0db47db0faef5ee1b4a795e Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Fri, 15 Apr 2022 16:30:19 -0700
Subject: [PATCH 6/8] Reduce digitize tests counts, move to test_series.py

---
 python/cudf/cudf/tests/test_dataframe.py | 29 ------------------------
 python/cudf/cudf/tests/test_series.py    | 29 ++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 07261534777..21488f1e8cd 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -2468,35 +2468,6 @@ def test_arrow_handle_no_index_name(pdf, gdf):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("num_rows", [1, 3, 10, 100])
-@pytest.mark.parametrize("num_bins", [1, 2, 4, 20])
-@pytest.mark.parametrize("right", [True, False])
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["bool"])
-@pytest.mark.parametrize("series_bins", [True, False])
-def test_series_digitize(num_rows, num_bins, right, dtype, series_bins):
-    data = np.random.randint(0, 100, num_rows).astype(dtype)
-    bins = np.unique(np.sort(np.random.randint(2, 95, num_bins).astype(dtype)))
-    s = cudf.Series(data)
-    if series_bins:
-        s_bins = cudf.Series(bins)
-        indices = s.digitize(s_bins, right)
-    else:
-        indices = s.digitize(bins, right)
-    np.testing.assert_array_equal(
-        np.digitize(data, bins, right), indices.to_numpy()
-    )
-
-
-def test_series_digitize_invalid_bins():
-    s = cudf.Series(np.random.randint(0, 30, 80), dtype="int32")
-    bins = cudf.Series([2, None, None, 50, 90], dtype="int32")
-
-    with pytest.raises(
-        ValueError, match="`bins` cannot contain null entries."
-    ):
-        _ = s.digitize(bins)
-
-
 def test_pandas_non_contiguious():
     arr1 = np.random.sample([5000, 10])
     assert arr1.flags["C_CONTIGUOUS"] is True
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index fccb9f680d9..87fb9bff7ed 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -1782,3 +1782,32 @@ def test_diff_many_dtypes(data):
     gs = cudf.from_pandas(ps)
     assert_eq(ps.diff(), gs.diff())
     assert_eq(ps.diff(periods=2), gs.diff(periods=2))
+
+
+@pytest.mark.parametrize("num_rows", [1, 100])
+@pytest.mark.parametrize("num_bins", [1, 10])
+@pytest.mark.parametrize("right", [True, False])
+@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["bool"])
+@pytest.mark.parametrize("series_bins", [True, False])
+def test_series_digitize(num_rows, num_bins, right, dtype, series_bins):
+    data = np.random.randint(0, 100, num_rows).astype(dtype)
+    bins = np.unique(np.sort(np.random.randint(2, 95, num_bins).astype(dtype)))
+    s = cudf.Series(data)
+    if series_bins:
+        s_bins = cudf.Series(bins)
+        indices = s.digitize(s_bins, right)
+    else:
+        indices = s.digitize(bins, right)
+    np.testing.assert_array_equal(
+        np.digitize(data, bins, right), indices.to_numpy()
+    )
+
+
+def test_series_digitize_invalid_bins():
+    s = cudf.Series(np.random.randint(0, 30, 80), dtype="int32")
+    bins = cudf.Series([2, None, None, 50, 90], dtype="int32")
+
+    with pytest.raises(
+        ValueError, match="`bins` cannot contain null entries."
+    ):
+        _ = s.digitize(bins)

From 2993fbc89a3020cb138a8f0c2ebfb79a1e8ed337 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Fri, 15 Apr 2022 16:59:58 -0700
Subject: [PATCH 7/8] Refactor `rank`

---
 python/cudf/cudf/_lib/sort.pyx | 26 +++++++-------------------
 python/cudf/cudf/core/frame.py |  9 ++++++---
 2 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx
index 56db54ee2a9..faa4279c1ca 100644
--- a/python/cudf/cudf/_lib/sort.pyx
+++ b/python/cudf/cudf/_lib/sort.pyx
@@ -21,12 +21,7 @@ from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport null_order, null_policy, order
 from cudf._lib.sort cimport underlying_type_t_rank_method
-from cudf._lib.utils cimport (
-    columns_from_unique_ptr,
-    data_from_unique_ptr,
-    table_view_from_columns,
-    table_view_from_table,
-)
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
 def is_sorted(
@@ -203,15 +198,13 @@ class RankMethod(IntEnum):
     DENSE = < underlying_type_t_rank_method > rank_method.DENSE
 
 
-def rank_columns(source_table, object method, str na_option,
+def rank_columns(list source_columns, object method, str na_option,
                  bool ascending, bool pct
                  ):
     """
     Compute numerical data ranks (1 through n) of each column in the dataframe
     """
-    cdef table_view source_table_view = table_view_from_table(
-        source_table, ignore_index=True
-    )
+    cdef table_view source_table_view = table_view_from_columns(source_columns)
 
     cdef rank_method c_rank_method = < rank_method > (
         < underlying_type_t_rank_method > method
@@ -251,7 +244,7 @@ def rank_columns(source_table, object method, str na_option,
     cdef vector[unique_ptr[column]] c_results
     cdef column_view c_view
     cdef Column col
-    for col in source_table._columns:
+    for col in source_columns:
         c_view = col.view()
         with nogil:
             c_results.push_back(move(
@@ -265,11 +258,6 @@ def rank_columns(source_table, object method, str na_option,
                 )
             ))
 
-    cdef unique_ptr[table] c_result
-    c_result.reset(new table(move(c_results)))
-    data, _ = data_from_unique_ptr(
-        move(c_result),
-        column_names=source_table._column_names,
-        index_names=None
-    )
-    return data, source_table._index
+    return [Column.from_unique_ptr(
+        move(c_results[i])
+    ) for i in range(c_results.size())]
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 38182a71676..52de513141b 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1365,11 +1365,14 @@ def rank(
             if source.empty:
                 return source.astype("float64")
 
-        data, index = libcudf.sort.rank_columns(
-            source, method_enum, na_option, ascending, pct
+        result_columns = libcudf.sort.rank_columns(
+            [*source._columns], method_enum, na_option, ascending, pct
         )
 
-        return self._from_data(data, index).astype(np.float64)
+        return self.__class__._from_data(
+            dict(zip(source._column_names, result_columns)),
+            index=source._index,
+        ).astype(np.float64)
 
     @_cudf_nvtx_annotate
     def shift(self, periods=1, freq=None, axis=0, fill_value=None):

From e2f7c27a0fd1ce1b0a7466e41c535eba49ed2072 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Fri, 15 Apr 2022 17:02:38 -0700
Subject: [PATCH 8/8] Move rank to `indexed_frame`

---
 python/cudf/cudf/core/frame.py         | 86 -------------------------
 python/cudf/cudf/core/indexed_frame.py | 87 ++++++++++++++++++++++++++
 2 files changed, 87 insertions(+), 86 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 52de513141b..0d3e310dfa1 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1288,92 +1288,6 @@ def _quantiles(
         result._copy_type_metadata(self)
         return result
 
-    @_cudf_nvtx_annotate
-    def rank(
-        self,
-        axis=0,
-        method="average",
-        numeric_only=None,
-        na_option="keep",
-        ascending=True,
-        pct=False,
-    ):
-        """
-        Compute numerical data ranks (1 through n) along axis.
-        By default, equal values are assigned a rank that is the average of the
-        ranks of those values.
-
-        Parameters
-        ----------
-        axis : {0 or 'index'}, default 0
-            Index to direct ranking.
-        method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
-            How to rank the group of records that have the same value
-            (i.e. ties):
-            * average: average rank of the group
-            * min: lowest rank in the group
-            * max: highest rank in the group
-            * first: ranks assigned in order they appear in the array
-            * dense: like 'min', but rank always increases by 1 between groups.
-        numeric_only : bool, optional
-            For DataFrame objects, rank only numeric columns if set to True.
-        na_option : {'keep', 'top', 'bottom'}, default 'keep'
-            How to rank NaN values:
-            * keep: assign NaN rank to NaN values
-            * top: assign smallest rank to NaN values if ascending
-            * bottom: assign highest rank to NaN values if ascending.
-        ascending : bool, default True
-            Whether or not the elements should be ranked in ascending order.
-        pct : bool, default False
-            Whether or not to display the returned rankings in percentile
-            form.
-
-        Returns
-        -------
-        same type as caller
-            Return a Series or DataFrame with data ranks as values.
-        """
-        if isinstance(self, cudf.BaseIndex):
-            warnings.warn(
-                "Index.rank is deprecated and will be removed.",
-                FutureWarning,
-            )
-
-        if method not in {"average", "min", "max", "first", "dense"}:
-            raise KeyError(method)
-
-        method_enum = libcudf.sort.RankMethod[method.upper()]
-        if na_option not in {"keep", "top", "bottom"}:
-            raise ValueError(
-                "na_option must be one of 'keep', 'top', or 'bottom'"
-            )
-
-        if axis not in (0, "index"):
-            raise NotImplementedError(
-                f"axis must be `0`/`index`, "
-                f"axis={axis} is not yet supported in rank"
-            )
-
-        source = self
-        if numeric_only:
-            numeric_cols = (
-                name
-                for name in self._data.names
-                if _is_non_decimal_numeric_dtype(self._data[name])
-            )
-            source = self._get_columns_by_label(numeric_cols)
-            if source.empty:
-                return source.astype("float64")
-
-        result_columns = libcudf.sort.rank_columns(
-            [*source._columns], method_enum, na_option, ascending, pct
-        )
-
-        return self.__class__._from_data(
-            dict(zip(source._column_names, result_columns)),
-            index=source._index,
-        ).astype(np.float64)
-
     @_cudf_nvtx_annotate
     def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         """Shift values by `periods` positions."""
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index ea722ec3968..7a00f6043b8 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -3545,6 +3545,93 @@ def ge(
             other=other, op="__ge__", fill_value=fill_value, can_reindex=True
         )
 
+    @_cudf_nvtx_annotate
+    def rank(
+        self,
+        axis=0,
+        method="average",
+        numeric_only=None,
+        na_option="keep",
+        ascending=True,
+        pct=False,
+    ):
+        """
+        Compute numerical data ranks (1 through n) along axis.
+
+        By default, equal values are assigned a rank that is the average of the
+        ranks of those values.
+
+        Parameters
+        ----------
+        axis : {0 or 'index'}, default 0
+            Index to direct ranking.
+        method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
+            How to rank the group of records that have the same value
+            (i.e. ties):
+            * average: average rank of the group
+            * min: lowest rank in the group
+            * max: highest rank in the group
+            * first: ranks assigned in order they appear in the array
+            * dense: like 'min', but rank always increases by 1 between groups.
+        numeric_only : bool, optional
+            For DataFrame objects, rank only numeric columns if set to True.
+        na_option : {'keep', 'top', 'bottom'}, default 'keep'
+            How to rank NaN values:
+            * keep: assign NaN rank to NaN values
+            * top: assign smallest rank to NaN values if ascending
+            * bottom: assign highest rank to NaN values if ascending.
+        ascending : bool, default True
+            Whether or not the elements should be ranked in ascending order.
+        pct : bool, default False
+            Whether or not to display the returned rankings in percentile
+            form.
+
+        Returns
+        -------
+        same type as caller
+            Return a Series or DataFrame with data ranks as values.
+        """
+        if isinstance(self, cudf.BaseIndex):
+            warnings.warn(
+                "Index.rank is deprecated and will be removed.",
+                FutureWarning,
+            )
+
+        if method not in {"average", "min", "max", "first", "dense"}:
+            raise KeyError(method)
+
+        method_enum = libcudf.sort.RankMethod[method.upper()]
+        if na_option not in {"keep", "top", "bottom"}:
+            raise ValueError(
+                "na_option must be one of 'keep', 'top', or 'bottom'"
+            )
+
+        if axis not in (0, "index"):
+            raise NotImplementedError(
+                f"axis must be `0`/`index`, "
+                f"axis={axis} is not yet supported in rank"
+            )
+
+        source = self
+        if numeric_only:
+            numeric_cols = (
+                name
+                for name in self._data.names
+                if _is_non_decimal_numeric_dtype(self._data[name])
+            )
+            source = self._get_columns_by_label(numeric_cols)
+            if source.empty:
+                return source.astype("float64")
+
+        result_columns = libcudf.sort.rank_columns(
+            [*source._columns], method_enum, na_option, ascending, pct
+        )
+
+        return self.__class__._from_data(
+            dict(zip(source._column_names, result_columns)),
+            index=source._index,
+        ).astype(np.float64)
+
 
 def _check_duplicate_level_names(specified, level_names):
     """Raise if any of `specified` has duplicates in `level_names`."""