From b23b57b686fc5c26bf0dfa60698f5a8ba58c8697 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 2 Mar 2022 17:07:29 -0800
Subject: [PATCH 01/17] Split num_rows implementation between Frame and
 IndexedFrame.

---
 python/cudf/cudf/core/frame.py         | 6 +-----
 python/cudf/cudf/core/indexed_frame.py | 5 +++++
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 07cc3ea71cd..6820fe8fc7a 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -144,11 +144,7 @@ def _num_columns(self) -> int:
 
     @property
     def _num_rows(self) -> int:
-        if self._index is not None:
-            return len(self._index)
-        if len(self._data) == 0:
-            return 0
-        return len(self._data.columns[0])
+        return 0 if self._num_columns == 0 else len(self._data.columns[0])
 
     @property
     def _column_names(self) -> Tuple[Any, ...]:  # TODO: Tuple[str]?
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 3fa951241f7..256dc104586 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -175,6 +175,11 @@ def to_dict(self, *args, **kwargs):  # noqa: D102
             "`.to_pandas().to_dict()` to construct a Python dictionary."
         )
 
+    @property
+    def _num_rows(self) -> int:
+        # Important to use the index because the data may be empty.
+        return len(self._index)
+
     @property
     def index(self):
         """Get the labels for the rows."""

From 696772b38a2dd14c8c8389eb67c9d9cbfe93c82a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 2 Mar 2022 18:08:12 -0800
Subject: [PATCH 02/17] Refactor _num_rows and _from_data.

---
 python/cudf/cudf/core/dataframe.py       | 17 +++++++++++------
 python/cudf/cudf/core/frame.py           | 17 ++++++++---------
 python/cudf/cudf/core/index.py           | 11 ++++++++++-
 python/cudf/cudf/core/indexed_frame.py   | 18 ++++++++++++++++--
 python/cudf/cudf/core/multiindex.py      | 12 +++---------
 python/cudf/cudf/core/series.py          | 11 +++++------
 python/cudf/cudf/tests/test_dataframe.py | 12 ++----------
 7 files changed, 55 insertions(+), 43 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 57d591dd3e7..0de80ed836d 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -612,7 +612,7 @@ def __init__(
                 new_df = self._from_arrays(data, index=index, columns=columns)
 
             self._data = new_df._data
-            self.index = new_df._index
+            self._index = new_df._index
         elif hasattr(data, "__array_interface__"):
             arr_interface = data.__array_interface__
             if len(arr_interface["descr"]) == 1:
@@ -621,7 +621,7 @@ def __init__(
             else:
                 new_df = self.from_records(data, index=index, columns=columns)
             self._data = new_df._data
-            self.index = new_df._index
+            self._index = new_df._index
         else:
             if is_list_like(data):
                 if len(data) > 0 and is_scalar(data[0]):
@@ -632,7 +632,7 @@ def __init__(
                     new_df = DataFrame(data=data, index=index)
 
                     self._data = new_df._data
-                    self.index = new_df._index
+                    self._index = new_df._index
                 elif len(data) > 0 and isinstance(data[0], Series):
                     self._init_from_series_list(
                         data=data, columns=columns, index=index
@@ -650,6 +650,11 @@ def __init__(
                     data, index=index, columns=columns, nan_as_null=nan_as_null
                 )
 
+        if self._data.nrows > 0 and self._data.nrows != len(self._index):
+            raise ValueError(
+                f"Shape of passed values is {self.shape}, indices imply "
+                f"({len(self._index)}, {self._num_columns})"
+            )
         if dtype:
             self._data = self.astype(dtype)._data
 
@@ -855,10 +860,10 @@ def _from_data(
         data: MutableMapping,
         index: Optional[BaseIndex] = None,
         columns: Any = None,
+        *args,
+        **kwargs,
     ) -> DataFrame:
-        out = super()._from_data(data, index)
-        if index is None:
-            out.index = RangeIndex(out._data.nrows)
+        out = super()._from_data(data=data, index=index)
         if columns is not None:
             out.columns = columns
         return out
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 6820fe8fc7a..813dff24104 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -181,13 +181,9 @@ def deserialize(cls, header, frames):
 
     @classmethod
     @_cudf_nvtx_annotate
-    def _from_data(
-        cls,
-        data: MutableMapping,
-        index: Optional[cudf.core.index.BaseIndex] = None,
-    ):
+    def _from_data(cls, data: MutableMapping, *args, **kwargs):
         obj = cls.__new__(cls)
-        Frame.__init__(obj, data, index)
+        Frame.__init__(obj, data)
         return obj
 
     @classmethod
@@ -1320,10 +1316,13 @@ def fillna(
             else:
                 filled_data[col_name] = col.copy(deep=True)
 
-        return self._mimic_inplace(
-            self._from_data(data=filled_data, index=self._index),
-            inplace=inplace,
+        ret = self._mimic_inplace(
+            self._from_data(data=filled_data), inplace=inplace,
         )
+        # TODO: Split this logic into the IndexedFrame class.
+        if isinstance(ret, cudf.core.indexed_frame.IndexedFrame):
+            ret._index = self._index
+        return ret
 
     @_cudf_nvtx_annotate
     def _drop_column(self, name):
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 1c68289898f..e944e5e61e9 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -113,7 +113,7 @@ def _index_from_data(data: MutableMapping, name: Any = None):
             index_class_type = IntervalIndex
     else:
         index_class_type = cudf.MultiIndex
-    return index_class_type._from_data(data, None, name)
+    return index_class_type._from_data(data, name)
 
 
 def _index_from_columns(
@@ -838,7 +838,16 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
 
         return NotImplemented
 
+    @classmethod
     @_cudf_nvtx_annotate
+    def _from_data(
+        cls, data: MutableMapping, name: Any = None, *args, **kwargs
+    ) -> GenericIndex:
+        out = super()._from_data(data=data)
+        if name is not None:
+            out.name = name
+        return out
+
     def _binaryop(
         self, other: T, op: str, fill_value: Any = None, *args, **kwargs,
     ) -> SingleColumnFrame:
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 256dc104586..b9b735fb222 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -8,7 +8,7 @@
 import warnings
 from collections import Counter, abc
 from functools import cached_property
-from typing import Any, Callable, Dict, Optional, Tuple, Type, TypeVar, Union
+from typing import Any, Callable, MutableMapping, Dict, Optional, Tuple, Type, TypeVar, Union
 from uuid import uuid4
 
 import cupy as cp
@@ -180,6 +180,18 @@ def _num_rows(self) -> int:
         # Important to use the index because the data may be empty.
         return len(self._index)
 
+    @classmethod
+    def _from_data(
+        cls,
+        data: MutableMapping,
+        index: Optional[BaseIndex] = None,
+        *args,
+        **kwargs,
+    ):
+        out = super()._from_data(data, *args, **kwargs)
+        out._index = RangeIndex(out._data.nrows) if index is None else index
+        return out
+
     @property
     def index(self):
         """Get the labels for the rows."""
@@ -1067,7 +1079,9 @@ def _align_to_index(
             result = result.sort_values(sort_col_id)
             del result[sort_col_id]
 
-        result = self.__class__._from_data(result._data, index=result.index)
+        result = self.__class__._from_data(
+            data=result._data, index=result.index
+        )
         result._data.multiindex = self._data.multiindex
         result._data._level_names = self._data._level_names
         result.index.names = self.index.names
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index c9036db05fa..c1ef8e315be 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -8,7 +8,7 @@
 from collections.abc import Sequence
 from functools import cached_property
 from numbers import Integral
-from typing import Any, List, MutableMapping, Optional, Tuple, Union
+from typing import Any, List, MutableMapping, Tuple, Union
 
 import cupy
 import numpy as np
@@ -278,14 +278,8 @@ def set_names(self, names, level=None, inplace=False):
 
     @classmethod
     @_cudf_nvtx_annotate
-    def _from_data(
-        cls,
-        data: MutableMapping,
-        index: Optional[cudf.core.index.BaseIndex] = None,
-        name: Any = None,
-    ) -> MultiIndex:
-        assert index is None
-        obj = cls.from_frame(cudf.DataFrame._from_data(data))
+    def _from_data(cls, data: MutableMapping, name: Any = None,) -> MultiIndex:
+        obj = cls.from_frame(cudf.DataFrame._from_data(data=data))
         if name is not None:
             obj.name = name
         return obj
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index b3b73b8961c..5d862213165 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -458,13 +458,12 @@ def _from_data(
         data: MutableMapping,
         index: Optional[BaseIndex] = None,
         name: Any = None,
+        *args,
+        **kwargs,
     ) -> Series:
-        """
-        Construct the Series from a ColumnAccessor
-        """
-        out: Series = super()._from_data(data, index, name)
-        if index is None:
-            out._index = RangeIndex(out._data.nrows)
+        out = super()._from_data(data=data, index=index)
+        if name is not None:
+            out.name = name
         return out
 
     @_cudf_nvtx_annotate
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 5bde75c2e21..136deb59334 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -1565,18 +1565,10 @@ def test_dataframe_cupy_wrong_dimensions():
 def test_dataframe_cupy_array_wrong_index():
     d_ary = cupy.empty((2, 3), dtype=np.int32)
 
-    with pytest.raises(
-        ValueError,
-        match="Length mismatch: Expected axis has 2 elements, "
-        "new values have 1 elements",
-    ):
+    with pytest.raises(ValueError):
         cudf.DataFrame(d_ary, index=["a"])
 
-    with pytest.raises(
-        ValueError,
-        match="Length mismatch: Expected axis has 2 elements, "
-        "new values have 1 elements",
-    ):
+    with pytest.raises(ValueError):
         cudf.DataFrame(d_ary, index="a")
 
 

From b959413f7004528374f82353e5a159944fd14be0 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 3 Mar 2022 09:38:19 -0800
Subject: [PATCH 03/17] Split _from_columns and _from_columns_like_self.

---
 python/cudf/cudf/core/frame.py         | 47 ++++-------------------
 python/cudf/cudf/core/indexed_frame.py | 53 +++++++++++++++++++++++++-
 2 files changed, 60 insertions(+), 40 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 813dff24104..d498ca686df 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -189,51 +189,20 @@ def _from_data(cls, data: MutableMapping, *args, **kwargs):
     @classmethod
     @_cudf_nvtx_annotate
     def _from_columns(
-        cls,
-        columns: List[ColumnBase],
-        column_names: abc.Iterable[str],
-        index_names: Optional[List[str]] = None,
+        cls, columns: List[ColumnBase], column_names: abc.Iterable[str],
     ):
-        """Construct a `Frame` object from a list of columns.
-
-        If `index_names` is set, the first `len(index_names)` columns are
-        used to construct the index of the frame.
-        """
-        index = None
-        n_index_columns = 0
-        if index_names is not None:
-            n_index_columns = len(index_names)
-            index = cudf.core.index._index_from_columns(
-                columns[:n_index_columns]
-            )
-            if isinstance(index, cudf.MultiIndex):
-                index.names = index_names
-            else:
-                index.name = index_names[0]
+        """Construct a `Frame` object from a list of columns."""
+        data = {name: columns[i] for i, name in enumerate(column_names)}
 
-        data = {
-            name: columns[i + n_index_columns]
-            for i, name in enumerate(column_names)
-        }
-
-        return cls._from_data(data, index)
+        return cls._from_data(data)
 
     @_cudf_nvtx_annotate
     def _from_columns_like_self(
-        self,
-        columns: List[ColumnBase],
-        column_names: abc.Iterable[str],
-        index_names: Optional[List[str]] = None,
+        self, columns: List[ColumnBase], column_names: abc.Iterable[str],
     ):
-        """Construct a `Frame` from a list of columns with metadata from self.
-
-        If `index_names` is set, the first `len(index_names)` columns are
-        used to construct the index of the frame.
-        """
-        frame = self.__class__._from_columns(
-            columns, column_names, index_names
-        )
-        return frame._copy_type_metadata(self, include_index=bool(index_names))
+        """Construct a Frame from a list of columns with metadata from self."""
+        frame = self.__class__._from_columns(columns, column_names)
+        return frame._copy_type_metadata(self)
 
     def _mimic_inplace(
         self: T, result: Frame, inplace: bool = False
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index b9b735fb222..6fd2bae978b 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -8,7 +8,7 @@
 import warnings
 from collections import Counter, abc
 from functools import cached_property
-from typing import Any, Callable, MutableMapping, Dict, Optional, Tuple, Type, TypeVar, Union
+from typing import Any, Callable, Dict, List, MutableMapping, Optional, Tuple, Type, TypeVar, Union
 from uuid import uuid4
 
 import cupy as cp
@@ -26,6 +26,7 @@
     is_list_dtype,
     is_list_like,
 )
+from cudf.core._base_index import BaseIndex
 from cudf.core.column import ColumnBase
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.frame import Frame, _drop_rows_by_labels
@@ -192,6 +193,56 @@ def _from_data(
         out._index = RangeIndex(out._data.nrows) if index is None else index
         return out
 
+    @classmethod
+    @annotate("FRAME_FROM_COLUMNS", color="green", domain="cudf_python")
+    def _from_columns(
+        cls,
+        columns: List[ColumnBase],
+        column_names: List[str],
+        index_names: Optional[List[str]] = None,
+    ):
+        """Construct a `Frame` object from a list of columns.
+
+        If `index_names` is set, the first `len(index_names)` columns are
+        used to construct the index of the frame.
+        """
+        data_columns = columns
+
+        n_index_columns = len(index_names) if index_names else 0
+        index_columns = columns[:n_index_columns]
+        data_columns = columns[n_index_columns:]
+
+        out = super()._from_columns(data_columns, column_names)
+
+        if index_names is not None:
+            out._index = cudf.core.index._index_from_columns(index_columns)
+            if isinstance(out._index, cudf.MultiIndex):
+                out._index.names = index_names
+            else:
+                assert len(index_names) == 1
+                out._index.name = index_names[0]
+
+        return out
+
+    @annotate(
+        "FRAME_FROM_COLUMNS_LIKE_SELF", color="green", domain="cudf_python"
+    )
+    def _from_columns_like_self(
+        self,
+        columns: List[ColumnBase],
+        column_names: List[str],
+        index_names: Optional[List[str]] = None,
+    ):
+        """Construct a `Frame` from a list of columns with metadata from self.
+
+        If `index_names` is set, the first `len(index_names)` columns are
+        used to construct the index of the frame.
+        """
+        frame = self.__class__._from_columns(
+            columns, column_names, index_names
+        )
+        return frame._copy_type_metadata(self, include_index=bool(index_names))
+
     @property
     def index(self):
         """Get the labels for the rows."""

From de5ca14ad41dc9d74bca850863d33111e5c28692 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 3 Mar 2022 10:20:51 -0800
Subject: [PATCH 04/17] Fix bug in fillna.

---
 python/cudf/cudf/core/frame.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index d498ca686df..ed7b7bc56d3 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1285,12 +1285,17 @@ def fillna(
             else:
                 filled_data[col_name] = col.copy(deep=True)
 
+        # TODO: This logic needs to move into the IndexedFrame class.
+        old_index = self._index
         ret = self._mimic_inplace(
             self._from_data(data=filled_data), inplace=inplace,
         )
         # TODO: Split this logic into the IndexedFrame class.
-        if isinstance(ret, cudf.core.indexed_frame.IndexedFrame):
-            ret._index = self._index
+        if isinstance(self, cudf.core.indexed_frame.IndexedFrame):
+            if inplace:
+                self._index = old_index
+            else:
+                ret._index = old_index
         return ret
 
     @_cudf_nvtx_annotate

From 3f2be82da648280c06e64f8e706965ae1a23dca2 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 3 Mar 2022 10:26:01 -0800
Subject: [PATCH 05/17] Remove now unnecessary _as_column.

---
 python/cudf/cudf/core/frame.py         | 13 -------------
 python/cudf/cudf/core/indexed_frame.py |  6 ++++--
 2 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index ed7b7bc56d3..0ff4ec42033 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -585,19 +585,6 @@ def _get_columns_by_index(self, indices):
             data, columns=data.to_pandas_index(), index=self.index
         )
 
-    def _as_column(self):
-        """
-        _as_column : Converts a single columned Frame to Column
-        """
-        assert (
-            self._num_columns == 1
-            and self._index is None
-            and self._column_names[0] is None
-        ), """There should be only one data column,
-            no index and None as the name to use this method"""
-
-        return self._data[None].copy(deep=False)
-
     @property
     def values(self):
         """
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 6fd2bae978b..157748889f3 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -194,7 +194,7 @@ def _from_data(
         return out
 
     @classmethod
-    @annotate("FRAME_FROM_COLUMNS", color="green", domain="cudf_python")
+    @annotate("INDEXEDFRAME_FROM_COLUMNS", color="green", domain="cudf_python")
     def _from_columns(
         cls,
         columns: List[ColumnBase],
@@ -225,7 +225,9 @@ def _from_columns(
         return out
 
     @annotate(
-        "FRAME_FROM_COLUMNS_LIKE_SELF", color="green", domain="cudf_python"
+        "INDEXEDFRAME_FROM_COLUMNS_LIKE_SELF",
+        color="green",
+        domain="cudf_python",
     )
     def _from_columns_like_self(
         self,

From 3f13f7eab992fc023de51cbd4305b49eda6ee81f Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 7 Mar 2022 14:19:17 -0800
Subject: [PATCH 06/17] Fix style.

---
 python/cudf/cudf/core/indexed_frame.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 157748889f3..30f5b1e0475 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -8,7 +8,18 @@
 import warnings
 from collections import Counter, abc
 from functools import cached_property
-from typing import Any, Callable, Dict, List, MutableMapping, Optional, Tuple, Type, TypeVar, Union
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    MutableMapping,
+    Optional,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
 from uuid import uuid4
 
 import cupy as cp

From 3fd4cb9744c32626c504111a0343480832223dfd Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 15 Mar 2022 11:35:51 -0700
Subject: [PATCH 07/17] Update annotations.

---
 python/cudf/cudf/core/indexed_frame.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 30f5b1e0475..3e54279c6d8 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -205,7 +205,7 @@ def _from_data(
         return out
 
     @classmethod
-    @annotate("INDEXEDFRAME_FROM_COLUMNS", color="green", domain="cudf_python")
+    @_cudf_nvtx_annotate
     def _from_columns(
         cls,
         columns: List[ColumnBase],
@@ -235,11 +235,7 @@ def _from_columns(
 
         return out
 
-    @annotate(
-        "INDEXEDFRAME_FROM_COLUMNS_LIKE_SELF",
-        color="green",
-        domain="cudf_python",
-    )
+    @_cudf_nvtx_annotate
     def _from_columns_like_self(
         self,
         columns: List[ColumnBase],

From 438152bc57264a45fae5fd7d8fb56a47782ef3fe Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 15 Mar 2022 12:14:11 -0700
Subject: [PATCH 08/17] Move copy and mimic_inplace.

---
 python/cudf/cudf/core/frame.py         | 87 --------------------------
 python/cudf/cudf/core/indexed_frame.py | 76 ++++++++++++++++++++++
 2 files changed, 76 insertions(+), 87 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 0ff4ec42033..db52e6ba061 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -214,7 +214,6 @@ def _mimic_inplace(
                         result._data[col], inplace=True
                     )
             self._data = result._data
-            self._index = result._index
             return None
         else:
             return result
@@ -385,92 +384,6 @@ def memory_usage(self, deep=False):
     def __len__(self):
         return self._num_rows
 
-    @_cudf_nvtx_annotate
-    def copy(self: T, deep: bool = True) -> T:
-        """
-        Make a copy of this object's indices and data.
-
-        When ``deep=True`` (default), a new object will be created with a
-        copy of the calling object's data and indices. Modifications to
-        the data or indices of the copy will not be reflected in the
-        original object (see notes below).
-        When ``deep=False``, a new object will be created without copying
-        the calling object's data or index (only references to the data
-        and index are copied). Any changes to the data of the original
-        will be reflected in the shallow copy (and vice versa).
-
-        Parameters
-        ----------
-        deep : bool, default True
-            Make a deep copy, including a copy of the data and the indices.
-            With ``deep=False`` neither the indices nor the data are copied.
-
-        Returns
-        -------
-        copy : Series or DataFrame
-            Object type matches caller.
-
-        Examples
-        --------
-        >>> s = cudf.Series([1, 2], index=["a", "b"])
-        >>> s
-        a    1
-        b    2
-        dtype: int64
-        >>> s_copy = s.copy()
-        >>> s_copy
-        a    1
-        b    2
-        dtype: int64
-
-        **Shallow copy versus default (deep) copy:**
-
-        >>> s = cudf.Series([1, 2], index=["a", "b"])
-        >>> deep = s.copy()
-        >>> shallow = s.copy(deep=False)
-
-        Shallow copy shares data and index with original.
-
-        >>> s is shallow
-        False
-        >>> s._column is shallow._column and s.index is shallow.index
-        True
-
-        Deep copy has own copy of data and index.
-
-        >>> s is deep
-        False
-        >>> s.values is deep.values or s.index is deep.index
-        False
-
-        Updates to the data shared by shallow copy and original is reflected
-        in both; deep copy remains unchanged.
-
-        >>> s['a'] = 3
-        >>> shallow['b'] = 4
-        >>> s
-        a    3
-        b    4
-        dtype: int64
-        >>> shallow
-        a    3
-        b    4
-        dtype: int64
-        >>> deep
-        a    1
-        b    2
-        dtype: int64
-        """
-        new_frame = self.__class__.__new__(self.__class__)
-        new_frame._data = self._data.copy(deep=deep)
-
-        if self._index is not None:
-            new_frame._index = self._index.copy(deep=deep)
-        else:
-            new_frame._index = None
-
-        return new_frame
-
     @_cudf_nvtx_annotate
     def astype(self, dtype, copy=False, **kwargs):
         result = {}
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 3e54279c6d8..fafdd43c06b 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -19,6 +19,7 @@
     Type,
     TypeVar,
     Union,
+    cast,
 )
 from uuid import uuid4
 
@@ -252,6 +253,81 @@ def _from_columns_like_self(
         )
         return frame._copy_type_metadata(self, include_index=bool(index_names))
 
+    def _mimic_inplace(
+        self: T, result: Frame, inplace: bool = False
+    ) -> Optional[Frame]:
+        # TODO: Is there a better way to make mypy happy?
+        result = cast(IndexedFrame, result)
+        if inplace:
+            self._index = result._index
+        return super()._mimic_inplace(result, inplace)
+
+    def copy(self: T, deep: bool = True) -> T:
+        """Make a copy of this object's indices and data.
+
+        When ``deep=True`` (default), a new object will be created with a
+        copy of the calling object's data and indices. Modifications to
+        the data or indices of the copy will not be reflected in the
+        original object (see notes below).
+        When ``deep=False``, a new object will be created without copying
+        the calling object's data or index (only references to the data
+        and index are copied). Any changes to the data of the original
+        will be reflected in the shallow copy (and vice versa).
+
+        Parameters
+        ----------
+        deep : bool, default True
+            Make a deep copy, including a copy of the data and the indices.
+            With ``deep=False`` neither the indices nor the data are copied.
+
+        Returns
+        -------
+        copy : Series or DataFrame
+            Object type matches caller.
+
+        Examples
+        --------
+        >>> s = cudf.Series([1, 2], index=["a", "b"])
+        >>> s
+        a    1
+        b    2
+        dtype: int64
+        >>> s_copy = s.copy()
+        >>> s_copy
+        a    1
+        b    2
+        dtype: int64
+
+        **Shallow copy versus default (deep) copy:**
+
+        >>> s = cudf.Series([1, 2], index=["a", "b"])
+        >>> deep = s.copy()
+        >>> shallow = s.copy(deep=False)
+
+        Updates to the data shared by shallow copy and original is reflected
+        in both; deep copy remains unchanged.
+
+        >>> s['a'] = 3
+        >>> shallow['b'] = 4
+        >>> s
+        a    3
+        b    4
+        dtype: int64
+        >>> shallow
+        a    3
+        b    4
+        dtype: int64
+        >>> deep
+        a    1
+        b    2
+        dtype: int64
+        """
+        return self._from_data(
+            self._data.copy(deep=deep),
+            # Indexes are immutable so copies can always be shallow.
+            self._index.copy(deep=False),
+        )
+
     @property
     def index(self):
         """Get the labels for the rows."""

From 6da676af01649c2c515be17cfe889c2422d30bb3 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 15 Mar 2022 14:24:36 -0700
Subject: [PATCH 09/17] Standardize equals.

---
 python/cudf/cudf/core/frame.py         | 35 +++++++++-----------------
 python/cudf/cudf/core/index.py         | 18 ++++++++-----
 python/cudf/cudf/core/indexed_frame.py |  6 +++++
 3 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index db52e6ba061..e2cf5abb3a2 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -397,7 +397,7 @@ def astype(self, dtype, copy=False, **kwargs):
         return result
 
     @_cudf_nvtx_annotate
-    def equals(self, other, **kwargs):
+    def equals(self, other):
         """
         Test whether two objects contain the same elements.
         This function allows two Series or DataFrames to be compared against
@@ -454,30 +454,19 @@ def equals(self, other, **kwargs):
         >>> df.equals(different_column_type)
         True
         """
-        if self is other:
-            return True
-
-        check_types = kwargs.get("check_types", True)
-
-        if check_types:
-            if type(self) is not type(other):
-                return False
-
-        if other is None or len(self) != len(other):
-            return False
-
-        # check data:
-        for self_col, other_col in zip(
-            self._data.values(), other._data.values()
+        if (
+            other is None
+            or not isinstance(other, type(self))
+            or len(self) != len(other)
         ):
-            if not self_col.equals(other_col, check_dtypes=check_types):
-                return False
+            return False
 
-        # check index:
-        if self._index is None:
-            return other._index is None
-        else:
-            return self._index.equals(other._index)
+        return all(
+            self_col.equals(other_col, check_dtypes=True)
+            for self_col, other_col in zip(
+                self._data.values(), other._data.values()
+            )
+        )
 
     @_cudf_nvtx_annotate
     def _get_columns_by_label(self, labels, downcast=False):
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index e944e5e61e9..802b25684c2 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -375,7 +375,7 @@ def equals(self, other):
                 other._step,
             ):
                 return True
-        return Int64Index._from_data(self._data).equals(other)
+        return self._as_int64().equals(other)
 
     @_cudf_nvtx_annotate
     def serialize(self):
@@ -921,22 +921,28 @@ def equals(self, other, **kwargs):
             True if “other” is an Index and it has the same elements
             as calling index; False otherwise.
         """
-        if not isinstance(other, BaseIndex):
+        if (
+            other is None
+            or not isinstance(other, BaseIndex)
+            or len(self) != len(other)
+        ):
             return False
 
-        check_types = False
+        check_dtypes = False
 
         self_is_categorical = isinstance(self, CategoricalIndex)
         other_is_categorical = isinstance(other, CategoricalIndex)
         if self_is_categorical and not other_is_categorical:
             other = other.astype(self.dtype)
-            check_types = True
+            check_dtypes = True
         elif other_is_categorical and not self_is_categorical:
             self = self.astype(other.dtype)
-            check_types = True
+            check_dtypes = True
 
         try:
-            return super().equals(other, check_types=check_types)
+            return self._column.equals(
+                other._column, check_dtypes=check_dtypes
+            )
         except TypeError:
             return False
 
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index fafdd43c06b..3e09fbdffd4 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -328,6 +328,12 @@ def copy(self: T, deep: bool = True) -> T:
             self._index.copy(deep=False),
         )
 
+    @_cudf_nvtx_annotate
+    def equals(self, other):  # noqa: D102
+        if not super().equals(other):
+            return False
+        return self._index.equals(other._index)
+
     @property
     def index(self):
         """Get the labels for the rows."""

From 61f9d07de80f9c83cf9e6c4a4a8380e1d7715dbb Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 15 Mar 2022 15:03:47 -0700
Subject: [PATCH 10/17] Simplify column selection by index.

---
 python/cudf/cudf/core/column_accessor.py | 28 ++++++++++++++++++------
 python/cudf/cudf/core/dataframe.py       | 12 +++++-----
 python/cudf/cudf/core/frame.py           | 11 ----------
 3 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 9cb86ca1cd2..c9c00692174 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -342,6 +342,26 @@ def select_by_label(self, key: Any) -> ColumnAccessor:
                     return self._select_by_label_with_wildcard(key)
             return self._select_by_label_grouped(key)
 
+    def get_labels_by_index(self, index: Any) -> tuple:
+        """Get the labels corresponding to the provided column indices.
+
+        Parameters
+        ----------
+        index : integer, integer slice, or list-like of integers
+            The column indexes.
+
+        Returns
+        -------
+        tuple
+        """
+        if isinstance(index, slice):
+            start, stop, step = index.indices(len(self._data))
+            return self.names[start:stop:step]
+        elif pd.api.types.is_integer(index):
+            return (self.names[index],)
+        else:
+            return tuple(self.names[i] for i in index)
+
     def select_by_index(self, index: Any) -> ColumnAccessor:
         """
         Return a ColumnAccessor composed of the columns
@@ -355,13 +375,7 @@ def select_by_index(self, index: Any) -> ColumnAccessor:
         -------
         ColumnAccessor
         """
-        if isinstance(index, slice):
-            start, stop, step = index.indices(len(self._data))
-            keys = self.names[start:stop:step]
-        elif pd.api.types.is_integer(index):
-            keys = (self.names[index],)
-        else:
-            keys = tuple(self.names[i] for i in index)
+        keys = self.get_labels_by_index(index)
         data = {k: self._data[k] for k in keys}
         return self.__class__(
             data, multiindex=self.multiindex, level_names=self.level_names,
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 0de80ed836d..b93dc2a7993 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -372,9 +372,9 @@ class _DataFrameIlocIndexer(_DataFrameIndexer):
     def _getitem_tuple_arg(self, arg):
         # Iloc Step 1:
         # Gather the columns specified by the second tuple arg
-        columns_df = self._frame._get_columns_by_index(arg[1])
-
-        columns_df._index = self._frame._index
+        columns_df = self._frame._from_data(
+            self._frame._data.select_by_index(arg[1]), self._frame._index
+        )
 
         # Iloc Step 2:
         # Gather the rows specified by the first tuple arg
@@ -422,9 +422,9 @@ def _getitem_tuple_arg(self, arg):
 
     @_cudf_nvtx_annotate
     def _setitem_tuple_arg(self, key, value):
-        columns = self._frame._get_columns_by_index(key[1])
-
-        for col in columns:
+        # TODO: Determine if this usage is prevalent enough to expose this
+        # selection logic at a higher level than ColumnAccessor.
+        for col in self._frame._data.get_labels_by_index(key[1]):
             self._frame[col].iloc[key[0]] = value
 
     def _getitem_scalar(self, arg):
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index e2cf5abb3a2..b54f92af9bd 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -476,17 +476,6 @@ def _get_columns_by_label(self, labels, downcast=False):
         """
         return self._data.select_by_label(labels)
 
-    @_cudf_nvtx_annotate
-    def _get_columns_by_index(self, indices):
-        """
-        Returns columns of the Frame specified by `labels`
-
-        """
-        data = self._data.select_by_index(indices)
-        return self.__class__._from_data(
-            data, columns=data.to_pandas_index(), index=self.index
-        )
-
     @property
     def values(self):
         """

From 6c607af2f2f8e5e73965de0d337017814e6b27fb Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 15 Mar 2022 15:27:39 -0700
Subject: [PATCH 11/17] Simplify clip and prep for move to IndexedFrame.

---
 python/cudf/cudf/core/frame.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index b54f92af9bd..36ec290da42 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -721,6 +721,10 @@ def clip(self, lower=None, upper=None, inplace=False, axis=1):
         3    4
         dtype: int64
         """
+        if isinstance(self, cudf.BaseIndex):
+            warnings.warn(
+                "Index.clip is deprecated and will be removed.", FutureWarning,
+            )
 
         if axis != 1:
             raise NotImplementedError("`axis is not yet supported in clip`")
@@ -738,13 +742,10 @@ def clip(self, lower=None, upper=None, inplace=False, axis=1):
 
         if len(lower) != self._num_columns:
             raise ValueError(
-                """Length of lower/upper should be
-                equal to number of columns in
-                DataFrame/Series/Index/MultiIndex"""
+                "Length of lower/upper should be equal to number of columns"
             )
 
-        output = self.copy(deep=False)
-        if output.ndim == 1:
+        if self.ndim == 1:
             # In case of series and Index,
             # swap lower and upper if lower > upper
             if (
@@ -754,11 +755,12 @@ def clip(self, lower=None, upper=None, inplace=False, axis=1):
             ):
                 lower[0], upper[0] = upper[0], lower[0]
 
-        for i, name in enumerate(self._data):
-            output._data[name] = self._data[name].clip(lower[i], upper[i])
-
+        data = {
+            name: col.clip(lower[i], upper[i])
+            for i, (name, col) in enumerate(self._data.items())
+        }
+        output = self._from_data(data, self._index)
         output._copy_type_metadata(self, include_index=False)
-
         return self._mimic_inplace(output, inplace=inplace)
 
     @_cudf_nvtx_annotate

From 991ca6a6c4dff44cef1dc82012d574e6fc00a59e Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 16 Mar 2022 17:20:33 -0700
Subject: [PATCH 12/17] Address first set of PR reviews.

---
 python/cudf/cudf/core/frame.py         | 7 ++-----
 python/cudf/cudf/core/indexed_frame.py | 1 -
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 36ec290da42..20da872f117 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1171,11 +1171,8 @@ def fillna(
             self._from_data(data=filled_data), inplace=inplace,
         )
         # TODO: Split this logic into the IndexedFrame class.
-        if isinstance(self, cudf.core.indexed_frame.IndexedFrame):
-            if inplace:
-                self._index = old_index
-            else:
-                ret._index = old_index
+        if isinstance(self, cudf.core.indexed_frame.IndexedFrame) and not inplace:
+            ret._index = old_index
         return ret
 
     @_cudf_nvtx_annotate
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 3e09fbdffd4..7c9b0381c69 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -231,7 +231,6 @@ def _from_columns(
             if isinstance(out._index, cudf.MultiIndex):
                 out._index.names = index_names
             else:
-                assert len(index_names) == 1
                 out._index.name = index_names[0]
 
         return out

From 87ed661f8a86b5a23acfab942612233101b6d61d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 16 Mar 2022 17:41:52 -0700
Subject: [PATCH 13/17] Make mypy happy in better ways.

---
 python/cudf/cudf/core/dataframe.py           |  2 --
 python/cudf/cudf/core/frame.py               |  9 ++++---
 python/cudf/cudf/core/index.py               |  2 +-
 python/cudf/cudf/core/indexed_frame.py       | 13 +++-------
 python/cudf/cudf/core/series.py              |  2 --
 python/cudf/cudf/core/single_column_frame.py | 25 +-------------------
 6 files changed, 11 insertions(+), 42 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index b93dc2a7993..738ff59c33e 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -860,8 +860,6 @@ def _from_data(
         data: MutableMapping,
         index: Optional[BaseIndex] = None,
         columns: Any = None,
-        *args,
-        **kwargs,
     ) -> DataFrame:
         out = super()._from_data(data=data, index=index)
         if columns is not None:
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 20da872f117..fab0c7fafb0 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -181,7 +181,7 @@ def deserialize(cls, header, frames):
 
     @classmethod
     @_cudf_nvtx_annotate
-    def _from_data(cls, data: MutableMapping, *args, **kwargs):
+    def _from_data(cls, data: MutableMapping):
         obj = cls.__new__(cls)
         Frame.__init__(obj, data)
         return obj
@@ -205,7 +205,7 @@ def _from_columns_like_self(
         return frame._copy_type_metadata(self)
 
     def _mimic_inplace(
-        self: T, result: Frame, inplace: bool = False
+        self: T, result: T, inplace: bool = False
     ) -> Optional[Frame]:
         if inplace:
             for col in self._data:
@@ -1171,7 +1171,10 @@ def fillna(
             self._from_data(data=filled_data), inplace=inplace,
         )
         # TODO: Split this logic into the IndexedFrame class.
-        if isinstance(self, cudf.core.indexed_frame.IndexedFrame) and not inplace:
+        if (
+            isinstance(self, cudf.core.indexed_frame.IndexedFrame)
+            and not inplace
+        ):
             ret._index = old_index
         return ret
 
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 802b25684c2..60bfeec9a72 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -841,7 +841,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
     @classmethod
     @_cudf_nvtx_annotate
     def _from_data(
-        cls, data: MutableMapping, name: Any = None, *args, **kwargs
+        cls, data: MutableMapping, name: Any = None
     ) -> GenericIndex:
         out = super()._from_data(data=data)
         if name is not None:
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 7c9b0381c69..adc8818aea3 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -19,7 +19,6 @@
     Type,
     TypeVar,
     Union,
-    cast,
 )
 from uuid import uuid4
 
@@ -195,13 +194,9 @@ def _num_rows(self) -> int:
 
     @classmethod
     def _from_data(
-        cls,
-        data: MutableMapping,
-        index: Optional[BaseIndex] = None,
-        *args,
-        **kwargs,
+        cls, data: MutableMapping, index: Optional[BaseIndex] = None,
     ):
-        out = super()._from_data(data, *args, **kwargs)
+        out = super()._from_data(data)
         out._index = RangeIndex(out._data.nrows) if index is None else index
         return out
 
@@ -253,10 +248,8 @@ def _from_columns_like_self(
         return frame._copy_type_metadata(self, include_index=bool(index_names))
 
     def _mimic_inplace(
-        self: T, result: Frame, inplace: bool = False
+        self: T, result: T, inplace: bool = False
     ) -> Optional[Frame]:
-        # TODO: Is there a better way to make mypy happy?
-        result = cast(IndexedFrame, result)
         if inplace:
             self._index = result._index
         return super()._mimic_inplace(result, inplace)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 5d862213165..0bb82b4ddad 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -458,8 +458,6 @@ def _from_data(
         data: MutableMapping,
         index: Optional[BaseIndex] = None,
         name: Any = None,
-        *args,
-        **kwargs,
     ) -> Series:
         out = super()._from_data(data=data, index=index)
         if name is not None:
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index b35d653e28f..de10261315c 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -3,16 +3,7 @@
 
 from __future__ import annotations
 
-from typing import (
-    Any,
-    Dict,
-    MutableMapping,
-    Optional,
-    Tuple,
-    Type,
-    TypeVar,
-    Union,
-)
+from typing import Any, Dict, Optional, Tuple, Type, TypeVar, Union
 
 import cupy
 import numpy as np
@@ -67,20 +58,6 @@ def _scan(self, op, axis=None, *args, **kwargs):
 
         return super()._scan(op, axis=axis, *args, **kwargs)
 
-    @classmethod
-    @_cudf_nvtx_annotate
-    def _from_data(
-        cls,
-        data: MutableMapping,
-        index: Optional[cudf.core.index.BaseIndex] = None,
-        name: Any = None,
-    ):
-
-        out = super()._from_data(data, index)
-        if name is not None:
-            out.name = name
-        return out
-
     @property  # type: ignore
     @_cudf_nvtx_annotate
     def name(self):

From 292acfa5d7ada9f4b59641e333041ed18d8d0b75 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 17 Mar 2022 09:36:14 -0700
Subject: [PATCH 14/17] Avoid constructing a Frame and just use
 DataFrame._from_data to construct it fast.

---
 python/cudf/cudf/core/dataframe.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 738ff59c33e..83530153641 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5603,7 +5603,9 @@ def stack(self, level=-1, dropna=True):
         """
         assert level in (None, -1)
         repeated_index = self.index.repeat(self.shape[1])
-        name_index = Frame({0: self._column_names}).tile(self.shape[0])
+        name_index = cudf.DataFrame._from_data({0: self._column_names}).tile(
+            self.shape[0]
+        )
         new_index = list(repeated_index._columns) + [name_index._columns[0]]
         if isinstance(self._index, MultiIndex):
             index_names = self._index.names + [None]

From f2bf9aa337c9c003b2dd11fba769a1c46cceded1 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 17 Mar 2022 09:45:09 -0700
Subject: [PATCH 15/17] Fix bug.

---
 python/cudf/cudf/core/frame.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index fab0c7fafb0..51cbfcebb00 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1171,11 +1171,11 @@ def fillna(
             self._from_data(data=filled_data), inplace=inplace,
         )
         # TODO: Split this logic into the IndexedFrame class.
-        if (
-            isinstance(self, cudf.core.indexed_frame.IndexedFrame)
-            and not inplace
-        ):
-            ret._index = old_index
+        if isinstance(self, cudf.core.indexed_frame.IndexedFrame):
+            if inplace:
+                self._index = old_index
+            else:
+                ret._index = old_index
         return ret
 
     @_cudf_nvtx_annotate

From 25b5df9b0601ba9e9c87ac8ed834fba12a2a1ac0 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 17 Mar 2022 13:47:31 -0700
Subject: [PATCH 16/17] Move index replacement logic in fillna to IndexedFrame.

---
 python/cudf/cudf/core/frame.py         | 13 ++-----------
 python/cudf/cudf/core/indexed_frame.py | 12 ++++++++++++
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 51cbfcebb00..aead63313f7 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1037,7 +1037,7 @@ def fillna(
 
         Returns
         -------
-        result : DataFrame
+        result : DataFrame, Series, or Index
             Copy with nulls filled.
 
         Examples
@@ -1165,18 +1165,9 @@ def fillna(
             else:
                 filled_data[col_name] = col.copy(deep=True)
 
-        # TODO: This logic needs to move into the IndexedFrame class.
-        old_index = self._index
-        ret = self._mimic_inplace(
+        return self._mimic_inplace(
             self._from_data(data=filled_data), inplace=inplace,
         )
-        # TODO: Split this logic into the IndexedFrame class.
-        if isinstance(self, cudf.core.indexed_frame.IndexedFrame):
-            if inplace:
-                self._index = old_index
-            else:
-                ret._index = old_index
-        return ret
 
     @_cudf_nvtx_annotate
     def _drop_column(self, name):
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index adc8818aea3..61efcb6c4bf 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -904,6 +904,18 @@ def _split(self, splits, keep_index=True):
             for i in range(len(splits) + 1)
         ]
 
+    @_cudf_nvtx_annotate
+    def fillna(
+        self, value=None, method=None, axis=None, inplace=False, limit=None
+    ):  # noqa: D102
+        old_index = self._index
+        ret = super().fillna(value, method, axis, inplace, limit)
+        if inplace:
+            self._index = old_index
+        else:
+            ret._index = old_index
+        return ret
+
     def add_prefix(self, prefix):
         """
         Prefix labels with string `prefix`.

From 21b7ef2a3cdb8a54a28ba898fb80fa8284306156 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 18 Mar 2022 16:48:41 -0700
Subject: [PATCH 17/17] Address PR comments.

---
 python/cudf/cudf/core/dataframe.py | 17 ++++++++++++-----
 python/cudf/cudf/core/frame.py     |  2 ++
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 83530153641..6f05e9bd678 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -613,6 +613,7 @@ def __init__(
 
             self._data = new_df._data
             self._index = new_df._index
+            self._check_data_index_length_match()
         elif hasattr(data, "__array_interface__"):
             arr_interface = data.__array_interface__
             if len(arr_interface["descr"]) == 1:
@@ -622,6 +623,7 @@ def __init__(
                 new_df = self.from_records(data, index=index, columns=columns)
             self._data = new_df._data
             self._index = new_df._index
+            self._check_data_index_length_match()
         else:
             if is_list_like(data):
                 if len(data) > 0 and is_scalar(data[0]):
@@ -633,6 +635,7 @@ def __init__(
 
                     self._data = new_df._data
                     self._index = new_df._index
+                    self._check_data_index_length_match()
                 elif len(data) > 0 and isinstance(data[0], Series):
                     self._init_from_series_list(
                         data=data, columns=columns, index=index
@@ -650,14 +653,18 @@ def __init__(
                     data, index=index, columns=columns, nan_as_null=nan_as_null
                 )
 
-        if self._data.nrows > 0 and self._data.nrows != len(self._index):
-            raise ValueError(
-                f"Shape of passed values is {self.shape}, indices imply "
-                f"({len(self._index)}, {self._num_columns})"
-            )
         if dtype:
             self._data = self.astype(dtype)._data
 
+    def _check_data_index_length_match(df: DataFrame) -> None:
+        # Validate that the number of rows in the data matches the index if the
+        # data is not empty. This is a helper for the constructor.
+        if df._data.nrows > 0 and df._data.nrows != len(df._index):
+            raise ValueError(
+                f"Shape of passed values is {df.shape}, indices imply "
+                f"({len(df._index)}, {df._num_columns})"
+            )
+
     @_cudf_nvtx_annotate
     def _init_from_series_list(self, data, columns, index):
         if index is None:
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index aead63313f7..0b476d5c982 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -454,6 +454,8 @@ def equals(self, other):
         >>> df.equals(different_column_type)
         True
         """
+        if self is other:
+            return True
         if (
             other is None
             or not isinstance(other, type(self))