From 213446501a204f629ea27321222cb44077f5a9a9 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 3 Dec 2021 14:30:37 -0800
Subject: [PATCH 01/11] Inline _repeat and _shift.

---
 python/cudf/cudf/core/frame.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index c85ed0c8555..b9dd4d192a9 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1798,14 +1798,11 @@ def repeat(self, repeats, axis=None):
                 "Only axis=`None` supported at this time."
             )
 
-        return self._repeat(repeats)
-
-    def _repeat(self, count):
-        if not is_scalar(count):
-            count = as_column(count)
+        if not is_scalar(repeats):
+            repeats = as_column(repeats)
 
         result = self.__class__._from_data(
-            *libcudf.filling.repeat(self, count)
+            *libcudf.filling.repeat(self, repeats)
         )
 
         result._copy_type_metadata(self)
@@ -1827,11 +1824,15 @@ def _fill(self, fill_values, begin, end, inplace):
 
     def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         """Shift values by `periods` positions."""
-        assert axis in (None, 0) and freq is None
-        return self._shift(periods)
+        axis = self._get_axis_from_axis_arg(axis)
+        if axis != 0:
+            raise ValueError("Only axis=0 is supported.")
+        if freq is not None:
+            raise ValueError("The freq argument is not yet supported.")
 
-    def _shift(self, offset, fill_value=None):
-        data_columns = (col.shift(offset, fill_value) for col in self._columns)
+        data_columns = (
+            col.shift(periods, fill_value) for col in self._columns
+        )
         return self.__class__._from_data(
             zip(self._column_names, data_columns), self._index
         )

From 450353416ccb5eabbeb9e43de3d5a7a993bca53a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 3 Dec 2021 14:36:33 -0800
Subject: [PATCH 02/11] Inline _fill and deprecate fill.

---
 python/cudf/cudf/core/frame.py  | 14 --------------
 python/cudf/cudf/core/series.py | 18 +++++++++++++++++-
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index b9dd4d192a9..c4ee00c60b7 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1808,20 +1808,6 @@ def repeat(self, repeats, axis=None):
         result._copy_type_metadata(self)
         return result
 
-    def _fill(self, fill_values, begin, end, inplace):
-        col_and_fill = zip(self._columns, fill_values)
-
-        if not inplace:
-            data_columns = (c._fill(v, begin, end) for (c, v) in col_and_fill)
-            return self.__class__._from_data(
-                zip(self._column_names, data_columns), self._index
-            )
-
-        for (c, v) in col_and_fill:
-            c.fill(v, begin, end, inplace=True)
-
-        return self
-
     def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         """Shift values by `periods` positions."""
         axis = self._get_axis_from_axis_arg(axis)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 036c8c1ee00..8bc757f33db 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1629,7 +1629,23 @@ def drop_duplicates(self, keep="first", inplace=False, ignore_index=False):
         return self._mimic_inplace(result, inplace=inplace)
 
     def fill(self, fill_value, begin=0, end=-1, inplace=False):
-        return self._fill([fill_value], begin, end, inplace)
+        warnings.warn(
+            "The fill method will be removed in a future cuDF " "release.",
+            FutureWarning,
+        )
+        fill_values = [fill_value]
+        col_and_fill = zip(self._columns, fill_values)
+
+        if not inplace:
+            data_columns = (c._fill(v, begin, end) for (c, v) in col_and_fill)
+            return self.__class__._from_data(
+                zip(self._column_names, data_columns), self._index
+            )
+
+        for (c, v) in col_and_fill:
+            c.fill(v, begin, end, inplace=True)
+
+        return self
 
     def fillna(
         self, value=None, method=None, axis=None, inplace=False, limit=None

From 7f734973fc4014f8dcf5542c33dbeb4e45db8db2 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 3 Dec 2021 14:48:20 -0800
Subject: [PATCH 03/11] Inline _split.

---
 python/cudf/cudf/core/frame.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index c4ee00c60b7..7eb7aa0c3cd 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1024,6 +1024,11 @@ def _partition(self, scatter_map, npartitions, keep_index=True):
 
         result = partitioned._split(output_offsets, keep_index=keep_index)
 
+        splits = libcudf.copying.table_split(
+            self, output_offsets, keep_index=keep_index
+        )
+        result = [self.__class__._from_data(*result) for result in splits]
+
         for frame in result:
             frame._copy_type_metadata(self, include_index=keep_index)
 
@@ -3733,12 +3738,6 @@ def _is_sorted(self, ascending=None, null_position=None):
             self, ascending=ascending, null_position=null_position
         )
 
-    def _split(self, splits, keep_index=True):
-        results = libcudf.copying.table_split(
-            self, splits, keep_index=keep_index
-        )
-        return [self.__class__._from_data(*result) for result in results]
-
     def _encode(self):
         data, index, indices = libcudf.transform.table_encode(self)
         for name, col in data.items():

From 4185253606e7732253258ff0b0beb63f8607b4f7 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 16 Dec 2021 09:29:06 -0800
Subject: [PATCH 04/11] Remove _repr_pandas025_formatting.

---
 python/cudf/cudf/core/dataframe.py  | 45 -----------------------------
 python/cudf/cudf/tests/test_repr.py |  6 ----
 2 files changed, 51 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 88c8aaebd9e..16ccbfdeabf 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1672,51 +1672,6 @@ def astype(self, dtype, copy=False, errors="raise", **kwargs):
 
         return result
 
-    def _repr_pandas025_formatting(self, ncols, nrows, dtype=None):
-        """
-        With Pandas > 0.25 there are some new conditional formatting for some
-        datatypes and column/row configurations. This fixes most of them in
-        context to match the expected Pandas repr of the same content.
-
-        Examples
-        --------
-        >>> gdf.__repr__()
-            0   ...  19
-        0   46  ...  48
-        ..  ..  ...  ..
-        19  40  ...  29
-
-        [20 rows x 20 columns]
-
-        >>> nrows, ncols = _repr_pandas025_formatting(2, 2, dtype="category")
-        >>> pd.options.display.max_rows = nrows
-        >>> pd.options.display.max_columns = ncols
-        >>> gdf.__repr__()
-             0  ...  19
-        0   46  ...  48
-        ..  ..  ...  ..
-        19  40  ...  29
-
-        [20 rows x 20 columns]
-        """
-        ncols = 1 if ncols in [0, 2] and dtype == "datetime64[ns]" else ncols
-        ncols = (
-            1
-            if ncols == 0
-            and nrows == 1
-            and dtype in ["int8", "str", "category"]
-            else ncols
-        )
-        ncols = (
-            1
-            if nrows == 1
-            and dtype in ["int8", "int16", "int64", "str", "category"]
-            else ncols
-        )
-        ncols = 0 if ncols == 2 else ncols
-        ncols = 19 if ncols in [20, 21] else ncols
-        return ncols, nrows
-
     def _clean_renderable_dataframe(self, output):
         """
         This method takes in partial/preprocessed dataframe
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index fe95b2930df..f8c136b8c2d 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -98,15 +98,9 @@ def test_full_dataframe_20(dtype, nrows, ncols):
     ).astype(dtype)
     gdf = cudf.from_pandas(pdf)
 
-    ncols, nrows = gdf._repr_pandas025_formatting(ncols, nrows, dtype)
-    pd.options.display.max_rows = int(nrows)
-    pd.options.display.max_columns = int(ncols)
-
     assert pdf.__repr__() == gdf.__repr__()
     assert pdf._repr_html_() == gdf._repr_html_()
     assert pdf._repr_latex_() == gdf._repr_latex_()
-    pd.reset_option("display.max_rows")
-    pd.reset_option("display.max_columns")
 
 
 @pytest.mark.parametrize("dtype", repr_categories)

From 74d4a1079140400a94886d4145853229722a7d5a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 16 Dec 2021 09:45:22 -0800
Subject: [PATCH 05/11] Remove unnecessary local imports.

---
 python/cudf/cudf/core/dataframe.py | 65 +++++++++++-------------------
 1 file changed, 24 insertions(+), 41 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 16ccbfdeabf..4696804fe01 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -59,6 +59,7 @@
     _get_label_range_or_mask,
     _indices_from_labels,
 )
+from cudf.core.multiindex import MultiIndex
 from cudf.core.resample import DataFrameResampler
 from cudf.core.series import Series
 from cudf.utils import applyutils, docutils, ioutils, queryutils, utils
@@ -90,10 +91,8 @@
 
 class _DataFrameIndexer(_FrameIndexer):
     def __getitem__(self, arg):
-        from cudf import MultiIndex
-
-        if isinstance(self._frame.index, MultiIndex) or isinstance(
-            self._frame.columns, MultiIndex
+        if isinstance(self._frame.index, cudf.MultiIndex) or isinstance(
+            self._frame.columns, cudf.MultiIndex
         ):
             # This try/except block allows the use of pandas-like
             # tuple arguments into MultiIndex dataframes.
@@ -118,8 +117,6 @@ def _can_downcast_to_series(self, df, arg):
         operation should be "downcasted" from a DataFrame to a
         Series
         """
-        from cudf.core.column import as_column
-
         if isinstance(df, cudf.Series):
             return False
         nrows, ncols = df.shape
@@ -201,11 +198,6 @@ def _getitem_scalar(self, arg):
     def _getitem_tuple_arg(self, arg):
         from uuid import uuid4
 
-        from cudf import MultiIndex
-        from cudf.core.column import column
-        from cudf.core.dataframe import DataFrame
-        from cudf.core.index import as_index
-
         # Step 1: Gather columns
         if isinstance(arg, tuple):
             columns_df = self._frame._get_columns_by_label(arg[1])
@@ -245,13 +237,13 @@ def _getitem_tuple_arg(self, arg):
                     tmp_arg = ([tmp_arg[0]], tmp_arg[1])
                 if len(tmp_arg[0]) == 0:
                     return columns_df._empty_like(keep_index=True)
-                tmp_arg = (column.as_column(tmp_arg[0]), tmp_arg[1])
+                tmp_arg = (as_column(tmp_arg[0]), tmp_arg[1])
 
                 if is_bool_dtype(tmp_arg[0]):
                     df = columns_df._apply_boolean_mask(tmp_arg[0])
                 else:
                     tmp_col_name = str(uuid4())
-                    other_df = DataFrame(
+                    other_df = cudf.DataFrame(
                         {tmp_col_name: column.arange(len(tmp_arg[0]))},
                         index=as_index(tmp_arg[0]),
                     )
@@ -273,7 +265,7 @@ def _getitem_tuple_arg(self, arg):
                     start = self._frame.index[0]
                 df.index = as_index(start)
             else:
-                row_selection = column.as_column(arg[0])
+                row_selection = as_column(arg[0])
                 if is_bool_dtype(row_selection.dtype):
                     df.index = self._frame.index.take(row_selection)
                 else:
@@ -285,7 +277,7 @@ def _getitem_tuple_arg(self, arg):
 
     @annotate("LOC_SETITEM", color="blue", domain="cudf_python")
     def _setitem_tuple_arg(self, key, value):
-        if isinstance(self._frame.index, cudf.MultiIndex) or isinstance(
+        if isinstance(self._frame.index, MultiIndex) or isinstance(
             self._frame.columns, pd.MultiIndex
         ):
             raise NotImplementedError(
@@ -351,10 +343,6 @@ class _DataFrameIlocIndexer(_DataFrameIndexer):
 
     @annotate("ILOC_GETITEM", color="blue", domain="cudf_python")
     def _getitem_tuple_arg(self, arg):
-        from cudf import MultiIndex
-        from cudf.core.column import column
-        from cudf.core.index import as_index
-
         # Iloc Step 1:
         # Gather the columns specified by the second tuple arg
         columns_df = cudf.DataFrame(self._frame._get_columns_by_index(arg[1]))
@@ -385,7 +373,7 @@ def _getitem_tuple_arg(self, arg):
                     index += len(columns_df)
                 df = columns_df._slice(slice(index, index + 1, 1))
             else:
-                arg = (column.as_column(arg[0]), arg[1])
+                arg = (as_column(arg[0]), arg[1])
                 if is_bool_dtype(arg[0]):
                     df = columns_df._apply_boolean_mask(arg[0])
                 else:
@@ -953,6 +941,7 @@ def ndim(self):
         return 2
 
     def __dir__(self):
+        # Add the columns of the DataFrame to the dir output.
         o = set(dir(type(self)))
         o.update(self.__dict__)
         o.update(
@@ -1169,8 +1158,6 @@ def _slice(self: T, arg: slice) -> T:
         arg : should always be of type slice
 
         """
-        from cudf.core.index import RangeIndex
-
         num_rows = len(self)
         if num_rows == 0:
             return self
@@ -1284,8 +1271,6 @@ def memory_usage(self, index=True, deep=False):
         return Series(sizes, index=ind)
 
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
-        import cudf
-
         if method == "__call__" and hasattr(cudf, ufunc.__name__):
             func = getattr(cudf, ufunc.__name__)
             return func(self)
@@ -1554,9 +1539,9 @@ def _concat(
                 out._index._data,
                 indices[:first_data_column_position],
             )
-            if not isinstance(
-                out._index, cudf.MultiIndex
-            ) and is_categorical_dtype(out._index._values.dtype):
+            if not isinstance(out._index, MultiIndex) and is_categorical_dtype(
+                out._index._values.dtype
+            ):
                 out = out.set_index(
                     cudf.core.index.as_index(out.index._values)
                 )
@@ -1777,7 +1762,7 @@ def _get_renderable_dataframe(self):
                 # adjust right columns for output if multiindex.
                 right_cols = (
                     right_cols - 1
-                    if isinstance(self.index, cudf.MultiIndex)
+                    if isinstance(self.index, MultiIndex)
                     else right_cols
                 )
                 left_cols = int(ncols / 2.0) + 1
@@ -2400,7 +2385,7 @@ def set_index(
                     except TypeError:
                         msg = f"{col} cannot be converted to column-like."
                         raise TypeError(msg)
-                if isinstance(col, (cudf.MultiIndex, pd.MultiIndex)):
+                if isinstance(col, (MultiIndex, pd.MultiIndex)):
                     col = (
                         cudf.from_pandas(col)
                         if isinstance(col, pd.MultiIndex)
@@ -2428,7 +2413,7 @@ def set_index(
 
         if append:
             idx_cols = [self.index._data[x] for x in self.index._data]
-            if isinstance(self.index, cudf.MultiIndex):
+            if isinstance(self.index, MultiIndex):
                 idx_names = self.index.names
             else:
                 idx_names = [self.index.name]
@@ -2440,7 +2425,7 @@ def set_index(
         elif len(columns_to_add) == 1:
             idx = cudf.Index(columns_to_add[0], name=names[0])
         else:
-            idx = cudf.MultiIndex._from_data(
+            idx = MultiIndex._from_data(
                 {i: col for i, col in enumerate(columns_to_add)}
             )
             idx.names = names
@@ -2523,7 +2508,7 @@ class max_speed
         result = self if inplace else self.copy()
 
         if not drop:
-            if isinstance(self.index, cudf.MultiIndex):
+            if isinstance(self.index, MultiIndex):
                 names = tuple(
                     name if name is not None else f"level_{i}"
                     for i, name in enumerate(self.index.names)
@@ -2983,9 +2968,7 @@ def rename(
                     "mixed type is not yet supported."
                 )
 
-            if level is not None and isinstance(
-                self.index, cudf.core.multiindex.MultiIndex
-            ):
+            if level is not None and isinstance(self.index, MultiIndex):
                 out_index = self.index.copy(deep=copy)
                 out_index.get_level_values(level).to_frame().replace(
                     to_replace=list(index.keys()),
@@ -4713,7 +4696,7 @@ def to_pandas(self, nullable=False, **kwargs):
 
         if isinstance(self.columns, BaseIndex):
             out_columns = self.columns.to_pandas()
-            if isinstance(self.columns, cudf.core.multiindex.MultiIndex):
+            if isinstance(self.columns, MultiIndex):
                 if self.columns.names is not None:
                     out_columns.names = self.columns.names
             else:
@@ -4889,7 +4872,7 @@ def to_arrow(self, preserve_index=True):
                     "step": 1,
                 }
             else:
-                if isinstance(self.index, cudf.MultiIndex):
+                if isinstance(self.index, MultiIndex):
                     gen_names = tuple(
                         f"level_{i}"
                         for i, _ in enumerate(self.index._data.names)
@@ -5965,11 +5948,11 @@ def stack(self, level=-1, dropna=True):
         repeated_index = self.index.repeat(self.shape[1])
         name_index = Frame({0: self._column_names}).tile(self.shape[0])
         new_index = list(repeated_index._columns) + [name_index._columns[0]]
-        if isinstance(self._index, cudf.MultiIndex):
+        if isinstance(self._index, MultiIndex):
             index_names = self._index.names + [None]
         else:
             index_names = [None] * len(new_index)
-        new_index = cudf.core.multiindex.MultiIndex.from_frame(
+        new_index = MultiIndex.from_frame(
             DataFrame(dict(zip(range(0, len(new_index)), new_index))),
             names=index_names,
         )
@@ -6529,7 +6512,7 @@ def from_pandas(obj, nan_as_null=None):
     elif isinstance(obj, pd.Series):
         return Series.from_pandas(obj, nan_as_null=nan_as_null)
     elif isinstance(obj, pd.MultiIndex):
-        return cudf.MultiIndex.from_pandas(obj, nan_as_null=nan_as_null)
+        return MultiIndex.from_pandas(obj, nan_as_null=nan_as_null)
     elif isinstance(obj, pd.RangeIndex):
         return cudf.core.index.RangeIndex(
             start=obj.start, stop=obj.stop, step=obj.step, name=obj.name
@@ -6647,7 +6630,7 @@ def extract_col(df, col):
         if (
             col == "index"
             and col not in df.index._data
-            and not isinstance(df.index, cudf.MultiIndex)
+            and not isinstance(df.index, MultiIndex)
         ):
             return df.index._data.columns[0]
         return df.index._data[col]

From de8272379088e7d3236e18a37a0badaec41b0a09 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 16 Dec 2021 09:48:23 -0800
Subject: [PATCH 06/11] Remove _rename_columns.

---
 python/cudf/cudf/core/dataframe.py       | 23 -----------------------
 python/cudf/cudf/tests/test_dataframe.py |  9 ---------
 2 files changed, 32 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 4696804fe01..0f1a56752bd 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1314,15 +1314,6 @@ def __array_function__(self, func, types, args, kwargs):
         else:
             return NotImplemented
 
-    def _get_numeric_data(self):
-        """Return a dataframe with only numeric data types"""
-        columns = [
-            c
-            for c, dt in self.dtypes.items()
-            if dt != object and not is_categorical_dtype(dt)
-        ]
-        return self[columns]
-
     def assign(self, **kwargs):
         """
         Assign columns to DataFrame from keyword arguments.
@@ -2091,20 +2082,6 @@ def columns(self, columns):
             data, multiindex=is_multiindex, level_names=columns.names,
         )
 
-    def _rename_columns(self, new_names):
-        old_cols = iter(self._data.names)
-        l_old_cols = len(self._data)
-        l_new_cols = len(new_names)
-        if l_new_cols != l_old_cols:
-            msg = (
-                f"Length of new column names: {l_new_cols} does not "
-                "match length of previous column names: {l_old_cols}"
-            )
-            raise ValueError(msg)
-
-        mapper = dict(zip(old_cols, new_names))
-        self.rename(mapper=mapper, inplace=True, axis=1)
-
     def _reindex(
         self, columns, dtypes=None, deep=False, index=None, inplace=False
     ):
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index ab0856fad1e..b70b1a657d1 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -3377,15 +3377,6 @@ def test_dataframe_describe_percentiles():
     assert_eq(pdf_results, gdf_results)
 
 
-def test_get_numeric_data():
-    pdf = pd.DataFrame(
-        {"x": [1, 2, 3], "y": [1.0, 2.0, 3.0], "z": ["a", "b", "c"]}
-    )
-    gdf = cudf.from_pandas(pdf)
-
-    assert_eq(pdf._get_numeric_data(), gdf._get_numeric_data())
-
-
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES)
 @pytest.mark.parametrize("period", [-1, -5, -10, -20, 0, 1, 5, 10, 20])
 @pytest.mark.parametrize("data_empty", [False, True])

From 7921067364f7860ee7dfd894c6a1d12f70d761f5 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 16 Dec 2021 09:56:43 -0800
Subject: [PATCH 07/11] Fix typo.

---
 python/cudf/cudf/core/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 8bc757f33db..18b2a6cf91e 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1630,7 +1630,7 @@ def drop_duplicates(self, keep="first", inplace=False, ignore_index=False):
 
     def fill(self, fill_value, begin=0, end=-1, inplace=False):
         warnings.warn(
-            "The fill method will be removed in a future cuDF " "release.",
+            "The fill method will be removed in a future cuDF release.",
             FutureWarning,
         )
         fill_values = [fill_value]

From 794df593902179ab3fae9c50ca0227f38c4fb367 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 16 Dec 2021 13:11:21 -0800
Subject: [PATCH 08/11] Standardize all DataFrame references.

---
 python/cudf/cudf/core/dataframe.py | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 0f1a56752bd..c91be2a13ca 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -243,7 +243,7 @@ def _getitem_tuple_arg(self, arg):
                     df = columns_df._apply_boolean_mask(tmp_arg[0])
                 else:
                     tmp_col_name = str(uuid4())
-                    other_df = cudf.DataFrame(
+                    other_df = DataFrame(
                         {tmp_col_name: column.arange(len(tmp_arg[0]))},
                         index=as_index(tmp_arg[0]),
                     )
@@ -314,7 +314,7 @@ def _setitem_tuple_arg(self, key, value):
             self._frame._data.insert(key[1], new_col)
         else:
             if isinstance(value, (cupy.ndarray, np.ndarray)):
-                value_df = cudf.DataFrame(value)
+                value_df = DataFrame(value)
                 if value_df.shape[1] != columns_df.shape[1]:
                     if value_df.shape[1] == 1:
                         value_cols = (
@@ -345,7 +345,7 @@ class _DataFrameIlocIndexer(_DataFrameIndexer):
     def _getitem_tuple_arg(self, arg):
         # Iloc Step 1:
         # Gather the columns specified by the second tuple arg
-        columns_df = cudf.DataFrame(self._frame._get_columns_by_index(arg[1]))
+        columns_df = DataFrame(self._frame._get_columns_by_index(arg[1]))
 
         columns_df._index = self._frame._index
 
@@ -395,7 +395,7 @@ def _getitem_tuple_arg(self, arg):
 
     @annotate("ILOC_SETITEM", color="blue", domain="cudf_python")
     def _setitem_tuple_arg(self, key, value):
-        columns = cudf.DataFrame(self._frame._get_columns_by_index(key[1]))
+        columns = DataFrame(self._frame._get_columns_by_index(key[1]))
 
         for col in columns:
             self._frame[col].iloc[key[0]] = value
@@ -2126,11 +2126,9 @@ def _reindex(
                 columns = (
                     columns if columns is not None else list(df._column_names)
                 )
-                df = cudf.DataFrame()
+                df = DataFrame()
             else:
-                df = cudf.DataFrame(None, index).join(
-                    df, how="left", sort=True
-                )
+                df = DataFrame(None, index).join(df, how="left", sort=True)
                 # double-argsort to map back from sorted to unsorted positions
                 df = df.take(index.argsort(ascending=True).argsort())
 
@@ -3222,7 +3220,7 @@ def agg(self, aggs, axis=None):
             raise NotImplementedError("axis not implemented yet")
 
         if isinstance(aggs, Iterable) and not isinstance(aggs, (str, dict)):
-            result = cudf.DataFrame()
+            result = DataFrame()
             # TODO : Allow simultaneous pass for multi-aggregation as
             # a future optimization
             for agg in aggs:
@@ -3235,7 +3233,7 @@ def agg(self, aggs, axis=None):
                     f"{aggs} is not a valid function for "
                     f"'DataFrame' object"
                 )
-            result = cudf.DataFrame()
+            result = DataFrame()
             result[aggs] = getattr(df_normalized, aggs)()
             result = result.iloc[:, 0]
             result.name = None
@@ -3270,7 +3268,7 @@ def agg(self, aggs, axis=None):
                         raise NotImplementedError(
                             "callable parameter is not implemented yet"
                         )
-                result = cudf.DataFrame(index=idxs, columns=cols)
+                result = DataFrame(index=idxs, columns=cols)
                 for key in aggs.keys():
                     col = df_normalized[key]
                     col_empty = column_empty(
@@ -5377,7 +5375,7 @@ def _prepare_for_rowwise_op(self, method, skipna):
             warnings.warn(msg)
 
         if not skipna and any(col.nullable for col in filtered._columns):
-            mask = cudf.DataFrame(
+            mask = DataFrame(
                 {
                     name: filtered._data[name]._get_mask_as_column()
                     if filtered._data[name].nullable
@@ -6190,8 +6188,8 @@ def append(
         elif isinstance(other, list):
             if not other:
                 pass
-            elif not isinstance(other[0], cudf.DataFrame):
-                other = cudf.DataFrame(other)
+            elif not isinstance(other[0], DataFrame):
+                other = DataFrame(other)
                 if (self.columns.get_indexer(other.columns) >= 0).all():
                     other = other.reindex(columns=self.columns)
 

From 1c8e0194d0448e7e65ac624ca7d1f0f2315f59f7 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 16 Dec 2021 13:23:01 -0800
Subject: [PATCH 09/11] Fix fill test.

---
 python/cudf/cudf/core/dataframe.py  | 4 ++--
 python/cudf/cudf/tests/test_fill.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index c91be2a13ca..3c467ac2cd6 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -91,8 +91,8 @@
 
 class _DataFrameIndexer(_FrameIndexer):
     def __getitem__(self, arg):
-        if isinstance(self._frame.index, cudf.MultiIndex) or isinstance(
-            self._frame.columns, cudf.MultiIndex
+        if isinstance(self._frame.index, MultiIndex) or isinstance(
+            self._frame.columns, MultiIndex
         ):
             # This try/except block allows the use of pandas-like
             # tuple arguments into MultiIndex dataframes.
diff --git a/python/cudf/cudf/tests/test_fill.py b/python/cudf/cudf/tests/test_fill.py
index efbe2834486..224db2b39d1 100644
--- a/python/cudf/cudf/tests/test_fill.py
+++ b/python/cudf/cudf/tests/test_fill.py
@@ -50,7 +50,7 @@ def test_fill(data, fill_value, begin, end, inplace):
 
         begin = max(0, min(len(gs), begin))
         end = max(0, min(len(gs), end))
-        actual = gs._fill([fill_value], begin, end, False)
+        actual = gs.fill(fill_value, begin, end, False)
         assert actual is not gs
 
     ps[begin:end] = fill_value

From c1b608bc2d0cc7ace266f7c54acf4ba7cc03dddf Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 16 Dec 2021 14:13:23 -0800
Subject: [PATCH 10/11] Revert "Inline _split."

This reverts commit 7f734973fc4014f8dcf5542c33dbeb4e45db8db2.
---
 python/cudf/cudf/core/frame.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 7eb7aa0c3cd..c4ee00c60b7 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1024,11 +1024,6 @@ def _partition(self, scatter_map, npartitions, keep_index=True):
 
         result = partitioned._split(output_offsets, keep_index=keep_index)
 
-        splits = libcudf.copying.table_split(
-            self, output_offsets, keep_index=keep_index
-        )
-        result = [self.__class__._from_data(*result) for result in splits]
-
         for frame in result:
             frame._copy_type_metadata(self, include_index=keep_index)
 
@@ -3738,6 +3733,12 @@ def _is_sorted(self, ascending=None, null_position=None):
             self, ascending=ascending, null_position=null_position
         )
 
+    def _split(self, splits, keep_index=True):
+        results = libcudf.copying.table_split(
+            self, splits, keep_index=keep_index
+        )
+        return [self.__class__._from_data(*result) for result in results]
+
     def _encode(self):
         data, index, indices = libcudf.transform.table_encode(self)
         for name, col in data.items():

From 63e3896bf95c68597127d4979efe9f77449110d1 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 21 Dec 2021 13:49:46 -0800
Subject: [PATCH 11/11] Reintroduce _get_nuemric_data for dask.

---
 python/cudf/cudf/core/dataframe.py       | 10 ++++++++++
 python/cudf/cudf/tests/test_dataframe.py |  9 +++++++++
 2 files changed, 19 insertions(+)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 3c467ac2cd6..b7fc5efb412 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1314,6 +1314,16 @@ def __array_function__(self, func, types, args, kwargs):
         else:
             return NotImplemented
 
+    # The _get_numeric_data method is necessary for dask compatibility.
+    def _get_numeric_data(self):
+        """Return a dataframe with only numeric data types"""
+        columns = [
+            c
+            for c, dt in self.dtypes.items()
+            if dt != object and not is_categorical_dtype(dt)
+        ]
+        return self[columns]
+
     def assign(self, **kwargs):
         """
         Assign columns to DataFrame from keyword arguments.
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index b70b1a657d1..ab0856fad1e 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -3377,6 +3377,15 @@ def test_dataframe_describe_percentiles():
     assert_eq(pdf_results, gdf_results)
 
 
+def test_get_numeric_data():
+    pdf = pd.DataFrame(
+        {"x": [1, 2, 3], "y": [1.0, 2.0, 3.0], "z": ["a", "b", "c"]}
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    assert_eq(pdf._get_numeric_data(), gdf._get_numeric_data())
+
+
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES)
 @pytest.mark.parametrize("period", [-1, -5, -10, -20, 0, 1, 5, 10, 20])
 @pytest.mark.parametrize("data_empty", [False, True])