From a0b0f8c9c348403e5ea7fdebccf118a7403df2cb Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Mon, 31 Jan 2022 15:20:02 -0800
Subject: [PATCH 01/14] Refactoring table_slice

---
 python/cudf/cudf/_lib/copying.pyx    | 37 ++++++---------
 python/cudf/cudf/_lib/utils.pxd      |  1 +
 python/cudf/cudf/_lib/utils.pyx      | 18 ++++++++
 python/cudf/cudf/core/_base_index.py |  9 +++-
 python/cudf/cudf/core/dataframe.py   | 69 ++++++++++++++--------------
 python/cudf/cudf/core/frame.py       |  2 +-
 python/cudf/cudf/core/index.py       |  6 +++
 python/cudf/cudf/core/multiindex.py  |  3 ++
 8 files changed, 86 insertions(+), 59 deletions(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index 30157bc10ad..df0e78a5634 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -38,6 +38,7 @@ from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport size_type
 from cudf._lib.utils cimport (
+    columns_from_table_view,
     columns_from_unique_ptr,
     data_from_table_view,
     data_from_unique_ptr,
@@ -330,21 +331,18 @@ def column_slice(Column input_column, object indices):
     return result
 
 
-def table_slice(input_table, object indices, bool keep_index=True):
-
-    cdef table_view input_table_view = table_view_from_table(
-        input_table, not keep_index
-    )
-
-    cdef vector[size_type] c_indices
-    c_indices.reserve(len(indices))
+def columns_slice(input_columns: list, indices: list):
+    """
+    Given a list of input columns, return columns sliced by ``indices``.
 
+    Returns a list of list of columns. The length of return is
+    `len(indices) / 2`. The `i`th item in return is a list of columns sliced
+    from ``input_columns`` with `slice(indices[i*2], indices[i*2 + 1])`.
+    """
+    cdef table_view input_table_view = table_view_from_columns(input_columns)
+    cdef vector[size_type] c_indices = indices
     cdef vector[table_view] c_result
 
-    cdef int index
-    for index in indices:
-        c_indices.push_back(index)
-
     with nogil:
         c_result = move(
             cpp_copying.slice(
@@ -352,18 +350,11 @@ def table_slice(input_table, object indices, bool keep_index=True):
                 c_indices)
         )
 
-    num_of_result_cols = c_result.size()
     return [
-        data_from_table_view(
-            c_result[i],
-            input_table,
-            column_names=input_table._column_names,
-            index_names=(
-                input_table._index._column_names if (
-                    keep_index is True)
-                else None
-            )
-        ) for i in range(num_of_result_cols)]
+        columns_from_table_view(
+            c_result[i], input_columns
+        ) for i in range(c_result.size())
+    ]
 
 
 def column_split(Column input_column, object splits):
diff --git a/python/cudf/cudf/_lib/utils.pxd b/python/cudf/cudf/_lib/utils.pxd
index 50893ef9838..ef8b5c156d5 100644
--- a/python/cudf/cudf/_lib/utils.pxd
+++ b/python/cudf/cudf/_lib/utils.pxd
@@ -17,3 +17,4 @@ cdef data_from_table_view(
 cdef table_view table_view_from_columns(columns) except *
 cdef table_view table_view_from_table(tbl, ignore_index=*) except*
 cdef columns_from_unique_ptr(unique_ptr[table] c_tbl)
+cdef columns_from_table_view(table_view tv, object owners)
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index 40edd4bf9a2..520c9a28dde 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -310,6 +310,24 @@ cdef data_from_unique_ptr(
     }
     return data, index
 
+cdef columns_from_table_view(
+    table_view tv,
+    object owners,
+):
+    """
+    Given a ``cudf::table_view``, construsts a list of columns from it,
+    along with referencing an ``owner`` Python object that owns the memory
+    lifetime. ``owner`` must be either None or a list of column. If ``owner``
+    is a list of columns, the owner of the `i`th ``cudf::column_view`` in the
+    table view is ``owners[i]``. For more about memory ownership,
+    see ``Column.from_column_view``
+    """
+
+    return [
+        Column.from_column_view(
+            tv.column(i), owners[i] if isinstance(owners, list) else None
+        ) for i in range(tv.num_columns())
+    ]
 
 cdef data_from_table_view(
     table_view tv,
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index b1335c7c076..ad02c7d423d 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -4,7 +4,7 @@
 
 import pickle
 import warnings
-from typing import Any, Set
+from typing import Any, Set, Tuple
 
 import pandas as pd
 
@@ -75,6 +75,13 @@ def get_loc(self, key, method=None, tolerance=None):
     def __getitem__(self, key):
         raise NotImplementedError()
 
+    def _data_columns(self) -> Tuple[ColumnBase, ...]:
+        """Return a tuple of columns that holds actual data. ``RangeIndex``
+        returns an empty tuple. Unlike ``_values``, this method does not
+        materialize columns.
+        """
+        raise NotImplementedError()
+
     def __contains__(self, item):
         return item in self._values
 
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index c686cd0fd39..7bc0d9ef6a1 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1192,52 +1192,53 @@ def _slice(self: T, arg: slice) -> T:
             return self
         start, stop, stride = arg.indices(num_rows)
 
-        # This is just to handle RangeIndex type, stop
-        # it from materializing unnecessarily
-        keep_index = True
-        if self.index is not None and isinstance(self.index, RangeIndex):
+        # If index type is RangeIndex, slice without materializing.
+        is_range_index = isinstance(self.index, RangeIndex)
+        if is_range_index:
             if self._num_columns == 0:
-                result = self._empty_like(keep_index)
+                result = self._empty_like(keep_index=False)
                 result._index = self.index[start:stop]
                 return result
-            keep_index = False
 
-        # For decreasing slices, terminal at before-the-zero
-        # position is preserved.
         if start < 0:
             start = start + num_rows
+
+        # Decreasing slices that terminates at -1, such as slice(4, -1, -1),
+        # has end index of 0, The check below makes sure -1 is not wrapped
+        # to `-1 + num_rows`.
         if stop < 0 and not (stride < 0 and stop == -1):
             stop = stop + num_rows
+        stride = 1 if stride is None else stride
 
-        if start > stop and (stride is None or stride == 1):
-            return self._empty_like(keep_index)
-        else:
-            start = len(self) if start > num_rows else start
-            stop = len(self) if stop > num_rows else stop
+        if start > stop and stride == 1:
+            return self._empty_like(keep_index=True)
 
-            if stride is not None and stride != 1:
-                return self._gather(
-                    cudf.core.column.arange(
-                        start, stop=stop, step=stride, dtype=np.int32
-                    )
-                )
-            else:
-                result = self._from_data(
-                    *libcudf.copying.table_slice(
-                        self, [start, stop], keep_index
-                    )[0]
+        start = len(self) if start > num_rows else start
+        stop = len(self) if stop > num_rows else stop
+
+        if stride != 1:
+            return self._gather(
+                cudf.core.column.arange(
+                    start, stop=stop, step=stride, dtype=np.int32
                 )
+            )
+        else:
+            columns_to_slice = [
+                *self._index._data_columns(),
+                *self._columns,
+            ]
+            result = self._from_columns_like_self(
+                libcudf.copying.columns_slice(columns_to_slice, [start, stop])[
+                    0
+                ],
+                self._column_names,
+                None if is_range_index else self._index.names,
+            )
 
-                result._copy_type_metadata(self, include_index=keep_index)
-                if self.index is not None:
-                    if keep_index:
-                        result._index.names = self.index.names
-                    else:
-                        # Adding index of type RangeIndex back to
-                        # result
-                        result.index = self.index[start:stop]
-                result.columns = self.columns
-                return result
+            if is_range_index:
+                result.index = self.index[start:stop]
+            result.columns = self.columns
+            return result
 
     def memory_usage(self, index=True, deep=False):
         """
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 69dc5389e7a..9a8d113e3e7 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -104,7 +104,7 @@ def _index_names(self) -> List[Any]:  # TODO: List[str]?
         )
 
     @property
-    def _columns(self) -> List[Any]:  # TODO: List[Column]?
+    def _columns(self) -> Tuple[ColumnBase, ...]:
         return self._data.columns
 
     def serialize(self):
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 91c7a740699..0c587be64ec 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -231,6 +231,9 @@ def _values(self):
         else:
             return column.column_empty(0, masked=False, dtype=self.dtype)
 
+    def _data_columns(self) -> Tuple[ColumnBase, ...]:
+        return ()
+
     def is_numeric(self):
         return True
 
@@ -825,6 +828,9 @@ def _copy_type_metadata(
     def _values(self):
         return self._column
 
+    def _data_columns(self) -> Tuple[ColumnBase, ...]:
+        return self._columns
+
     @classmethod
     def _concat(cls, objs):
         if all(isinstance(obj, RangeIndex) for obj in objs):
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index e8ff7838a9e..f5e6d194fca 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1090,6 +1090,9 @@ def values(self):
         """
         return self.to_frame(index=False).values
 
+    def _data_columns(self) -> Tuple[column.ColumnBase, ...]:
+        return self._columns
+
     @classmethod
     def from_frame(cls, df, names=None):
         """

From 9e794927a6e03d84c7808f030ddc0b042692b0d2 Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Mon, 31 Jan 2022 16:49:59 -0800
Subject: [PATCH 02/14] Refactor `table_empty_like`.

---
 python/cudf/cudf/_lib/copying.pyx      | 16 +++-------------
 python/cudf/cudf/core/frame.py         | 10 +---------
 python/cudf/cudf/core/indexed_frame.py | 14 ++++++++++++++
 3 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index df0e78a5634..0014f64cbf5 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -282,24 +282,14 @@ def column_allocate_like(Column input_column, size=None):
     return Column.from_unique_ptr(move(c_result))
 
 
-def table_empty_like(input_table, bool keep_index=True):
-
-    cdef table_view input_table_view = table_view_from_table(
-        input_table, not keep_index
-    )
-
+def columns_empty_like(input_columns):
+    cdef table_view input_table_view = table_view_from_columns(input_columns)
     cdef unique_ptr[table] c_result
 
     with nogil:
         c_result = move(cpp_copying.empty_like(input_table_view))
 
-    return data_from_unique_ptr(
-        move(c_result),
-        column_names=input_table._column_names,
-        index_names=(
-            input_table._index._column_names if keep_index is True else None
-        )
-    )
+    return columns_from_unique_ptr(move(c_result))
 
 
 def column_slice(Column input_column, object indices):
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 9a8d113e3e7..9cabeef6a9f 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -104,7 +104,7 @@ def _index_names(self) -> List[Any]:  # TODO: List[str]?
         )
 
     @property
-    def _columns(self) -> Tuple[ColumnBase, ...]:
+    def _columns(self) -> Tuple[ColumnBase, ...]:  # TODO: List[Column]?
         return self._data.columns
 
     def serialize(self):
@@ -560,14 +560,6 @@ def _as_column(self):
 
         return self._data[None].copy(deep=False)
 
-    def _empty_like(self, keep_index=True):
-        result = self.__class__._from_data(
-            *libcudf.copying.table_empty_like(self, keep_index)
-        )
-
-        result._copy_type_metadata(self, include_index=keep_index)
-        return result
-
     @property
     def values(self):
         """
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index e9f2de1cb1c..56f7287fd6c 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -638,6 +638,20 @@ def drop_duplicates(
             self._index.names if not ignore_index else None,
         )
 
+    def _empty_like(self, keep_index=True):
+        # TODO: RangeIndex._data.columns materializes data,
+        # which is unecessary here.
+        return self._from_columns_like_self(
+            libcudf.copying.columns_empty_like(
+                [
+                    *(self._index._data.columns if keep_index else ()),
+                    *self._columns,
+                ]
+            ),
+            self._column_names,
+            self._index.names if keep_index else None,
+        )
+
     def add_prefix(self, prefix):
         """
         Prefix labels with string `prefix`.

From cd14022a531f97ef7d7f89df9ef6814b3e20b79c Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Tue, 1 Feb 2022 12:26:01 -0800
Subject: [PATCH 03/14] Refactor split

---
 python/cudf/cudf/_lib/copying.pyx      | 30 +++++++-------------------
 python/cudf/cudf/core/_base_index.py   |  3 +++
 python/cudf/cudf/core/frame.py         | 18 +++++++++++-----
 python/cudf/cudf/core/index.py         |  5 +++++
 python/cudf/cudf/core/indexed_frame.py | 16 ++++++++++++++
 5 files changed, 45 insertions(+), 27 deletions(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index 0014f64cbf5..d5adb47e0f5 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -282,7 +282,7 @@ def column_allocate_like(Column input_column, size=None):
     return Column.from_unique_ptr(move(c_result))
 
 
-def columns_empty_like(input_columns):
+def columns_empty_like(input_columns: list):
     cdef table_view input_table_view = table_view_from_columns(input_columns)
     cdef unique_ptr[table] c_result
 
@@ -378,21 +378,12 @@ def column_split(Column input_column, object splits):
     return result
 
 
-def table_split(input_table, object splits, bool keep_index=True):
-
-    cdef table_view input_table_view = table_view_from_table(
-        input_table, not keep_index
-    )
-
-    cdef vector[size_type] c_splits
-    c_splits.reserve(len(splits))
+def columns_split(input_columns: list, object splits):
 
+    cdef table_view input_table_view = table_view_from_columns(input_columns)
+    cdef vector[size_type] c_splits = splits
     cdef vector[table_view] c_result
 
-    cdef int split
-    for split in splits:
-        c_splits.push_back(split)
-
     with nogil:
         c_result = move(
             cpp_copying.split(
@@ -400,16 +391,11 @@ def table_split(input_table, object splits, bool keep_index=True):
                 c_splits)
         )
 
-    num_of_result_cols = c_result.size()
     return [
-        data_from_table_view(
-            c_result[i],
-            input_table,
-            column_names=input_table._column_names,
-            index_names=input_table._index_names if (
-                keep_index is True)
-            else None
-        ) for i in range(num_of_result_cols)]
+        columns_from_table_view(
+            c_result[i], input_columns
+        ) for i in range(c_result.size())
+    ]
 
 
 def _copy_if_else_column_column(Column lhs, Column rhs, Column boolean_mask):
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index ad02c7d423d..410dcad9ea3 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -1577,6 +1577,9 @@ def _split_columns_by_levels(self, levels):
             [],
         )
 
+    def _split(self, splits):
+        raise NotImplementedError()
+
 
 def _get_result_name(left_name, right_name):
     if left_name == right_name:
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 9cabeef6a9f..dc3c56ef77e 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -3378,11 +3378,19 @@ def _is_sorted(self, ascending=None, null_position=None):
             self, ascending=ascending, null_position=null_position
         )
 
-    def _split(self, splits, keep_index=True):
-        results = libcudf.copying.table_split(
-            self, splits, keep_index=keep_index
-        )
-        return [self.__class__._from_data(*result) for result in results]
+    def _split(self, splits):
+        """Split a frame with split points in ``splits``. Returns a list of
+        Frames of length `len(splits) + 1`.
+        """
+        return [
+            self._from_columns_like_self(
+                libcudf.copying.columns_split([*self._data.columns], splits)[
+                    split_idx
+                ],
+                self._column_names,
+            )
+            for split_idx in range(len(splits) + 1)
+        ]
 
     def _encode(self):
         data, index, indices = libcudf.transform.table_encode(self)
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 0c587be64ec..520e8c90ba2 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -694,6 +694,11 @@ def _apply_boolean_mask(self, boolean_mask):
             [self._values.apply_boolean_mask(boolean_mask)], [self.name]
         )
 
+    def _split(self, splits):
+        return Int64Index._from_columns(
+            [self._values.columns_split(splits)], [self.name]
+        )
+
 
 # Patch in all binops and unary ops, which bypass __getattr__ on the instance
 # and prevent the above overload from working.
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 56f7287fd6c..3a5e82dc106 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -652,6 +652,22 @@ def _empty_like(self, keep_index=True):
             self._index.names if keep_index else None,
         )
 
+    def _split(self, splits, keep_index=True):
+        return [
+            self._from_columns_like_self(
+                libcudf.copying.columns_split(
+                    [
+                        *(self._index._data.columns if keep_index else []),
+                        *self._columns,
+                    ],
+                    splits,
+                )[split_idx],
+                self._column_names,
+                self._index.names if keep_index else None,
+            )
+            for split_idx in range(len(splits) + 1)
+        ]
+
     def add_prefix(self, prefix):
         """
         Prefix labels with string `prefix`.

From 85ff332bb28bf04abb17b29915981602413c3db8 Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Wed, 2 Feb 2022 16:33:46 -0800
Subject: [PATCH 04/14] Unify type declaration

---
 python/cudf/cudf/_lib/copying.pyx           | 8 ++++----
 python/cudf/cudf/_lib/stream_compaction.pyx | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index d5adb47e0f5..5e8ee95857f 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -167,7 +167,7 @@ def copy_range(Column input_column,
 
 
 def gather(
-    columns: list,
+    list columns,
     Column gather_map,
     bool nullify=False
 ):
@@ -282,7 +282,7 @@ def column_allocate_like(Column input_column, size=None):
     return Column.from_unique_ptr(move(c_result))
 
 
-def columns_empty_like(input_columns: list):
+def columns_empty_like(list input_columns):
     cdef table_view input_table_view = table_view_from_columns(input_columns)
     cdef unique_ptr[table] c_result
 
@@ -321,7 +321,7 @@ def column_slice(Column input_column, object indices):
     return result
 
 
-def columns_slice(input_columns: list, indices: list):
+def columns_slice(list input_columns, list indices):
     """
     Given a list of input columns, return columns sliced by ``indices``.
 
@@ -378,7 +378,7 @@ def column_split(Column input_column, object splits):
     return result
 
 
-def columns_split(input_columns: list, object splits):
+def columns_split(list input_columns, object splits):
 
     cdef table_view input_table_view = table_view_from_columns(input_columns)
     cdef vector[size_type] c_splits = splits
diff --git a/python/cudf/cudf/_lib/stream_compaction.pyx b/python/cudf/cudf/_lib/stream_compaction.pyx
index 4330c565982..c11a221547d 100644
--- a/python/cudf/cudf/_lib/stream_compaction.pyx
+++ b/python/cudf/cudf/_lib/stream_compaction.pyx
@@ -32,7 +32,7 @@ from cudf._lib.utils cimport (
 )
 
 
-def drop_nulls(columns: list, how="any", keys=None, thresh=None):
+def drop_nulls(list columns, how="any", keys=None, thresh=None):
     """
     Drops null rows from cols depending on key columns.
 
@@ -75,7 +75,7 @@ def drop_nulls(columns: list, how="any", keys=None, thresh=None):
     return columns_from_unique_ptr(move(c_result))
 
 
-def apply_boolean_mask(columns: list, Column boolean_mask):
+def apply_boolean_mask(list columns, Column boolean_mask):
     """
     Drops the rows which correspond to False in boolean_mask.
 
@@ -104,7 +104,7 @@ def apply_boolean_mask(columns: list, Column boolean_mask):
     return columns_from_unique_ptr(move(c_result))
 
 
-def drop_duplicates(columns: list,
+def drop_duplicates(list columns,
                     object keys=None,
                     object keep='first',
                     bool nulls_are_equal=True):

From 4fd6dd93107a040da7d7f2ce9c53843663cb29d6 Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 25 Feb 2022 13:22:57 -0800
Subject: [PATCH 05/14] Refactor scatter

---
 python/cudf/cudf/_lib/copying.pyx      | 110 +++++++++++++++----------
 python/cudf/cudf/_lib/utils.pxd        |   2 +-
 python/cudf/cudf/core/column/column.py |   6 +-
 3 files changed, 69 insertions(+), 49 deletions(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index 4cf696f8248..e462770ef85 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -191,60 +191,80 @@ def gather(
     return columns_from_unique_ptr(move(c_result))
 
 
-def scatter(object source, Column scatter_map, Column target_column,
-            bool bounds_check=True):
-    """
-    Scattering input into target as per the scatter map,
-    input can be a list of scalars or can be a table
-    """
-
-    cdef column_view scatter_map_view = scatter_map.view()
-    cdef table_view target_table_view = table_view_from_columns(
-        (target_column,))
-    cdef bool c_bounds_check = bounds_check
+cdef scatter_scalar(list source_device_slrs,
+                    column_view scatter_map,
+                    table_view target_table,
+                    bool bounds_check):
+    cdef vector[reference_wrapper[constscalar]] c_source
+    cdef DeviceScalar d_slr
     cdef unique_ptr[table] c_result
 
-    # Needed for the table branch
-    cdef table_view source_table_view
+    c_source.reserve(len(source_device_slrs))
+    for d_slr in source_device_slrs:
+        c_source.push_back(
+            reference_wrapper[constscalar](d_slr.get_raw_ptr()[0])
+        )
+
+    with nogil:
+        c_result = move(
+            cpp_copying.scatter(
+                c_source,
+                scatter_map,
+                target_table,
+                bounds_check
+            )
+        )
 
-    # Needed for the scalar branch
-    cdef vector[reference_wrapper[constscalar]] source_scalars
-    cdef DeviceScalar slr
+    return columns_from_unique_ptr(move(c_result))
 
-    if isinstance(source, Column):
-        source_table_view = table_view_from_columns((<Column> source,))
 
-        with nogil:
-            c_result = move(
-                cpp_copying.scatter(
-                    source_table_view,
-                    scatter_map_view,
-                    target_table_view,
-                    c_bounds_check
-                )
-            )
-    else:
-        slr = as_device_scalar(source, target_column.dtype)
-        source_scalars.push_back(reference_wrapper[constscalar](
-            slr.get_raw_ptr()[0]))
+cdef scatter_column(list source_columns,
+                    column_view scatter_map,
+                    table_view target_table,
+                    bool bounds_check):
+    cdef table_view c_source = table_view_from_columns(source_columns)
+    cdef unique_ptr[table] c_result
 
-        with nogil:
-            c_result = move(
-                cpp_copying.scatter(
-                    source_scalars,
-                    scatter_map_view,
-                    target_table_view,
-                    c_bounds_check
-                )
+    with nogil:
+        c_result = move(
+            cpp_copying.scatter(
+                c_source,
+                scatter_map,
+                target_table,
+                bounds_check
             )
+        )
+    return columns_from_unique_ptr(move(c_result))
 
-    data, _ = data_from_unique_ptr(
-        move(c_result),
-        column_names=(None,),
-        index_names=None
-    )
 
-    return next(iter(data.values()))
+def scatter(list sources, Column scatter_map, list target_columns,
+            bool bounds_check=True):
+    """
+    Scattering source into target as per the scatter map.
+    `source` can be a list of scalars, or a list of columns. The number of
+    items in `sources` must equal the number of `target_columns` to scatter.
+    """
+    # TODO: Only single column scatter is used, we should explore multi-column
+    # scatter for frames for performance increase.
+
+    if len(sources) != len(target_columns):
+        raise ValueError("Mismatched number of source and target columns.")
+
+    if len(sources) == 0:
+        return []
+
+    cdef column_view scatter_map_view = scatter_map.view()
+    cdef table_view target_table_view = table_view_from_columns(target_columns)
+
+    if isinstance(sources[0], Column):
+        return scatter_column(
+            sources, scatter_map_view, target_table_view, bounds_check
+        )
+    else:
+        source_scalars = [as_device_scalar(slr) for slr in sources]
+        return scatter_scalar(
+            source_scalars, scatter_map_view, target_table_view, bounds_check
+        )
 
 
 def column_empty_like(Column input_column):
diff --git a/python/cudf/cudf/_lib/utils.pxd b/python/cudf/cudf/_lib/utils.pxd
index ef8b5c156d5..8a53b71124a 100644
--- a/python/cudf/cudf/_lib/utils.pxd
+++ b/python/cudf/cudf/_lib/utils.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 2788ac6a600..95bb103e364 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -585,9 +585,9 @@ def _scatter_by_column(
                     [value], [self], key
                 )[0]._with_type_metadata(self.dtype)
             else:
-                return libcudf.copying.scatter(
-                    value, key, self
-                )._with_type_metadata(self.dtype)
+                return libcudf.copying.scatter([value], key, [self])[
+                    0
+                ]._with_type_metadata(self.dtype)
         except RuntimeError as e:
             if "out of bounds" in str(e):
                 raise IndexError(

From f0d8b6b855d780d70342d71014bf9450dee3d6e4 Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 25 Feb 2022 13:24:26 -0800
Subject: [PATCH 06/14] Revert _data_column introduction

---
 python/cudf/cudf/core/multiindex.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 3c6d86d2b1c..b09a2d39c14 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1067,9 +1067,6 @@ def values(self):
         """
         return self.to_frame(index=False).values
 
-    def _data_columns(self) -> Tuple[column.ColumnBase, ...]:
-        return self._columns
-
     @classmethod
     def from_frame(cls, df, names=None):
         """

From 683417574edf3910dfeea3578c79a04f0005fac6 Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 25 Feb 2022 13:29:32 -0800
Subject: [PATCH 07/14] doc fix

---
 python/cudf/cudf/_lib/utils.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index 39025500095..8557f430e25 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -321,7 +321,7 @@ cdef columns_from_table_view(
     lifetime. ``owner`` must be either None or a list of column. If ``owner``
     is a list of columns, the owner of the `i`th ``cudf::column_view`` in the
     table view is ``owners[i]``. For more about memory ownership,
-    see ``Column.from_column_view``
+    see ``Column.from_column_view``.
     """
 
     return [

From da613a191ec86b3ad99f510bdad17b7842832759 Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 25 Feb 2022 13:35:23 -0800
Subject: [PATCH 08/14] Revert more _data_columns introduction

---
 python/cudf/cudf/core/_base_index.py | 9 +--------
 python/cudf/cudf/core/dataframe.py   | 2 +-
 python/cudf/cudf/core/index.py       | 6 ------
 3 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 8a2b27bc6eb..a91d2747980 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -4,7 +4,7 @@
 
 import pickle
 from functools import cached_property
-from typing import Any, Set, Tuple
+from typing import Any, Set
 
 import pandas as pd
 
@@ -66,13 +66,6 @@ def get_loc(self, key, method=None, tolerance=None):
     def __getitem__(self, key):
         raise NotImplementedError()
 
-    def _data_columns(self) -> Tuple[ColumnBase, ...]:
-        """Return a tuple of columns that holds actual data. ``RangeIndex``
-        returns an empty tuple. Unlike ``_values``, this method does not
-        materialize columns.
-        """
-        raise NotImplementedError()
-
     def __contains__(self, item):
         return item in self._values
 
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 1c0d5620492..c88dc7b0c94 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1314,7 +1314,7 @@ def _slice(self: T, arg: slice) -> T:
             )
         else:
             columns_to_slice = [
-                *self._index._data_columns(),
+                *(self._index._data.columns if not is_range_index else []),
                 *self._columns,
             ]
             result = self._from_columns_like_self(
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 3b81cfb1b5c..89613edef8c 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -232,9 +232,6 @@ def _values(self):
         else:
             return column.column_empty(0, masked=False, dtype=self.dtype)
 
-    def _data_columns(self) -> Tuple[ColumnBase, ...]:
-        return ()
-
     def is_numeric(self):
         return True
 
@@ -863,9 +860,6 @@ def _copy_type_metadata(
     def _values(self):
         return self._column
 
-    def _data_columns(self) -> Tuple[ColumnBase, ...]:
-        return self._columns
-
     @classmethod
     def _concat(cls, objs):
         if all(isinstance(obj, RangeIndex) for obj in objs):

From 52df3cbbcd41492275a43c135836818c586f4844 Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 25 Feb 2022 13:36:40 -0800
Subject: [PATCH 09/14] Remove TODO that's tracked elsewhere.

---
 python/cudf/cudf/core/indexed_frame.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 9b4d688904b..2245c40f7d5 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -701,8 +701,6 @@ def drop_duplicates(
 
     @annotate("FRAME_EMPTY_LIKE", color="green", domain="cudf_python")
     def _empty_like(self, keep_index=True):
-        # TODO: RangeIndex._data.columns materializes data,
-        # which is unecessary here.
         return self._from_columns_like_self(
             libcudf.copying.columns_empty_like(
                 [

From 9d878f142f1188e8a7f3a98312a1d813e7eeae1d Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 4 Mar 2022 10:43:57 -0800
Subject: [PATCH 10/14] review comments

---
 python/cudf/cudf/core/_base_index.py | 2 +-
 python/cudf/cudf/core/dataframe.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index a91d2747980..84c916472e5 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -1529,7 +1529,7 @@ def _split_columns_by_levels(self, levels):
         )
 
     def _split(self, splits):
-        raise NotImplementedError()
+        raise NotImplementedError
 
 
 def _get_result_name(left_name, right_name):
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index c88dc7b0c94..3faf04029a7 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1300,7 +1300,7 @@ def _slice(self: T, arg: slice) -> T:
             stop = stop + num_rows
         stride = 1 if stride is None else stride
 
-        if start > stop and stride == 1:
+        if (stop - start) * stride <= 0:
             return self._empty_like(keep_index=True)
 
         start = len(self) if start > num_rows else start

From 0afc3e7775e8acd2d4a5cc5fb0e925f0414cba81 Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 4 Mar 2022 12:03:04 -0800
Subject: [PATCH 11/14] Avoid dup work in split

---
 python/cudf/cudf/core/indexed_frame.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 2245c40f7d5..35381dbe198 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -713,19 +713,21 @@ def _empty_like(self, keep_index=True):
         )
 
     def _split(self, splits, keep_index=True):
+        columns_splitted = libcudf.copying.columns_split(
+            [
+                *(self._index._data.columns if keep_index else []),
+                *self._columns,
+            ],
+            splits,
+        )
+
         return [
             self._from_columns_like_self(
-                libcudf.copying.columns_split(
-                    [
-                        *(self._index._data.columns if keep_index else []),
-                        *self._columns,
-                    ],
-                    splits,
-                )[split_idx],
+                columns_splitted[i],
                 self._column_names,
                 self._index.names if keep_index else None,
             )
-            for split_idx in range(len(splits) + 1)
+            for i in range(len(splits) + 1)
         ]
 
     def add_prefix(self, prefix):

From 580732942f167fae139e0281e839f510c98f8eaf Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Tue, 8 Mar 2022 11:53:36 -0800
Subject: [PATCH 12/14] Update python/cudf/cudf/core/indexed_frame.py

Co-authored-by: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
---
 python/cudf/cudf/core/indexed_frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index c3b6fbc0d60..d5af02161e3 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -713,7 +713,7 @@ def _empty_like(self, keep_index=True):
         )
 
     def _split(self, splits, keep_index=True):
-        columns_splitted = libcudf.copying.columns_split(
+        columns_split = libcudf.copying.columns_split(
             [
                 *(self._index._data.columns if keep_index else []),
                 *self._columns,

From c0d8d8e46286d0b19ac5b1e045aa3a52662fd6fa Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Tue, 8 Mar 2022 11:53:49 -0800
Subject: [PATCH 13/14] Update python/cudf/cudf/core/indexed_frame.py

Co-authored-by: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
---
 python/cudf/cudf/core/indexed_frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index d5af02161e3..46b5e51df73 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -723,7 +723,7 @@ def _split(self, splits, keep_index=True):
 
         return [
             self._from_columns_like_self(
-                columns_splitted[i],
+                columns_split[i],
                 self._column_names,
                 self._index.names if keep_index else None,
             )

From f4349bdaf828037c31420f61bcffee7ce1da2485 Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Tue, 8 Mar 2022 11:56:02 -0800
Subject: [PATCH 14/14] Deindent

---
 python/cudf/cudf/core/dataframe.py | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index d74aeddbda7..e687c274d2f 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1312,23 +1312,21 @@ def _slice(self: T, arg: slice) -> T:
                     start, stop=stop, step=stride, dtype=np.int32
                 )
             )
-        else:
-            columns_to_slice = [
-                *(self._index._data.columns if not is_range_index else []),
-                *self._columns,
-            ]
-            result = self._from_columns_like_self(
-                libcudf.copying.columns_slice(columns_to_slice, [start, stop])[
-                    0
-                ],
-                self._column_names,
-                None if is_range_index else self._index.names,
-            )
 
-            if is_range_index:
-                result.index = self.index[start:stop]
-            result._set_column_names_like(self)
-            return result
+        columns_to_slice = [
+            *(self._index._data.columns if not is_range_index else []),
+            *self._columns,
+        ]
+        result = self._from_columns_like_self(
+            libcudf.copying.columns_slice(columns_to_slice, [start, stop])[0],
+            self._column_names,
+            None if is_range_index else self._index.names,
+        )
+
+        if is_range_index:
+            result.index = self.index[start:stop]
+        result._set_column_names_like(self)
+        return result
 
     @annotate("DATAFRAME_MEMORY_USAGE", color="blue", domain="cudf_python")
     def memory_usage(self, index=True, deep=False):