From 01a4e749abd744a6920919c6ce13bff0c009bfce Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 18 Mar 2022 12:54:19 -0700
Subject: [PATCH 01/21] Rolling out to hash.pyx

---
 python/cudf/cudf/_lib/hash.pyx         | 32 ++++++++------------------
 python/cudf/cudf/core/dataframe.py     | 19 +++++++--------
 python/cudf/cudf/core/indexed_frame.py |  3 ++-
 3 files changed, 20 insertions(+), 34 deletions(-)

diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx
index 301f571f5fb..8bb8ab92a48 100644
--- a/python/cudf/cudf/_lib/hash.pyx
+++ b/python/cudf/cudf/_lib/hash.pyx
@@ -14,16 +14,14 @@ from cudf._lib.cpp.hash cimport hash as cpp_hash, hash_id as cpp_hash_id
 from cudf._lib.cpp.partitioning cimport hash_partition as cpp_hash_partition
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
-def hash_partition(source_table, object columns_to_hash,
-                   int num_partitions, bool keep_index=True):
+def hash_partition(list source_columns, object columns_to_hash,
+                   int num_partitions):
     cdef vector[libcudf_types.size_type] c_columns_to_hash = columns_to_hash
     cdef int c_num_partitions = num_partitions
-    cdef table_view c_source_view = table_view_from_table(
-        source_table, not keep_index
-    )
+    cdef table_view c_source_view = table_view_from_columns(source_columns)
 
     cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
     with nogil:
@@ -36,27 +34,17 @@ def hash_partition(source_table, object columns_to_hash,
         )
 
     # Note that the offsets (`c_result.second`) may be empty when
-    # the original table (`source_table`) is empty. We need to
+    # the original table (`source_columns`) is empty. We need to
     # return a list of zeros in this case.
     return (
-        *data_from_unique_ptr(
-            move(c_result.first),
-            column_names=source_table._column_names,
-            index_names=(
-                source_table._index_names
-                if keep_index is True
-                else None
-            )
-
-        ),
-        list(c_result.second) if c_result.second.size()
-        else [0] * num_partitions
+        columns_from_unique_ptr(move(c_result.first)),
+        list(c_result.second)
+        if c_result.second.size() else [0] * num_partitions
     )
 
 
-def hash(source_table, str method, int seed=0):
-    cdef table_view c_source_view = table_view_from_table(
-        source_table, ignore_index=True)
+def hash(list source_columns, str method, int seed=0):
+    cdef table_view c_source_view = table_view_from_columns(source_columns)
     cdef unique_ptr[column] c_result
     cdef cpp_hash_id c_hash_function
     if method == "murmur3":
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 57d591dd3e7..006d09366ea 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3877,19 +3877,16 @@ def partition_by_hash(self, columns, nparts, keep_index=True):
         -------
         partitioned: list of DataFrame
         """
-        idx = (
-            0
-            if (self._index is None or keep_index is False)
-            else self._index._num_columns
-        )
-        key_indices = [self._data.names.index(k) + idx for k in columns]
 
-        output_data, output_index, offsets = libcudf.hash.hash_partition(
-            self, key_indices, nparts, keep_index
+        key_indices = [self._column_names.index(k) for k in columns]
+        output_columns, offsets = libcudf.hash.hash_partition(
+            [*self._columns], key_indices, nparts
+        )
+        outdf = self._from_columns_like_self(
+            [*(self._index._columns if keep_index else ()), *output_columns],
+            self._column_names,
+            self._index_names if keep_index else None,
         )
-        outdf = self.__class__._from_data(output_data, output_index)
-        outdf._copy_type_metadata(self, include_index=keep_index)
-
         # Slice into partition
         return [outdf[s:e] for s, e in zip(offsets, offsets[1:] + [None])]
 
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 3fa951241f7..dced49016fd 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -617,7 +617,8 @@ def hash_values(self, method="murmur3"):
         # calculation, necessitating the unfortunate circular reference to the
         # child class here.
         return cudf.Series._from_data(
-            {None: libcudf.hash.hash(self, method)}, index=self.index
+            {None: libcudf.hash.hash([*self._columns], method)},
+            index=self.index,
         )
 
     def _gather(

From e514c10bf9015dea2db1370ba726113f6665068b Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 18 Mar 2022 14:00:49 -0700
Subject: [PATCH 02/21] Rolling out to interop.pyx

---
 python/cudf/cudf/_lib/interop.pyx      | 54 ++++++++------------------
 python/cudf/cudf/core/column/column.py | 18 ++-------
 python/cudf/cudf/core/frame.py         | 23 ++++++-----
 python/cudf/cudf/io/dlpack.py          | 13 ++++---
 4 files changed, 40 insertions(+), 68 deletions(-)

diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx
index 06e287ee670..26bb2e868f5 100644
--- a/python/cudf/cudf/_lib/interop.pyx
+++ b/python/cudf/cudf/_lib/interop.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import cudf
 
@@ -20,7 +20,7 @@ from cudf._lib.cpp.interop cimport (
 )
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
 def from_dlpack(dlpack_capsule):
@@ -40,31 +40,25 @@ def from_dlpack(dlpack_capsule):
             cpp_from_dlpack(dlpack_tensor)
         )
 
-    res = data_from_unique_ptr(
-        move(c_result),
-        column_names=range(0, c_result.get()[0].num_columns())
-    )
+    res = columns_from_unique_ptr(move(c_result))
     dlpack_tensor.deleter(dlpack_tensor)
     return res
 
 
-def to_dlpack(source_table):
+def to_dlpack(list source_columns):
     """
     Converts a cudf Frame into a DLPack Tensor PyCapsule.
 
     DLPack Tensor PyCapsule will have the name "dltensor".
     """
-    for column in source_table._columns:
-        if column.null_count:
-            raise ValueError(
-                "Cannot create a DLPack tensor with null values. \
-                    Input is required to have null count as zero."
-            )
+    if any(column.null_count for column in source_columns):
+        raise ValueError(
+            "Cannot create a DLPack tensor with null values. \
+                Input is required to have null count as zero."
+        )
 
     cdef DLManagedTensor *dlpack_tensor
-    cdef table_view source_table_view = table_view_from_table(
-        source_table, ignore_index=True
-    )
+    cdef table_view source_table_view = table_view_from_columns(source_columns)
 
     with nogil:
         dlpack_tensor = cpp_to_dlpack(
@@ -110,17 +104,13 @@ cdef vector[column_metadata] gather_metadata(object metadata) except *:
         raise ValueError("Malformed metadata has been encountered")
 
 
-def to_arrow(input_table,
-             object metadata,
-             bool keep_index=True):
+def to_arrow(list source_columns, object metadata):
     """Convert from cudf Frame to PyArrow Table.
 
     Parameters
     ----------
-    input_table : cudf table
-    column_names : names for the pyarrow arrays
-    field_names : field names for nested type arrays
-    keep_index : whether index needs to be part of arrow table
+    source_columns : a list of columns to convert
+    metadata : a list of metadata, see `gather_metadata` for layout
 
     Returns
     -------
@@ -128,9 +118,7 @@ def to_arrow(input_table,
     """
 
     cdef vector[column_metadata] cpp_metadata = gather_metadata(metadata)
-    cdef table_view input_table_view = (
-        table_view_from_table(input_table, not keep_index)
-    )
+    cdef table_view input_table_view = table_view_from_columns(source_columns)
 
     cdef shared_ptr[CTable] cpp_arrow_table
     with nogil:
@@ -141,18 +129,12 @@ def to_arrow(input_table,
     return pyarrow_wrap_table(cpp_arrow_table)
 
 
-def from_arrow(
-    object input_table,
-    object column_names=None,
-    object index_names=None
-):
+def from_arrow(object input_table):
     """Convert from PyArrow Table to cudf Frame.
 
     Parameters
     ----------
     input_table : PyArrow table
-    column_names : names for the cudf table data columns
-    index_names : names for the cudf table index columns
 
     Returns
     -------
@@ -166,8 +148,4 @@ def from_arrow(
     with nogil:
         c_result = move(cpp_from_arrow(cpp_arrow_table.get()[0]))
 
-    return data_from_unique_ptr(
-        move(c_result),
-        column_names=column_names,
-        index_names=index_names
-    )
+    return columns_from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 1c1845373e1..08f460da48e 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -227,13 +227,7 @@ def to_arrow(self) -> pa.Array:
           4
         ]
         """
-        return libcudf.interop.to_arrow(
-            cudf.core.frame.Frame(
-                cudf.core.column_accessor.ColumnAccessor({"None": self})
-            ),
-            [["None"]],
-            keep_index=False,
-        )["None"].chunk(0)
+        return libcudf.interop.to_arrow([self], [["None"]],)["None"].chunk(0)
 
     @classmethod
     def from_arrow(cls, array: pa.Array) -> ColumnBase:
@@ -278,12 +272,8 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
                 }
             )
 
-            codes = libcudf.interop.from_arrow(
-                indices_table, indices_table.column_names
-            )[0]["None"]
-            categories = libcudf.interop.from_arrow(
-                dictionaries_table, dictionaries_table.column_names
-            )[0]["None"]
+            codes = libcudf.interop.from_arrow(indices_table)[0]
+            categories = libcudf.interop.from_arrow(dictionaries_table)[0]
 
             return build_categorical_column(
                 categories=categories,
@@ -299,7 +289,7 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
         ):
             return cudf.core.column.IntervalColumn.from_arrow(array)
 
-        result = libcudf.interop.from_arrow(data, data.column_names)[0]["None"]
+        result = libcudf.interop.from_arrow(data)[0]
 
         return result._with_type_metadata(cudf_dtype_from_pa_type(array.type))
 
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 07cc3ea71cd..98c052bbc34 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1727,30 +1727,33 @@ def from_arrow(cls, data):
 
             dict_indices_table = pa.table(dict_indices)
             data = data.drop(dict_indices_table.column_names)
-            cudf_indices_frame, _ = libcudf.interop.from_arrow(
-                dict_indices_table, dict_indices_table.column_names
-            )
+            indices_columns = libcudf.interop.from_arrow(dict_indices_table)
             # as dictionary size can vary, it can't be a single table
             cudf_dictionaries_columns = {
                 name: ColumnBase.from_arrow(dict_dictionaries[name])
                 for name in dict_dictionaries.keys()
             }
 
-            for name, codes in cudf_indices_frame.items():
-                cudf_category_frame[name] = build_categorical_column(
+            cudf_category_frame = {
+                name: build_categorical_column(
                     cudf_dictionaries_columns[name],
                     codes,
                     mask=codes.base_mask,
                     size=codes.size,
                     ordered=dict_ordered[name],
                 )
+                for name, codes in zip(
+                    dict_indices_table.column_names, indices_columns
+                )
+            }
 
         # Handle non-dict arrays
-        cudf_non_category_frame = (
-            {}
-            if data.num_columns == 0
-            else libcudf.interop.from_arrow(data, data.column_names)[0]
-        )
+        cudf_non_category_frame = {
+            name: col
+            for name, col in zip(
+                data.column_names, libcudf.interop.from_arrow(data)
+            )
+        }
 
         result = {**cudf_non_category_frame, **cudf_category_frame}
 
diff --git a/python/cudf/cudf/io/dlpack.py b/python/cudf/cudf/io/dlpack.py
index 00a2cb4cee2..644643db83c 100644
--- a/python/cudf/cudf/io/dlpack.py
+++ b/python/cudf/cudf/io/dlpack.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 
 import cudf
@@ -34,12 +34,13 @@ def from_dlpack(pycapsule_obj):
     tensor is row-major, transpose it before passing it to this function.
     """
 
-    data, _ = libdlpack.from_dlpack(pycapsule_obj)
+    columns = libdlpack.from_dlpack(pycapsule_obj)
+    column_names = range(len(columns))
 
-    if len(data) == 1:
-        return cudf.Series._from_data(data)
+    if len(columns) == 1:
+        return cudf.Series._from_columns(columns, column_names=column_names)
     else:
-        return cudf.DataFrame._from_data(data)
+        return cudf.DataFrame._from_columns(columns, column_names=column_names)
 
 
 @ioutils.doc_to_dlpack()
@@ -91,4 +92,4 @@ def to_dlpack(cudf_obj):
     )
     gdf = gdf.astype(dtype)
 
-    return libdlpack.to_dlpack(gdf)
+    return libdlpack.to_dlpack([*gdf._columns])

From cb1a1a94c8e24e39636151abd13e78d056cda99a Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 18 Mar 2022 14:16:04 -0700
Subject: [PATCH 03/21] Rolling out to join.pyx

---
 python/cudf/cudf/_lib/join.pyx     | 46 +++++++++---------------------
 python/cudf/cudf/core/join/join.py | 12 ++++----
 2 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 5921f06d36e..1ff6daaae52 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from itertools import chain
 
@@ -16,31 +16,22 @@ from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport data_type, size_type, type_id
-from cudf._lib.utils cimport table_view_from_table
+from cudf._lib.utils cimport table_view_from_columns
 
 # The functions below return the *gathermaps* that represent
 # the join result when joining on the keys `lhs` and `rhs`.
 
-cpdef join(lhs, rhs, how=None):
+cpdef join(list lhs, list rhs, how=None):
     cdef pair[cpp_join.gather_map_type, cpp_join.gather_map_type] c_result
-    cdef table_view c_lhs = table_view_from_table(lhs)
-    cdef table_view c_rhs = table_view_from_table(rhs)
+    cdef table_view c_lhs = table_view_from_columns(lhs)
+    cdef table_view c_rhs = table_view_from_columns(rhs)
 
     if how == "inner":
-        c_result = move(cpp_join.inner_join(
-            c_lhs,
-            c_rhs
-        ))
+        c_result = move(cpp_join.inner_join(c_lhs, c_rhs))
     elif how == "left":
-        c_result = move(cpp_join.left_join(
-            c_lhs,
-            c_rhs
-        ))
+        c_result = move(cpp_join.left_join(c_lhs, c_rhs))
     elif how == "outer":
-        c_result = move(cpp_join.full_join(
-            c_lhs,
-            c_rhs
-        ))
+        c_result = move(cpp_join.full_join(c_lhs, c_rhs))
     else:
         raise ValueError(f"Invalid join type {how}")
 
@@ -49,30 +40,21 @@ cpdef join(lhs, rhs, how=None):
     return left_rows, right_rows
 
 
-cpdef semi_join(lhs, rhs, how=None):
+cpdef semi_join(list lhs, list rhs, how=None):
     # left-semi and left-anti joins
     cdef cpp_join.gather_map_type c_result
-    cdef table_view c_lhs = table_view_from_table(lhs)
-    cdef table_view c_rhs = table_view_from_table(rhs)
+    cdef table_view c_lhs = table_view_from_columns(lhs)
+    cdef table_view c_rhs = table_view_from_columns(rhs)
 
     if how == "leftsemi":
-        c_result = move(cpp_join.left_semi_join(
-            c_lhs,
-            c_rhs
-        ))
+        c_result = move(cpp_join.left_semi_join(c_lhs, c_rhs))
     elif how == "leftanti":
-        c_result = move(cpp_join.left_anti_join(
-            c_lhs,
-            c_rhs
-        ))
+        c_result = move(cpp_join.left_anti_join(c_lhs, c_rhs))
     else:
         raise ValueError(f"Invalid join type {how}")
 
     cdef Column left_rows = _gather_map_as_column(move(c_result))
-    return (
-        left_rows,
-        None
-    )
+    return left_rows, None
 
 
 cdef Column _gather_map_as_column(cpp_join.gather_map_type gather_map):
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index c7e46cf0165..c3ccf91efe7 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -177,15 +177,15 @@ def __init__(
         )
 
     def perform_merge(self) -> Frame:
-        left_join_cols = {}
-        right_join_cols = {}
+        left_join_cols = []
+        right_join_cols = []
 
         for left_key, right_key in zip(self._left_keys, self._right_keys):
             lcol = left_key.get(self.lhs)
             rcol = right_key.get(self.rhs)
             lcol_casted, rcol_casted = _match_join_keys(lcol, rcol, self.how)
-            left_join_cols[left_key.name] = lcol_casted
-            right_join_cols[left_key.name] = rcol_casted
+            left_join_cols.append(lcol_casted)
+            right_join_cols.append(rcol_casted)
 
             # Categorical dtypes must be cast back from the underlying codes
             # type that was returned by _match_join_keys.
@@ -201,9 +201,7 @@ def perform_merge(self) -> Frame:
             right_key.set(self.rhs, rcol_casted, validate=False)
 
         left_rows, right_rows = self._joiner(
-            cudf.core.frame.Frame(left_join_cols),
-            cudf.core.frame.Frame(right_join_cols),
-            how=self.how,
+            left_join_cols, right_join_cols, how=self.how,
         )
 
         gather_index = self._using_left_index or self._using_right_index

From 30d6b78e2c2bf157f2b2b4891350749a8c5ea33e Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 18 Mar 2022 14:42:12 -0700
Subject: [PATCH 04/21] Rolling out to partitioning.pyx

---
 python/cudf/cudf/_lib/partitioning.pyx | 22 ++++++----------------
 python/cudf/cudf/core/frame.py         | 15 +++++++++------
 2 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/python/cudf/cudf/_lib/partitioning.pyx b/python/cudf/cudf/_lib/partitioning.pyx
index e53667e7589..f2f5a92aca1 100644
--- a/python/cudf/cudf/_lib/partitioning.pyx
+++ b/python/cudf/cudf/_lib/partitioning.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -11,21 +11,19 @@ from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.partitioning cimport partition as cpp_partition
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 from cudf._lib.stream_compaction import distinct_count as cpp_distinct_count
 
 cimport cudf._lib.cpp.types as libcudf_types
 
 
-def partition(source_table, Column partition_map,
-              object num_partitions, bool keep_index=True):
+def partition(list source_columns, Column partition_map,
+              object num_partitions):
 
     if num_partitions is None:
         num_partitions = cpp_distinct_count(partition_map, ignore_nulls=True)
     cdef int c_num_partitions = num_partitions
-    cdef table_view c_source_view = table_view_from_table(
-        source_table, not keep_index
-    )
+    cdef table_view c_source_view = table_view_from_columns(source_columns)
 
     cdef column_view c_partition_map_view = partition_map.view()
 
@@ -40,13 +38,5 @@ def partition(source_table, Column partition_map,
         )
 
     return (
-        *data_from_unique_ptr(
-            move(c_result.first),
-            column_names=source_table._column_names,
-            index_names=source_table._index_names if(
-                keep_index is True)
-            else None
-
-        ),
-        list(c_result.second)
+        columns_from_unique_ptr(move(c_result.first)), list(c_result.second)
     )
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 98c052bbc34..ff30fb24963 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1150,10 +1150,16 @@ def scatter_by_map(
                     f"ERROR: map_size must be >= {count} (got {map_size})."
                 )
 
-        data, index, output_offsets = libcudf.partitioning.partition(
-            self, map_index, map_size, keep_index
+        partitioned_columns, output_offsets = libcudf.partitioning.partition(
+            [*(self._index._columns if keep_index else ()), *self._columns],
+            map_index,
+            map_size,
+        )
+        partitioned = self._from_columns_like_self(
+            partitioned_columns,
+            column_names=self._column_names,
+            index_names=self._index_names if keep_index else None,
         )
-        partitioned = self.__class__._from_data(data, index)
 
         # due to the split limitation mentioned
         # here: https://github.com/rapidsai/cudf/issues/4607
@@ -1163,9 +1169,6 @@ def scatter_by_map(
 
         result = partitioned._split(output_offsets, keep_index=keep_index)
 
-        for frame in result:
-            frame._copy_type_metadata(self, include_index=keep_index)
-
         if map_size:
             result += [
                 self._empty_like(keep_index)

From 1a23fd6a5ef82144565dd9f0d8a36f2ff5084c10 Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 18 Mar 2022 14:47:28 -0700
Subject: [PATCH 05/21] Include gil releases for joins

---
 python/cudf/cudf/_lib/join.pyx | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 1ff6daaae52..1baef266dab 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -27,11 +27,14 @@ cpdef join(list lhs, list rhs, how=None):
     cdef table_view c_rhs = table_view_from_columns(rhs)
 
     if how == "inner":
-        c_result = move(cpp_join.inner_join(c_lhs, c_rhs))
+        with nogil:
+            c_result = move(cpp_join.inner_join(c_lhs, c_rhs))
     elif how == "left":
-        c_result = move(cpp_join.left_join(c_lhs, c_rhs))
+        with nogil:
+            c_result = move(cpp_join.left_join(c_lhs, c_rhs))
     elif how == "outer":
-        c_result = move(cpp_join.full_join(c_lhs, c_rhs))
+        with nogil:
+            c_result = move(cpp_join.full_join(c_lhs, c_rhs))
     else:
         raise ValueError(f"Invalid join type {how}")
 
@@ -47,9 +50,11 @@ cpdef semi_join(list lhs, list rhs, how=None):
     cdef table_view c_rhs = table_view_from_columns(rhs)
 
     if how == "leftsemi":
-        c_result = move(cpp_join.left_semi_join(c_lhs, c_rhs))
+        with nogil:
+            c_result = move(cpp_join.left_semi_join(c_lhs, c_rhs))
     elif how == "leftanti":
-        c_result = move(cpp_join.left_anti_join(c_lhs, c_rhs))
+        with nogil:
+            c_result = move(cpp_join.left_anti_join(c_lhs, c_rhs))
     else:
         raise ValueError(f"Invalid join type {how}")
 

From 42737ab4f397aba6f3974587fe9664c1ebeb79a1 Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 18 Mar 2022 15:04:20 -0700
Subject: [PATCH 06/21] Rolling out to quantiles.pyx

---
 python/cudf/cudf/_lib/quantiles.pyx | 14 +++++---------
 python/cudf/cudf/core/frame.py      | 10 ++++------
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/python/cudf/cudf/_lib/quantiles.pyx b/python/cudf/cudf/_lib/quantiles.pyx
index 497a71df89d..f65c29a55a8 100644
--- a/python/cudf/cudf/_lib/quantiles.pyx
+++ b/python/cudf/cudf/_lib/quantiles.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -31,7 +31,7 @@ from cudf._lib.cpp.types cimport (
     order_info,
     sorted,
 )
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
 def quantile(
@@ -74,14 +74,13 @@ def quantile(
     return Column.from_unique_ptr(move(c_result))
 
 
-def quantiles(source_table,
+def quantiles(list source_columns,
               vector[double] q,
               object interp,
               object is_input_sorted,
               list column_order,
               list null_precedence):
-    cdef table_view c_input = table_view_from_table(
-        source_table, ignore_index=True)
+    cdef table_view c_input = table_view_from_columns(source_columns)
     cdef vector[double] c_q = q
     cdef interpolation c_interp = <interpolation>(
         <underlying_type_t_interpolation> interp
@@ -119,7 +118,4 @@ def quantiles(source_table,
             )
         )
 
-    return data_from_unique_ptr(
-        move(c_result),
-        column_names=source_table._column_names
-    )
+    return columns_from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index ff30fb24963..51170a9c2ee 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1460,20 +1460,18 @@ def _quantiles(
             libcudf.types.NullOrder[key] for key in null_precedence
         ]
 
-        result = self.__class__._from_data(
+        return self._from_columns_like_self(
             *libcudf.quantiles.quantiles(
-                self,
+                [*self._columns],
                 q,
                 interpolation,
                 is_sorted,
                 column_order,
                 null_precedence,
-            )
+            ),
+            column_names=self._column_names,
         )
 
-        result._copy_type_metadata(self)
-        return result
-
     @_cudf_nvtx_annotate
     def rank(
         self,

From 6226f32473ec94b2e1fb235def6367e729fd4498 Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 18 Mar 2022 16:03:07 -0700
Subject: [PATCH 07/21] Rolling out to reshape.pyx

---
 python/cudf/cudf/_lib/reshape.pyx        | 23 +++-----
 python/cudf/cudf/core/dataframe.py       | 56 +++++++++++++++++--
 python/cudf/cudf/core/frame.py           | 70 ------------------------
 python/cudf/cudf/core/groupby/groupby.py |  7 +--
 python/cudf/cudf/core/indexed_frame.py   | 32 +++++++++++
 5 files changed, 93 insertions(+), 95 deletions(-)

diff --git a/python/cudf/cudf/_lib/reshape.pyx b/python/cudf/cudf/_lib/reshape.pyx
index d64d0543892..29223947eea 100644
--- a/python/cudf/cudf/_lib/reshape.pyx
+++ b/python/cudf/cudf/_lib/reshape.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -13,32 +13,25 @@ from cudf._lib.cpp.reshape cimport (
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport size_type
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
-def interleave_columns(source_table):
-    cdef table_view c_view = table_view_from_table(
-        source_table, ignore_index=True)
+def interleave_columns(list source_columns):
+    cdef table_view c_view = table_view_from_columns(source_columns)
     cdef unique_ptr[column] c_result
 
     with nogil:
         c_result = move(cpp_interleave_columns(c_view))
 
-    return Column.from_unique_ptr(
-        move(c_result)
-    )
+    return Column.from_unique_ptr(move(c_result))
 
 
-def tile(source_table, size_type count):
+def tile(list source_columns, size_type count):
     cdef size_type c_count = count
-    cdef table_view c_view = table_view_from_table(source_table)
+    cdef table_view c_view = table_view_from_columns(source_columns)
     cdef unique_ptr[table] c_result
 
     with nogil:
         c_result = move(cpp_tile(c_view, c_count))
 
-    return data_from_unique_ptr(
-        move(c_result),
-        column_names=source_table._column_names,
-        index_names=source_table._index_names
-    )
+    return columns_from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 006d09366ea..4b5e7cc3441 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5597,8 +5597,10 @@ def stack(self, level=-1, dropna=True):
         """
         assert level in (None, -1)
         repeated_index = self.index.repeat(self.shape[1])
-        name_index = Frame({0: self._column_names}).tile(self.shape[0])
-        new_index = list(repeated_index._columns) + [name_index._columns[0]]
+        name_index = libcudf.reshape.tile(
+            [as_column(self._column_names)], self.shape[0]
+        )
+        new_index = list(repeated_index._columns) + name_index
         if isinstance(self._index, MultiIndex):
             index_names = self._index.names + [None]
         else:
@@ -5621,9 +5623,15 @@ def stack(self, level=-1, dropna=True):
             }
         )
 
-        data_col = libcudf.reshape.interleave_columns(homogenized)
+        result = Series._from_data(
+            {
+                None: libcudf.reshape.interleave_columns(
+                    [*homogenized._columns]
+                )
+            },
+            index=new_index,
+        )
 
-        result = Series(data=data_col, index=new_index)
         if dropna:
             return result.dropna()
         else:
@@ -6057,6 +6065,46 @@ def _from_columns_like_self(
         result._set_column_names_like(self)
         return result
 
+    @_cudf_nvtx_annotate
+    def interleave_columns(self):
+        """
+        Interleave Series columns of a table into a single column.
+
+        Converts the column major table `cols` into a row major column.
+
+        Parameters
+        ----------
+        cols : input Table containing columns to interleave.
+
+        Examples
+        --------
+        >>> df = DataFrame([['A1', 'A2', 'A3'], ['B1', 'B2', 'B3']])
+        >>> df
+        0    [A1, A2, A3]
+        1    [B1, B2, B3]
+        >>> df.interleave_columns()
+        0    A1
+        1    B1
+        2    A2
+        3    B2
+        4    A3
+        5    B3
+
+        Returns
+        -------
+        The interleaved columns as a single column
+        """
+        if ("category" == self.dtypes).any():
+            raise ValueError(
+                "interleave_columns does not support 'category' dtype."
+            )
+
+        result = self._constructor_sliced._from_data(
+            {None: libcudf.reshape.interleave_columns([*self._columns])}
+        )
+
+        return result
+
 
 def from_dataframe(df, allow_copy=False):
     return df_protocol.from_dataframe(df, allow_copy=allow_copy)
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 51170a9c2ee..5ddc74308f1 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2286,76 +2286,6 @@ def notnull(self):
     # Alias for notnull
     notna = notnull
 
-    @_cudf_nvtx_annotate
-    def interleave_columns(self):
-        """
-        Interleave Series columns of a table into a single column.
-
-        Converts the column major table `cols` into a row major column.
-
-        Parameters
-        ----------
-        cols : input Table containing columns to interleave.
-
-        Examples
-        --------
-        >>> df = DataFrame([['A1', 'A2', 'A3'], ['B1', 'B2', 'B3']])
-        >>> df
-        0    [A1, A2, A3]
-        1    [B1, B2, B3]
-        >>> df.interleave_columns()
-        0    A1
-        1    B1
-        2    A2
-        3    B2
-        4    A3
-        5    B3
-
-        Returns
-        -------
-        The interleaved columns as a single column
-        """
-        if ("category" == self.dtypes).any():
-            raise ValueError(
-                "interleave_columns does not support 'category' dtype."
-            )
-
-        result = self._constructor_sliced(
-            libcudf.reshape.interleave_columns(self)
-        )
-
-        return result
-
-    @_cudf_nvtx_annotate
-    def tile(self, count):
-        """
-        Repeats the rows from `self` DataFrame `count` times to form a
-        new DataFrame.
-
-        Parameters
-        ----------
-        self : input Table containing columns to interleave.
-        count : Number of times to tile "rows". Must be non-negative.
-
-        Examples
-        --------
-        >>> df  = Dataframe([[8, 4, 7], [5, 2, 3]])
-        >>> count = 2
-        >>> df.tile(df, count)
-           0  1  2
-        0  8  4  7
-        1  5  2  3
-        0  8  4  7
-        1  5  2  3
-
-        Returns
-        -------
-        The table containing the tiled "rows".
-        """
-        result = self.__class__._from_data(*libcudf.reshape.tile(self, count))
-        result._copy_type_metadata(self)
-        return result
-
     @_cudf_nvtx_annotate
     def searchsorted(
         self, values, side="left", ascending=True, na_position="last"
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index a1a4596ba45..9d53558d73a 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1097,16 +1097,11 @@ def _cov_or_corr(self, func, method_name):
             for i in range(0, len(cols_list), num_cols)
         ]
 
-        def combine_columns(gb_cov_corr, ys):
-            list_of_columns = [gb_cov_corr._data[y] for y in ys]
-            frame = cudf.core.frame.Frame._from_columns(list_of_columns, ys)
-            return interleave_columns(frame)
-
         # interleave: combines the correlation or covariance results for each
         # column-pair into a single column
         res = cudf.DataFrame._from_data(
             {
-                x: combine_columns(gb_cov_corr, ys)
+                x: interleave_columns([gb_cov_corr._data[y] for y in ys])
                 for ys, x in zip(cols_split, column_names)
             }
         )
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index dced49016fd..7d9b354e69c 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -2425,6 +2425,38 @@ def _explode(self, explode_column: Any, ignore_index: bool):
             res.index.names = self._index.names
         return res
 
+    @_cudf_nvtx_annotate
+    def tile(self, count):
+        """Repeats the rows `count` times to form a new Frame.
+
+        Parameters
+        ----------
+        self : input Table containing columns to interleave.
+        count : Number of times to tile "rows". Must be non-negative.
+
+        Examples
+        --------
+        >>> df  = Dataframe([[8, 4, 7], [5, 2, 3]])
+        >>> count = 2
+        >>> df.tile(df, count)
+           0  1  2
+        0  8  4  7
+        1  5  2  3
+        0  8  4  7
+        1  5  2  3
+
+        Returns
+        -------
+        The indexed frame containing the tiled "rows".
+        """
+        return self._from_columns_like_self(
+            libcudf.reshape.tile(
+                [*self._index._columns, *self._columns], count
+            ),
+            column_names=self._column_names,
+            index_names=self._index_names,
+        )
+
 
 def _check_duplicate_level_names(specified, level_names):
     """Raise if any of `specified` has duplicates in `level_names`."""

From f00b9e0594309dceb71c49c7149cdce3d207f334 Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 18 Mar 2022 17:05:32 -0700
Subject: [PATCH 08/21] Rolling out to search.pyx

---
 python/cudf/cudf/_lib/search.pyx      | 24 +++++++++++-------------
 python/cudf/cudf/core/frame.py        | 22 +++++++++++++++++-----
 python/cudf/cudf/tests/test_search.py |  8 ++++++++
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/python/cudf/cudf/_lib/search.pyx b/python/cudf/cudf/_lib/search.pyx
index f92ef753fc2..d5568f53231 100644
--- a/python/cudf/cudf/_lib/search.pyx
+++ b/python/cudf/cudf/_lib/search.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -10,20 +10,20 @@ from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.table.table_view cimport table_view
-from cudf._lib.utils cimport table_view_from_table
+from cudf._lib.utils cimport table_view_from_columns
 
 
 def search_sorted(
-    table, values, side, ascending=True, na_position="last"
+    list source, list values, side, ascending=True, na_position="last"
 ):
     """Find indices where elements should be inserted to maintain order
 
     Parameters
     ----------
-    table : Frame
-        Frame to search in
-    values : Frame
-        Frame of values to search for
+    source : list of columns
+        List of columns to search in
+    values : List of columns
+        List of value columns to search for
     side : str {‘left’, ‘right’} optional
         If ‘left’, the index of the first suitable location is given.
         If ‘right’, return the last such index
@@ -33,10 +33,8 @@ def search_sorted(
     cdef vector[libcudf_types.null_order] c_null_precedence
     cdef libcudf_types.order c_order
     cdef libcudf_types.null_order c_null_order
-    cdef table_view c_table_data = table_view_from_table(
-        table, ignore_index=True)
-    cdef table_view c_values_data = table_view_from_table(
-        values, ignore_index=True)
+    cdef table_view c_table_data = table_view_from_columns(source)
+    cdef table_view c_values_data = table_view_from_columns(values)
 
     # Note: We are ignoring index columns here
     c_order = (libcudf_types.order.ASCENDING
@@ -47,9 +45,9 @@ def search_sorted(
         if na_position=="last"
         else libcudf_types.null_order.BEFORE
     )
-    c_column_order = vector[libcudf_types.order](table._num_columns, c_order)
+    c_column_order = vector[libcudf_types.order](len(source), c_order)
     c_null_precedence = vector[libcudf_types.null_order](
-        table._num_columns, c_null_order
+        len(source), c_null_order
     )
 
     if side == 'left':
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 5ddc74308f1..af4eee0eb16 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2355,12 +2355,24 @@ def searchsorted(
             scalar_flag = True
 
         if not isinstance(values, Frame):
-            values = as_column(values)
-            if values.dtype != self.dtype:
-                self = self.astype(values.dtype)
-            values = values.as_frame()
+            values = [as_column(values)]
+        else:
+            values = [*values._columns]
+        if len(values) != len(self._data):
+            raise ValueError("Mismatch number of columns to search for.")
+
+        sources = [
+            col
+            if is_dtype_equal(col.dtype, val.dtype)
+            else col.astype(val.dtype)
+            for col, val in zip(self._columns, values)
+        ]
         outcol = libcudf.search.search_sorted(
-            self, values, side, ascending=ascending, na_position=na_position
+            sources,
+            values,
+            side,
+            ascending=ascending,
+            na_position=na_position,
         )
 
         # Retrun result as cupy array if the values is non-scalar
diff --git a/python/cudf/cudf/tests/test_search.py b/python/cudf/cudf/tests/test_search.py
index cd029d02d79..103e40dbeda 100644
--- a/python/cudf/cudf/tests/test_search.py
+++ b/python/cudf/cudf/tests/test_search.py
@@ -73,6 +73,14 @@ def test_searchsorted_dataframe(side, multiindex):
         assert result == [2, 0, 4, 1]
 
 
+def test_search_sorted_dataframe_unequal_number_of_columns():
+    values = cudf.DataFrame({"a": [1, 0, 5, 1]})
+    base = cudf.DataFrame({"a": [1, 0, 5, 1], "b": ["x", "z", "w", "a"]})
+
+    with pytest.raises(ValueError, match="Mismatch number of columns"):
+        base.searchsorted(values)
+
+
 @pytest.mark.parametrize("side", ["left", "right"])
 def test_searchsorted_categorical(side):
 

From e86363c52d54edb6f509e624dcde22d16ac0d530 Mon Sep 17 00:00:00 2001
From: Michael Wang <michaelwang0905@icloud.com>
Date: Fri, 18 Mar 2022 17:13:20 -0700
Subject: [PATCH 09/21] Fix copyrights

---
 python/cudf/cudf/tests/test_search.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_search.py b/python/cudf/cudf/tests/test_search.py
index 103e40dbeda..d3433a589a7 100644
--- a/python/cudf/cudf/tests/test_search.py
+++ b/python/cudf/cudf/tests/test_search.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
 import cupy
 import numpy as np
 import pandas as pd

From 881edbaba94de366c3e510269a260fd4fe99b94d Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Thu, 14 Apr 2022 15:30:07 -0700
Subject: [PATCH 10/21] Refactor table_encode in `transform.pyx`

---
 python/cudf/cudf/_lib/transform.pyx | 20 +++++++-------------
 python/cudf/cudf/core/frame.py      |  6 ++----
 2 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx
index 96d25cb92c9..175150b6865 100644
--- a/python/cudf/cudf/_lib/transform.pyx
+++ b/python/cudf/cudf/_lib/transform.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import numpy as np
 from numba.np import numpy_support
@@ -25,9 +25,9 @@ from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport bitmask_type, data_type, size_type, type_id
 from cudf._lib.types cimport underlying_type_t_type_id
 from cudf._lib.utils cimport (
+    columns_from_unique_ptr,
     data_from_table_view,
-    data_from_unique_ptr,
-    table_view_from_table,
+    table_view_from_columns,
 )
 
 
@@ -123,21 +123,15 @@ def transform(Column input, op):
     return Column.from_unique_ptr(move(c_output))
 
 
-def table_encode(input):
-    cdef table_view c_input = table_view_from_table(
-        input, ignore_index=True)
+def table_encode(list source_columns):
+    cdef table_view c_input = table_view_from_columns(source_columns)
     cdef pair[unique_ptr[table], unique_ptr[column]] c_result
 
     with nogil:
         c_result = move(libcudf_transform.encode(c_input))
 
-    return (
-        *data_from_unique_ptr(
-            move(c_result.first),
-            column_names=input._column_names,
-        ),
-        Column.from_unique_ptr(move(c_result.second))
-    )
+    return columns_from_unique_ptr(
+        move(c_result.first)), Column.from_unique_ptr(move(c_result.second))
 
 
 def one_hot_encode(Column input_column, Column categories):
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index deef069f80e..c6a6d21df0e 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2408,10 +2408,8 @@ def _split(self, splits):
 
     @_cudf_nvtx_annotate
     def _encode(self):
-        data, index, indices = libcudf.transform.table_encode(self)
-        for name, col in data.items():
-            data[name] = col._with_type_metadata(self._data[name].dtype)
-        keys = self.__class__._from_data(data, index)
+        columns, indices = libcudf.transform.table_encode([*self._columns])
+        keys = self._from_columns_like_self(columns)
         return keys, indices
 
     @_cudf_nvtx_annotate

From 63ec965c35f8b51fa18b82017a26d5fc3b1fab09 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Thu, 14 Apr 2022 15:53:59 -0700
Subject: [PATCH 11/21] Refactor lists.explode_outer

---
 python/cudf/cudf/_lib/lists.pyx        | 19 +++++++++++--------
 python/cudf/cudf/core/indexed_frame.py | 24 +++++++++++-------------
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx
index 523686fafe6..af938761e71 100644
--- a/python/cudf/cudf/_lib/lists.pyx
+++ b/python/cudf/cudf/_lib/lists.pyx
@@ -42,7 +42,12 @@ from cudf.core.dtypes import ListDtype
 
 from cudf._lib.cpp.lists.contains cimport contains, index_of as cpp_index_of
 from cudf._lib.cpp.lists.extract cimport extract_list_element
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+from cudf._lib.utils cimport (
+    columns_from_unique_ptr,
+    data_from_unique_ptr,
+    table_view_from_columns,
+    table_view_from_table,
+)
 
 
 def count_elements(Column col):
@@ -61,8 +66,10 @@ def count_elements(Column col):
     return result
 
 
-def explode_outer(tbl, int explode_column_idx, bool ignore_index=False):
-    cdef table_view c_table_view = table_view_from_table(tbl, ignore_index)
+def explode_outer(
+    list source_columns, int explode_column_idx
+):
+    cdef table_view c_table_view = table_view_from_columns(source_columns)
     cdef size_type c_explode_column_idx = explode_column_idx
 
     cdef unique_ptr[table] c_result
@@ -70,11 +77,7 @@ def explode_outer(tbl, int explode_column_idx, bool ignore_index=False):
     with nogil:
         c_result = move(cpp_explode_outer(c_table_view, c_explode_column_idx))
 
-    return data_from_unique_ptr(
-        move(c_result),
-        column_names=tbl._column_names,
-        index_names=None if ignore_index else tbl._index_names
-    )
+    return columns_from_unique_ptr(move(c_result))
 
 
 def drop_list_duplicates(Column col, bool nulls_equal, bool nans_all_equal):
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 6c29c3bb8d4..ad7b2f8458a 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -2689,21 +2689,19 @@ def _explode(self, explode_column: Any, ignore_index: bool):
         if not ignore_index and self._index is not None:
             explode_column_num += self._index.nlevels
 
-        data, index = libcudf.lists.explode_outer(
-            self, explode_column_num, ignore_index
-        )
-        res = self.__class__._from_data(
-            ColumnAccessor(
-                data,
-                multiindex=self._data.multiindex,
-                level_names=self._data._level_names,
-            ),
-            index=index,
+        exploded = libcudf.lists.explode_outer(
+            [
+                *(self._index._data.columns if not ignore_index else ()),
+                *self._columns,
+            ],
+            explode_column_num,
         )
 
-        if not ignore_index and self._index is not None:
-            res.index.names = self._index.names
-        return res
+        return self._from_columns_like_self(
+            exploded,
+            self._column_names,
+            self._index_names if not ignore_index else None,
+        )
 
     @_cudf_nvtx_annotate
     def tile(self, count):

From 96913a14a59b2840486f212e8fd3081c21ecb53e Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Thu, 14 Apr 2022 16:12:25 -0700
Subject: [PATCH 12/21] Refactor lists.pyx frame APIs

---
 python/cudf/cudf/_lib/lists.pyx       | 14 ++++----------
 python/cudf/cudf/core/column/lists.py |  4 +---
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx
index af938761e71..e5a705ab603 100644
--- a/python/cudf/cudf/_lib/lists.pyx
+++ b/python/cudf/cudf/_lib/lists.pyx
@@ -42,12 +42,7 @@ from cudf.core.dtypes import ListDtype
 
 from cudf._lib.cpp.lists.contains cimport contains, index_of as cpp_index_of
 from cudf._lib.cpp.lists.extract cimport extract_list_element
-from cudf._lib.utils cimport (
-    columns_from_unique_ptr,
-    data_from_unique_ptr,
-    table_view_from_columns,
-    table_view_from_table,
-)
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
 def count_elements(Column col):
@@ -200,18 +195,17 @@ def index_of(Column col, object py_search_key):
     return Column.from_unique_ptr(move(c_result))
 
 
-def concatenate_rows(tbl):
+def concatenate_rows(list source_columns):
     cdef unique_ptr[column] c_result
 
-    cdef table_view c_table_view = table_view_from_table(tbl)
+    cdef table_view c_table_view = table_view_from_columns(source_columns)
 
     with nogil:
         c_result = move(cpp_concatenate_rows(
             c_table_view,
         ))
 
-    result = Column.from_unique_ptr(move(c_result))
-    return result
+    return Column.from_unique_ptr(move(c_result))
 
 
 def concatenate_list_elements(Column input_column, dropna=False):
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 8578bfe8147..b383f7bc321 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -113,9 +113,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
             return NotImplemented
         if isinstance(other.dtype, ListDtype):
             if op == "__add__":
-                return concatenate_rows(
-                    cudf.core.frame.Frame({0: self, 1: other})
-                )
+                return concatenate_rows([self, other])
             else:
                 raise NotImplementedError(
                     "Lists concatenation for this operation is not yet"

From 5f8122c5cdbd64a011d20172c04038032a21b959 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Thu, 14 Apr 2022 16:20:31 -0700
Subject: [PATCH 13/21] Refactor `string/combine.concatenate`

---
 python/cudf/cudf/_lib/strings/combine.pyx | 9 ++++-----
 python/cudf/cudf/core/column/string.py    | 6 ++----
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/_lib/strings/combine.pyx b/python/cudf/cudf/_lib/strings/combine.pyx
index 3b5ef33a668..eeb39f70728 100644
--- a/python/cudf/cudf/_lib/strings/combine.pyx
+++ b/python/cudf/cudf/_lib/strings/combine.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
@@ -18,10 +18,10 @@ from cudf._lib.cpp.strings.combine cimport (
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
-from cudf._lib.utils cimport table_view_from_table
+from cudf._lib.utils cimport table_view_from_columns
 
 
-def concatenate(source_strings,
+def concatenate(list source_strings,
                 object sep,
                 object na_rep):
     """
@@ -33,8 +33,7 @@ def concatenate(source_strings,
     cdef DeviceScalar narep = na_rep.device_value
 
     cdef unique_ptr[column] c_result
-    cdef table_view source_view = table_view_from_table(
-        source_strings, ignore_index=True)
+    cdef table_view source_view = table_view_from_columns(source_strings)
 
     cdef const string_scalar* scalar_separator = \
         <const string_scalar*>(separator.get_raw_ptr())
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index d5d45c341d5..6f4a6334a1d 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -365,9 +365,7 @@ def cat(self, others=None, sep=None, na_rep=None):
             other_cols = _get_cols_list(self._parent, others)
             all_cols = [self._column] + other_cols
             data = libstrings.concatenate(
-                cudf.DataFrame(
-                    {index: value for index, value in enumerate(all_cols)}
-                ),
+                all_cols,
                 cudf.Scalar(sep),
                 cudf.Scalar(na_rep, "str"),
             )
@@ -5531,7 +5529,7 @@ def _binaryop(
                 return cast(
                     "column.ColumnBase",
                     libstrings.concatenate(
-                        cudf.DataFrame._from_data(data={0: lhs, 1: rhs}),
+                        [lhs, rhs],
                         sep=cudf.Scalar(""),
                         na_rep=cudf.Scalar(None, "str"),
                     ),

From 24f61fe402a570b0924d82dbbc4bfbbfba02dcf5 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Mon, 18 Apr 2022 09:22:55 -0700
Subject: [PATCH 14/21] black style fix

---
 python/cudf/cudf/core/column/column.py | 4 +++-
 python/cudf/cudf/core/join/join.py     | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 462516e2da4..5c9d8535798 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -229,7 +229,9 @@ def to_arrow(self) -> pa.Array:
           4
         ]
         """
-        return libcudf.interop.to_arrow([self], [["None"]],)["None"].chunk(0)
+        return libcudf.interop.to_arrow([self], [["None"]],)[
+            "None"
+        ].chunk(0)
 
     @classmethod
     def from_arrow(cls, array: pa.Array) -> ColumnBase:
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index c3ccf91efe7..6a495ef8d9a 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -201,7 +201,9 @@ def perform_merge(self) -> Frame:
             right_key.set(self.rhs, rcol_casted, validate=False)
 
         left_rows, right_rows = self._joiner(
-            left_join_cols, right_join_cols, how=self.how,
+            left_join_cols,
+            right_join_cols,
+            how=self.how,
         )
 
         gather_index = self._using_left_index or self._using_right_index

From 349f45f566c744ca18af2ed5faf81b1d8d860059 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Mon, 18 Apr 2022 16:36:07 -0700
Subject: [PATCH 15/21] Various docstring updates

Co-authored-by: GALI PREM SAGAR <sagarprem75@gmail.com>
---
 python/cudf/cudf/_lib/interop.pyx      | 7 ++++---
 python/cudf/cudf/core/dataframe.py     | 3 ++-
 python/cudf/cudf/core/indexed_frame.py | 3 ++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx
index 26bb2e868f5..dd9b4a87f5a 100644
--- a/python/cudf/cudf/_lib/interop.pyx
+++ b/python/cudf/cudf/_lib/interop.pyx
@@ -47,7 +47,7 @@ def from_dlpack(dlpack_capsule):
 
 def to_dlpack(list source_columns):
     """
-    Converts a cudf Frame into a DLPack Tensor PyCapsule.
+    Converts a list of columns into a DLPack Tensor PyCapsule.
 
     DLPack Tensor PyCapsule will have the name "dltensor".
     """
@@ -105,7 +105,8 @@ cdef vector[column_metadata] gather_metadata(object metadata) except *:
 
 
 def to_arrow(list source_columns, object metadata):
-    """Convert from cudf Frame to PyArrow Table.
+    """Convert a list of columns from
+    cudf Frame to a PyArrow Table.
 
     Parameters
     ----------
@@ -130,7 +131,7 @@ def to_arrow(list source_columns, object metadata):
 
 
 def from_arrow(object input_table):
-    """Convert from PyArrow Table to cudf Frame.
+    """Convert from PyArrow Table to a list of columns.
 
     Parameters
     ----------
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 9fa9b9231d8..569a148388e 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6189,7 +6189,8 @@ def interleave_columns(self):
 
         Examples
         --------
-        >>> df = DataFrame([['A1', 'A2', 'A3'], ['B1', 'B2', 'B3']])
+        >>> import cudf
+        >>> df = cudf.DataFrame([['A1', 'A2', 'A3'], ['B1', 'B2', 'B3']])
         >>> df
         0    [A1, A2, A3]
         1    [B1, B2, B3]
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index ad7b2f8458a..68c9a429227 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -2714,7 +2714,8 @@ def tile(self, count):
 
         Examples
         --------
-        >>> df  = Dataframe([[8, 4, 7], [5, 2, 3]])
+        >>> import cudf
+        >>> df  = cudf.Dataframe([[8, 4, 7], [5, 2, 3]])
         >>> count = 2
         >>> df.tile(df, count)
            0  1  2

From 2ba24358fe250cf133be66168f1dd8862ffe079b Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Mon, 18 Apr 2022 16:41:41 -0700
Subject: [PATCH 16/21] More docstring changes

Co-authored-by: GALI PREM SAGAR <sagarprem75@gmail.com>
---
 python/cudf/cudf/_lib/interop.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx
index dd9b4a87f5a..88c8b19ded0 100644
--- a/python/cudf/cudf/_lib/interop.pyx
+++ b/python/cudf/cudf/_lib/interop.pyx
@@ -25,7 +25,7 @@ from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 def from_dlpack(dlpack_capsule):
     """
-    Converts a DLPack Tensor PyCapsule into a cudf Frame object.
+    Converts a DLPack Tensor PyCapsule into a list of columns.
 
     DLPack Tensor PyCapsule is expected to have the name "dltensor".
     """
@@ -139,7 +139,7 @@ def from_arrow(object input_table):
 
     Returns
     -------
-    cudf Frame
+    A list of columns to construct Frame object
     """
     cdef shared_ptr[CTable] cpp_arrow_table = (
         pyarrow_unwrap_table(input_table)

From c0e1a1bc62bad5650ff229ca2e621aa4a0874e77 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Mon, 18 Apr 2022 16:52:10 -0700
Subject: [PATCH 17/21] Use _from_data factory

---
 python/cudf/cudf/core/dataframe.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 569a148388e..d02096d3ef5 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5688,7 +5688,7 @@ def stack(self, level=-1, dropna=True):
         else:
             index_names = [None] * len(new_index_columns)
         new_index = MultiIndex.from_frame(
-            DataFrame(
+            DataFrame._from_data(
                 dict(zip(range(0, len(new_index_columns)), new_index_columns))
             ),
             names=index_names,
@@ -5696,7 +5696,7 @@ def stack(self, level=-1, dropna=True):
 
         # Collect datatypes and cast columns as that type
         common_type = np.result_type(*self.dtypes)
-        homogenized = DataFrame(
+        homogenized = DataFrame._from_data(
             {
                 c: (
                     self._data[c].astype(common_type)

From 665f79b2f8b9baa99f9575a2a13e3f208e137d45 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Mon, 18 Apr 2022 16:53:18 -0700
Subject: [PATCH 18/21] Update python/cudf/cudf/core/dataframe.py

Co-authored-by: GALI PREM SAGAR <sagarprem75@gmail.com>
---
 python/cudf/cudf/core/dataframe.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index d02096d3ef5..e69f44042b3 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6211,12 +6211,10 @@ def interleave_columns(self):
                 "interleave_columns does not support 'category' dtype."
             )
 
-        result = self._constructor_sliced._from_data(
+        return self._constructor_sliced._from_data(
             {None: libcudf.reshape.interleave_columns([*self._columns])}
         )
 
-        return result
-
 
 def from_dataframe(df, allow_copy=False):
     return df_protocol.from_dataframe(df, allow_copy=allow_copy)

From adbd3477d69b85c23e384a55a0f66cc5cf6801c2 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Mon, 18 Apr 2022 23:13:02 -0700
Subject: [PATCH 19/21] Fix interleave_columns docstring

---
 python/cudf/cudf/core/dataframe.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index e69f44042b3..17b46c0e34e 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6190,10 +6190,12 @@ def interleave_columns(self):
         Examples
         --------
         >>> import cudf
-        >>> df = cudf.DataFrame([['A1', 'A2', 'A3'], ['B1', 'B2', 'B3']])
+        >>> df = cudf.DataFrame({0: ['A1', 'A2', 'A3'], 1: ['B1', 'B2', 'B3']})
         >>> df
-        0    [A1, A2, A3]
-        1    [B1, B2, B3]
+            0   1
+        0  A1  B1
+        1  A2  B2
+        2  A3  B3
         >>> df.interleave_columns()
         0    A1
         1    B1
@@ -6201,6 +6203,7 @@ def interleave_columns(self):
         3    B2
         4    A3
         5    B3
+        dtype: object
 
         Returns
         -------

From ca7f99a104d7e4430d82d826bccd2995353baa2b Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Tue, 19 Apr 2022 00:12:56 -0700
Subject: [PATCH 20/21] Fixing all failed tests

---
 python/cudf/cudf/_lib/scalar.pyx | 22 +++++++---------------
 python/cudf/cudf/core/frame.py   |  2 +-
 python/cudf/cudf/core/index.py   |  4 ++--
 3 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 32d6cb2ea6d..8138b6c65d0 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -68,6 +68,7 @@ from cudf._lib.utils cimport (
     data_from_table_view,
     table_view_from_columns,
     table_view_from_table,
+    columns_from_table_view
 )
 
 
@@ -361,8 +362,8 @@ cdef _set_struct_from_pydict(unique_ptr[scalar]& s,
             names=columns
         )
 
-    data, _ = from_arrow(pyarrow_table, column_names=columns)
-    cdef table_view struct_view = table_view_from_columns(data.values())
+    data = from_arrow(pyarrow_table)
+    cdef table_view struct_view = table_view_from_columns(data)
 
     s.reset(
         new struct_scalar(struct_view, valid)
@@ -373,18 +374,10 @@ cdef _get_py_dict_from_struct(unique_ptr[scalar]& s):
         return cudf.NA
 
     cdef table_view struct_table_view = (<struct_scalar*>s.get()).view()
-    columns = [str(i) for i in range(struct_table_view.num_columns())]
+    column_names = [str(i) for i in range(struct_table_view.num_columns())]
 
-    data, _ = data_from_table_view(
-        struct_table_view,
-        None,
-        column_names=columns
-    )
-    to_arrow_table = cudf.core.frame.Frame(
-        cudf.core.column_accessor.ColumnAccessor(data)
-    )
-
-    python_dict = to_arrow(to_arrow_table, columns).to_pydict()
+    columns = columns_from_table_view(struct_table_view, None)
+    python_dict = to_arrow(columns, column_names).to_pydict()
 
     return {k: _nested_na_replace(python_dict[k])[0] for k in python_dict}
 
@@ -415,9 +408,8 @@ cdef _get_py_list_from_list(unique_ptr[scalar]& s):
 
     cdef column_view list_col_view = (<list_scalar*>s.get()).view()
     cdef Column list_col = Column.from_column_view(list_col_view, None)
-    to_arrow_table = cudf.core.frame.Frame({"col": list_col})
 
-    arrow_table = to_arrow(to_arrow_table, [["col", []]])
+    arrow_table = to_arrow([list_col], [["col", []]])
     result = arrow_table['col'].to_pylist()
     return _nested_na_replace(result)
 
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index c6a6d21df0e..d10f7c690bf 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1278,7 +1278,7 @@ def _quantiles(
         ]
 
         return self._from_columns_like_self(
-            *libcudf.quantiles.quantiles(
+            libcudf.quantiles.quantiles(
                 [*self._columns],
                 q,
                 interpolation,
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index aff13025e72..fd918f723fe 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -76,10 +76,10 @@ def _lexsorted_equal_range(
         sort_inds = None
         sort_vals = idx
     lower_bound = search_sorted(
-        sort_vals, key_as_table, side="left"
+        [*sort_vals._data.columns], [*key_as_table._columns], side="left"
     ).element_indexing(0)
     upper_bound = search_sorted(
-        sort_vals, key_as_table, side="right"
+        [*sort_vals._data.columns], [*key_as_table._columns], side="right"
     ).element_indexing(0)
 
     return lower_bound, upper_bound, sort_inds

From 906da0186f97f567163b1f17d66c7444913d2845 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Tue, 19 Apr 2022 00:13:07 -0700
Subject: [PATCH 21/21] style fix

---
 python/cudf/cudf/_lib/scalar.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 8138b6c65d0..a7acfa8f906 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -65,10 +65,10 @@ from cudf._lib.cpp.wrappers.timestamps cimport (
     timestamp_us,
 )
 from cudf._lib.utils cimport (
+    columns_from_table_view,
     data_from_table_view,
     table_view_from_columns,
     table_view_from_table,
-    columns_from_table_view
 )