From 985a7b06b3ec12f06dea8982f3e9ff9476703382 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 29 Apr 2021 15:37:39 -0700
Subject: [PATCH 01/18] Inline _numeric_column_binop.

---
 python/cudf/cudf/core/column/numerical.py | 40 ++++-------------------
 1 file changed, 6 insertions(+), 34 deletions(-)

diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index d710129900a..39bbf10c235 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -11,7 +11,6 @@
 import numpy as np
 import pandas as pd
 from numba import cuda, njit
-from nvtx import annotate
 from pandas.api.types import is_integer_dtype
 
 import cudf
@@ -151,7 +150,7 @@ def binary_operator(
                 msg = "{!r} operator not supported between {} and {}"
                 raise TypeError(msg.format(binop, type(self), type(rhs)))
             if isinstance(rhs, cudf.core.column.DecimalColumn):
-                lhs = self.as_decimal_column(
+                lhs: Union[ScalarLike, ColumnBase] = self.as_decimal_column(
                     Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0)
                 )
                 return lhs.binary_operator(binop, rhs)
@@ -163,9 +162,11 @@ def binary_operator(
                     or ((isinstance(tmp, NumericalColumn)) and (0.0 in tmp))
                 ):
                     out_dtype = np.dtype("float64")
-        return _numeric_column_binop(
-            lhs=self, rhs=rhs, op=binop, out_dtype=out_dtype, reflect=reflect
-        )
+
+        if binop in {"lt", "gt", "le", "ge", "eq", "ne", "NULL_EQUALS"}:
+            out_dtype = "bool"
+        lhs, rhs = (self, rhs) if not reflect else (rhs, self)
+        return libcudf.binaryop.binaryop(lhs, rhs, binop, out_dtype)
 
     def _apply_scan_op(self, op: str) -> ColumnBase:
         return libcudf.reduce.scan(op, self, True)
@@ -761,35 +762,6 @@ def to_pandas(
         return pd_series
 
 
-@annotate("BINARY_OP", color="orange", domain="cudf_python")
-def _numeric_column_binop(
-    lhs: Union[ColumnBase, ScalarLike],
-    rhs: Union[ColumnBase, ScalarLike],
-    op: str,
-    out_dtype: Dtype,
-    reflect: bool = False,
-) -> ColumnBase:
-    if reflect:
-        lhs, rhs = rhs, lhs
-
-    is_op_comparison = op in [
-        "lt",
-        "gt",
-        "le",
-        "ge",
-        "eq",
-        "ne",
-        "NULL_EQUALS",
-    ]
-
-    if is_op_comparison:
-        out_dtype = "bool"
-
-    out = libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
-
-    return out
-
-
 def _numeric_column_unaryop(operand: ColumnBase, op: str) -> ColumnBase:
     if callable(op):
         return libcudf.transform.transform(operand, op)

From 3a416bdb4e7fe0efeff6032871145f43f8559892 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 29 Apr 2021 15:40:06 -0700
Subject: [PATCH 02/18] Inline binop for datetime columns.

---
 python/cudf/cudf/core/column/datetime.py | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 14c82b5ff45..b96a49c2514 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -11,7 +11,6 @@
 
 import numpy as np
 import pandas as pd
-from nvtx import annotate
 
 import cudf
 from cudf import _lib as libcudf
@@ -307,7 +306,7 @@ def binary_operator(
     ) -> ColumnBase:
         if isinstance(rhs, cudf.DateOffset):
             return rhs._datetime_binop(self, op, reflect=reflect)
-        lhs, rhs = self, rhs
+        lhs: Union[ScalarLike, ColumnBase] = self
         if op in ("eq", "ne", "lt", "gt", "le", "ge", "NULL_EQUALS"):
             out_dtype = np.dtype(np.bool_)  # type: Dtype
         elif op == "add" and pd.api.types.is_timedelta64_dtype(rhs.dtype):
@@ -332,7 +331,10 @@ def binary_operator(
                 f"Series of dtype {self.dtype} cannot perform "
                 f" the operation {op}"
             )
-        return binop(lhs, rhs, op=op, out_dtype=out_dtype, reflect=reflect)
+
+        if reflect:
+            lhs, rhs = rhs, lhs
+        return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
 
     def fillna(
         self, fill_value: Any = None, method: str = None, dtype: Dtype = None
@@ -422,20 +424,6 @@ def _make_copy_with_na_as_null(self):
         return out_col
 
 
-@annotate("BINARY_OP", color="orange", domain="cudf_python")
-def binop(
-    lhs: Union[ColumnBase, ScalarLike],
-    rhs: Union[ColumnBase, ScalarLike],
-    op: str,
-    out_dtype: Dtype,
-    reflect: bool,
-) -> ColumnBase:
-    if reflect:
-        lhs, rhs = rhs, lhs
-    out = libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
-    return out
-
-
 def binop_offset(lhs, rhs, op):
     if rhs._is_no_op:
         return lhs

From e9a80e0ac36ba15272693dc0f9c52f81170d88ba Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 29 Apr 2021 15:57:47 -0700
Subject: [PATCH 03/18] Inline binop for string columns.

---
 python/cudf/cudf/core/column/string.py | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index e4d7b6d4188..f969593e6d9 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -12,7 +12,6 @@
 import pandas as pd
 import pyarrow as pa
 from numba import cuda
-from nvtx import annotate
 
 import cudf
 from cudf import _lib as libcudf
@@ -5302,7 +5301,9 @@ def binary_operator(
             if op == "add":
                 return cast("column.ColumnBase", lhs.str().cat(others=rhs))
             elif op in ("eq", "ne", "gt", "lt", "ge", "le", "NULL_EQUALS"):
-                return _string_column_binop(self, rhs, op=op, out_dtype="bool")
+                return libcudf.binaryop.binaryop(
+                    lhs=self, rhs=rhs, op=op, dtype="bool"
+                )
 
         raise TypeError(
             f"{op} operator not supported between {type(self)} and {type(rhs)}"
@@ -5335,17 +5336,6 @@ def view(self, dtype) -> "cudf.core.column.ColumnBase":
         return to_view.view(dtype)
 
 
-@annotate("BINARY_OP", color="orange", domain="cudf_python")
-def _string_column_binop(
-    lhs: "column.ColumnBase",
-    rhs: "column.ColumnBase",
-    op: str,
-    out_dtype: Dtype,
-) -> "column.ColumnBase":
-    out = libcudf.binaryop.binaryop(lhs=lhs, rhs=rhs, op=op, dtype=out_dtype)
-    return out
-
-
 def _get_cols_list(parent_obj, others):
 
     parent_index = (

From eef4b18bbd463634a14be508201bb52b63651296 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 29 Apr 2021 15:58:32 -0700
Subject: [PATCH 04/18] Various minor cleanup tasks for string columns.

---
 python/cudf/cudf/core/column/string.py | 11 ++---------
 python/cudf/cudf/tests/test_string.py  |  1 -
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index f969593e6d9..5cf32cc9561 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5003,13 +5003,6 @@ def __contains__(self, item: ScalarLike) -> bool:
     def str(self, parent: ParentType = None) -> StringMethods:
         return StringMethods(self, parent=parent)
 
-    @property
-    def _nbytes(self) -> int:
-        if self.size == 0:
-            return 0
-        else:
-            return self.children[1].size
-
     def as_numerical_column(
         self, dtype: Dtype
     ) -> "cudf.core.column.NumericalColumn":
@@ -5126,7 +5119,7 @@ def to_array(self, fillna: bool = None) -> np.ndarray:
         return self.to_arrow().to_pandas().values
 
     def to_pandas(
-        self, index: ColumnLike = None, nullable: bool = False, **kwargs
+        self, index: pd.Index = None, nullable: bool = False, **kwargs
     ) -> "pd.Series":
         if nullable:
             pandas_array = pd.StringDtype().__from_arrow__(self.to_arrow())
@@ -5139,7 +5132,7 @@ def to_pandas(
         return pd_series
 
     def serialize(self) -> Tuple[dict, list]:
-        header = {"null_count": self.null_count}  # type: Dict[Any, Any]
+        header: Dict[Any, Any] = {"null_count": self.null_count}
         header["type-serialized"] = pickle.dumps(type(self))
         header["size"] = self.size
 
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 0ff5b81ce81..0f87f007266 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -1317,7 +1317,6 @@ def test_string_no_children_properties():
     assert empty_col.children == ()
     assert empty_col.size == 0
 
-    assert empty_col._nbytes == 0
     assert getsizeof(empty_col) >= 0  # Accounts for Python GC overhead
 
 

From 7d4ea4e293fa24509d943bd3f7d7e6756068c6fa Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 29 Apr 2021 16:10:06 -0700
Subject: [PATCH 05/18] Inline binop for timedelta columns.

---
 python/cudf/cudf/core/column/timedelta.py | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index d8ad11f41b3..b202838662c 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -9,7 +9,6 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
-from nvtx import annotate
 
 import cudf
 from cudf import _lib as libcudf
@@ -247,7 +246,7 @@ def binary_operator(
         if reflect:
             lhs, rhs = rhs, lhs  # type: ignore
 
-        return binop(lhs, rhs, op=op, out_dtype=out_dtype)
+        return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
 
     def normalize_binop_value(self, other) -> BinaryOperand:
         if isinstance(other, cudf.Scalar):
@@ -575,17 +574,6 @@ def nanoseconds(self) -> "cudf.core.column.NumericalColumn":
         )
 
 
-@annotate("BINARY_OP", color="orange", domain="cudf_python")
-def binop(
-    lhs: "column.ColumnBase",
-    rhs: "column.ColumnBase",
-    op: str,
-    out_dtype: DtypeObj,
-) -> "cudf.core.column.ColumnBase":
-    out = libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
-    return out
-
-
 def determine_out_dtype(lhs_dtype: Dtype, rhs_dtype: Dtype) -> Dtype:
     if np.can_cast(np.dtype(lhs_dtype), np.dtype(rhs_dtype)):
         return rhs_dtype

From eb692837633b7dbdc836246fcbb50d6bdd212a65 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 29 Apr 2021 16:27:40 -0700
Subject: [PATCH 06/18] Cleanup of categorical.

---
 python/cudf/cudf/core/column/categorical.py | 21 ++++----------
 python/cudf/cudf/core/column/column.py      | 32 +++++++--------------
 2 files changed, 17 insertions(+), 36 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 3cd1a599ddc..456d2cc2b46 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -35,6 +35,7 @@
     min_signed_type,
     min_unsigned_type,
 )
+from cudf.utils.utils import cached_property
 
 if TYPE_CHECKING:
     from cudf.core.column import (
@@ -819,7 +820,7 @@ def __contains__(self, item: ScalarLike) -> bool:
         return self._encode(item) in self.as_numerical
 
     def serialize(self) -> Tuple[dict, list]:
-        header = {}  # type: Dict[Any, Any]
+        header: Dict[Any, Any] = {}
         frames = []
         header["type-serialized"] = pickle.dumps(type(self))
         header["dtype"], dtype_frames = self.dtype.serialize()
@@ -1341,23 +1342,13 @@ def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
         """
         return self.as_numerical.find_last_value(self._encode(value))
 
-    @property
+    @cached_property
     def is_monotonic_increasing(self) -> bool:
-        if not hasattr(self, "_is_monotonic_increasing"):
-            self._is_monotonic_increasing = (
-                bool(self.ordered)
-                and self.as_numerical.is_monotonic_increasing
-            )
-        return self._is_monotonic_increasing
+        return bool(self.ordered) and self.as_numerical.is_monotonic_increasing
 
-    @property
+    @cached_property
     def is_monotonic_decreasing(self) -> bool:
-        if not hasattr(self, "_is_monotonic_decreasing"):
-            self._is_monotonic_decreasing = (
-                bool(self.ordered)
-                and self.as_numerical.is_monotonic_decreasing
-            )
-        return self._is_monotonic_decreasing
+        return bool(self.ordered) and self.as_numerical.is_monotonic_decreasing
 
     def as_categorical_column(
         self, dtype: Dtype, **kwargs
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 0f039b137bc..62767c6aae3 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -56,7 +56,7 @@
     min_unsigned_type,
     np_to_pa_dtype,
 )
-from cudf.utils.utils import mask_dtype
+from cudf.utils.utils import cached_property, mask_dtype
 
 T = TypeVar("T", bound="ColumnBase")
 
@@ -932,27 +932,17 @@ def is_unique(self) -> bool:
     def is_monotonic(self) -> bool:
         return self.is_monotonic_increasing
 
-    @property
+    @cached_property
     def is_monotonic_increasing(self) -> bool:
-        if not hasattr(self, "_is_monotonic_increasing"):
-            if self.has_nulls:
-                self._is_monotonic_increasing = False
-            else:
-                self._is_monotonic_increasing = self.as_frame()._is_sorted(
-                    ascending=None, null_position=None
-                )
-        return self._is_monotonic_increasing
+        return not self.has_nulls and self.as_frame()._is_sorted(
+            ascending=None, null_position=None
+        )
 
-    @property
+    @cached_property
     def is_monotonic_decreasing(self) -> bool:
-        if not hasattr(self, "_is_monotonic_decreasing"):
-            if self.has_nulls:
-                self._is_monotonic_decreasing = False
-            else:
-                self._is_monotonic_decreasing = self.as_frame()._is_sorted(
-                    ascending=[False], null_position=None
-                )
-        return self._is_monotonic_decreasing
+        return not self.has_nulls and self.as_frame()._is_sorted(
+            ascending=[False], null_position=None
+        )
 
     def get_slice_bound(
         self, label: ScalarLike, side: builtins.str, kind: builtins.str
@@ -1211,7 +1201,7 @@ def unique(self) -> ColumnBase:
         )
 
     def serialize(self) -> Tuple[dict, list]:
-        header = {}  # type: Dict[Any, Any]
+        header: Dict[Any, Any] = {}
         frames = []
         header["type-serialized"] = pickle.dumps(type(self))
         header["dtype"] = self.dtype.str
@@ -2226,7 +2216,7 @@ def serialize_columns(columns) -> Tuple[List[dict], List]:
     frames : list
         list of frames
     """
-    headers = []  # type List[Dict[Any, Any], ...]
+    headers: List[Dict[Any, Any]] = []
     frames = []
 
     if len(columns) > 0:

From 13fee0e4f8b14c5df9a66ca852f3336053af7133 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 30 Apr 2021 08:36:35 -0700
Subject: [PATCH 07/18] Move ColumnBase._concat out of the class.

---
 python/cudf/cudf/core/column/column.py | 201 ++++++++++++-------------
 python/cudf/cudf/core/index.py         |   3 +-
 python/cudf/cudf/core/series.py        |   3 +-
 3 files changed, 104 insertions(+), 103 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 62767c6aae3..c2b40c0d70e 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -197,106 +197,6 @@ def cat(
     def str(self, parent=None) -> "cudf.core.column.string.StringMethods":
         raise NotImplementedError()
 
-    @classmethod
-    def _concat(
-        cls, objs: "MutableSequence[ColumnBase]", dtype: Dtype = None
-    ) -> ColumnBase:
-        if len(objs) == 0:
-            dtype = pd.api.types.pandas_dtype(dtype)
-            if is_categorical_dtype(dtype):
-                dtype = CategoricalDtype()
-            return column_empty(0, dtype=dtype, masked=True)
-
-        # If all columns are `NumericalColumn` with different dtypes,
-        # we cast them to a common dtype.
-        # Notice, we can always cast pure null columns
-        not_null_cols = list(filter(lambda o: o.valid_count > 0, objs))
-        if len(not_null_cols) > 0 and (
-            len(
-                [
-                    o
-                    for o in not_null_cols
-                    if not is_numerical_dtype(o.dtype)
-                    or np.issubdtype(o.dtype, np.datetime64)
-                ]
-            )
-            == 0
-        ):
-            col_dtypes = [o.dtype for o in not_null_cols]
-            # Use NumPy to find a common dtype
-            common_dtype = np.find_common_type(col_dtypes, [])
-            # Cast all columns to the common dtype
-            for i in range(len(objs)):
-                objs[i] = objs[i].astype(common_dtype)
-
-        # Find the first non-null column:
-        head = objs[0]
-        for i, obj in enumerate(objs):
-            if obj.valid_count > 0:
-                head = obj
-                break
-
-        for i, obj in enumerate(objs):
-            # Check that all columns are the same type:
-            if not pd.api.types.is_dtype_equal(obj.dtype, head.dtype):
-                # if all null, cast to appropriate dtype
-                if obj.valid_count == 0:
-                    objs[i] = column_empty_like(
-                        head, dtype=head.dtype, masked=True, newsize=len(obj)
-                    )
-                else:
-                    raise ValueError("All columns must be the same type")
-
-        cats = None
-        is_categorical = all(is_categorical_dtype(o.dtype) for o in objs)
-
-        # Combine CategoricalColumn categories
-        if is_categorical:
-            # Combine and de-dupe the categories
-            cats = (
-                cudf.concat([o.cat().categories for o in objs])
-                .to_series()
-                .drop_duplicates(ignore_index=True)
-                ._column
-            )
-            objs = [
-                o.cat()._set_categories(
-                    o.cat().categories, cats, is_unique=True
-                )
-                for o in objs
-            ]
-            # Map `objs` into a list of the codes until we port Categorical to
-            # use the libcudf++ Category data type.
-            objs = [o.cat().codes._column for o in objs]
-            head = head.cat().codes._column
-
-        newsize = sum(map(len, objs))
-        if newsize > libcudf.MAX_COLUMN_SIZE:
-            raise MemoryError(
-                f"Result of concat cannot have "
-                f"size > {libcudf.MAX_COLUMN_SIZE_STR}"
-            )
-
-        # Filter out inputs that have 0 length
-        objs = [o for o in objs if len(o) > 0]
-
-        # Perform the actual concatenation
-        if newsize > 0:
-            col = libcudf.concat.concat_columns(objs)
-        else:
-            col = column_empty(0, head.dtype, masked=True)
-
-        if is_categorical:
-            col = build_categorical_column(
-                categories=as_column(cats),
-                codes=as_column(col.base_data, dtype=col.dtype),
-                mask=col.base_mask,
-                size=col.size,
-                offset=col.offset,
-            )
-
-        return col
-
     def dropna(self, drop_nan: bool = False) -> ColumnBase:
         if drop_nan:
             col = self.nans_to_nulls()
@@ -796,7 +696,7 @@ def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
         return indices[-1]
 
     def append(self, other: ColumnBase) -> ColumnBase:
-        return self.__class__._concat([self, as_column(other)])
+        return _concat_columns([self, as_column(other)])
 
     def quantile(
         self,
@@ -2336,3 +2236,102 @@ def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase:
     dtype: int8
     """
     return ColumnBase.from_scalar(cudf.Scalar(fill_value, dtype), size)
+
+
+def _concat_columns(
+    objs: "MutableSequence[ColumnBase]", dtype: Dtype = None
+) -> ColumnBase:
+    """Concatenate a sequence of columns."""
+    if len(objs) == 0:
+        dtype = pd.api.types.pandas_dtype(dtype)
+        if is_categorical_dtype(dtype):
+            dtype = CategoricalDtype()
+        return column_empty(0, dtype=dtype, masked=True)
+
+    # If all columns are `NumericalColumn` with different dtypes,
+    # we cast them to a common dtype.
+    # Notice, we can always cast pure null columns
+    not_null_cols = list(filter(lambda o: o.valid_count > 0, objs))
+    if len(not_null_cols) > 0 and (
+        len(
+            [
+                o
+                for o in not_null_cols
+                if not is_numerical_dtype(o.dtype)
+                or np.issubdtype(o.dtype, np.datetime64)
+            ]
+        )
+        == 0
+    ):
+        col_dtypes = [o.dtype for o in not_null_cols]
+        # Use NumPy to find a common dtype
+        common_dtype = np.find_common_type(col_dtypes, [])
+        # Cast all columns to the common dtype
+        for i in range(len(objs)):
+            objs[i] = objs[i].astype(common_dtype)
+
+    # Find the first non-null column:
+    head = objs[0]
+    for i, obj in enumerate(objs):
+        if obj.valid_count > 0:
+            head = obj
+            break
+
+    for i, obj in enumerate(objs):
+        # Check that all columns are the same type:
+        if not pd.api.types.is_dtype_equal(obj.dtype, head.dtype):
+            # if all null, cast to appropriate dtype
+            if obj.valid_count == 0:
+                objs[i] = column_empty_like(
+                    head, dtype=head.dtype, masked=True, newsize=len(obj)
+                )
+            else:
+                raise ValueError("All columns must be the same type")
+
+    cats = None
+    is_categorical = all(is_categorical_dtype(o.dtype) for o in objs)
+
+    # Combine CategoricalColumn categories
+    if is_categorical:
+        # Combine and de-dupe the categories
+        cats = (
+            cudf.concat([o.cat().categories for o in objs])
+            .to_series()
+            .drop_duplicates(ignore_index=True)
+            ._column
+        )
+        objs = [
+            o.cat()._set_categories(o.cat().categories, cats, is_unique=True)
+            for o in objs
+        ]
+        # Map `objs` into a list of the codes until we port Categorical to
+        # use the libcudf++ Category data type.
+        objs = [o.cat().codes._column for o in objs]
+        head = head.cat().codes._column
+
+    newsize = sum(map(len, objs))
+    if newsize > libcudf.MAX_COLUMN_SIZE:
+        raise MemoryError(
+            f"Result of concat cannot have "
+            f"size > {libcudf.MAX_COLUMN_SIZE_STR}"
+        )
+
+    # Filter out inputs that have 0 length
+    objs = [o for o in objs if len(o) > 0]
+
+    # Perform the actual concatenation
+    if newsize > 0:
+        col = libcudf.concat.concat_columns(objs)
+    else:
+        col = column_empty(0, head.dtype, masked=True)
+
+    if is_categorical:
+        col = build_categorical_column(
+            categories=as_column(cats),
+            codes=as_column(col.base_data, dtype=col.dtype),
+            mask=col.base_mask,
+            size=col.size,
+            offset=col.offset,
+        )
+
+    return col
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 0ffe0c11fef..c2528a91927 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -27,6 +27,7 @@
     arange,
     column,
 )
+from cudf.core.column.column import _concat_columns
 from cudf.core.column.string import StringMethods as StringMethods
 from cudf.core.dtypes import IntervalDtype
 from cudf.core.frame import Frame
@@ -779,7 +780,7 @@ def sum(self):
 
     @classmethod
     def _concat(cls, objs):
-        data = ColumnBase._concat([o._values for o in objs])
+        data = _concat_columns([o._values for o in objs])
         names = {obj.name for obj in objs}
         if len(names) == 1:
             [name] = names
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 4cc5fb56a4c..67bf6805d38 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -34,6 +34,7 @@
 from cudf.core.column.categorical import (
     CategoricalAccessor as CategoricalAccessor,
 )
+from cudf.core.column.column import _concat_columns
 from cudf.core.column.lists import ListMethods
 from cudf.core.column.string import StringMethods
 from cudf.core.column.struct import StructMethods
@@ -2749,7 +2750,7 @@ def _concat(cls, objs, axis=0, index=True):
             if dtype_mismatch:
                 objs = numeric_normalize_types(*objs)
 
-        col = ColumnBase._concat([o._column for o in objs])
+        col = _concat_columns([o._column for o in objs])
         return cls(data=col, index=index, name=name)
 
     @property

From 42a490c025b19f674c3a47f16387302896d5e6fe Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 30 Apr 2021 12:15:07 -0700
Subject: [PATCH 08/18] Various cleanup tasks in _concat_columns.

---
 python/cudf/cudf/core/column/column.py | 55 +++++++++-----------------
 1 file changed, 18 insertions(+), 37 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index c2b40c0d70e..cb1e3a78196 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2238,44 +2238,29 @@ def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase:
     return ColumnBase.from_scalar(cudf.Scalar(fill_value, dtype), size)
 
 
-def _concat_columns(
-    objs: "MutableSequence[ColumnBase]", dtype: Dtype = None
-) -> ColumnBase:
+def _concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
     """Concatenate a sequence of columns."""
-    if len(objs) == 0:
-        dtype = pd.api.types.pandas_dtype(dtype)
-        if is_categorical_dtype(dtype):
-            dtype = CategoricalDtype()
-        return column_empty(0, dtype=dtype, masked=True)
+    # TODO: This function currently assumes that len(objs) > 0. Concatenation
+    # is only accessible via cudf.concat (which calls through to this from
+    # methods like Index._concat or Series._concat), and that method
+    # pre-filters on zero objects being provided. We may need to add logic for
+    # handling an empty input sequence as more Frame logic becomes centralized.
 
     # If all columns are `NumericalColumn` with different dtypes,
     # we cast them to a common dtype.
     # Notice, we can always cast pure null columns
-    not_null_cols = list(filter(lambda o: o.valid_count > 0, objs))
-    if len(not_null_cols) > 0 and (
-        len(
-            [
-                o
-                for o in not_null_cols
-                if not is_numerical_dtype(o.dtype)
-                or np.issubdtype(o.dtype, np.datetime64)
-            ]
-        )
-        == 0
+    not_null_col_dtypes = [o.dtype for o in objs if o.valid_count > 0]
+    if len(not_null_col_dtypes) and all(
+        is_numerical_dtype(dtyp) and np.issubdtype(dtyp, np.datetime64)
+        for dtyp in not_null_col_dtypes
     ):
-        col_dtypes = [o.dtype for o in not_null_cols]
         # Use NumPy to find a common dtype
-        common_dtype = np.find_common_type(col_dtypes, [])
+        common_dtype = np.find_common_type(not_null_col_dtypes, [])
         # Cast all columns to the common dtype
-        for i in range(len(objs)):
-            objs[i] = objs[i].astype(common_dtype)
+        objs = [obj.astype(common_dtype) for obj in objs]
 
     # Find the first non-null column:
-    head = objs[0]
-    for i, obj in enumerate(objs):
-        if obj.valid_count > 0:
-            head = obj
-            break
+    head = next((obj for obj in objs if obj.valid_count), objs[0])
 
     for i, obj in enumerate(objs):
         # Check that all columns are the same type:
@@ -2288,7 +2273,6 @@ def _concat_columns(
             else:
                 raise ValueError("All columns must be the same type")
 
-    cats = None
     is_categorical = all(is_categorical_dtype(o.dtype) for o in objs)
 
     # Combine CategoricalColumn categories
@@ -2315,15 +2299,12 @@ def _concat_columns(
             f"Result of concat cannot have "
             f"size > {libcudf.MAX_COLUMN_SIZE_STR}"
         )
-
-    # Filter out inputs that have 0 length
-    objs = [o for o in objs if len(o) > 0]
-
-    # Perform the actual concatenation
-    if newsize > 0:
-        col = libcudf.concat.concat_columns(objs)
-    else:
+    elif newsize == 0:
         col = column_empty(0, head.dtype, masked=True)
+    else:
+        # Filter out inputs that have 0 length, then concatenate.
+        objs = [o for o in objs if len(o) > 0]
+        col = libcudf.concat.concat_columns(objs)
 
     if is_categorical:
         col = build_categorical_column(

From f8c2590815e987684dfc72510990a3a928a3b7ee Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 30 Apr 2021 14:20:05 -0700
Subject: [PATCH 09/18] Move categorical column concatenation into
 CategoricalColumn.

---
 python/cudf/cudf/core/column/categorical.py | 47 +++++++++++++++++++++
 python/cudf/cudf/core/column/column.py      | 40 ++++++------------
 2 files changed, 59 insertions(+), 28 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 456d2cc2b46..d9a02a2c981 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import pickle
+from collections.abc import MutableSequence
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -1463,6 +1464,52 @@ def view(self, dtype: Dtype) -> ColumnBase:
             "Categorical column views are not currently supported"
         )
 
+    @staticmethod
+    def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:
+        # TODO: This function currently assumes it is being called from
+        # column._concat_columns, at least to the extent that all the
+        # preprocessing in that function has already been done. That should be
+        # improved as the concatenation API is solidified.
+        # Find the first non-null column:
+        head = next((obj for obj in objs if obj.valid_count), objs[0])
+
+        # Combine and de-dupe the categories
+        cats = (
+            cudf.concat([o.cat().categories for o in objs])
+            .to_series()
+            .drop_duplicates(ignore_index=True)
+            ._column
+        )
+        objs = [
+            o.cat()._set_categories(o.cat().categories, cats, is_unique=True)
+            for o in objs
+        ]
+        # Map `objs` into a list of the codes until we port Categorical to
+        # use the libcudf++ Category data type.
+        objs = [o.cat().codes._column for o in objs]
+        head = head.cat().codes._column
+
+        newsize = sum(map(len, objs))
+        if newsize > libcudf.MAX_COLUMN_SIZE:
+            raise MemoryError(
+                f"Result of concat cannot have "
+                f"size > {libcudf.MAX_COLUMN_SIZE_STR}"
+            )
+        elif newsize == 0:
+            col = column.column_empty(0, head.dtype, masked=True)
+        else:
+            # Filter out inputs that have 0 length, then concatenate.
+            objs = [o for o in objs if len(o) > 0]
+            col = libcudf.concat.concat_columns(objs)
+
+        return column.build_categorical_column(
+            categories=column.as_column(cats),
+            codes=column.as_column(col.base_data, dtype=col.dtype),
+            mask=col.base_mask,
+            size=col.size,
+            offset=col.offset,
+        )
+
 
 def _create_empty_categorical_column(
     categorical_column: CategoricalColumn, dtype: "CategoricalDtype"
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index cb1e3a78196..e6dd8ac822a 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2273,25 +2273,19 @@ def _concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
             else:
                 raise ValueError("All columns must be the same type")
 
-    is_categorical = all(is_categorical_dtype(o.dtype) for o in objs)
-
-    # Combine CategoricalColumn categories
-    if is_categorical:
-        # Combine and de-dupe the categories
-        cats = (
-            cudf.concat([o.cat().categories for o in objs])
-            .to_series()
-            .drop_duplicates(ignore_index=True)
-            ._column
+    # TODO: This logic should be generalized to a dispatch to
+    # ColumnBase._concat so that all subclasses can override necessary
+    # behavior. However, at the moment it's not clear what that API should look
+    # like, so CategoricalColumn simply implements a minimal working API.
+    if all(is_categorical_dtype(o.dtype) for o in objs):
+        return cudf.core.column.categorical.CategoricalColumn._concat(
+            cast(
+                MutableSequence[
+                    cudf.core.column.categorical.CategoricalColumn
+                ],
+                objs,
+            )
         )
-        objs = [
-            o.cat()._set_categories(o.cat().categories, cats, is_unique=True)
-            for o in objs
-        ]
-        # Map `objs` into a list of the codes until we port Categorical to
-        # use the libcudf++ Category data type.
-        objs = [o.cat().codes._column for o in objs]
-        head = head.cat().codes._column
 
     newsize = sum(map(len, objs))
     if newsize > libcudf.MAX_COLUMN_SIZE:
@@ -2305,14 +2299,4 @@ def _concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
         # Filter out inputs that have 0 length, then concatenate.
         objs = [o for o in objs if len(o) > 0]
         col = libcudf.concat.concat_columns(objs)
-
-    if is_categorical:
-        col = build_categorical_column(
-            categories=as_column(cats),
-            codes=as_column(col.base_data, dtype=col.dtype),
-            mask=col.base_mask,
-            size=col.size,
-            offset=col.offset,
-        )
-
     return col

From 4338d576e923849bcc1205fb8c3f3ecee4f637b0 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 30 Apr 2021 14:36:19 -0700
Subject: [PATCH 10/18] Remove unnecessary generic methods.

---
 python/cudf/cudf/core/column/column.py | 8 --------
 python/cudf/cudf/core/column/string.py | 2 +-
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index e6dd8ac822a..a73836fb972 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -189,14 +189,6 @@ def __sizeof__(self) -> int:
             n += bitmask_allocation_size_bytes(self.size)
         return n
 
-    def cat(
-        self, parent=None
-    ) -> "cudf.core.column.categorical.CategoricalAccessor":
-        raise NotImplementedError()
-
-    def str(self, parent=None) -> "cudf.core.column.string.StringMethods":
-        raise NotImplementedError()
-
     def dropna(self, drop_nan: bool = False) -> ColumnBase:
         if drop_nan:
             col = self.nans_to_nulls()
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 5cf32cc9561..4e3b7963f56 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -4976,7 +4976,7 @@ def sum(
         result_col = self._process_for_reduction(
             skipna=skipna, min_count=min_count
         )
-        if isinstance(result_col, cudf.core.column.ColumnBase):
+        if isinstance(result_col, type(self)):
             return result_col.str().cat()
         else:
             return result_col

From 45c0c04e96db4033e72451e6a15571c607261cc8 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 3 May 2021 10:59:47 -0700
Subject: [PATCH 11/18] Address PR comments.

---
 python/cudf/cudf/core/column/categorical.py | 6 +++---
 python/cudf/cudf/core/column/column.py      | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index d9a02a2c981..6a70089e92e 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -36,7 +36,6 @@
     min_signed_type,
     min_unsigned_type,
 )
-from cudf.utils.utils import cached_property
 
 if TYPE_CHECKING:
     from cudf.core.column import (
@@ -1343,11 +1342,11 @@ def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
         """
         return self.as_numerical.find_last_value(self._encode(value))
 
-    @cached_property
+    @property
     def is_monotonic_increasing(self) -> bool:
         return bool(self.ordered) and self.as_numerical.is_monotonic_increasing
 
-    @cached_property
+    @property
     def is_monotonic_decreasing(self) -> bool:
         return bool(self.ordered) and self.as_numerical.is_monotonic_decreasing
 
@@ -1470,6 +1469,7 @@ def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:
         # column._concat_columns, at least to the extent that all the
         # preprocessing in that function has already been done. That should be
         # improved as the concatenation API is solidified.
+
         # Find the first non-null column:
         head = next((obj for obj in objs if obj.valid_count), objs[0])
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index a73836fb972..735a7b4645c 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -56,7 +56,7 @@
     min_unsigned_type,
     np_to_pa_dtype,
 )
-from cudf.utils.utils import cached_property, mask_dtype
+from cudf.utils.utils import mask_dtype
 
 T = TypeVar("T", bound="ColumnBase")
 
@@ -824,13 +824,13 @@ def is_unique(self) -> bool:
     def is_monotonic(self) -> bool:
         return self.is_monotonic_increasing
 
-    @cached_property
+    @property
     def is_monotonic_increasing(self) -> bool:
         return not self.has_nulls and self.as_frame()._is_sorted(
             ascending=None, null_position=None
         )
 
-    @cached_property
+    @property
     def is_monotonic_decreasing(self) -> bool:
         return not self.has_nulls and self.as_frame()._is_sorted(
             ascending=[False], null_position=None

From 5a69ea483928dd6e3bc8bf4f93eedc4c79b25ec1 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 3 May 2021 13:51:58 -0700
Subject: [PATCH 12/18] Switch from collections.abc to typing for
 MutableSequence.

---
 python/cudf/cudf/core/column/column.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 735a7b4645c..d0ed6c98d32 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -5,13 +5,13 @@
 import builtins
 import pickle
 import warnings
-from collections.abc import MutableSequence
 from types import SimpleNamespace
 from typing import (
     Any,
     Callable,
     Dict,
     List,
+    MutableSequence,
     Optional,
     Sequence,
     Tuple,

From fb030a9a5197cc0868d22f06d8f0bce80fe4ba2d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 5 May 2021 17:08:10 -0700
Subject: [PATCH 13/18] Avoid caching monotonic properties in MultiIndex.

---
 python/cudf/cudf/core/multiindex.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index ca029198e52..6b551dc72c3 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1386,11 +1386,7 @@ def is_monotonic_increasing(self):
         Return if the index is monotonic increasing
         (only equal or increasing) values.
         """
-        if not hasattr(self, "_is_monotonic_increasing"):
-            self._is_monotonic_increasing = self._is_sorted(
-                ascending=None, null_position=None
-            )
-        return self._is_monotonic_increasing
+        return self._is_sorted(ascending=None, null_position=None)
 
     @property
     def is_monotonic_decreasing(self):
@@ -1398,11 +1394,9 @@ def is_monotonic_decreasing(self):
         Return if the index is monotonic decreasing
         (only equal or decreasing) values.
         """
-        if not hasattr(self, "_is_monotonic_decreasing"):
-            self._is_monotonic_decreasing = self._is_sorted(
-                ascending=[False] * len(self.levels), null_position=None
-            )
-        return self._is_monotonic_decreasing
+        return self._is_sorted(
+            ascending=[False] * len(self.levels), null_position=None
+        )
 
     def argsort(self, ascending=True, **kwargs):
         indices = self._source_data.argsort(ascending=ascending, **kwargs)

From 4d1239b54baa1759a874db2b9d7f36a1199214c3 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 5 May 2021 18:20:21 -0700
Subject: [PATCH 14/18] Handle empty list of objects to concatenate.

---
 python/cudf/cudf/core/column/column.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index d0ed6c98d32..9e632a57383 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2232,11 +2232,9 @@ def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase:
 
 def _concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
     """Concatenate a sequence of columns."""
-    # TODO: This function currently assumes that len(objs) > 0. Concatenation
-    # is only accessible via cudf.concat (which calls through to this from
-    # methods like Index._concat or Series._concat), and that method
-    # pre-filters on zero objects being provided. We may need to add logic for
-    # handling an empty input sequence as more Frame logic becomes centralized.
+    if len(objs) == 0:
+        dtype = pd.api.types.pandas_dtype(None)
+        return column_empty(0, dtype=dtype, masked=True)
 
     # If all columns are `NumericalColumn` with different dtypes,
     # we cast them to a common dtype.

From 05c33921bbbc6bca34fa903ec261f56fab91ce6a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
Date: Tue, 11 May 2021 16:34:46 -0700
Subject: [PATCH 15/18] Apply suggestions from code review

Co-authored-by: Keith Kraus <keith.j.kraus@gmail.com>
---
 python/cudf/cudf/core/column/categorical.py | 2 +-
 python/cudf/cudf/core/column/column.py      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 6a70089e92e..97bc097b7d2 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1499,7 +1499,7 @@ def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:
             col = column.column_empty(0, head.dtype, masked=True)
         else:
             # Filter out inputs that have 0 length, then concatenate.
-            objs = [o for o in objs if len(o) > 0]
+            objs = [o for o in objs if len(o)]
             col = libcudf.concat.concat_columns(objs)
 
         return column.build_categorical_column(
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 9e632a57383..31fbd33c990 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2239,7 +2239,7 @@ def _concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
     # If all columns are `NumericalColumn` with different dtypes,
     # we cast them to a common dtype.
     # Notice, we can always cast pure null columns
-    not_null_col_dtypes = [o.dtype for o in objs if o.valid_count > 0]
+    not_null_col_dtypes = [o.dtype for o in objs if o.valid_count]
     if len(not_null_col_dtypes) and all(
         is_numerical_dtype(dtyp) and np.issubdtype(dtyp, np.datetime64)
         for dtyp in not_null_col_dtypes
@@ -2287,6 +2287,6 @@ def _concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
         col = column_empty(0, head.dtype, masked=True)
     else:
         # Filter out inputs that have 0 length, then concatenate.
-        objs = [o for o in objs if len(o) > 0]
+        objs = [o for o in objs if len(o)]
         col = libcudf.concat.concat_columns(objs)
     return col

From 6d73c444ab8f6a87e52ead0418a7fe8c0f6108a4 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 14 May 2021 10:20:06 -0700
Subject: [PATCH 16/18] Use codes directly.

---
 python/cudf/cudf/core/column/categorical.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 97bc097b7d2..67f50cedb1e 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1484,30 +1484,27 @@ def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:
             o.cat()._set_categories(o.cat().categories, cats, is_unique=True)
             for o in objs
         ]
-        # Map `objs` into a list of the codes until we port Categorical to
-        # use the libcudf++ Category data type.
-        objs = [o.cat().codes._column for o in objs]
-        head = head.cat().codes._column
+        codes = [o.codes for o in objs]
 
-        newsize = sum(map(len, objs))
+        newsize = sum(map(len, codes))
         if newsize > libcudf.MAX_COLUMN_SIZE:
             raise MemoryError(
                 f"Result of concat cannot have "
                 f"size > {libcudf.MAX_COLUMN_SIZE_STR}"
             )
         elif newsize == 0:
-            col = column.column_empty(0, head.dtype, masked=True)
+            codes_col = column.column_empty(0, head.codes.dtype, masked=True)
         else:
             # Filter out inputs that have 0 length, then concatenate.
-            objs = [o for o in objs if len(o)]
-            col = libcudf.concat.concat_columns(objs)
+            codes = [o for o in codes if len(o)]
+            codes_col = libcudf.concat.concat_columns(objs)
 
         return column.build_categorical_column(
             categories=column.as_column(cats),
-            codes=column.as_column(col.base_data, dtype=col.dtype),
-            mask=col.base_mask,
-            size=col.size,
-            offset=col.offset,
+            codes=column.as_column(codes_col.base_data, dtype=codes_col.dtype),
+            mask=codes_col.base_mask,
+            size=codes_col.size,
+            offset=codes_col.offset,
         )
 
 

From 565c99779b7b892f2b1aa3ca5cfbd2bcdc25b2f4 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 14 May 2021 11:25:07 -0700
Subject: [PATCH 17/18] Use column.ColumnBase and remove import to try to
 appease flake8.

---
 python/cudf/cudf/core/series.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index cc594474520..9dec934a0ae 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -22,7 +22,6 @@
 from cudf._lib.transform import bools_to_mask
 from cudf.core.abc import Serializable
 from cudf.core.column import (
-    ColumnBase,
     DatetimeColumn,
     TimeDeltaColumn,
     arange,
@@ -2148,7 +2147,7 @@ def _normalize_binop_value(self, other):
         """Returns a *column* (not a Series) or scalar for performing
         binary operations with self._column.
         """
-        if isinstance(other, ColumnBase):
+        if isinstance(other, column.ColumnBase):
             return other
         if isinstance(other, Series):
             return other._column

From 49ca36a6cd49c27f0609307c3c3bb99cef72b4cc Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 14 May 2021 14:10:27 -0700
Subject: [PATCH 18/18] Remove conversion to series.

---
 python/cudf/cudf/core/column/categorical.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 67f50cedb1e..c199947d261 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1476,8 +1476,7 @@ def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:
         # Combine and de-dupe the categories
         cats = (
             cudf.concat([o.cat().categories for o in objs])
-            .to_series()
-            .drop_duplicates(ignore_index=True)
+            .drop_duplicates()
             ._column
         )
         objs = [