From 6d7d144482c22b9b11ecb8228e14f41a0e3383c6 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 26 Apr 2021 16:12:39 -0700
Subject: [PATCH 01/12] Proper ducktyping for to_pandas.

---
 python/cudf/cudf/core/column/column.py    | 20 +++++---------------
 python/cudf/cudf/core/column/interval.py  | 10 ++++++++++
 python/cudf/cudf/core/column/numerical.py | 20 ++++++++++++++++++++
 3 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 2d438d37b3e..2b10d3ad630 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -44,9 +44,7 @@
 from cudf.core.dtypes import CategoricalDtype, IntervalDtype
 from cudf.utils import ioutils, utils
 from cudf.utils.dtypes import (
-    NUMERIC_TYPES,
     check_cast_unsupported_dtype,
-    cudf_dtypes_to_pandas_dtypes,
     get_time_unit,
     is_categorical_dtype,
     is_decimal_dtype,
@@ -119,19 +117,11 @@ def __repr__(self):
     def to_pandas(
         self, index: ColumnLike = None, nullable: bool = False, **kwargs
     ) -> "pd.Series":
-        if nullable and self.dtype in cudf_dtypes_to_pandas_dtypes:
-            pandas_nullable_dtype = cudf_dtypes_to_pandas_dtypes[self.dtype]
-            arrow_array = self.to_arrow()
-            pandas_array = pandas_nullable_dtype.__from_arrow__(arrow_array)
-            pd_series = pd.Series(pandas_array, copy=False)
-        elif str(self.dtype) in NUMERIC_TYPES and self.null_count == 0:
-            pd_series = pd.Series(cupy.asnumpy(self.values), copy=False)
-        elif is_interval_dtype(self.dtype):
-            pd_series = pd.Series(
-                pd.IntervalDtype().__from_arrow__(self.to_arrow())
-            )
-        else:
-            pd_series = self.to_arrow().to_pandas(**kwargs)
+        """Convert object to pandas type.
+
+        The default implementation falls back to PyArrow for the conversion.
+        """
+        pd_series = self.to_arrow().to_pandas(**kwargs)
 
         if index is not None:
             pd_series.index = index
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index d8bea6b1658..7436a69e14a 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -1,6 +1,9 @@
 # Copyright (c) 2018-2021, NVIDIA CORPORATION.
+import pandas as pd
 import pyarrow as pa
+
 import cudf
+from cudf._typing import ColumnLike
 from cudf.core.column import StructColumn
 from cudf.core.dtypes import IntervalDtype
 from cudf.utils.dtypes import is_interval_dtype
@@ -110,3 +113,10 @@ def as_interval_column(self, dtype, **kwargs):
             )
         else:
             raise ValueError("dtype must be IntervalDtype")
+
+    def to_pandas(
+        self, index: ColumnLike = None, nullable: bool = False, **kwargs
+    ) -> "pd.Series":
+        return pd.Series(
+            pd.IntervalDtype().__from_arrow__(self.to_arrow()), index=index
+        )
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 70b4569b180..0ae302da9e1 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -5,6 +5,7 @@
 from numbers import Number
 from typing import Any, Callable, Sequence, Tuple, Union, cast
 
+import cupy
 import numpy as np
 import pandas as pd
 from numba import cuda, njit
@@ -27,6 +28,8 @@
 from cudf.core.dtypes import Decimal64Dtype
 from cudf.utils import cudautils, utils
 from cudf.utils.dtypes import (
+    NUMERIC_TYPES,
+    cudf_dtypes_to_pandas_dtypes,
     min_column_type,
     min_signed_type,
     numeric_normalize_types,
@@ -711,6 +714,23 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
 
         return False
 
+    def to_pandas(
+        self, index: ColumnLike = None, nullable: bool = False, **kwargs
+    ) -> "pd.Series":
+        if nullable and self.dtype in cudf_dtypes_to_pandas_dtypes:
+            pandas_nullable_dtype = cudf_dtypes_to_pandas_dtypes[self.dtype]
+            arrow_array = self.to_arrow()
+            pandas_array = pandas_nullable_dtype.__from_arrow__(arrow_array)
+            pd_series = pd.Series(pandas_array, copy=False)
+        elif str(self.dtype) in NUMERIC_TYPES and self.null_count == 0:
+            pd_series = pd.Series(cupy.asnumpy(self.values), copy=False)
+        else:
+            pd_series = self.to_arrow().to_pandas(**kwargs)
+
+        if index is not None:
+            pd_series.index = index
+        return pd_series
+
 
 @annotate("BINARY_OP", color="orange", domain="cudf_python")
 def _numeric_column_binop(

From 2dc087253f009411b2ae62a9707789d1cf5ade92 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 26 Apr 2021 16:23:28 -0700
Subject: [PATCH 02/12] Some minor improvements.

---
 python/cudf/cudf/core/column/column.py    | 4 +++-
 python/cudf/cudf/core/column/numerical.py | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 2b10d3ad630..382688580e7 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -128,6 +128,8 @@ def to_pandas(
         return pd_series
 
     def __iter__(self):
+        # TODO: Why don't we just implement this method in terms of one of the
+        # proposed alternatives (to_arrow, to_pandas, or values_host)?
         cudf.utils.utils.raise_iteration_error(obj=self)
 
     @property
@@ -828,7 +830,7 @@ def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
         return indices[-1]
 
     def append(self, other: ColumnBase) -> ColumnBase:
-        return ColumnBase._concat([self, as_column(other)])
+        return self.__class__._concat([self, as_column(other)])
 
     def quantile(
         self,
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 0ae302da9e1..0f717159558 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -410,7 +410,7 @@ def round(self, decimals: int = 0) -> NumericalColumn:
     def applymap(
         self, udf: Callable[[ScalarLike], ScalarLike], out_dtype: Dtype = None
     ) -> ColumnBase:
-        """Apply an element-wise function to transform the values in the Column.
+        """Apply an elementwise function to transform the values in the Column.
 
         Parameters
         ----------

From ab555650772f76649a338528406478ccd08f6aee Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 26 Apr 2021 17:08:36 -0700
Subject: [PATCH 03/12] Add to_pandas implementation for str.

---
 python/cudf/cudf/core/column/string.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index de2df9b50d7..f15a3fd105b 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -4945,6 +4945,19 @@ def __arrow_array__(self, type=None):
             "consider using .to_arrow()"
         )
 
+    def to_pandas(
+        self, index: ColumnLike = None, nullable: bool = False, **kwargs
+    ) -> "pd.Series":
+        if nullable:
+            pandas_array = pd.StringDtype().__from_arrow__(self.to_arrow())
+            pd_series = pd.Series(pandas_array, copy=False)
+        else:
+            pd_series = self.to_arrow().to_pandas(**kwargs)
+
+        if index is not None:
+            pd_series.index = index
+        return pd_series
+
     def serialize(self) -> Tuple[dict, list]:
         header = {"null_count": self.null_count}  # type: Dict[Any, Any]
         header["type-serialized"] = pickle.dumps(type(self))

From 4538ecb09100d0901513a9f23bbd0f5d705dac1c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 26 Apr 2021 17:16:36 -0700
Subject: [PATCH 04/12] Proper ducktyping for to_arrow.

---
 python/cudf/cudf/core/column/categorical.py | 19 +++++++++++++
 python/cudf/cudf/core/column/column.py      | 31 ---------------------
 python/cudf/cudf/core/column/string.py      | 30 ++++++++++++++++++++
 3 files changed, 49 insertions(+), 31 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index bb1bf3c5d5c..d55bd245cb7 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -17,6 +17,7 @@
 
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 from numba import cuda
 
 import cudf
@@ -1099,6 +1100,24 @@ def to_pandas(
         )
         return pd.Series(data, index=index)
 
+    def to_arrow(self) -> pa.Array:
+        """Convert to PyArrow Array."""
+        # arrow doesn't support unsigned codes
+        signed_type = (
+            min_signed_type(self.codes.max())
+            if self.codes.size > 0
+            else np.int8
+        )
+        codes = self.codes.astype(signed_type)
+        categories = self.categories
+
+        out_indices = codes.to_arrow()
+        out_dictionary = categories.to_arrow()
+
+        return pa.DictionaryArray.from_arrays(
+            out_indices, out_dictionary, ordered=self.ordered,
+        )
+
     @property
     def values_host(self) -> np.ndarray:
         """
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 382688580e7..d14f621cf76 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -54,7 +54,6 @@
     is_scalar,
     is_string_dtype,
     is_struct_dtype,
-    min_signed_type,
     min_unsigned_type,
     np_to_pa_dtype,
 )
@@ -325,30 +324,6 @@ def to_arrow(self) -> pa.Array:
           4
         ]
         """
-        if isinstance(self, cudf.core.column.CategoricalColumn):
-            # arrow doesn't support unsigned codes
-            signed_type = (
-                min_signed_type(self.codes.max())
-                if self.codes.size > 0
-                else np.int8
-            )
-            codes = self.codes.astype(signed_type)
-            categories = self.categories
-
-            out_indices = codes.to_arrow()
-            out_dictionary = categories.to_arrow()
-
-            return pa.DictionaryArray.from_arrays(
-                out_indices, out_dictionary, ordered=self.ordered,
-            )
-
-        if isinstance(self, cudf.core.column.StringColumn) and (
-            self.null_count == len(self)
-        ):
-            return pa.NullArray.from_buffers(
-                pa.null(), len(self), [pa.py_buffer((b""))]
-            )
-
         result = libcudf.interop.to_arrow(
             libcudf.table.Table(
                 cudf.core.column_accessor.ColumnAccessor({"None": self})
@@ -357,12 +332,6 @@ def to_arrow(self) -> pa.Array:
             keep_index=False,
         )["None"].chunk(0)
 
-        if isinstance(self.dtype, cudf.Decimal64Dtype):
-            result = result.view(
-                pa.decimal128(
-                    scale=result.type.scale, precision=self.dtype.precision
-                )
-            )
         return result
 
     @classmethod
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index f15a3fd105b..af0f8df04bd 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -10,6 +10,7 @@
 import cupy
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 from numba import cuda
 from nvtx import annotate
 
@@ -4766,6 +4767,35 @@ def base_size(self) -> int:
     def data_array_view(self) -> cuda.devicearray.DeviceNDArray:
         raise ValueError("Cannot get an array view of a StringColumn")
 
+    def to_arrow(self) -> pa.Array:
+        """Convert to PyArrow Array
+
+        Examples
+        --------
+        >>> import cudf
+        >>> col = cudf.core.column.as_column([1, 2, 3, 4])
+        >>> col.to_arrow()
+        <pyarrow.lib.Int64Array object at 0x7f886547f830>
+        [
+          1,
+          2,
+          3,
+          4
+        ]
+        """
+        if self.null_count == len(self):
+            return pa.NullArray.from_buffers(
+                pa.null(), len(self), [pa.py_buffer((b""))]
+            )
+        else:
+            return libcudf.interop.to_arrow(
+                libcudf.table.Table(
+                    cudf.core.column_accessor.ColumnAccessor({"None": self})
+                ),
+                [["None"]],
+                keep_index=False,
+            )["None"].chunk(0)
+
     def sum(
         self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0
     ):

From e846a331c832c9d0de621c0ba995034dd7b22fe6 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 26 Apr 2021 17:34:46 -0700
Subject: [PATCH 05/12] Minor fix.

---
 python/cudf/cudf/core/column/column.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index d14f621cf76..40351b6efd9 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -851,9 +851,6 @@ def isin(self, values: Sequence) -> ColumnBase:
         result: Column
             Column of booleans indicating if each element is in values.
         """
-        lhs = self
-        rhs = None
-
         try:
             lhs, rhs = self._process_values_for_isin(values)
             res = lhs._isin_earlystop(rhs)

From 71ca4f41421889b96be0b80bc59530fae442142a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 26 Apr 2021 20:24:49 -0700
Subject: [PATCH 06/12] Remove reudndant functions from string column, moving
 the ones that should be generic into ColumnBase.

---
 python/cudf/cudf/core/column/column.py    | 41 ++++++++---------------
 python/cudf/cudf/core/column/numerical.py | 31 ++++++++++++++++-
 python/cudf/cudf/core/column/string.py    | 30 -----------------
 3 files changed, 44 insertions(+), 58 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 40351b6efd9..901617d6d0e 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -12,7 +12,6 @@
     Callable,
     Dict,
     List,
-    Mapping,
     Optional,
     Sequence,
     Tuple,
@@ -1126,31 +1125,11 @@ def argsort(
         return sorted_indices
 
     @property
-    def __cuda_array_interface__(self) -> Mapping[builtins.str, Any]:
-        output = {
-            "shape": (len(self),),
-            "strides": (self.dtype.itemsize,),
-            "typestr": self.dtype.str,
-            "data": (self.data_ptr, False),
-            "version": 1,
-        }
-
-        if self.nullable and self.has_nulls:
-
-            # Create a simple Python object that exposes the
-            # `__cuda_array_interface__` attribute here since we need to modify
-            # some of the attributes from the numba device array
-            mask = SimpleNamespace(
-                __cuda_array_interface__={
-                    "shape": (len(self),),
-                    "typestr": "<t1",
-                    "data": (self.mask_ptr, True),
-                    "version": 1,
-                }
-            )
-            output["mask"] = mask
-
-        return output
+    def __cuda_array_interface__(self):
+        raise NotImplementedError(
+            f"dtype {self.dtype} is not yet supported via "
+            "`__cuda_array_interface__`"
+        )
 
     def __add__(self, other):
         return self.binary_operator("add", other)
@@ -1247,10 +1226,18 @@ def deserialize(cls, header: dict, frames: list) -> ColumnBase:
             mask = Buffer.deserialize(header["mask"], [frames[1]])
         return build_column(data=data, dtype=dtype, mask=mask)
 
+    def unary_operator(self, unaryop: builtins.str):
+        raise TypeError(
+            f"Operation {unaryop} not supported for dtype {self.dtype}."
+        )
+
     def binary_operator(
         self, op: builtins.str, other: BinaryOperand, reflect: bool = False
     ) -> ColumnBase:
-        raise NotImplementedError
+        raise TypeError(
+            f"Operation {op} not supported between dtypes {self.dtype} and "
+            f"{other.dtype}."
+        )
 
     def min(self, skipna: bool = None, dtype: Dtype = None):
         result_col = self._process_for_reduction(skipna=skipna)
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 0f717159558..0c815db0b49 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -2,8 +2,10 @@
 
 from __future__ import annotations
 
+import builtins
 from numbers import Number
-from typing import Any, Callable, Sequence, Tuple, Union, cast
+from types import SimpleNamespace
+from typing import Any, Callable, Mapping, Sequence, Tuple, Union, cast
 
 import cupy
 import numpy as np
@@ -89,6 +91,33 @@ def __contains__(self, item: ScalarLike) -> bool:
             self, column.as_column([item], dtype=self.dtype)
         ).any()
 
+    @property
+    def __cuda_array_interface__(self) -> Mapping[builtins.str, Any]:
+        output = {
+            "shape": (len(self),),
+            "strides": (self.dtype.itemsize,),
+            "typestr": self.dtype.str,
+            "data": (self.data_ptr, False),
+            "version": 1,
+        }
+
+        if self.nullable and self.has_nulls:
+
+            # Create a simple Python object that exposes the
+            # `__cuda_array_interface__` attribute here since we need to modify
+            # some of the attributes from the numba device array
+            mask = SimpleNamespace(
+                __cuda_array_interface__={
+                    "shape": (len(self),),
+                    "typestr": "<t1",
+                    "data": (self.mask_ptr, True),
+                    "version": 1,
+                }
+            )
+            output["mask"] = mask
+
+        return output
+
     def unary_operator(self, unaryop: str) -> ColumnBase:
         return _numeric_column_unaryop(self, op=unaryop)
 
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index af0f8df04bd..60b159c8350 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -4829,15 +4829,6 @@ def __contains__(self, item: ScalarLike) -> bool:
     def str(self, parent: ParentType = None) -> StringMethods:
         return StringMethods(self, parent=parent)
 
-    def unary_operator(self, unaryop: builtins.str):
-        raise TypeError(
-            f"Series of dtype `str` cannot perform the operation: "
-            f"{unaryop}"
-        )
-
-    def __len__(self) -> int:
-        return self.size
-
     @property
     def _nbytes(self) -> int:
         if self.size == 0:
@@ -4960,21 +4951,6 @@ def to_array(self, fillna: bool = None) -> np.ndarray:
 
         return self.to_arrow().to_pandas().values
 
-    def __array__(self, dtype=None):
-        raise TypeError(
-            "Implicit conversion to a host NumPy array via __array__ is not "
-            "allowed, Conversion to GPU array in strings is not yet "
-            "supported.\nTo explicitly construct a host array, "
-            "consider using .to_array()"
-        )
-
-    def __arrow_array__(self, type=None):
-        raise TypeError(
-            "Implicit conversion to a host PyArrow Array via __arrow_array__ "
-            "is not allowed, To explicitly construct a PyArrow Array, "
-            "consider using .to_arrow()"
-        )
-
     def to_pandas(
         self, index: ColumnLike = None, nullable: bool = False, **kwargs
     ) -> "pd.Series":
@@ -5161,12 +5137,6 @@ def binary_operator(
     def is_unique(self) -> bool:
         return len(self.unique()) == len(self)
 
-    @property
-    def __cuda_array_interface__(self):
-        raise NotImplementedError(
-            "Strings are not yet supported via `__cuda_array_interface__`"
-        )
-
     @copy_docstring(column.ColumnBase.view)
     def view(self, dtype) -> "cudf.core.column.ColumnBase":
         if self.null_count > 0:

From 0329ad54cbbabf83cc319f0f0bef79d6b1a5a7ce Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 27 Apr 2021 09:09:17 -0700
Subject: [PATCH 07/12] Remove redundant method.

---
 python/cudf/cudf/core/column/string.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 60b159c8350..78ff8c65272 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5133,10 +5133,6 @@ def binary_operator(
             f"{op} operator not supported between {type(self)} and {type(rhs)}"
         )
 
-    @property
-    def is_unique(self) -> bool:
-        return len(self.unique()) == len(self)
-
     @copy_docstring(column.ColumnBase.view)
     def view(self, dtype) -> "cudf.core.column.ColumnBase":
         if self.null_count > 0:

From 51e6bea5ab6963f3059623c0b45bb0992834857f Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 27 Apr 2021 09:35:05 -0700
Subject: [PATCH 08/12] Fixed a few more redundancies.

---
 python/cudf/cudf/core/column/column.py   |  4 +---
 python/cudf/cudf/core/column/datetime.py | 11 ++++-------
 python/cudf/cudf/core/column/string.py   |  8 +-------
 3 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 901617d6d0e..5226893d524 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -323,7 +323,7 @@ def to_arrow(self) -> pa.Array:
           4
         ]
         """
-        result = libcudf.interop.to_arrow(
+        return libcudf.interop.to_arrow(
             libcudf.table.Table(
                 cudf.core.column_accessor.ColumnAccessor({"None": self})
             ),
@@ -331,8 +331,6 @@ def to_arrow(self) -> pa.Array:
             keep_index=False,
         )["None"].chunk(0)
 
-        return result
-
     @classmethod
     def from_arrow(cls, array: pa.Array) -> ColumnBase:
         """
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 66141fec610..b9d42c031c7 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -139,15 +139,12 @@ def to_pandas(
         # https://issues.apache.org/jira/browse/ARROW-9772
 
         # Pandas supports only `datetime64[ns]`, hence the cast.
-        pd_series = pd.Series(
-            self.astype("datetime64[ns]").to_array("NAT"), copy=False
+        return pd.Series(
+            self.astype("datetime64[ns]").to_array("NAT"),
+            copy=False,
+            index=index,
         )
 
-        if index is not None:
-            pd_series.index = index
-
-        return pd_series
-
     def get_dt_field(self, field: str) -> ColumnBase:
         return libcudf.datetime.extract_datetime_component(self, field)
 
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 78ff8c65272..ea919866e34 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -4788,13 +4788,7 @@ def to_arrow(self) -> pa.Array:
                 pa.null(), len(self), [pa.py_buffer((b""))]
             )
         else:
-            return libcudf.interop.to_arrow(
-                libcudf.table.Table(
-                    cudf.core.column_accessor.ColumnAccessor({"None": self})
-                ),
-                [["None"]],
-                keep_index=False,
-            )["None"].chunk(0)
+            return super().to_arrow()
 
     def sum(
         self, skipna: bool = None, dtype: Dtype = None, min_count: int = 0

From 0c2c7a1dc4a556722a3bd28c9188f4fdc84d229f Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 27 Apr 2021 12:46:12 -0700
Subject: [PATCH 09/12] Add missing __cuda_array_interface__ for datetime
 objects.

---
 python/cudf/cudf/core/column/datetime.py | 31 +++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index b9d42c031c7..d86a54e6970 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -2,10 +2,12 @@
 
 from __future__ import annotations
 
+import builtins
 import datetime as dt
 import re
 from numbers import Number
-from typing import Any, Sequence, Union, cast
+from types import SimpleNamespace
+from typing import Any, Mapping, Sequence, Union, cast
 
 import numpy as np
 import pandas as pd
@@ -199,6 +201,33 @@ def as_numerical(self) -> "cudf.core.column.NumericalColumn":
             ),
         )
 
+    @property
+    def __cuda_array_interface__(self) -> Mapping[builtins.str, Any]:
+        output = {
+            "shape": (len(self),),
+            "strides": (self.dtype.itemsize,),
+            "typestr": self.dtype.str,
+            "data": (self.data_ptr, False),
+            "version": 1,
+        }
+
+        if self.nullable and self.has_nulls:
+
+            # Create a simple Python object that exposes the
+            # `__cuda_array_interface__` attribute here since we need to modify
+            # some of the attributes from the numba device array
+            mask = SimpleNamespace(
+                __cuda_array_interface__={
+                    "shape": (len(self),),
+                    "typestr": "<t1",
+                    "data": (self.mask_ptr, True),
+                    "version": 1,
+                }
+            )
+            output["mask"] = mask
+
+        return output
+
     def as_datetime_column(self, dtype: Dtype, **kwargs) -> DatetimeColumn:
         dtype = np.dtype(dtype)
         if dtype == self.dtype:

From f575e88356aaaae49a3256c414e8df72175166f2 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 27 Apr 2021 16:03:10 -0700
Subject: [PATCH 10/12] Address PR comments.

---
 python/cudf/cudf/core/column/categorical.py |  5 +----
 python/cudf/cudf/core/column/column.py      | 10 +++++-----
 python/cudf/cudf/core/column/datetime.py    |  2 +-
 python/cudf/cudf/core/column/interval.py    | 10 ++++++----
 python/cudf/cudf/core/column/numerical.py   |  4 ++--
 5 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index d55bd245cb7..3cd1a599ddc 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1076,10 +1076,7 @@ def __cuda_array_interface__(self) -> Mapping[str, Any]:
             " if you need this functionality."
         )
 
-    def to_pandas(
-        self, index: ColumnLike = None, nullable: bool = False, **kwargs
-    ) -> pd.Series:
-
+    def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series:
         if self.categories.dtype.kind == "f":
             new_mask = bools_to_mask(self.notnull())
             col = column.build_categorical_column(
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 5226893d524..65fcc6791d8 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -112,13 +112,15 @@ def __repr__(self):
             f"dtype: {self.dtype}"
         )
 
-    def to_pandas(
-        self, index: ColumnLike = None, nullable: bool = False, **kwargs
-    ) -> "pd.Series":
+    def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series":
         """Convert object to pandas type.
 
         The default implementation falls back to PyArrow for the conversion.
         """
+        # This default implementation does not handle nulls in any meaningful
+        # way, but must consume the parameter to avoid passing it to PyArrow
+        # (which does not recognize it).
+        kwargs.pop("nullable", None)
         pd_series = self.to_arrow().to_pandas(**kwargs)
 
         if index is not None:
@@ -126,8 +128,6 @@ def to_pandas(
         return pd_series
 
     def __iter__(self):
-        # TODO: Why don't we just implement this method in terms of one of the
-        # proposed alternatives (to_arrow, to_pandas, or values_host)?
         cudf.utils.utils.raise_iteration_error(obj=self)
 
     @property
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index d86a54e6970..14c82b5ff45 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -135,7 +135,7 @@ def weekday(self) -> ColumnBase:
         return self.get_dt_field("weekday")
 
     def to_pandas(
-        self, index: "cudf.Index" = None, nullable: bool = False, **kwargs
+        self, index: pd.Index = None, nullable: bool = False, **kwargs
     ) -> "cudf.Series":
         # Workaround until following issue is fixed:
         # https://issues.apache.org/jira/browse/ARROW-9772
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index 7436a69e14a..24541c57044 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -3,7 +3,6 @@
 import pyarrow as pa
 
 import cudf
-from cudf._typing import ColumnLike
 from cudf.core.column import StructColumn
 from cudf.core.dtypes import IntervalDtype
 from cudf.utils.dtypes import is_interval_dtype
@@ -114,9 +113,12 @@ def as_interval_column(self, dtype, **kwargs):
         else:
             raise ValueError("dtype must be IntervalDtype")
 
-    def to_pandas(
-        self, index: ColumnLike = None, nullable: bool = False, **kwargs
-    ) -> "pd.Series":
+    def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series":
+        # Note: This does not handle null values in the interval column.
+        # However, this exact sequence (calling __from_arrow__ on the output of
+        # self.to_arrow) is currently the best known way to convert interval
+        # types into pandas (trying to convert the underlying numerical columns
+        # directly is problematic), so we're stuck with this for now.
         return pd.Series(
             pd.IntervalDtype().__from_arrow__(self.to_arrow()), index=index
         )
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 0c815db0b49..d710129900a 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -744,14 +744,14 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
         return False
 
     def to_pandas(
-        self, index: ColumnLike = None, nullable: bool = False, **kwargs
+        self, index: pd.Index = None, nullable: bool = False, **kwargs
     ) -> "pd.Series":
         if nullable and self.dtype in cudf_dtypes_to_pandas_dtypes:
             pandas_nullable_dtype = cudf_dtypes_to_pandas_dtypes[self.dtype]
             arrow_array = self.to_arrow()
             pandas_array = pandas_nullable_dtype.__from_arrow__(arrow_array)
             pd_series = pd.Series(pandas_array, copy=False)
-        elif str(self.dtype) in NUMERIC_TYPES and self.null_count == 0:
+        elif str(self.dtype) in NUMERIC_TYPES and not self.has_nulls:
             pd_series = pd.Series(cupy.asnumpy(self.values), copy=False)
         else:
             pd_series = self.to_arrow().to_pandas(**kwargs)

From 6583e59bd69783d3e97f071b50fcc494511fafb3 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 27 Apr 2021 17:00:12 -0700
Subject: [PATCH 11/12] Explicitly prohibit conversion of columns to host
 arrays.

---
 python/cudf/cudf/core/column/column.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 65fcc6791d8..a3a6813908a 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1122,6 +1122,13 @@ def argsort(
         )
         return sorted_indices
 
+    def __array__(self, dtype=None):
+        raise TypeError(
+            "Implicit conversion to a host NumPy array via __array__ is not "
+            "allowed. To explicitly construct a host array, consider using "
+            ".to_array()"
+        )
+
     @property
     def __cuda_array_interface__(self):
         raise NotImplementedError(

From 381d15f8a951ab9fbbdb68bd37416cb91de76da4 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 28 Apr 2021 09:20:30 -0700
Subject: [PATCH 12/12] Explicitly prohibit conversion of columns to arrow
 (host) arrays.

---
 python/cudf/cudf/core/column/column.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index a3a6813908a..bd67376642f 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1122,6 +1122,13 @@ def argsort(
         )
         return sorted_indices
 
+    def __arrow_array__(self, type=None):
+        raise TypeError(
+            "Implicit conversion to a host PyArrow Array via __arrow_array__ "
+            "is not allowed, To explicitly construct a PyArrow Array, "
+            "consider using .to_arrow()"
+        )
+
     def __array__(self, dtype=None):
         raise TypeError(
             "Implicit conversion to a host NumPy array via __array__ is not "