From c0a3cd14eabd18ba8cedd3b7dd87cba8b6706719 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 11 Jan 2024 16:13:59 -1000
Subject: [PATCH] Clean up base column methods (#14725)

* Removed the need for a `drop_nan` argument in `Column.dropna`
* Removed the need for `Column.as_frame`
* Removed the need for `Column.force_deep_copy`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Michael Wang (https://github.com/isVoid)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/14725
---
 .../cudf/benchmarks/internal/bench_column.py  |  7 ++---
 python/cudf/cudf/core/column/categorical.py   | 11 ++++----
 python/cudf/cudf/core/column/column.py        | 28 ++++---------------
 python/cudf/cudf/core/column/interval.py      |  5 +---
 python/cudf/cudf/core/column/numerical.py     |  5 ----
 python/cudf/cudf/io/dlpack.py                 |  4 +--
 6 files changed, 18 insertions(+), 42 deletions(-)

diff --git a/python/cudf/benchmarks/internal/bench_column.py b/python/cudf/benchmarks/internal/bench_column.py
index d4969b39f7f..8da769b7858 100644
--- a/python/cudf/benchmarks/internal/bench_column.py
+++ b/python/cudf/benchmarks/internal/bench_column.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 """Benchmarks of Column methods."""
 
@@ -18,9 +18,8 @@ def bench_apply_boolean_mask(benchmark, column):
 
 
 @benchmark_with_object(cls="column", dtype="float")
-@pytest.mark.parametrize("dropnan", [True, False])
-def bench_dropna(benchmark, column, dropnan):
-    benchmark(column.dropna, drop_nan=dropnan)
+def bench_dropna(benchmark, column):
+    benchmark(column.dropna)
 
 
 @benchmark_with_object(cls="column", dtype="float")
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 71143fa7a95..eb4220c5895 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -987,15 +987,16 @@ def to_pandas(
             .fillna(_DEFAULT_CATEGORICAL_VALUE)
             .values_host
         )
-        if isinstance(col.categories.dtype, IntervalDtype):
+        cats = col.categories
+        if cats.dtype.kind in "biuf":
+            cats = cats.nans_to_nulls().dropna()  # type: ignore[attr-defined]
+        elif not isinstance(cats.dtype, IntervalDtype):
             # leaving out dropna because it temporarily changes an interval
             # index into a struct and throws off results.
             # TODO: work on interval index dropna
-            categories = col.categories.to_pandas()
-        else:
-            categories = col.categories.dropna(drop_nan=True).to_pandas()
+            cats = cats.dropna()
         data = pd.Categorical.from_codes(
-            codes, categories=categories, ordered=col.ordered
+            codes, categories=cats.to_pandas(), ordered=col.ordered
         )
         return pd.Series(data, index=index)
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 81579b53bb7..3cf686da7b0 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -109,16 +109,8 @@ class ColumnBase(Column, Serializable, BinaryOperand, Reducible):
         "min",
     }
 
-    def as_frame(self) -> "cudf.core.frame.Frame":
-        """
-        Converts a Column to Frame
-        """
-        return cudf.core.single_column_frame.SingleColumnFrame(
-            {None: self.copy(deep=False)}
-        )
-
     def data_array_view(
-        self, *, mode="write"
+        self, *, mode: Literal["write", "read"] = "write"
     ) -> "cuda.devicearray.DeviceNDArray":
         """
         View the data as a device array object
@@ -155,7 +147,7 @@ def data_array_view(
         return cuda.as_cuda_array(obj).view(self.dtype)
 
     def mask_array_view(
-        self, *, mode="write"
+        self, *, mode: Literal["write", "read"] = "write"
     ) -> "cuda.devicearray.DeviceNDArray":
         """
         View the mask as a device array
@@ -291,8 +283,7 @@ def any(self, skipna: bool = True) -> bool:
 
         return libcudf.reduce.reduce("any", self, dtype=np.bool_)
 
-    def dropna(self, drop_nan: bool = False) -> ColumnBase:
-        # The drop_nan argument is only used for numerical columns.
+    def dropna(self) -> ColumnBase:
         return drop_nulls([self])[0]._with_type_metadata(self.dtype)
 
     def to_arrow(self) -> pa.Array:
@@ -437,14 +428,6 @@ def nullmask(self) -> Buffer:
             raise ValueError("Column has no null mask")
         return self.mask_array_view(mode="read")
 
-    def force_deep_copy(self) -> Self:
-        """
-        A method to create deep copy irrespective of whether
-        `copy-on-write` is enabled.
-        """
-        result = libcudf.copying.copy_column(self)
-        return result._with_type_metadata(self.dtype)
-
     def copy(self, deep: bool = True) -> Self:
         """
         Makes a copy of the Column.
@@ -464,7 +447,8 @@ def copy(self, deep: bool = True) -> Self:
             them.
         """
         if deep:
-            return self.force_deep_copy()
+            result = libcudf.copying.copy_column(self)
+            return result._with_type_metadata(self.dtype)
         else:
             return cast(
                 Self,
@@ -1069,7 +1053,7 @@ def as_categorical_column(self, dtype) -> ColumnBase:
         )
         # columns include null index in factorization; remove:
         if self.has_nulls():
-            cats = cats.dropna(drop_nan=False)
+            cats = cats.dropna()
             min_type = min_unsigned_type(len(cats), 8)
             if cudf.dtype(min_type).itemsize < labels.dtype.itemsize:
                 labels = labels.astype(min_type)
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index 81059717b20..6a7e7729123 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -142,7 +142,4 @@ def element_indexing(self, index: int):
         result = super().element_indexing(index)
         if cudf.get_option("mode.pandas_compatible"):
             return pd.Interval(**result, closed=self._closed)
-        return {
-            field: value
-            for field, value in zip(self.dtype.fields, result.values())
-        }
+        return result
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 5461d1b13b5..0577e0f37ed 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -20,7 +20,6 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf._lib.stream_compaction import drop_nulls
 from cudf._lib.types import size_type_dtype
 from cudf._typing import (
     ColumnBinaryOperand,
@@ -421,10 +420,6 @@ def nan_count(self) -> int:
             self._nan_count = nan_col.sum()
         return self._nan_count
 
-    def dropna(self, drop_nan: bool = False) -> NumericalColumn:
-        col = self.nans_to_nulls() if drop_nan else self
-        return drop_nulls([col])[0]
-
     def _process_values_for_isin(
         self, values: Sequence
     ) -> Tuple[ColumnBase, ColumnBase]:
diff --git a/python/cudf/cudf/io/dlpack.py b/python/cudf/cudf/io/dlpack.py
index e1950c9f250..bed376e4a79 100644
--- a/python/cudf/cudf/io/dlpack.py
+++ b/python/cudf/cudf/io/dlpack.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 
 import cudf
@@ -71,7 +71,7 @@ def to_dlpack(cudf_obj):
     if isinstance(cudf_obj, (cudf.DataFrame, cudf.Series, cudf.BaseIndex)):
         gdf = cudf_obj
     elif isinstance(cudf_obj, ColumnBase):
-        gdf = cudf_obj.as_frame()
+        gdf = cudf.Series._from_data({None: cudf_obj})
     else:
         raise TypeError(
             f"Input of type {type(cudf_obj)} cannot be converted "