Address PR comments.

rapidsai · Apr 27, 2021 · f575e88 · f575e88
1 parent 0c2c7a1
commit f575e88
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 16 deletions.
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
@@ -1076,10 +1076,7 @@ def __cuda_array_interface__(self) -> Mapping[str, Any]:
             " if you need this functionality."
         )
 
-    def to_pandas(
-        self, index: ColumnLike = None, nullable: bool = False, **kwargs
-    ) -> pd.Series:
-
+    def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series:
         if self.categories.dtype.kind == "f":
             new_mask = bools_to_mask(self.notnull())
             col = column.build_categorical_column(

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
@@ -112,22 +112,22 @@ def __repr__(self):
             f"dtype: {self.dtype}"
         )
 
-    def to_pandas(
-        self, index: ColumnLike = None, nullable: bool = False, **kwargs
-    ) -> "pd.Series":
+    def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series":
         """Convert object to pandas type.
 
         The default implementation falls back to PyArrow for the conversion.
         """
+        # This default implementation does not handle nulls in any meaningful
+        # way, but must consume the parameter to avoid passing it to PyArrow
+        # (which does not recognize it).
+        kwargs.pop("nullable", None)
         pd_series = self.to_arrow().to_pandas(**kwargs)
 
         if index is not None:
             pd_series.index = index
         return pd_series
 
     def __iter__(self):
-        # TODO: Why don't we just implement this method in terms of one of the
-        # proposed alternatives (to_arrow, to_pandas, or values_host)?
         cudf.utils.utils.raise_iteration_error(obj=self)
 
     @property

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
@@ -135,7 +135,7 @@ def weekday(self) -> ColumnBase:
         return self.get_dt_field("weekday")
 
     def to_pandas(
-        self, index: "cudf.Index" = None, nullable: bool = False, **kwargs
+        self, index: pd.Index = None, nullable: bool = False, **kwargs
     ) -> "cudf.Series":
         # Workaround until following issue is fixed:
         # https://issues.apache.org/jira/browse/ARROW-9772

diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
@@ -3,7 +3,6 @@
 import pyarrow as pa
 
 import cudf
-from cudf._typing import ColumnLike
 from cudf.core.column import StructColumn
 from cudf.core.dtypes import IntervalDtype
 from cudf.utils.dtypes import is_interval_dtype
@@ -114,9 +113,12 @@ def as_interval_column(self, dtype, **kwargs):
         else:
             raise ValueError("dtype must be IntervalDtype")
 
-    def to_pandas(
-        self, index: ColumnLike = None, nullable: bool = False, **kwargs
-    ) -> "pd.Series":
+    def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series":
+        # Note: This does not handle null values in the interval column.
+        # However, this exact sequence (calling __from_arrow__ on the output of
+        # self.to_arrow) is currently the best known way to convert interval
+        # types into pandas (trying to convert the underlying numerical columns
+        # directly is problematic), so we're stuck with this for now.
         return pd.Series(
             pd.IntervalDtype().__from_arrow__(self.to_arrow()), index=index
         )
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
@@ -744,14 +744,14 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
         return False
 
     def to_pandas(
-        self, index: ColumnLike = None, nullable: bool = False, **kwargs
+        self, index: pd.Index = None, nullable: bool = False, **kwargs
     ) -> "pd.Series":
         if nullable and self.dtype in cudf_dtypes_to_pandas_dtypes:
             pandas_nullable_dtype = cudf_dtypes_to_pandas_dtypes[self.dtype]
             arrow_array = self.to_arrow()
             pandas_array = pandas_nullable_dtype.__from_arrow__(arrow_array)
             pd_series = pd.Series(pandas_array, copy=False)
-        elif str(self.dtype) in NUMERIC_TYPES and self.null_count == 0:
+        elif str(self.dtype) in NUMERIC_TYPES and not self.has_nulls:
             pd_series = pd.Series(cupy.asnumpy(self.values), copy=False)
         else:
             pd_series = self.to_arrow().to_pandas(**kwargs)