diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index d55bd245cb7..3cd1a599ddc 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -1076,10 +1076,7 @@ def __cuda_array_interface__(self) -> Mapping[str, Any]: " if you need this functionality." ) - def to_pandas( - self, index: ColumnLike = None, nullable: bool = False, **kwargs - ) -> pd.Series: - + def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series: if self.categories.dtype.kind == "f": new_mask = bools_to_mask(self.notnull()) col = column.build_categorical_column( diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 5226893d524..65fcc6791d8 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -112,13 +112,15 @@ def __repr__(self): f"dtype: {self.dtype}" ) - def to_pandas( - self, index: ColumnLike = None, nullable: bool = False, **kwargs - ) -> "pd.Series": + def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": """Convert object to pandas type. The default implementation falls back to PyArrow for the conversion. """ + # This default implementation does not handle nulls in any meaningful + # way, but must consume the parameter to avoid passing it to PyArrow + # (which does not recognize it). + kwargs.pop("nullable", None) pd_series = self.to_arrow().to_pandas(**kwargs) if index is not None: @@ -126,8 +128,6 @@ def to_pandas( return pd_series def __iter__(self): - # TODO: Why don't we just implement this method in terms of one of the - # proposed alternatives (to_arrow, to_pandas, or values_host)? cudf.utils.utils.raise_iteration_error(obj=self) @property diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index d86a54e6970..14c82b5ff45 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -135,7 +135,7 @@ def weekday(self) -> ColumnBase: return self.get_dt_field("weekday") def to_pandas( - self, index: "cudf.Index" = None, nullable: bool = False, **kwargs + self, index: pd.Index = None, nullable: bool = False, **kwargs ) -> "cudf.Series": # Workaround until following issue is fixed: # https://issues.apache.org/jira/browse/ARROW-9772 diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py index 7436a69e14a..24541c57044 100644 --- a/python/cudf/cudf/core/column/interval.py +++ b/python/cudf/cudf/core/column/interval.py @@ -3,7 +3,6 @@ import pyarrow as pa import cudf -from cudf._typing import ColumnLike from cudf.core.column import StructColumn from cudf.core.dtypes import IntervalDtype from cudf.utils.dtypes import is_interval_dtype @@ -114,9 +113,12 @@ def as_interval_column(self, dtype, **kwargs): else: raise ValueError("dtype must be IntervalDtype") - def to_pandas( - self, index: ColumnLike = None, nullable: bool = False, **kwargs - ) -> "pd.Series": + def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": + # Note: This does not handle null values in the interval column. + # However, this exact sequence (calling __from_arrow__ on the output of + # self.to_arrow) is currently the best known way to convert interval + # types into pandas (trying to convert the underlying numerical columns + # directly is problematic), so we're stuck with this for now. return pd.Series( pd.IntervalDtype().__from_arrow__(self.to_arrow()), index=index ) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 0c815db0b49..d710129900a 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -744,14 +744,14 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool: return False def to_pandas( - self, index: ColumnLike = None, nullable: bool = False, **kwargs + self, index: pd.Index = None, nullable: bool = False, **kwargs ) -> "pd.Series": if nullable and self.dtype in cudf_dtypes_to_pandas_dtypes: pandas_nullable_dtype = cudf_dtypes_to_pandas_dtypes[self.dtype] arrow_array = self.to_arrow() pandas_array = pandas_nullable_dtype.__from_arrow__(arrow_array) pd_series = pd.Series(pandas_array, copy=False) - elif str(self.dtype) in NUMERIC_TYPES and self.null_count == 0: + elif str(self.dtype) in NUMERIC_TYPES and not self.has_nulls: pd_series = pd.Series(cupy.asnumpy(self.values), copy=False) else: pd_series = self.to_arrow().to_pandas(**kwargs)