From 570df6c5fbb0a2120b539aba0a65702c2190527f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 10 Jun 2024 15:24:40 -1000 Subject: [PATCH] Add typing to single_column_frame (#15965) Also removes an extra copy from `.flatten()` when calling `.values` or `.values_host` Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Thomas Li (https://github.com/lithomas1) URL: https://github.com/rapidsai/cudf/pull/15965 --- python/cudf/cudf/api/types.py | 7 ++- python/cudf/cudf/core/column/column.py | 4 +- python/cudf/cudf/core/single_column_frame.py | 58 ++++++++------------ 3 files changed, 29 insertions(+), 40 deletions(-) diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py index 417d8b0922a..42b1524bd76 100644 --- a/python/cudf/cudf/api/types.py +++ b/python/cudf/cudf/api/types.py @@ -8,7 +8,7 @@ from collections import abc from functools import wraps from inspect import isclass -from typing import List, Union +from typing import List, Union, cast import cupy as cp import numpy as np @@ -238,7 +238,10 @@ def _union_categoricals( raise TypeError("ignore_order is not yet implemented") result_col = cudf.core.column.CategoricalColumn._concat( - [obj._column for obj in to_union] + [ + cast(cudf.core.column.CategoricalColumn, obj._column) + for obj in to_union + ] ) if sort_categories: sorted_categories = result_col.categories.sort_values(ascending=True) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index f87797a1fa3..7abdbc85720 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -212,7 +212,7 @@ def to_pandas( return pd.Index(pa_array.to_pandas()) @property - def values_host(self) -> "np.ndarray": + def values_host(self) -> np.ndarray: """ Return a numpy representation of the Column. """ @@ -226,7 +226,7 @@ def values_host(self) -> "np.ndarray": return self.data_array_view(mode="read").copy_to_host() @property - def values(self) -> "cupy.ndarray": + def values(self) -> cupy.ndarray: """ Return a CuPy representation of the Column. """ diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index d864b563208..acc74129a29 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -7,9 +7,11 @@ import cupy import numpy +import pyarrow as pa +from typing_extensions import Self import cudf -from cudf._typing import Dtype, NotImplementedType, ScalarLike +from cudf._typing import NotImplementedType, ScalarLike from cudf.api.extensions import no_default from cudf.api.types import ( _is_scalar_or_zero_d_array, @@ -27,8 +29,8 @@ class SingleColumnFrame(Frame, NotIterable): """A one-dimensional frame. - Frames with only a single column share certain logic that is encoded in - this class. + Frames with only a single column (Index or Series) + share certain logic that is encoded in this class. """ _SUPPORT_AXIS_LOOKUP = { @@ -47,7 +49,7 @@ def _reduce( if axis not in (None, 0, no_default): raise NotImplementedError("axis parameter is not implemented yet") - if numeric_only and not is_numeric_dtype(self._column): + if numeric_only and not is_numeric_dtype(self.dtype): raise TypeError( f"Series.{op} does not allow numeric_only={numeric_only} " "with non-numeric dtypes." @@ -68,7 +70,7 @@ def _scan(self, op, axis=None, *args, **kwargs): @_cudf_nvtx_annotate def name(self): """Get the name of this object.""" - return next(iter(self._data.names)) + return next(iter(self._column_names)) @name.setter # type: ignore @_cudf_nvtx_annotate @@ -83,7 +85,7 @@ def ndim(self) -> int: # noqa: D401 @property # type: ignore @_cudf_nvtx_annotate - def shape(self): + def shape(self) -> tuple[int]: """Get a tuple representing the dimensionality of the Index.""" return (len(self),) @@ -95,45 +97,27 @@ def __bool__(self): @property # type: ignore @_cudf_nvtx_annotate - def _num_columns(self): + def _num_columns(self) -> int: return 1 @property # type: ignore @_cudf_nvtx_annotate - def _column(self): - return self._data[self.name] + def _column(self) -> ColumnBase: + return next(iter(self._columns)) @property # type: ignore @_cudf_nvtx_annotate - def values(self): # noqa: D102 + def values(self) -> cupy.ndarray: # noqa: D102 return self._column.values @property # type: ignore @_cudf_nvtx_annotate - def values_host(self): # noqa: D102 + def values_host(self) -> numpy.ndarray: # noqa: D102 return self._column.values_host - @_cudf_nvtx_annotate - def to_cupy( - self, - dtype: Union[Dtype, None] = None, - copy: bool = True, - na_value=None, - ) -> cupy.ndarray: # noqa: D102 - return super().to_cupy(dtype, copy, na_value).flatten() - - @_cudf_nvtx_annotate - def to_numpy( - self, - dtype: Union[Dtype, None] = None, - copy: bool = True, - na_value=None, - ) -> numpy.ndarray: # noqa: D102 - return super().to_numpy(dtype, copy, na_value).flatten() - @classmethod @_cudf_nvtx_annotate - def from_arrow(cls, array): + def from_arrow(cls, array) -> Self: """Create from PyArrow Array/ChunkedArray. Parameters @@ -164,7 +148,7 @@ def from_arrow(cls, array): return cls(ColumnBase.from_arrow(array)) @_cudf_nvtx_annotate - def to_arrow(self): + def to_arrow(self) -> pa.Array: """ Convert to a PyArrow Array. @@ -196,7 +180,7 @@ def to_arrow(self): @property # type: ignore @_cudf_nvtx_annotate - def is_unique(self): + def is_unique(self) -> bool: """Return boolean if values in the object are unique. Returns @@ -207,7 +191,7 @@ def is_unique(self): @property # type: ignore @_cudf_nvtx_annotate - def is_monotonic_increasing(self): + def is_monotonic_increasing(self) -> bool: """Return boolean if values in the object are monotonically increasing. Returns @@ -218,7 +202,7 @@ def is_monotonic_increasing(self): @property # type: ignore @_cudf_nvtx_annotate - def is_monotonic_decreasing(self): + def is_monotonic_decreasing(self) -> bool: """Return boolean if values in the object are monotonically decreasing. Returns @@ -243,7 +227,9 @@ def __cuda_array_interface__(self): ) @_cudf_nvtx_annotate - def factorize(self, sort=False, use_na_sentinel=True): + def factorize( + self, sort: bool = False, use_na_sentinel: bool = True + ) -> tuple[cupy.ndarray, cudf.Index]: """Encode the input values as integer labels. Parameters @@ -335,7 +321,7 @@ def _make_operands_for_binop( return {result_name: (self._column, other, reflect, fill_value)} @_cudf_nvtx_annotate - def nunique(self, dropna: bool = True): + def nunique(self, dropna: bool = True) -> int: """ Return count of unique values for the column.