diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py index 6d5387591cb..050a71d83f0 100644 --- a/python/cudf/cudf/api/types.py +++ b/python/cudf/cudf/api/types.py @@ -200,7 +200,7 @@ def wrapped_func(obj): def _union_categoricals( - to_union: List[Union[cudf.Series, cudf.CategoricalIndex]], + to_union: list[cudf.Series | cudf.CategoricalIndex], sort_categories: bool = False, ignore_order: bool = False, ): diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 5c5ccaf94c0..a70681a06d3 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -39,7 +39,7 @@ class BaseIndex(Serializable): """Base class for all cudf Index types.""" dtype: DtypeObj - _accessors: Set[Any] = set() + _accessors: set[Any] = set() _data: ColumnAccessor def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): diff --git a/python/cudf/cudf/core/buffer.py b/python/cudf/cudf/core/buffer.py index 0658927975f..c5b4198581f 100644 --- a/python/cudf/cudf/core/buffer.py +++ b/python/cudf/cudf/core/buffer.py @@ -38,7 +38,7 @@ class Buffer(Serializable): _owner: Any def __init__( - self, data: Any = None, size: Optional[int] = None, owner: Any = None + self, data: Any = None, size: int | None = None, owner: Any = None ): if isinstance(data, Buffer): @@ -117,7 +117,7 @@ def _init_from_array_like(self, data, owner): f"Cannot construct Buffer from {data.__class__.__name__}" ) - def serialize(self) -> Tuple[dict, list]: + def serialize(self) -> tuple[dict, list]: header = {} # type: Dict[Any, Any] header["type-serialized"] = pickle.dumps(type(self)) header["constructor-kwargs"] = {} diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index de06e62cbb1..bafcc19ab1d 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -110,14 +110,14 @@ def __init__(self, parent: SeriesOrSingleColumnIndex): super().__init__(parent=parent) @property - def categories(self) -> "cudf.core.index.BaseIndex": + def categories(self) -> cudf.core.index.BaseIndex: """ The categories of this categorical. """ return cudf.core.index.as_index(self._column.categories) @property - def codes(self) -> "cudf.Series": + def codes(self) -> cudf.Series: """ Return Series of codes as well as the index. """ @@ -129,13 +129,13 @@ def codes(self) -> "cudf.Series": return cudf.Series(self._column.codes, index=index) @property - def ordered(self) -> Optional[bool]: + def ordered(self) -> bool | None: """ Whether the categories have an ordered relationship. """ return self._column.ordered - def as_ordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]: + def as_ordered(self, inplace: bool = False) -> SeriesOrIndex | None: """ Set the Categorical to be ordered. @@ -192,7 +192,7 @@ def as_ordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]: self._column.as_ordered(), inplace=inplace ) - def as_unordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]: + def as_unordered(self, inplace: bool = False) -> SeriesOrIndex | None: """ Set the Categorical to be unordered. @@ -262,7 +262,7 @@ def as_unordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]: def add_categories( self, new_categories: Any, inplace: bool = False - ) -> Optional[SeriesOrIndex]: + ) -> SeriesOrIndex | None: """ Add new categories. @@ -347,7 +347,7 @@ def add_categories( def remove_categories( self, removals: Any, inplace: bool = False, - ) -> Optional[SeriesOrIndex]: + ) -> SeriesOrIndex | None: """ Remove the specified categories. @@ -441,7 +441,7 @@ def set_categories( ordered: bool = False, rename: bool = False, inplace: bool = False, - ) -> Optional[SeriesOrIndex]: + ) -> SeriesOrIndex | None: """ Set the categories to the specified new_categories. @@ -535,7 +535,7 @@ def reorder_categories( new_categories: Any, ordered: bool = False, inplace: bool = False, - ) -> Optional[SeriesOrIndex]: + ) -> SeriesOrIndex | None: """ Reorder categories as specified in new_categories. @@ -624,8 +624,8 @@ class CategoricalColumn(column.ColumnBase): """ dtype: cudf.core.dtypes.CategoricalDtype - _codes: Optional[NumericalColumn] - _children: Tuple[NumericalColumn] + _codes: NumericalColumn | None + _children: tuple[NumericalColumn] def __init__( self, @@ -634,7 +634,7 @@ def __init__( size: int = None, offset: int = 0, null_count: int = None, - children: Tuple["column.ColumnBase", ...] = (), + children: tuple[column.ColumnBase, ...] = (), ): if size is None: @@ -671,8 +671,8 @@ def __contains__(self, item: ScalarLike) -> bool: return False return self._encode(item) in self.as_numerical - def serialize(self) -> Tuple[dict, list]: - header: Dict[Any, Any] = {} + def serialize(self) -> tuple[dict, list]: + header: dict[Any, Any] = {} frames = [] header["type-serialized"] = pickle.dumps(type(self)) header["dtype"], dtype_frames = self.dtype.serialize() @@ -729,23 +729,23 @@ def set_base_data(self, value): def _process_values_for_isin( self, values: Sequence - ) -> Tuple[ColumnBase, ColumnBase]: + ) -> tuple[ColumnBase, ColumnBase]: lhs = self # We need to convert values to same type as self, # hence passing dtype=self.dtype rhs = cudf.core.column.as_column(values, dtype=self.dtype) return lhs, rhs - def set_base_mask(self, value: Optional[Buffer]): + def set_base_mask(self, value: Buffer | None): super().set_base_mask(value) self._codes = None - def set_base_children(self, value: Tuple[ColumnBase, ...]): + def set_base_children(self, value: tuple[ColumnBase, ...]): super().set_base_children(value) self._codes = None @property - def children(self) -> Tuple[NumericalColumn]: + def children(self) -> tuple[NumericalColumn]: if self._children is None: codes_column = self.base_children[0] @@ -788,7 +788,7 @@ def codes(self) -> NumericalColumn: return cast(cudf.core.column.NumericalColumn, self._codes) @property - def ordered(self) -> Optional[bool]: + def ordered(self) -> bool | None: return self.dtype.ordered @ordered.setter @@ -842,7 +842,7 @@ def _fill( begin: int, end: int, inplace: bool = False, - ) -> "column.ColumnBase": + ) -> column.ColumnBase: if end <= begin or begin >= self.size: return self if inplace else self.copy() @@ -858,7 +858,7 @@ def _fill( def slice( self, start: int, stop: int, stride: int = None - ) -> "column.ColumnBase": + ) -> column.ColumnBase: codes = self.codes.slice(start, stop, stride) return cudf.core.column.build_categorical_column( categories=self.categories, @@ -909,7 +909,7 @@ def normalize_binop_value(self, other: ScalarLike) -> CategoricalColumn: def sort_by_values( self, ascending: bool = True, na_position="last" - ) -> Tuple[CategoricalColumn, NumericalColumn]: + ) -> tuple[CategoricalColumn, NumericalColumn]: codes, inds = self.as_numerical.sort_by_values(ascending, na_position) col = column.build_categorical_column( categories=self.dtype.categories._values, @@ -991,7 +991,7 @@ def values(self): """ raise NotImplementedError("cudf.Categorical is not yet implemented") - def clip(self, lo: ScalarLike, hi: ScalarLike) -> "column.ColumnBase": + def clip(self, lo: ScalarLike, hi: ScalarLike) -> column.ColumnBase: return ( self.astype(self.categories.dtype).clip(lo, hi).astype(self.dtype) ) @@ -1329,7 +1329,7 @@ def memory_usage(self) -> int: def _mimic_inplace( self, other_col: ColumnBase, inplace: bool = False - ) -> Optional[ColumnBase]: + ) -> ColumnBase | None: out = super()._mimic_inplace(other_col, inplace=inplace) if inplace and isinstance(other_col, CategoricalColumn): self._codes = other_col._codes @@ -1572,7 +1572,7 @@ def as_unordered(self): def _create_empty_categorical_column( - categorical_column: CategoricalColumn, dtype: "CategoricalDtype" + categorical_column: CategoricalColumn, dtype: CategoricalDtype ) -> CategoricalColumn: return column.build_categorical_column( categories=column.as_column(dtype.categories), diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 7999fa9039b..ed19264b228 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -83,7 +83,7 @@ class ColumnBase(Column, Serializable): - def as_frame(self) -> "cudf.core.frame.Frame": + def as_frame(self) -> cudf.core.frame.Frame: """ Converts a Column to Frame """ @@ -92,14 +92,14 @@ def as_frame(self) -> "cudf.core.frame.Frame": ) @property - def data_array_view(self) -> "cuda.devicearray.DeviceNDArray": + def data_array_view(self) -> cuda.devicearray.DeviceNDArray: """ View the data as a device array object """ return cuda.as_cuda_array(self.data).view(self.dtype) @property - def mask_array_view(self) -> "cuda.devicearray.DeviceNDArray": + def mask_array_view(self) -> cuda.devicearray.DeviceNDArray: """ View the mask as a device array """ @@ -115,7 +115,7 @@ def __repr__(self): f"dtype: {self.dtype}" ) - def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": + def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series: """Convert object to pandas type. The default implementation falls back to PyArrow for the conversion. @@ -134,7 +134,7 @@ def __iter__(self): cudf.utils.utils.raise_iteration_error(obj=self) @property - def values_host(self) -> "np.ndarray": + def values_host(self) -> np.ndarray: """ Return a numpy representation of the Column. """ @@ -147,7 +147,7 @@ def values_host(self) -> "np.ndarray": return self.data_array_view.copy_to_host() @property - def values(self) -> "cupy.ndarray": + def values(self) -> cupy.ndarray: """ Return a CuPy representation of the Column. """ @@ -319,7 +319,7 @@ def _default_na_value(self) -> Any: # TODO: This method is deprecated and can be removed when the associated # Frame methods are removed. - def to_gpu_array(self, fillna=None) -> "cuda.devicearray.DeviceNDArray": + def to_gpu_array(self, fillna=None) -> cuda.devicearray.DeviceNDArray: """Get a dense numba device array for the data. Parameters @@ -365,7 +365,7 @@ def _fill( begin: int, end: int, inplace: bool = False, - ) -> Optional[ColumnBase]: + ) -> ColumnBase | None: if end <= begin or begin >= self.size: return self if inplace else self.copy() @@ -517,7 +517,7 @@ def slice(self, start: int, stop: int, stride: int = None) -> ColumnBase: ) return self.take(gather_map) - def __getitem__(self, arg) -> Union[ScalarLike, ColumnBase]: + def __getitem__(self, arg) -> ScalarLike | ColumnBase: if _is_scalar_or_zero_d_array(arg): return self.element_indexing(int(arg)) elif isinstance(arg, slice): @@ -677,7 +677,7 @@ def append(self, other: ColumnBase) -> ColumnBase: def quantile( self, - q: Union[float, Sequence[float]], + q: float | Sequence[float], interpolation: builtins.str, exact: bool, ) -> ColumnBase: @@ -740,7 +740,7 @@ def isin(self, values: Sequence) -> ColumnBase: def _process_values_for_isin( self, values: Sequence - ) -> Tuple[ColumnBase, ColumnBase]: + ) -> tuple[ColumnBase, ColumnBase]: """ Helper function for `isin` which pre-process `values` based on `self`. """ @@ -752,7 +752,7 @@ def _process_values_for_isin( rhs = rhs.astype(lhs.dtype) return lhs, rhs - def _isin_earlystop(self, rhs: ColumnBase) -> Union[ColumnBase, None]: + def _isin_earlystop(self, rhs: ColumnBase) -> ColumnBase | None: """ Helper function for `isin` which determines possibility of early-stopping or not. @@ -847,7 +847,7 @@ def sort_by_values( self: ColumnBase, ascending: bool = True, na_position: builtins.str = "last", - ) -> Tuple[ColumnBase, "cudf.core.column.NumericalColumn"]: + ) -> tuple[ColumnBase, cudf.core.column.NumericalColumn]: col_inds = self.as_frame()._get_sorted_inds( ascending=ascending, na_position=na_position ) @@ -960,47 +960,47 @@ def as_categorical_column(self, dtype, **kwargs) -> ColumnBase: def as_numerical_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.NumericalColumn": + ) -> cudf.core.column.NumericalColumn: raise NotImplementedError def as_datetime_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DatetimeColumn": + ) -> cudf.core.column.DatetimeColumn: raise NotImplementedError def as_interval_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.IntervalColumn": + ) -> cudf.core.column.IntervalColumn: raise NotImplementedError def as_timedelta_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.TimeDeltaColumn": + ) -> cudf.core.column.TimeDeltaColumn: raise NotImplementedError def as_string_column( self, dtype: Dtype, format=None, **kwargs - ) -> "cudf.core.column.StringColumn": + ) -> cudf.core.column.StringColumn: raise NotImplementedError def as_decimal_column( self, dtype: Dtype, **kwargs - ) -> Union["cudf.core.column.decimal.DecimalBaseColumn"]: + ) -> cudf.core.column.decimal.DecimalBaseColumn: raise NotImplementedError def as_decimal128_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.Decimal128Column": + ) -> cudf.core.column.Decimal128Column: raise NotImplementedError def as_decimal64_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.Decimal64Column": + ) -> cudf.core.column.Decimal64Column: raise NotImplementedError def as_decimal32_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.Decimal32Column": + ) -> cudf.core.column.Decimal32Column: raise NotImplementedError def apply_boolean_mask(self, mask) -> ColumnBase: @@ -1110,8 +1110,8 @@ def unique(self) -> ColumnBase: return drop_duplicates([self], keep="first")[0] - def serialize(self) -> Tuple[dict, list]: - header: Dict[Any, Any] = {} + def serialize(self) -> tuple[dict, list]: + header: dict[Any, Any] = {} frames = [] header["type-serialized"] = pickle.dumps(type(self)) header["dtype"] = self.dtype.str @@ -1155,7 +1155,7 @@ def binary_operator( def normalize_binop_value( self, other: ScalarLike - ) -> Union[ColumnBase, ScalarLike]: + ) -> ColumnBase | ScalarLike: raise NotImplementedError def _minmax(self, skipna: bool = None): @@ -1217,7 +1217,7 @@ def nans_to_nulls(self: T) -> T: def _process_for_reduction( self, skipna: bool = None, min_count: int = 0 - ) -> Union[ColumnBase, ScalarLike]: + ) -> ColumnBase | ScalarLike: skipna = True if skipna is None else skipna if skipna: @@ -1347,14 +1347,14 @@ def column_empty( def build_column( - data: Union[Buffer, None], + data: Buffer | None, dtype: Dtype, *, size: int = None, mask: Buffer = None, offset: int = 0, null_count: int = None, - children: Tuple[ColumnBase, ...] = (), + children: tuple[ColumnBase, ...] = (), ) -> ColumnBase: """ Build a Column of the appropriate type from the given parameters @@ -1516,7 +1516,7 @@ def build_categorical_column( offset: int = 0, null_count: int = None, ordered: bool = None, -) -> "cudf.core.column.CategoricalColumn": +) -> cudf.core.column.CategoricalColumn: """ Build a CategoricalColumn @@ -1606,7 +1606,7 @@ def build_list_column( size: int = None, offset: int = 0, null_count: int = None, -) -> "cudf.core.column.ListColumn": +) -> cudf.core.column.ListColumn: """ Build a ListColumn @@ -1638,13 +1638,13 @@ def build_list_column( def build_struct_column( names: Sequence[str], - children: Tuple[ColumnBase, ...], - dtype: Optional[Dtype] = None, + children: tuple[ColumnBase, ...], + dtype: Dtype | None = None, mask: Buffer = None, size: int = None, offset: int = 0, null_count: int = None, -) -> "cudf.core.column.StructColumn": +) -> cudf.core.column.StructColumn: """ Build a StructColumn @@ -2177,8 +2177,8 @@ def as_column( def _construct_array( - arbitrary: Any, dtype: Optional[Dtype] -) -> Union[np.ndarray, cupy.ndarray]: + arbitrary: Any, dtype: Dtype | None +) -> np.ndarray | cupy.ndarray: """ Construct a CuPy or NumPy array from `arbitrary` """ @@ -2212,7 +2212,7 @@ def _data_from_cuda_array_interface_desc(obj) -> Buffer: return data -def _mask_from_cuda_array_interface_desc(obj) -> Union[Buffer, None]: +def _mask_from_cuda_array_interface_desc(obj) -> Buffer | None: desc = obj.__cuda_array_interface__ mask = desc.get("mask", None) @@ -2235,7 +2235,7 @@ def _mask_from_cuda_array_interface_desc(obj) -> Union[Buffer, None]: return mask -def serialize_columns(columns) -> Tuple[List[dict], List]: +def serialize_columns(columns) -> tuple[list[dict], list]: """ Return the headers and frames resulting from serializing a list of Column @@ -2250,7 +2250,7 @@ def serialize_columns(columns) -> Tuple[List[dict], List]: frames : list list of frames """ - headers: List[Dict[Any, Any]] = [] + headers: list[dict[Any, Any]] = [] frames = [] if len(columns) > 0: @@ -2262,7 +2262,7 @@ def serialize_columns(columns) -> Tuple[List[dict], List]: return headers, frames -def deserialize_columns(headers: List[dict], frames: List) -> List[ColumnBase]: +def deserialize_columns(headers: list[dict], frames: list) -> list[ColumnBase]: """ Construct a list of Columns from a list of headers and frames. @@ -2281,9 +2281,9 @@ def deserialize_columns(headers: List[dict], frames: List) -> List[ColumnBase]: def arange( - start: Union[int, float], - stop: Union[int, float] = None, - step: Union[int, float] = 1, + start: int | float, + stop: int | float = None, + step: int | float = 1, dtype=None, ) -> ColumnBase: """ @@ -2372,7 +2372,7 @@ def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase: return ColumnBase.from_scalar(cudf.Scalar(fill_value, dtype), size) -def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase: +def concat_columns(objs: MutableSequence[ColumnBase]) -> ColumnBase: """Concatenate a sequence of columns.""" if len(objs) == 0: dtype = cudf.dtype(None) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index b763790986a..75b50f7e12e 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -193,7 +193,7 @@ def day_of_year(self) -> ColumnBase: def to_pandas( self, index: pd.Index = None, nullable: bool = False, **kwargs - ) -> "cudf.Series": + ) -> cudf.Series: # Workaround until following issue is fixed: # https://issues.apache.org/jira/browse/ARROW-9772 @@ -264,7 +264,7 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: raise TypeError(f"cannot normalize {type(other)}") @property - def as_numerical(self) -> "cudf.core.column.NumericalColumn": + def as_numerical(self) -> cudf.core.column.NumericalColumn: return cast( "cudf.core.column.NumericalColumn", column.build_column( @@ -311,21 +311,21 @@ def as_datetime_column(self, dtype: Dtype, **kwargs) -> DatetimeColumn: def as_timedelta_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.TimeDeltaColumn": + ) -> cudf.core.column.TimeDeltaColumn: raise TypeError( f"cannot astype a datetimelike from {self.dtype} to {dtype}" ) def as_numerical_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.NumericalColumn": + ) -> cudf.core.column.NumericalColumn: return cast( "cudf.core.column.NumericalColumn", self.as_numerical.astype(dtype) ) def as_string_column( self, dtype: Dtype, format=None, **kwargs - ) -> "cudf.core.column.StringColumn": + ) -> cudf.core.column.StringColumn: if format is None: format = _dtype_to_format_conversion.get( self.dtype.name, "%Y-%m-%d %H:%M:%S" @@ -370,7 +370,7 @@ def median(self, skipna: bool = None) -> pd.Timestamp: ) def quantile( - self, q: Union[float, Sequence[float]], interpolation: str, exact: bool + self, q: float | Sequence[float], interpolation: str, exact: bool ) -> ColumnBase: result = self.as_numerical.quantile( q=q, interpolation=interpolation, exact=exact @@ -380,14 +380,11 @@ def quantile( return result.astype(self.dtype) def binary_operator( - self, - op: str, - rhs: Union[ColumnBase, "cudf.Scalar"], - reflect: bool = False, + self, op: str, rhs: ColumnBase | cudf.Scalar, reflect: bool = False, ) -> ColumnBase: if isinstance(rhs, cudf.DateOffset): return rhs._datetime_binop(self, op, reflect=reflect) - lhs: Union[ScalarLike, ColumnBase] = self + lhs: ScalarLike | ColumnBase = self if op in ("eq", "ne", "lt", "gt", "le", "ge", "NULL_EQUALS"): out_dtype = cudf.dtype(np.bool_) # type: Dtype elif op == "add" and pd.api.types.is_timedelta64_dtype(rhs.dtype): diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py index 9bea94cfecb..6f0ffae3343 100644 --- a/python/cudf/cudf/core/column/methods.py +++ b/python/cudf/cudf/core/column/methods.py @@ -51,7 +51,7 @@ def _return_or_inplace( inplace: bool = False, expand: bool = False, retain_index: bool = True, - ) -> Optional[ParentType]: + ) -> ParentType | None: ... def _return_or_inplace( diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index a7481ce62a3..730e223f111 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -61,7 +61,7 @@ class NumericalColumn(NumericalBaseColumn): mask : Buffer, optional """ - _nan_count: Optional[int] + _nan_count: int | None def __init__( self, @@ -142,7 +142,7 @@ def __cuda_array_interface__(self) -> Mapping[str, Any]: return output - def unary_operator(self, unaryop: Union[str, Callable]) -> ColumnBase: + def unary_operator(self, unaryop: str | Callable) -> ColumnBase: if callable(unaryop): return libcudf.transform.transform(self, unaryop) @@ -179,7 +179,7 @@ def binary_operator( msg = "{!r} operator not supported between {} and {}" raise TypeError(msg.format(binop, type(self), type(rhs))) if isinstance(rhs, cudf.core.column.Decimal128Column): - lhs: Union[ScalarLike, ColumnBase] = self.as_decimal_column( + lhs: ScalarLike | ColumnBase = self.as_decimal_column( Decimal128Dtype(Decimal128Dtype.MAX_PRECISION, 0) ) return lhs.binary_operator(binop, rhs) @@ -226,7 +226,7 @@ def nans_to_nulls(self: NumericalColumn) -> NumericalColumn: def normalize_binop_value( self, other: ScalarLike - ) -> Union[ColumnBase, ScalarLike]: + ) -> ColumnBase | ScalarLike: if other is None: return other if isinstance(other, cudf.Scalar): @@ -259,7 +259,7 @@ def normalize_binop_value( else: raise TypeError(f"cannot broadcast {type(other)}") - def int2ip(self) -> "cudf.core.column.StringColumn": + def int2ip(self) -> cudf.core.column.StringColumn: if self.dtype != cudf.dtype("int64"): raise TypeError("Only int64 type can be converted to ip") @@ -267,7 +267,7 @@ def int2ip(self) -> "cudf.core.column.StringColumn": def as_string_column( self, dtype: Dtype, format=None, **kwargs - ) -> "cudf.core.column.StringColumn": + ) -> cudf.core.column.StringColumn: if len(self) > 0: return string._numeric_to_str_typecast_functions[ cudf.dtype(self.dtype) @@ -279,7 +279,7 @@ def as_string_column( def as_datetime_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DatetimeColumn": + ) -> cudf.core.column.DatetimeColumn: return cast( "cudf.core.column.DatetimeColumn", build_column( @@ -293,7 +293,7 @@ def as_datetime_column( def as_timedelta_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.TimeDeltaColumn": + ) -> cudf.core.column.TimeDeltaColumn: return cast( "cudf.core.column.TimeDeltaColumn", build_column( @@ -307,7 +307,7 @@ def as_timedelta_column( def as_decimal_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DecimalBaseColumn": + ) -> cudf.core.column.DecimalBaseColumn: return libcudf.unary.cast(self, dtype) def as_numerical_column(self, dtype: Dtype, **kwargs) -> NumericalColumn: @@ -327,7 +327,7 @@ def nan_count(self) -> int: def _process_values_for_isin( self, values: Sequence - ) -> Tuple[ColumnBase, ColumnBase]: + ) -> tuple[ColumnBase, ColumnBase]: lhs = cast("cudf.core.column.ColumnBase", self) rhs = as_column(values, nan_as_null=False) @@ -346,7 +346,7 @@ def _can_return_nan(self, skipna: bool = None) -> bool: def _process_for_reduction( self, skipna: bool = None, min_count: int = 0 - ) -> Union[ColumnBase, ScalarLike]: + ) -> ColumnBase | ScalarLike: skipna = True if skipna is None else skipna if self._can_return_nan(skipna=skipna): @@ -642,7 +642,7 @@ def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase: def to_pandas( self, index: pd.Index = None, nullable: bool = False, **kwargs - ) -> "pd.Series": + ) -> pd.Series: if nullable and self.dtype in np_dtypes_to_pandas_dtypes: pandas_nullable_dtype = np_dtypes_to_pandas_dtypes[self.dtype] arrow_array = self.to_arrow() @@ -670,7 +670,7 @@ def _reduction_result_dtype(self, reduction_op: str) -> Dtype: def _normalize_find_and_replace_input( - input_column_dtype: DtypeObj, col_to_normalize: Union[ColumnBase, list] + input_column_dtype: DtypeObj, col_to_normalize: ColumnBase | list ) -> ColumnBase: normalized_column = column.as_column( col_to_normalize, diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py index 1f84cb88e37..e5918578fbf 100644 --- a/python/cudf/cudf/core/column/numerical_base.py +++ b/python/cudf/cudf/core/column/numerical_base.py @@ -129,7 +129,7 @@ def skew(self, skipna: bool = None) -> ScalarLike: return skew def quantile( - self, q: Union[float, Sequence[float]], interpolation: str, exact: bool + self, q: float | Sequence[float], interpolation: str, exact: bool ) -> NumericalBaseColumn: if isinstance(q, Number) or cudf.api.types.is_list_like(q): np_array_q = np.asarray(q) @@ -158,7 +158,7 @@ def median(self, skipna: bool = None) -> NumericalBaseColumn: return self.quantile(0.5, interpolation="linear", exact=True) def _numeric_quantile( - self, q: Union[float, Sequence[float]], interpolation: str, exact: bool + self, q: float | Sequence[float], interpolation: str, exact: bool ) -> NumericalBaseColumn: quant = [float(q)] if not isinstance(q, (Sequence, np.ndarray)) else q # get sorted indices and exclude nulls diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 7f62d8fe03f..7a00c7401d2 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -257,7 +257,7 @@ def cat(self, sep: str = None, na_rep: str = None) -> str: @overload def cat( self, others, sep: str = None, na_rep: str = None - ) -> Union[SeriesOrIndex, "cudf.core.column.string.StringColumn"]: + ) -> SeriesOrIndex | cudf.core.column.string.StringColumn: ... def cat(self, others=None, sep=None, na_rep=None): @@ -630,7 +630,7 @@ def extract( def contains( self, - pat: Union[str, Sequence], + pat: str | Sequence, case: bool = True, flags: int = 0, na=np.nan, @@ -771,7 +771,7 @@ def contains( ) return self._return_or_inplace(result_col) - def repeat(self, repeats: Union[int, Sequence],) -> SeriesOrIndex: + def repeat(self, repeats: int | Sequence,) -> SeriesOrIndex: """ Duplicate each string in the Series or Index. Equivalent to `str.repeat() @@ -826,8 +826,8 @@ def repeat(self, repeats: Union[int, Sequence],) -> SeriesOrIndex: def replace( self, - pat: Union[str, Sequence], - repl: Union[str, Sequence], + pat: str | Sequence, + repl: str | Sequence, n: int = -1, case=None, flags: int = 0, @@ -1988,7 +1988,7 @@ def filter_alphanum( ) def slice_from( - self, starts: "cudf.Series", stops: "cudf.Series" + self, starts: cudf.Series, stops: cudf.Series ) -> SeriesOrIndex: """ Return substring of each string using positions for each string. @@ -3608,7 +3608,7 @@ def endswith(self, pat: str) -> SeriesOrIndex: return self._return_or_inplace(result_col) - def startswith(self, pat: Union[str, Sequence]) -> SeriesOrIndex: + def startswith(self, pat: str | Sequence) -> SeriesOrIndex: """ Test if the start of each string element matches a pattern. @@ -4285,7 +4285,7 @@ def tokenize(self, delimiter: str = " ") -> SeriesOrIndex: ) def detokenize( - self, indices: "cudf.Series", separator: str = " " + self, indices: cudf.Series, separator: str = " " ) -> SeriesOrIndex: """ Combines tokens into strings by concatenating them in the order @@ -4958,9 +4958,9 @@ class StringColumn(column.ColumnBase): respectively """ - _start_offset: Optional[int] - _end_offset: Optional[int] - _cached_sizeof: Optional[int] + _start_offset: int | None + _end_offset: int | None + _cached_sizeof: int | None def __init__( self, @@ -4968,7 +4968,7 @@ def __init__( size: int = None, # TODO: make non-optional offset: int = 0, null_count: int = None, - children: Tuple["column.ColumnBase", ...] = (), + children: tuple[column.ColumnBase, ...] = (), ): dtype = cudf.dtype("object") @@ -5123,7 +5123,7 @@ def __contains__(self, item: ScalarLike) -> bool: def as_numerical_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.NumericalColumn": + ) -> cudf.core.column.NumericalColumn: out_dtype = cudf.dtype(dtype) string_col = self if out_dtype.kind in {"i", "u"}: @@ -5165,7 +5165,7 @@ def _as_datetime_or_timedelta_column(self, dtype, format): def as_datetime_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DatetimeColumn": + ) -> cudf.core.column.DatetimeColumn: out_dtype = cudf.dtype(dtype) # infer on host from the first not na element @@ -5189,14 +5189,14 @@ def as_datetime_column( def as_timedelta_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.TimeDeltaColumn": + ) -> cudf.core.column.TimeDeltaColumn: out_dtype = cudf.dtype(dtype) format = "%D days %H:%M:%S" return self._as_datetime_or_timedelta_column(out_dtype, format) def as_decimal_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DecimalBaseColumn": + ) -> cudf.core.column.DecimalBaseColumn: return libstrings.to_decimal(self, dtype) def as_string_column( @@ -5240,7 +5240,7 @@ def to_array(self, fillna: bool = None) -> np.ndarray: def to_pandas( self, index: pd.Index = None, nullable: bool = False, **kwargs - ) -> "pd.Series": + ) -> pd.Series: if nullable: pandas_array = pd.StringDtype().__from_arrow__(self.to_arrow()) pd_series = pd.Series(pandas_array, copy=False) @@ -5251,8 +5251,8 @@ def to_pandas( pd_series.index = index return pd_series - def serialize(self) -> Tuple[dict, list]: - header: Dict[Any, Any] = {"null_count": self.null_count} + def serialize(self) -> tuple[dict, list]: + header: dict[Any, Any] = {"null_count": self.null_count} header["type-serialized"] = pickle.dumps(type(self)) header["size"] = self.size @@ -5366,7 +5366,7 @@ def fillna( else: return super().fillna(method=method) - def _find_first_and_last(self, value: ScalarLike) -> Tuple[int, int]: + def _find_first_and_last(self, value: ScalarLike) -> tuple[int, int]: found_indices = libcudf.search.contains( self, column.as_column([value], dtype=self.dtype) ) @@ -5383,7 +5383,7 @@ def find_first_value( def find_last_value(self, value: ScalarLike, closest: bool = False) -> int: return self._find_first_and_last(value)[1] - def normalize_binop_value(self, other) -> "column.ColumnBase": + def normalize_binop_value(self, other) -> column.ColumnBase: # fastpath: gpu scalar if isinstance(other, cudf.Scalar) and other.dtype == "object": return column.as_column(other, length=len(self)) @@ -5407,7 +5407,7 @@ def _default_na_value(self) -> ScalarLike: def binary_operator( self, op: builtins.str, rhs, reflect: bool = False - ) -> "column.ColumnBase": + ) -> column.ColumnBase: lhs = self if reflect: lhs, rhs = rhs, lhs @@ -5431,7 +5431,7 @@ def binary_operator( ) @copy_docstring(column.ColumnBase.view) - def view(self, dtype) -> "cudf.core.column.ColumnBase": + def view(self, dtype) -> cudf.core.column.ColumnBase: if self.null_count > 0: raise ValueError( "Can not produce a view of a string column with nulls" diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index f0d02a706e2..c344ab3739d 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -81,7 +81,7 @@ def to_arrow(self): pa_type, len(self), buffers, children=children ) - def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": + def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series: # We cannot go via Arrow's `to_pandas` because of the following issue: # https://issues.apache.org/jira/browse/ARROW-12680 diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 4b7a3bcc197..da485a144bc 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -133,7 +133,7 @@ def to_pandas( def _binary_op_floordiv( self, rhs: BinaryOperand - ) -> Tuple["column.ColumnBase", BinaryOperand, DtypeObj]: + ) -> tuple[column.ColumnBase, BinaryOperand, DtypeObj]: lhs = self # type: column.ColumnBase if pd.api.types.is_timedelta64_dtype(rhs.dtype): common_dtype = determine_out_dtype(self.dtype, rhs.dtype) @@ -203,7 +203,7 @@ def _binary_op_lt_gt_le_ge(self, rhs: BinaryOperand) -> DtypeObj: def _binary_op_truediv( self, rhs: BinaryOperand - ) -> Tuple["column.ColumnBase", BinaryOperand, DtypeObj]: + ) -> tuple[column.ColumnBase, BinaryOperand, DtypeObj]: lhs = self # type: column.ColumnBase if pd.api.types.is_timedelta64_dtype(rhs.dtype): common_dtype = determine_out_dtype(self.dtype, rhs.dtype) @@ -229,7 +229,7 @@ def _binary_op_truediv( def binary_operator( self, op: str, rhs: BinaryOperand, reflect: bool = False - ) -> "column.ColumnBase": + ) -> column.ColumnBase: lhs, rhs = self, rhs if op in ("eq", "ne"): @@ -292,7 +292,7 @@ def normalize_binop_value(self, other) -> BinaryOperand: raise TypeError(f"cannot normalize {type(other)}") @property - def as_numerical(self) -> "cudf.core.column.NumericalColumn": + def as_numerical(self) -> cudf.core.column.NumericalColumn: return cast( "cudf.core.column.NumericalColumn", column.build_column( @@ -334,21 +334,21 @@ def fillna( def as_numerical_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.NumericalColumn": + ) -> cudf.core.column.NumericalColumn: return cast( "cudf.core.column.NumericalColumn", self.as_numerical.astype(dtype) ) def as_datetime_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DatetimeColumn": + ) -> cudf.core.column.DatetimeColumn: raise TypeError( f"cannot astype a timedelta from {self.dtype} to {dtype}" ) def as_string_column( self, dtype: Dtype, format=None, **kwargs - ) -> "cudf.core.column.StringColumn": + ) -> cudf.core.column.StringColumn: if format is None: format = _dtype_to_format_conversion.get( self.dtype.name, "%D days %H:%M:%S" @@ -384,8 +384,8 @@ def isin(self, values: Sequence) -> ColumnBase: return cudf.core.tools.datetimes._isin_datetimelike(self, values) def quantile( - self, q: Union[float, Sequence[float]], interpolation: str, exact: bool - ) -> "column.ColumnBase": + self, q: float | Sequence[float], interpolation: str, exact: bool + ) -> column.ColumnBase: result = self.as_numerical.quantile( q=q, interpolation=interpolation, exact=exact ) @@ -411,7 +411,7 @@ def std( unit=self.time_unit, ) - def components(self, index=None) -> "cudf.DataFrame": + def components(self, index=None) -> cudf.DataFrame: """ Return a Dataframe of the components of the Timedeltas. @@ -505,7 +505,7 @@ def components(self, index=None) -> "cudf.DataFrame": ) @property - def days(self) -> "cudf.core.column.NumericalColumn": + def days(self) -> cudf.core.column.NumericalColumn: """ Number of days for each element. @@ -518,7 +518,7 @@ def days(self) -> "cudf.core.column.NumericalColumn": ) @property - def seconds(self) -> "cudf.core.column.NumericalColumn": + def seconds(self) -> cudf.core.column.NumericalColumn: """ Number of seconds (>= 0 and less than 1 day). @@ -541,7 +541,7 @@ def seconds(self) -> "cudf.core.column.NumericalColumn": ) @property - def microseconds(self) -> "cudf.core.column.NumericalColumn": + def microseconds(self) -> cudf.core.column.NumericalColumn: """ Number of microseconds (>= 0 and less than 1 second). @@ -561,7 +561,7 @@ def microseconds(self) -> "cudf.core.column.NumericalColumn": ) @property - def nanoseconds(self) -> "cudf.core.column.NumericalColumn": + def nanoseconds(self) -> cudf.core.column.NumericalColumn: """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index c2ea9d756f7..d836dc5b2db 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -94,13 +94,13 @@ class ColumnAccessor(MutableMapping): may be passe. """ - _data: "Dict[Any, ColumnBase]" + _data: dict[Any, ColumnBase] multiindex: bool - _level_names: Tuple[Any, ...] + _level_names: tuple[Any, ...] def __init__( self, - data: Union[MutableMapping, ColumnAccessor] = None, + data: MutableMapping | ColumnAccessor = None, multiindex: bool = False, level_names=None, ): @@ -137,7 +137,7 @@ def __init__( @classmethod def _create_unsafe( cls, - data: Dict[Any, ColumnBase], + data: dict[Any, ColumnBase], multiindex: bool = False, level_names=None, ) -> ColumnAccessor: @@ -177,7 +177,7 @@ def __repr__(self) -> str: return f"{type_info}\n{column_info}" @property - def level_names(self) -> Tuple[Any, ...]: + def level_names(self) -> tuple[Any, ...]: if self._level_names is None or len(self._level_names) == 0: return tuple((None,) * max(1, self.nlevels)) else: @@ -206,11 +206,11 @@ def nrows(self) -> int: return len(next(iter(self.values()))) @cached_property - def names(self) -> Tuple[Any, ...]: + def names(self) -> tuple[Any, ...]: return tuple(self.keys()) @cached_property - def columns(self) -> Tuple[ColumnBase, ...]: + def columns(self) -> tuple[ColumnBase, ...]: return tuple(self.values()) @cached_property @@ -463,7 +463,7 @@ def _pad_key(self, key: Any, pad_value="") -> Any: return key + (pad_value,) * (self.nlevels - len(key)) def rename_levels( - self, mapper: Union[Mapping[Any, Any], Callable], level: Optional[int] + self, mapper: Mapping[Any, Any] | Callable, level: int | None ) -> ColumnAccessor: """ Rename the specified levels of the given ColumnAccessor diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index bd08ac385c7..336b659d115 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -503,7 +503,7 @@ class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin): """ _PROTECTED_KEYS = frozenset(("_data", "_index")) - _accessors: Set[Any] = set() + _accessors: set[Any] = set() _loc_indexer_type = _DataFrameLocIndexer _iloc_indexer_type = _DataFrameIlocIndexer @@ -822,7 +822,7 @@ def _init_from_dict_like( def _from_data( cls, data: MutableMapping, - index: Optional[BaseIndex] = None, + index: BaseIndex | None = None, columns: Any = None, ) -> DataFrame: out = super()._from_data(data, index) @@ -6433,7 +6433,7 @@ def _setitem_with_dataframe( input_df: DataFrame, replace_df: DataFrame, input_cols: Any = None, - mask: Optional[cudf.core.column.ColumnBase] = None, + mask: cudf.core.column.ColumnBase | None = None, ignore_index: bool = False, ): """ diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 69dc5389e7a..9d86aa30b16 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -65,11 +65,11 @@ class Frame: A Frame representing the (optional) index columns. """ - _data: "ColumnAccessor" + _data: ColumnAccessor # TODO: Once all dependence on Frame having an index is removed, this # attribute should be moved to IndexedFrame. - _index: Optional[cudf.core.index.BaseIndex] - _names: Optional[List] + _index: cudf.core.index.BaseIndex | None + _names: list | None def __init__(self, data=None, index=None): if data is None: @@ -90,11 +90,11 @@ def _num_rows(self) -> int: return len(self._data.columns[0]) @property - def _column_names(self) -> List[Any]: # TODO: List[str]? + def _column_names(self) -> list[Any]: # TODO: List[str]? return self._data.names @property - def _index_names(self) -> List[Any]: # TODO: List[str]? + def _index_names(self) -> list[Any]: # TODO: List[str]? # TODO: Temporarily suppressing mypy warnings to avoid introducing bugs # by returning an empty list where one is not expected. return ( @@ -104,7 +104,7 @@ def _index_names(self) -> List[Any]: # TODO: List[str]? ) @property - def _columns(self) -> List[Any]: # TODO: List[Column]? + def _columns(self) -> list[Any]: # TODO: List[Column]? return self._data.columns def serialize(self): @@ -126,7 +126,7 @@ def deserialize(cls, header, frames): def _from_data( cls, data: MutableMapping, - index: Optional[cudf.core.index.BaseIndex] = None, + index: cudf.core.index.BaseIndex | None = None, ): obj = cls.__new__(cls) Frame.__init__(obj, data, index) @@ -135,9 +135,9 @@ def _from_data( @classmethod def _from_columns( cls, - columns: List[ColumnBase], - column_names: List[str], - index_names: Optional[List[str]] = None, + columns: list[ColumnBase], + column_names: list[str], + index_names: list[str] | None = None, ): """Construct a `Frame` object from a list of columns. @@ -165,9 +165,9 @@ def _from_columns( def _from_columns_like_self( self, - columns: List[ColumnBase], - column_names: List[str], - index_names: Optional[List[str]] = None, + columns: list[ColumnBase], + column_names: list[str], + index_names: list[str] | None = None, ): """Construct a `Frame` from a list of columns with metadata from self. @@ -181,7 +181,7 @@ def _from_columns_like_self( def _mimic_inplace( self: T, result: Frame, inplace: bool = False - ) -> Optional[Frame]: + ) -> Frame | None: if inplace: for col in self._data: if col in result._data: @@ -616,9 +616,9 @@ def _to_array( self, get_column_values: Callable, make_empty_matrix: Callable, - dtype: Union[Dtype, None] = None, + dtype: Dtype | None = None, na_value=None, - ) -> Union[cupy.ndarray, np.ndarray]: + ) -> cupy.ndarray | np.ndarray: # Internal function to implement to_cupy and to_numpy, which are nearly # identical except for the attribute they access to generate values. @@ -650,10 +650,7 @@ def get_column_values_na(col): return matrix def to_cupy( - self, - dtype: Union[Dtype, None] = None, - copy: bool = False, - na_value=None, + self, dtype: Dtype | None = None, copy: bool = False, na_value=None, ) -> cupy.ndarray: """Convert the Frame to a CuPy array. @@ -684,10 +681,7 @@ def to_cupy( ) def to_numpy( - self, - dtype: Union[Dtype, None] = None, - copy: bool = True, - na_value=None, + self, dtype: Dtype | None = None, copy: bool = True, na_value=None, ) -> np.ndarray: """Convert the Frame to a NumPy array. @@ -3440,7 +3434,7 @@ def _binaryop( @classmethod def _colwise_binop( cls, - operands: Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]], + operands: dict[str | None, tuple[ColumnBase, Any, bool, Any]], fn: str, ): """Implement binary ops between two frame-like objects. @@ -6438,8 +6432,8 @@ def ge(self, other, axis="columns", level=None, fill_value=None): def _get_replacement_values_for_columns( - to_replace: Any, value: Any, columns_dtype_map: Dict[Any, Any] -) -> Tuple[Dict[Any, bool], Dict[Any, Any], Dict[Any, Any]]: + to_replace: Any, value: Any, columns_dtype_map: dict[Any, Any] +) -> tuple[dict[Any, bool], dict[Any, Any], dict[Any, Any]]: """ Returns a per column mapping for the values to be replaced, new values to be replaced with and if all the values are empty. @@ -6464,9 +6458,9 @@ def _get_replacement_values_for_columns( A dict mapping of all columns and the corresponding values to be replaced with. """ - to_replace_columns: Dict[Any, Any] = {} - values_columns: Dict[Any, Any] = {} - all_na_columns: Dict[Any, Any] = {} + to_replace_columns: dict[Any, Any] = {} + values_columns: dict[Any, Any] = {} + all_na_columns: dict[Any, Any] = {} if is_scalar(to_replace) and is_scalar(value): to_replace_columns = {col: [to_replace] for col in columns_dtype_map} @@ -6603,8 +6597,8 @@ def _is_series(obj): def _drop_rows_by_labels( obj: DataFrameOrSeries, - labels: Union[ColumnLike, abc.Iterable, str], - level: Union[int, str], + labels: ColumnLike | abc.Iterable | str, + level: int | str, errors: str, ) -> DataFrameOrSeries: """Remove rows specified by `labels`. If `errors=True`, an error is raised diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 88371666ce6..983525e4ea2 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -60,10 +60,8 @@ def _lexsorted_equal_range( - idx: Union[GenericIndex, cudf.MultiIndex], - key_as_table: Frame, - is_sorted: bool, -) -> Tuple[int, int, Optional[ColumnBase]]: + idx: GenericIndex | cudf.MultiIndex, key_as_table: Frame, is_sorted: bool, +) -> tuple[int, int, ColumnBase | None]: """Get equal range for key in lexicographically sorted index. If index is not sorted when called, a sort will take place and `sort_inds` is returned. Otherwise `None` is returned in that position. @@ -94,8 +92,8 @@ def _index_from_data(data: MutableMapping, name: Any = None): if isinstance(values, NumericalColumn): try: - index_class_type: Type[ - Union[GenericIndex, cudf.MultiIndex] + index_class_type: type[ + GenericIndex | cudf.MultiIndex ] = _dtype_to_index[values.dtype.type] except KeyError: index_class_type = GenericIndex @@ -115,7 +113,7 @@ def _index_from_data(data: MutableMapping, name: Any = None): def _index_from_columns( - columns: List[cudf.core.column.ColumnBase], name: Any = None + columns: list[cudf.core.column.ColumnBase], name: Any = None ): """Construct an index from ``columns``, with levels named 0, 1, 2...""" return _index_from_data(dict(zip(range(len(columns)), columns)), name=name) @@ -1213,7 +1211,7 @@ class NumericIndex(GenericIndex): """ # Subclasses must define the dtype they are associated with. - _dtype: Union[None, Type[np.number]] = None + _dtype: None | type[np.number] = None def __init__(self, data=None, dtype=None, copy=False, name=None): @@ -2246,7 +2244,7 @@ def is_categorical(self): def interval_range( start=None, end=None, periods=None, freq=None, name=None, closed="right", -) -> "IntervalIndex": +) -> IntervalIndex: """ Returns a fixed frequency IntervalIndex. @@ -2593,7 +2591,7 @@ def as_index(arbitrary, nan_as_null=None, **kwargs) -> BaseIndex: ) -_dtype_to_index: Dict[Any, Type[NumericIndex]] = { +_dtype_to_index: dict[Any, type[NumericIndex]] = { np.int8: Int8Index, np.int16: Int16Index, np.int32: Int32Index, @@ -2704,7 +2702,7 @@ def from_arrow(cls, obj): return cudf.MultiIndex.from_arrow(obj) -def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex: +def _concat_range_index(indexes: list[RangeIndex]) -> BaseIndex: """ An internal Utility function to concat RangeIndex objects. """ @@ -2744,7 +2742,7 @@ def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex: return RangeIndex(start, stop, step) -def _extended_gcd(a: int, b: int) -> Tuple[int, int, int]: +def _extended_gcd(a: int, b: int) -> tuple[int, int, int]: """ Extended Euclidean algorithms to solve Bezout's identity: a*x + b*y = gcd(x, y) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 72878078593..784be09494c 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -135,8 +135,8 @@ class IndexedFrame(Frame): """ # mypy can't handle bound type variables as class members - _loc_indexer_type: Type[_LocIndexerClass] # type: ignore - _iloc_indexer_type: Type[_IlocIndexerClass] # type: ignore + _loc_indexer_type: type[_LocIndexerClass] # type: ignore + _iloc_indexer_type: type[_IlocIndexerClass] # type: ignore _index: cudf.core.index.BaseIndex def __init__(self, data=None, index=None): @@ -1486,7 +1486,7 @@ def _reset_index(self, level, drop, col_level=0, col_fill=""): def _first_or_last( self, offset, idx: int, op: Callable, side: str, slice_func: Callable - ) -> "IndexedFrame": + ) -> IndexedFrame: """Shared code path for ``first`` and ``last``.""" if not isinstance(self._index, cudf.core.index.DatetimeIndex): raise TypeError("'first' only supports a DatetimeIndex index.") diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py index ead0cd566d9..7589ed27484 100644 --- a/python/cudf/cudf/core/join/_join_helpers.py +++ b/python/cudf/cudf/core/join/_join_helpers.py @@ -57,7 +57,7 @@ def set(self, obj: Frame, value: ColumnBase, validate=False): def _match_join_keys( lcol: ColumnBase, rcol: ColumnBase, how: str -) -> Tuple[ColumnBase, ColumnBase]: +) -> tuple[ColumnBase, ColumnBase]: # Casts lcol and rcol to a common dtype for use as join keys. If no casting # is necessary, they are returned as is. @@ -118,7 +118,7 @@ def _match_join_keys( def _match_categorical_dtypes_both( lcol: CategoricalColumn, rcol: CategoricalColumn, how: str -) -> Tuple[ColumnBase, ColumnBase]: +) -> tuple[ColumnBase, ColumnBase]: ltype, rtype = lcol.dtype, rcol.dtype # when both are ordered and both have the same categories, diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 3796b596ad6..0c573d4efed 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -255,7 +255,7 @@ def set_names(self, names, level=None, inplace=False): def _from_data( cls, data: MutableMapping, - index: Optional[cudf.core.index.BaseIndex] = None, + index: cudf.core.index.BaseIndex | None = None, name: Any = None, ) -> MultiIndex: assert index is None @@ -781,9 +781,9 @@ def _index_and_downcast(self, result, index, index_key): def _get_row_major( self, df: DataFrameOrSeries, - row_tuple: Union[ - numbers.Number, slice, Tuple[Any, ...], List[Tuple[Any, ...]] - ], + row_tuple: ( + numbers.Number | slice | tuple[Any, ...] | list[tuple[Any, ...]] + ), ) -> DataFrameOrSeries: if pd.api.types.is_bool_dtype( list(row_tuple) if isinstance(row_tuple, tuple) else row_tuple @@ -805,9 +805,9 @@ def _get_row_major( def _validate_indexer( self, - indexer: Union[ - numbers.Number, slice, Tuple[Any, ...], List[Tuple[Any, ...]] - ], + indexer: ( + numbers.Number | slice | tuple[Any, ...] | list[tuple[Any, ...]] + ), ): if isinstance(indexer, numbers.Number): return diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 7f00162099a..e2d350f0f5b 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -156,7 +156,7 @@ class _SeriesLocIndexer(_FrameIndexer): Label-based selection """ - def __getitem__(self, arg: Any) -> Union[ScalarLike, DataFrameOrSeries]: + def __getitem__(self, arg: Any) -> ScalarLike | DataFrameOrSeries: if isinstance(arg, pd.MultiIndex): arg = cudf.from_pandas(arg) @@ -281,7 +281,7 @@ class Series(SingleColumnFrame, IndexedFrame, Serializable): If ``False``, leaves ``np.nan`` values as is. """ - _accessors: Set[Any] = set() + _accessors: set[Any] = set() _loc_indexer_type = _SeriesLocIndexer _iloc_indexer_type = _SeriesIlocIndexer @@ -450,7 +450,7 @@ def __init__( def _from_data( cls, data: MutableMapping, - index: Optional[BaseIndex] = None, + index: BaseIndex | None = None, name: Any = None, ) -> Series: """ @@ -1049,7 +1049,7 @@ def __array_function__(self, func, types, args, kwargs): kwargs, ) - def map(self, arg, na_action=None) -> "Series": + def map(self, arg, na_action=None) -> Series: """ Map values of Series according to input correspondence. diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 7793a2fdf29..2d98c66e726 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -57,7 +57,7 @@ def _scan(self, op, axis=None, *args, **kwargs): def _from_data( cls, data: MutableMapping, - index: Optional[cudf.core.index.BaseIndex] = None, + index: cudf.core.index.BaseIndex | None = None, name: Any = None, ): @@ -118,18 +118,12 @@ def values_host(self): # noqa: D102 return self._column.values_host def to_cupy( - self, - dtype: Union[Dtype, None] = None, - copy: bool = True, - na_value=None, + self, dtype: Dtype | None = None, copy: bool = True, na_value=None, ) -> cupy.ndarray: # noqa: D102 return super().to_cupy(dtype, copy, na_value).flatten() def to_numpy( - self, - dtype: Union[Dtype, None] = None, - copy: bool = True, - na_value=None, + self, dtype: Dtype | None = None, copy: bool = True, na_value=None, ) -> np.ndarray: # noqa: D102 return super().to_numpy(dtype, copy, na_value).flatten() @@ -294,7 +288,7 @@ def _make_operands_for_binop( reflect: bool = False, *args, **kwargs, - ) -> Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]]: + ) -> dict[str | None, tuple[ColumnBase, Any, bool, Any]]: """Generate the dictionary of operands used for a binary operation. Parameters diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py index 782b74ef4a6..cb72faac004 100644 --- a/python/cudf/cudf/core/subword_tokenizer.py +++ b/python/cudf/cudf/core/subword_tokenizer.py @@ -61,7 +61,7 @@ def __call__( max_num_rows: int, add_special_tokens: bool = True, padding: str = "max_length", - truncation: Union[bool, str] = False, + truncation: bool | str = False, stride: int = 0, return_tensors: str = "cp", return_token_type_ids: bool = False, diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py index 59c291eea0b..79d08cca2a2 100644 --- a/python/cudf/cudf/testing/testing.py +++ b/python/cudf/cudf/testing/testing.py @@ -239,7 +239,7 @@ def assert_index_equal( right, exact="equiv", check_names: bool = True, - check_less_precise: Union[bool, int] = False, + check_less_precise: bool | int = False, check_exact: bool = True, check_categorical: bool = True, check_order: bool = True,