From 040f5773c37d89cdbc0baf57fa70a4b3dcab4fc9 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Thu, 11 May 2023 18:16:37 +0100 Subject: [PATCH] Run no_implicit_optional to rewrite types PEP484 prohibits implicit Optional types, so def bad(x: int = None): pass Is invalid. MyPy since version 0.983 prohibits this usage by default. So rewrite all of the typing (except x : Any = None) using https://github.com/hauntsaninja/no_implicit_optional. --- python/cudf/cudf/_lib/column.pyi | 4 +- python/cudf/cudf/core/buffer/buffer.py | 4 +- python/cudf/cudf/core/buffer/spill_manager.py | 6 +- .../cudf/cudf/core/buffer/spillable_buffer.py | 8 +- python/cudf/cudf/core/buffer/utils.py | 4 +- python/cudf/cudf/core/column/categorical.py | 21 +++-- python/cudf/cudf/core/column/column.py | 69 +++++++++-------- python/cudf/cudf/core/column/datetime.py | 34 +++++---- python/cudf/cudf/core/column/decimal.py | 7 +- python/cudf/cudf/core/column/interval.py | 8 +- python/cudf/cudf/core/column/numerical.py | 20 ++--- .../cudf/cudf/core/column/numerical_base.py | 31 +++++--- python/cudf/cudf/core/column/string.py | 76 +++++++++++-------- python/cudf/cudf/core/column/struct.py | 5 +- python/cudf/cudf/core/column/timedelta.py | 21 ++--- python/cudf/cudf/core/column_accessor.py | 2 +- python/cudf/cudf/core/dataframe.py | 4 +- python/cudf/cudf/core/index.py | 1 - python/cudf/cudf/core/multiindex.py | 3 - python/cudf/cudf/core/single_column_frame.py | 1 - 20 files changed, 196 insertions(+), 133 deletions(-) diff --git a/python/cudf/cudf/_lib/column.pyi b/python/cudf/cudf/_lib/column.pyi index 013cba3ae03..bd53801a972 100644 --- a/python/cudf/cudf/_lib/column.pyi +++ b/python/cudf/cudf/_lib/column.pyi @@ -29,8 +29,8 @@ class Column: size: int, dtype: Dtype, mask: Optional[Buffer] = None, - offset: int = None, - null_count: int = None, + offset: Optional[int] = None, + null_count: Optional[int] = None, children: Tuple[ColumnBase, ...] = (), ) -> None: ... @property diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py index 2262730d8a1..abf1ec47e3d 100644 --- a/python/cudf/cudf/core/buffer/buffer.py +++ b/python/cudf/cudf/core/buffer/buffer.py @@ -5,7 +5,7 @@ import math import pickle from types import SimpleNamespace -from typing import Any, Dict, Mapping, Sequence, Tuple, Type, TypeVar +from typing import Any, Dict, Mapping, Optional, Sequence, Tuple, Type, TypeVar import numpy @@ -42,7 +42,7 @@ def host_memory_allocation(nbytes: int) -> memoryview: def cuda_array_interface_wrapper( ptr: int, size: int, - owner: object = None, + owner: Optional[object] = None, readonly=False, typestr="|u1", version=0, diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py index d2a87af3869..7f8399ba522 100644 --- a/python/cudf/cudf/core/buffer/spill_manager.py +++ b/python/cudf/cudf/core/buffer/spill_manager.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. from __future__ import annotations @@ -225,7 +225,7 @@ def __init__( self, *, spill_on_demand: bool = False, - device_memory_limit: int = None, + device_memory_limit: Optional[int] = None, statistic_level: int = 0, ) -> None: self._lock = threading.Lock() @@ -358,7 +358,7 @@ def spill_device_memory(self, nbytes: int) -> int: buf.lock.release() return spilled - def spill_to_device_limit(self, device_limit: int = None) -> int: + def spill_to_device_limit(self, device_limit: Optional[int] = None) -> int: """Try to spill device memory until device limit Notice, by default this is a no-op. diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py index c71841a5a26..169b52b828e 100644 --- a/python/cudf/cudf/core/buffer/spillable_buffer.py +++ b/python/cudf/cudf/core/buffer/spillable_buffer.py @@ -448,7 +448,9 @@ def __cuda_array_interface__(self) -> dict: "version": 0, } - def memoryview(self, *, offset: int = 0, size: int = None) -> memoryview: + def memoryview( + self, *, offset: int = 0, size: Optional[int] = None + ) -> memoryview: size = self._size if size is None else size with self.lock: if self.spillable: @@ -573,7 +575,9 @@ def deserialize(cls, header: dict, frames: list): # copied. return SpillableBuffer.deserialize(header, frames) - def memoryview(self, *, offset: int = 0, size: int = None) -> memoryview: + def memoryview( + self, *, offset: int = 0, size: Optional[int] = None + ) -> memoryview: size = self._size if size is None else size return self._base.memoryview(offset=self._offset + offset, size=size) diff --git a/python/cudf/cudf/core/buffer/utils.py b/python/cudf/cudf/core/buffer/utils.py index 2fe332a12fe..85e4762641e 100644 --- a/python/cudf/cudf/core/buffer/utils.py +++ b/python/cudf/cudf/core/buffer/utils.py @@ -16,8 +16,8 @@ def as_buffer( data: Union[int, Any], *, - size: int = None, - owner: object = None, + size: Optional[int] = None, + owner: Optional[object] = None, exposed: bool = False, ) -> Buffer: """Factory function to wrap `data` in a Buffer object. diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 52f7c0b957f..c6d7f779884 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -710,10 +710,10 @@ class CategoricalColumn(column.ColumnBase): def __init__( self, dtype: CategoricalDtype, - mask: Buffer = None, - size: int = None, + mask: Optional[Buffer] = None, + size: Optional[int] = None, offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, children: Tuple["column.ColumnBase", ...] = (), ): @@ -889,7 +889,7 @@ def _fill( return result def slice( - self, start: int, stop: int, stride: int = None + self, start: int, stop: int, stride: Optional[int] = None ) -> "column.ColumnBase": codes = self.codes.slice(start, stop, stride) return cudf.core.column.build_categorical_column( @@ -962,7 +962,9 @@ def __cuda_array_interface__(self) -> Mapping[str, Any]: " if you need this functionality." ) - def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series: + def to_pandas( + self, index: Optional[pd.Index] = None, **kwargs + ) -> pd.Series: if self.categories.dtype.kind == "f": new_mask = bools_to_mask(self.notnull()) col = column.build_categorical_column( @@ -1219,7 +1221,10 @@ def notnull(self) -> ColumnBase: return result def fillna( - self, fill_value: Any = None, method: Any = None, dtype: Dtype = None + self, + fill_value: Any = None, + method: Any = None, + dtype: Optional[Dtype] = None, ) -> CategoricalColumn: """ Fill null values with *fill_value* @@ -1237,7 +1242,7 @@ def fillna( try: fill_value = self._encode(fill_value) fill_value = self.codes.dtype.type(fill_value) - except (ValueError) as err: + except ValueError as err: err_msg = "fill value must be in categories" raise ValueError(err_msg) from err else: @@ -1641,7 +1646,7 @@ def _create_empty_categorical_column( def pandas_categorical_as_column( - categorical: ColumnLike, codes: ColumnLike = None + categorical: ColumnLike, codes: Optional[ColumnLike] = None ) -> CategoricalColumn: """Creates a CategoricalColumn from a pandas.Categorical diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 042a1060fae..6557001f884 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -201,7 +201,9 @@ def __repr__(self): f"dtype: {self.dtype}" ) - def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": + def to_pandas( + self, index: Optional[pd.Index] = None, **kwargs + ) -> "pd.Series": """Convert object to pandas type. The default implementation falls back to PyArrow for the conversion. @@ -548,7 +550,9 @@ def element_indexing(self, index: int): return libcudf.copying.get_element(self, idx).value - def slice(self, start: int, stop: int, stride: int = None) -> ColumnBase: + def slice( + self, start: int, stop: int, stride: Optional[int] = None + ) -> ColumnBase: stride = 1 if stride is None else stride if start < 0: start = start + len(self) @@ -699,8 +703,8 @@ def _check_scatter_key_length( def fillna( self: T, value: Any = None, - method: str = None, - dtype: Dtype = None, + method: Optional[str] = None, + dtype: Optional[Dtype] = None, ) -> T: """Fill null values with ``value``. @@ -1097,7 +1101,6 @@ def apply_boolean_mask(self, mask) -> ColumnBase: def argsort( self, ascending: bool = True, na_position: str = "last" ) -> "cudf.core.column.NumericalColumn": - return self.as_frame()._get_sorted_inds( ascending=ascending, na_position=na_position ) @@ -1244,14 +1247,19 @@ def normalize_binop_value( ) -> Union[ColumnBase, ScalarLike]: raise NotImplementedError - def _minmax(self, skipna: bool = None): + def _minmax(self, skipna: Optional[bool] = None): result_col = self._process_for_reduction(skipna=skipna) if isinstance(result_col, ColumnBase): return libcudf.reduce.minmax(result_col) return result_col def _reduce( - self, op: str, skipna: bool = None, min_count: int = 0, *args, **kwargs + self, + op: str, + skipna: Optional[bool] = None, + min_count: int = 0, + *args, + **kwargs, ) -> ScalarLike: """Compute {op} of column values. @@ -1273,7 +1281,7 @@ def contains_na_entries(self) -> bool: return self.null_count != 0 def _process_for_reduction( - self, skipna: bool = None, min_count: int = 0 + self, skipna: Optional[bool] = None, min_count: int = 0 ) -> Union[ColumnBase, ScalarLike]: skipna = True if skipna is None else skipna @@ -1314,8 +1322,8 @@ def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase: def _label_encoding( self, cats: ColumnBase, - dtype: Dtype = None, - na_sentinel: ScalarLike = None, + dtype: Optional[Dtype] = None, + na_sentinel: Optional[ScalarLike] = None, ): """ Convert each value in `self` into an integer code, with `cats` @@ -1389,9 +1397,9 @@ def _return_sentinel_column(): def column_empty_like( column: ColumnBase, - dtype: Dtype = None, + dtype: Optional[Dtype] = None, masked: bool = False, - newsize: int = None, + newsize: Optional[int] = None, ) -> ColumnBase: """Allocate a new column like the given *column*""" if dtype is None: @@ -1494,10 +1502,10 @@ def build_column( data: Union[Buffer, None], dtype: Dtype, *, - size: int = None, - mask: Buffer = None, + size: Optional[int] = None, + mask: Optional[Buffer] = None, offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, children: Tuple[ColumnBase, ...] = (), ) -> ColumnBase: """ @@ -1666,10 +1674,10 @@ def build_column( def build_categorical_column( categories: ColumnBase, codes: ColumnBase, - mask: Buffer = None, - size: int = None, + mask: Optional[Buffer] = None, + size: Optional[int] = None, offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, ordered: bool = False, ) -> "cudf.core.column.CategoricalColumn": """ @@ -1757,10 +1765,10 @@ def build_interval_column( def build_list_column( indices: ColumnBase, elements: ColumnBase, - mask: Buffer = None, - size: int = None, + mask: Optional[Buffer] = None, + size: Optional[int] = None, offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, ) -> "cudf.core.column.ListColumn": """ Build a ListColumn @@ -1803,10 +1811,10 @@ def build_struct_column( names: Sequence[str], children: Tuple[ColumnBase, ...], dtype: Optional[Dtype] = None, - mask: Buffer = None, - size: int = None, + mask: Optional[Buffer] = None, + size: Optional[int] = None, offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, ) -> "cudf.core.column.StructColumn": """ Build a StructColumn @@ -1863,9 +1871,9 @@ def _make_copy_replacing_NaT_with_null(column): def as_column( arbitrary: Any, - nan_as_null: bool = None, - dtype: Dtype = None, - length: int = None, + nan_as_null: Optional[bool] = None, + dtype: Optional[Dtype] = None, + length: Optional[int] = None, ): """Create a Column from an arbitrary object @@ -2106,7 +2114,6 @@ def as_column( data = build_column(data=buffer, mask=mask, dtype=arbitrary.dtype) elif arb_dtype.kind == "m": - time_unit = get_time_unit(arbitrary) cast_dtype = time_unit in ("D", "W", "M", "Y") @@ -2466,7 +2473,7 @@ def deserialize_columns(headers: List[dict], frames: List) -> List[ColumnBase]: def arange( start: Union[int, float], - stop: Union[int, float] = None, + stop: Optional[Union[int, float]] = None, step: Union[int, float] = 1, dtype=None, ) -> cudf.core.column.NumericalColumn: @@ -2524,7 +2531,9 @@ def arange( ) -def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase: +def full( + size: int, fill_value: ScalarLike, dtype: Optional[Dtype] = None +) -> ColumnBase: """ Returns a column of given size and dtype, filled with a given value. diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 4c65a631adc..c0a2a6ac546 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -6,7 +6,7 @@ import locale import re from locale import nl_langinfo -from typing import Any, Mapping, Sequence, cast +from typing import Any, Mapping, Optional, Sequence, cast import numpy as np import pandas as pd @@ -125,10 +125,10 @@ def __init__( self, data: Buffer, dtype: DtypeObj, - mask: Buffer = None, - size: int = None, # TODO: make non-optional + mask: Optional[Buffer] = None, + size: Optional[int] = None, # TODO: make non-optional offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, ): dtype = cudf.dtype(dtype) @@ -202,7 +202,10 @@ def day_of_year(self) -> ColumnBase: return self.get_dt_field("day_of_year") def to_pandas( - self, index: pd.Index = None, nullable: bool = False, **kwargs + self, + index: Optional[pd.Index] = None, + nullable: bool = False, + **kwargs, ) -> "cudf.Series": # Workaround until following issue is fixed: # https://issues.apache.org/jira/browse/ARROW-9772 @@ -363,7 +366,7 @@ def mean( def std( self, - skipna: bool = None, + skipna: Optional[bool] = None, min_count: int = 0, dtype: Dtype = np.float64, ddof: int = 1, @@ -375,7 +378,7 @@ def std( * _unit_to_nanoseconds_conversion[self.time_unit], ) - def median(self, skipna: bool = None) -> pd.Timestamp: + def median(self, skipna: Optional[bool] = None) -> pd.Timestamp: return pd.Timestamp( self.as_numerical.median(skipna=skipna), unit=self.time_unit ) @@ -451,7 +454,10 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype) def fillna( - self, fill_value: Any = None, method: str = None, dtype: Dtype = None + self, + fill_value: Any = None, + method: Optional[str] = None, + dtype: Optional[Dtype] = None, ) -> DatetimeColumn: if fill_value is not None: if cudf.utils.utils._isnat(fill_value): @@ -495,7 +501,6 @@ def isin(self, values: Sequence) -> ColumnBase: def can_cast_safely(self, to_dtype: Dtype) -> bool: if np.issubdtype(to_dtype, np.datetime64): - to_res, _ = np.datetime_data(to_dtype) self_res, _ = np.datetime_data(self.dtype) @@ -542,10 +547,10 @@ def __init__( self, data: Buffer, dtype: pd.DatetimeTZDtype, - mask: Buffer = None, - size: int = None, + mask: Optional[Buffer] = None, + size: Optional[int] = None, offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, ): super().__init__( data=data, @@ -558,7 +563,10 @@ def __init__( self._dtype = dtype def to_pandas( - self, index: pd.Index = None, nullable: bool = False, **kwargs + self, + index: Optional[pd.Index] = None, + nullable: bool = False, + **kwargs, ) -> "cudf.Series": return self._local_time.to_pandas().dt.tz_localize( self.dtype.tz, ambiguous="NaT", nonexistent="NaT" diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 9fc7663ffca..420637c1924 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -2,7 +2,7 @@ import warnings from decimal import Decimal -from typing import Any, Sequence, Union, cast +from typing import Any, Optional, Sequence, Union, cast import cupy as cp import numpy as np @@ -103,7 +103,10 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str): return result def fillna( - self, value: Any = None, method: str = None, dtype: Dtype = None + self, + value: Any = None, + method: Optional[str] = None, + dtype: Optional[Dtype] = None, ): """Fill null values with ``value``. diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py index 657403a6082..1b9caa42ecf 100644 --- a/python/cudf/cudf/core/column/interval.py +++ b/python/cudf/cudf/core/column/interval.py @@ -1,4 +1,6 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. +from typing import Optional + import pandas as pd import pyarrow as pa @@ -124,7 +126,9 @@ def as_interval_column(self, dtype, **kwargs): else: raise ValueError("dtype must be IntervalDtype") - def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": + def to_pandas( + self, index: Optional[pd.Index] = None, **kwargs + ) -> "pd.Series": # Note: This does not handle null values in the interval column. # However, this exact sequence (calling __from_arrow__ on the output of # self.to_arrow) is currently the best known way to convert interval diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 87e73d212ef..840858c4bdb 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -81,10 +81,10 @@ def __init__( self, data: Buffer, dtype: DtypeObj, - mask: Buffer = None, - size: int = None, # TODO: make this non-optional + mask: Optional[Buffer] = None, + size: Optional[int] = None, # TODO: make this non-optional offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, ): dtype = cudf.dtype(dtype) @@ -428,11 +428,11 @@ def _process_values_for_isin( return lhs, rhs - def _can_return_nan(self, skipna: bool = None) -> bool: + def _can_return_nan(self, skipna: Optional[bool] = None) -> bool: return not skipna and self.has_nulls(include_nan=True) def _process_for_reduction( - self, skipna: bool = None, min_count: int = 0 + self, skipna: Optional[bool] = None, min_count: int = 0 ) -> Union[NumericalColumn, ScalarLike]: skipna = True if skipna is None else skipna @@ -516,8 +516,8 @@ def find_and_replace( def fillna( self, fill_value: Any = None, - method: str = None, - dtype: Dtype = None, + method: Optional[str] = None, + dtype: Optional[Dtype] = None, fill_nan: bool = True, ) -> NumericalColumn: """ @@ -684,7 +684,6 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool: ): return True else: - filled = self.fillna(0) return ( cudf.Series(filled).astype(to_dtype).astype(filled.dtype) @@ -720,7 +719,10 @@ def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase: return self def to_pandas( - self, index: pd.Index = None, nullable: bool = False, **kwargs + self, + index: Optional[pd.Index] = None, + nullable: bool = False, + **kwargs, ) -> "pd.Series": if nullable and self.dtype in np_dtypes_to_pandas_dtypes: pandas_nullable_dtype = np_dtypes_to_pandas_dtypes[self.dtype] diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py index bb7711a3ead..08c2f7cc7b1 100644 --- a/python/cudf/cudf/core/column/numerical_base.py +++ b/python/cudf/cudf/core/column/numerical_base.py @@ -1,9 +1,9 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. """Define an interface for columns that can perform numerical operations.""" from __future__ import annotations -from typing import cast +from typing import Optional, cast import numpy as np @@ -40,10 +40,10 @@ class NumericalBaseColumn(ColumnBase, Scannable): "cummax", } - def _can_return_nan(self, skipna: bool = None) -> bool: + def _can_return_nan(self, skipna: Optional[bool] = None) -> bool: return not skipna and self.has_nulls() - def kurtosis(self, skipna: bool = None) -> float: + def kurtosis(self, skipna: Optional[bool] = None) -> float: skipna = True if skipna is None else skipna if len(self) == 0 or self._can_return_nan(skipna=skipna): @@ -68,7 +68,7 @@ def kurtosis(self, skipna: bool = None) -> float: kurt = term_one_section_one * term_one_section_two - 3 * term_two return kurt - def skew(self, skipna: bool = None) -> ScalarLike: + def skew(self, skipna: Optional[bool] = None) -> ScalarLike: skipna = True if skipna is None else skipna if len(self) == 0 or self._can_return_nan(skipna=skipna): @@ -122,26 +122,39 @@ def quantile( ) return result - def mean(self, skipna: bool = None, min_count: int = 0, dtype=np.float64): + def mean( + self, + skipna: Optional[bool] = None, + min_count: int = 0, + dtype=np.float64, + ): return self._reduce( "mean", skipna=skipna, min_count=min_count, dtype=dtype ) def var( - self, skipna: bool = None, min_count: int = 0, dtype=np.float64, ddof=1 + self, + skipna: Optional[bool] = None, + min_count: int = 0, + dtype=np.float64, + ddof=1, ): return self._reduce( "var", skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof ) def std( - self, skipna: bool = None, min_count: int = 0, dtype=np.float64, ddof=1 + self, + skipna: Optional[bool] = None, + min_count: int = 0, + dtype=np.float64, + ddof=1, ): return self._reduce( "std", skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof ) - def median(self, skipna: bool = None) -> NumericalBaseColumn: + def median(self, skipna: Optional[bool] = None) -> NumericalBaseColumn: skipna = True if skipna is None else skipna if self._can_return_nan(skipna=skipna): diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 1a09fc0b985..8e83d0c72b6 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -259,12 +259,14 @@ def byte_count(self) -> SeriesOrIndex: ) @overload - def cat(self, sep: str = None, na_rep: str = None) -> str: + def cat( + self, sep: Optional[str] = None, na_rep: Optional[str] = None + ) -> str: ... @overload def cat( - self, others, sep: str = None, na_rep: str = None + self, others, sep: Optional[str] = None, na_rep: Optional[str] = None ) -> Union[SeriesOrIndex, "cudf.core.column.string.StringColumn"]: ... @@ -792,7 +794,7 @@ def contains( result_col = libstrings.contains_multiple(input_column, pat) return self._return_or_inplace(result_col) - def like(self, pat: str, esc: str = None) -> SeriesOrIndex: + def like(self, pat: str, esc: Optional[str] = None) -> SeriesOrIndex: """ Test if a like pattern matches a string of a Series or Index. @@ -1072,7 +1074,10 @@ def replace_with_backrefs(self, pat: str, repl: str) -> SeriesOrIndex: ) def slice( - self, start: int = None, stop: int = None, step: int = None + self, + start: Optional[int] = None, + stop: Optional[int] = None, + step: Optional[int] = None, ) -> SeriesOrIndex: """ Slice substrings from each element in the Series or Index. @@ -2047,7 +2052,7 @@ def istitle(self) -> SeriesOrIndex: return self._return_or_inplace(libstrings.is_title(self._column)) def filter_alphanum( - self, repl: str = None, keep: bool = True + self, repl: Optional[str] = None, keep: bool = True ) -> SeriesOrIndex: """ Remove non-alphanumeric characters from strings in this column. @@ -2133,7 +2138,10 @@ def slice_from( ) def slice_replace( - self, start: int = None, stop: int = None, repl: str = None + self, + start: Optional[int] = None, + stop: Optional[int] = None, + repl: Optional[str] = None, ) -> SeriesOrIndex: """ Replace the specified section of each string with a new string. @@ -2221,7 +2229,9 @@ def slice_replace( ), ) - def insert(self, start: int = 0, repl: str = None) -> SeriesOrIndex: + def insert( + self, start: int = 0, repl: Optional[str] = None + ) -> SeriesOrIndex: """ Insert the specified string into each string in the specified position. @@ -2401,10 +2411,10 @@ def get_json_object( def split( self, - pat: str = None, + pat: Optional[str] = None, n: int = -1, expand: bool = False, - regex: bool = None, + regex: Optional[bool] = None, ) -> SeriesOrIndex: """ Split strings around given separator/delimiter. @@ -2569,10 +2579,10 @@ def split( def rsplit( self, - pat: str = None, + pat: Optional[str] = None, n: int = -1, expand: bool = False, - regex: bool = None, + regex: Optional[bool] = None, ) -> SeriesOrIndex: """ Split strings around given separator/delimiter. @@ -3221,7 +3231,7 @@ def rjust(self, width: int, fillchar: str = " ") -> SeriesOrIndex: libstrings.rjust(self._column, width, fillchar) ) - def strip(self, to_strip: str = None) -> SeriesOrIndex: + def strip(self, to_strip: Optional[str] = None) -> SeriesOrIndex: r""" Remove leading and trailing characters. @@ -3280,7 +3290,7 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex: libstrings.strip(self._column, cudf.Scalar(to_strip, "str")) ) - def lstrip(self, to_strip: str = None) -> SeriesOrIndex: + def lstrip(self, to_strip: Optional[str] = None) -> SeriesOrIndex: r""" Remove leading and trailing characters. @@ -3327,7 +3337,7 @@ def lstrip(self, to_strip: str = None) -> SeriesOrIndex: libstrings.lstrip(self._column, cudf.Scalar(to_strip, "str")) ) - def rstrip(self, to_strip: str = None) -> SeriesOrIndex: + def rstrip(self, to_strip: Optional[str] = None) -> SeriesOrIndex: r""" Remove leading and trailing characters. @@ -3980,7 +3990,9 @@ def removeprefix(self, prefix: str) -> SeriesOrIndex: ) return self._return_or_inplace(result) - def find(self, sub: str, start: int = 0, end: int = None) -> SeriesOrIndex: + def find( + self, sub: str, start: int = 0, end: Optional[int] = None + ) -> SeriesOrIndex: """ Return lowest indexes in each strings in the Series/Index where the substring is fully contained between ``[start:end]``. @@ -4036,7 +4048,7 @@ def find(self, sub: str, start: int = 0, end: int = None) -> SeriesOrIndex: return self._return_or_inplace(result_col) def rfind( - self, sub: str, start: int = 0, end: int = None + self, sub: str, start: int = 0, end: Optional[int] = None ) -> SeriesOrIndex: """ Return highest indexes in each strings in the Series/Index @@ -4097,7 +4109,7 @@ def rfind( return self._return_or_inplace(result_col) def index( - self, sub: str, start: int = 0, end: int = None + self, sub: str, start: int = 0, end: Optional[int] = None ) -> SeriesOrIndex: """ Return lowest indexes in each strings where the substring @@ -4159,7 +4171,7 @@ def index( return result def rindex( - self, sub: str, start: int = 0, end: int = None + self, sub: str, start: int = 0, end: Optional[int] = None ) -> SeriesOrIndex: """ Return highest indexes in each strings where the substring @@ -4426,7 +4438,7 @@ def translate(self, table: dict) -> SeriesOrIndex: ) def filter_characters( - self, table: dict, keep: bool = True, repl: str = None + self, table: dict, keep: bool = True, repl: Optional[str] = None ) -> SeriesOrIndex: """ Remove characters from each string using the character ranges @@ -4877,7 +4889,7 @@ def ngrams_tokenize( ) def replace_tokens( - self, targets, replacements, delimiter: str = None + self, targets, replacements, delimiter: Optional[str] = None ) -> SeriesOrIndex: """ The targets tokens are searched for within each string in the series @@ -4962,8 +4974,8 @@ def replace_tokens( def filter_tokens( self, min_token_length: int, - replacement: str = None, - delimiter: str = None, + replacement: Optional[str] = None, + delimiter: Optional[str] = None, ) -> SeriesOrIndex: """ Remove tokens from within each string in the series that are @@ -5351,10 +5363,10 @@ class StringColumn(column.ColumnBase): def __init__( self, - mask: Buffer = None, - size: int = None, # TODO: make non-optional + mask: Optional[Buffer] = None, + size: Optional[int] = None, # TODO: make non-optional offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, children: Tuple["column.ColumnBase", ...] = (), ): dtype = cudf.api.types.dtype("object") @@ -5484,8 +5496,8 @@ def to_arrow(self) -> pa.Array: def sum( self, - skipna: bool = None, - dtype: Dtype = None, + skipna: Optional[bool] = None, + dtype: Optional[Dtype] = None, min_count: int = 0, ): result_col = self._process_for_reduction( @@ -5616,7 +5628,10 @@ def values(self) -> cupy.ndarray: raise TypeError("String Arrays is not yet implemented in cudf") def to_pandas( - self, index: pd.Index = None, nullable: bool = False, **kwargs + self, + index: Optional[pd.Index] = None, + nullable: bool = False, + **kwargs, ) -> "pd.Series": if nullable: pandas_array = pd.StringDtype().__from_arrow__(self.to_arrow()) @@ -5686,8 +5701,8 @@ def find_and_replace( def fillna( self, fill_value: Any = None, - method: str = None, - dtype: Dtype = None, + method: Optional[str] = None, + dtype: Optional[Dtype] = None, ) -> StringColumn: if fill_value is not None: if not is_scalar(fill_value): @@ -5835,7 +5850,6 @@ def view(self, dtype) -> "cudf.core.column.ColumnBase": def _get_cols_list(parent_obj, others): - parent_index = ( parent_obj.index if isinstance(parent_obj, cudf.Series) else parent_obj ) diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index 6838d711641..6306bd1f32d 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -2,6 +2,7 @@ from __future__ import annotations from functools import cached_property +from typing import Optional import pandas as pd import pyarrow as pa @@ -57,7 +58,9 @@ def to_arrow(self): pa_type, len(self), buffers, children=children ) - def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": + def to_pandas( + self, index: Optional[pd.Index] = None, **kwargs + ) -> "pd.Series": # We cannot go via Arrow's `to_pandas` because of the following issue: # https://issues.apache.org/jira/browse/ARROW-12680 diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index e7979fa4d27..e1d913742ec 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -3,7 +3,7 @@ from __future__ import annotations import datetime -from typing import Any, Sequence, cast +from typing import Any, Optional, Sequence, cast import numpy as np import pandas as pd @@ -80,10 +80,10 @@ def __init__( self, data: Buffer, dtype: Dtype, - size: int = None, # TODO: make non-optional - mask: Buffer = None, + size: Optional[int] = None, # TODO: make non-optional + mask: Optional[Buffer] = None, offset: int = 0, - null_count: int = None, + null_count: Optional[int] = None, ): dtype = cudf.dtype(dtype) @@ -251,7 +251,10 @@ def time_unit(self) -> str: return self._time_unit def fillna( - self, fill_value: Any = None, method: str = None, dtype: Dtype = None + self, + fill_value: Any = None, + method: Optional[str] = None, + dtype: Optional[Dtype] = None, ) -> TimeDeltaColumn: if fill_value is not None: if cudf.utils.utils._isnat(fill_value): @@ -313,7 +316,7 @@ def mean(self, skipna=None, dtype: Dtype = np.float64) -> pd.Timedelta: unit=self.time_unit, ) - def median(self, skipna: bool = None) -> pd.Timedelta: + def median(self, skipna: Optional[bool] = None) -> pd.Timedelta: return pd.Timedelta( self.as_numerical.median(skipna=skipna), unit=self.time_unit ) @@ -340,9 +343,9 @@ def quantile( def sum( self, - skipna: bool = None, + skipna: Optional[bool] = None, min_count: int = 0, - dtype: Dtype = None, + dtype: Optional[Dtype] = None, ) -> pd.Timedelta: return pd.Timedelta( # Since sum isn't overridden in Numerical[Base]Column, mypy only @@ -356,7 +359,7 @@ def sum( def std( self, - skipna: bool = None, + skipna: Optional[bool] = None, min_count: int = 0, dtype: Dtype = np.float64, ddof: int = 1, diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index 707eda3f5e6..832d5acf2de 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -101,7 +101,7 @@ class ColumnAccessor(abc.MutableMapping): def __init__( self, - data: Union[abc.MutableMapping, ColumnAccessor] = None, + data: Union[abc.MutableMapping, ColumnAccessor, None] = None, multiindex: bool = False, level_names=None, ): diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 8c8f0119b3f..79e97fa6455 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -1988,8 +1988,8 @@ def from_dict( cls, data: dict, orient: str = "columns", - dtype: Dtype = None, - columns: list = None, + dtype: Optional[Dtype] = None, + columns: Optional[list] = None, ) -> DataFrame: """ Construct DataFrame from dict of array-like or dicts. diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 281290e1788..0ec06f8d81f 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2601,7 +2601,6 @@ def __init__( copy=False, name=None, ): - if freq is not None: raise NotImplementedError("freq is not yet supported") diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 4a9bc89fa34..edabdb34435 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -101,7 +101,6 @@ def __init__( name=None, **kwargs, ): - if sortorder is not None: raise NotImplementedError("sortorder is not yet supported") if name is not None: @@ -811,7 +810,6 @@ def _get_valid_indices_by_tuple(self, index, row_tuple, max_length): @_cudf_nvtx_annotate def _index_and_downcast(self, result, index, index_key): - if isinstance(index_key, (numbers.Number, slice)): index_key = [index_key] if ( @@ -1069,7 +1067,6 @@ def _is_interval(self): @classmethod @_cudf_nvtx_annotate def _concat(cls, objs): - source_data = [o.to_frame(index=False) for o in objs] # TODO: Verify if this is really necessary or if we can rely on diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index c4128621148..037ac9c378e 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -140,7 +140,6 @@ def to_numpy( return super().to_numpy(dtype, copy, na_value).flatten() def tolist(self): # noqa: D102 - raise TypeError( "cuDF does not support conversion to host memory " "via the `tolist()` method. Consider using "