Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added pyarrow/numpy dtype literals and allowed str | DtypeObj as input for Series.astype #756

Merged
merged 14 commits into from
Jul 25, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
254 changes: 222 additions & 32 deletions pandas-stubs/_typing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,17 @@ NpDtype: TypeAlias = str | np.dtype[np.generic] | type[str | complex | bool | ob
Dtype: TypeAlias = ExtensionDtype | NpDtype
DtypeArg: TypeAlias = Dtype | Mapping[Any, Dtype]
DtypeBackend: TypeAlias = Literal["pyarrow", "numpy_nullable"]

# NOTE: we want to catch all the possible dtypes from np.sctypeDict
# timedelta64
# M
# m8
# M8
# object_
# object0
# m
# datetime64

BooleanDtypeArg: TypeAlias = (
# Builtin bool type and its string alias
type[bool] # noqa: Y030
Expand All @@ -86,7 +97,11 @@ BooleanDtypeArg: TypeAlias = (
| pd.BooleanDtype
| Literal["boolean"]
# Numpy bool type
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.bool_
| type[np.bool_]
| Literal["?", "b1", "bool8", "bool_"]
# PyArrow boolean type and its string alias
| Literal["bool[pyarrow]", "boolean[pyarrow]"]
)
IntDtypeArg: TypeAlias = (
# Builtin integer type and its string alias
Expand All @@ -99,31 +114,56 @@ IntDtypeArg: TypeAlias = (
| pd.Int64Dtype
| Literal["Int8", "Int16", "Int32", "Int64"]
# Numpy signed integer types and their string aliases
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.byte
| type[np.byte]
| type[np.int8]
| type[np.int16]
| type[np.int32]
| type[np.int64]
| type[np.intp]
| Literal["byte", "int8", "int16", "int32", "int64", "intp"]
| Literal["b", "i1", "int8", "byte"]
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.short
| type[np.short]
| Literal["h", "i2", "int16", "short"]
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.intc
| type[np.intc]
| Literal["i", "i4", "int32", "intc"]
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.int_
| type[np.int_]
| Literal["l", "i8", "int64", "int0", "int_", "long"]
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.longlong
| type[np.longlong]
| Literal["q", "longlong"] # NOTE: int128 not assigned
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.intp
| type[np.intp] # signed pointer (=`intptr_t`, platform dependent)
| Literal["p", "intp"]
# PyArrow integer types and their string aliases
| Literal["int8[pyarrow]", "int16[pyarrow]", "int32[pyarrow]", "int64[pyarrow]"]
)
UIntDtypeArg: TypeAlias = (
# Pandas nullable unsigned integer types and their string aliases
pd.UInt8Dtype # noqa: Y030
| pd.UInt16Dtype
| pd.UInt32Dtype
| pd.UInt64Dtype
| Literal["UInt8", "UInt16", "UInt32", "UInt64"]
# Numpy unsigned integer types and their string aliases
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ubyte
| type[np.ubyte]
| type[np.uint8]
| type[np.uint16]
| type[np.uint32]
| type[np.uint64]
| type[np.uintp]
| Literal["ubyte", "uint8", "uint16", "uint32", "uint64", "uintp"]
| Literal["B", "u1", "uint8", "ubyte"]
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ushort
| type[np.ushort]
| Literal["H", "u2", "uint16", "ushort"]
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uintc
| type[np.uintc]
| Literal["I", "u4", "uint32", "uintc"]
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uint
| type[np.uint]
| Literal["L", "u8", "uint", "ulong", "uint64", "uint0"]
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ulonglong
| type[np.ulonglong]
| Literal["Q", "ulonglong"] # NOTE: uint128 not assigned
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uintp
| type[np.uintp] # unsigned pointer (=`uintptr_t`, platform dependent)
| Literal["P", "uintp"]
# PyArrow unsigned integer types and their string aliases
| Literal["uint8[pyarrow]", "uint16[pyarrow]", "uint32[pyarrow]", "uint64[pyarrow]"]
)
StrDtypeArg: TypeAlias = (
# Builtin str type and its string alias
type[str] # noqa: Y030
| Literal["str"]
# Pandas nullable string type and its string alias
| pd.StringDtype
| Literal["string"]
)
BytesDtypeArg: TypeAlias = type[bytes]
FloatDtypeArg: TypeAlias = (
# Builtin float type and its string alias
type[float] # noqa: Y030
Expand All @@ -133,19 +173,51 @@ FloatDtypeArg: TypeAlias = (
| pd.Float64Dtype
| Literal["Float32", "Float64"]
# Numpy float types and their string aliases
| type[np.float16]
| type[np.float32]
| type[np.float64]
| Literal["float16", "float32", "float64"]
# NOTE: Alias np.float16 only on Linux x86_64, use np.half instead
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.half
| type[np.half]
| Literal["e", "f2", "<f2", "float16", "half"]
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.single
| type[np.single]
| Literal["f", "f4", "float32", "single"]
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.double
| type[np.double]
| Literal["d", "f8", "float64", "double", "float_"]
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.longdouble
| type[np.longdouble]
| Literal["g", "f16", "float96", "float128", "longdouble", "longfloat"]
# PyArrow floating point types and their string aliases
| Literal[
"float[pyarrow]",
"double[pyarrow]",
"float16[pyarrow]",
"float32[pyarrow]",
"float64[pyarrow]",
]
)
ComplexDtypeArg: TypeAlias = (
# Builtin complex type and its string alias
type[complex] # noqa: Y030
| Literal["complex"]
# Numpy complex types and their aliases
| type[np.complex64]
| type[np.complex128]
| Literal["complex64", "complex128"]
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.csingle
| type[np.csingle]
| Literal["F", "c8", "complex64", "csingle", "singlecomplex"]
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.cdouble
| type[np.cdouble]
| Literal["D", "c16", "complex128", "cdouble", "cfloat", "complex_"]
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.clongdouble
# NOTE: Alias np.complex256 only on Linux x86_64, use np.clongdouble instead
| type[np.clongdouble]
| Literal[
"G",
"c32",
"complex192",
"complex256",
"clongdouble",
"clongfloat",
"longcomplex",
]
)
# Refer to https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units
TimedeltaDtypeArg: TypeAlias = Literal[
Expand All @@ -163,6 +235,41 @@ TimedeltaDtypeArg: TypeAlias = Literal[
"timedelta64[ps]",
"timedelta64[fs]",
"timedelta64[as]",
# numpy type codes
"m8[Y]",
"m8[M]",
"m8[W]",
"m8[D]",
"m8[h]",
"m8[m]",
"m8[s]",
"m8[ms]",
"m8[us]",
"m8[μs]",
"m8[ns]",
"m8[ps]",
"m8[fs]",
"m8[as]",
# little endian
"<m8[Y]",
"<m8[M]",
"<m8[W]",
"<m8[D]",
"<m8[h]",
"<m8[m]",
"<m8[s]",
"<m8[ms]",
"<m8[us]",
"<m8[μs]",
"<m8[ns]",
"<m8[ps]",
"<m8[fs]",
"<m8[as]",
# PyArrow duration type and its string alias
"duration[s][pyarrow]",
"duration[ms][pyarrow]",
"duration[us][pyarrow]",
"duration[ns][pyarrow]",
]
TimestampDtypeArg: TypeAlias = Literal[
"datetime64[Y]",
Expand All @@ -179,24 +286,107 @@ TimestampDtypeArg: TypeAlias = Literal[
"datetime64[ps]",
"datetime64[fs]",
"datetime64[as]",
# numpy type codes
"M8[Y]",
"M8[M]",
"M8[W]",
"M8[D]",
"M8[h]",
"M8[m]",
"M8[s]",
"M8[ms]",
"M8[us]",
"M8[μs]",
"M8[ns]",
"M8[ps]",
"M8[fs]",
"M8[as]",
# little endian
"<M8[Y]",
"<M8[M]",
"<M8[W]",
"<M8[D]",
"<M8[h]",
"<M8[m]",
"<M8[s]",
"<M8[ms]",
"<M8[us]",
"<M8[μs]",
"<M8[ns]",
"<M8[ps]",
"<M8[fs]",
"<M8[as]",
# PyArrow timestamp type and its string alias
"date32[pyarrow]",
"date64[pyarrow]",
"timestamp[s][pyarrow]",
"timestamp[ms][pyarrow]",
"timestamp[us][pyarrow]",
"timestamp[ns][pyarrow]",
]

StrDtypeArg: TypeAlias = (
# Builtin str type and its string alias
type[str] # noqa: Y030
| Literal["str"]
# Pandas nullable string type and its string alias
| pd.StringDtype
| Literal["string"]
# Numpy string type and its string alias
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.str_
| type[np.str_]
| Literal["U", "str_", "str0", "unicode", "unicode_"]
# PyArrow string type and its string alias
| Literal["string[pyarrow]"]
)
BytesDtypeArg: TypeAlias = (
# Builtin bytes type and its string alias
type[bytes] # noqa: Y030
| Literal["bytes"]
# Numpy bytes type and its string alias
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.bytes_
| type[np.bytes_]
| Literal["S", "a", "bytes_", "bytes0", "string_"]
# PyArrow binary type and its string alias
| Literal["binary[pyarrow]"]
)
CategoryDtypeArg: TypeAlias = CategoricalDtype | Literal["category"]

ObjectDtypeArg: TypeAlias = (
# Builtin object type and its string alias
type[object] # noqa: Y030
| Literal["object"]
# Numpy object type and its string alias
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.object_
| type[np.object_]
| Literal["O"] # NOTE: "object_" not assigned
)

VoidDtypeArg: TypeAlias = (
# Numpy void type and its string alias
# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.void
type[np.void]
| Literal["V", "void", "void0"]
)

# DtypeArg specifies all allowable dtypes in a functions its dtype argument
DtypeObj: TypeAlias = np.dtype[np.generic] | ExtensionDtype

AstypeArg: TypeAlias = (
BooleanDtypeArg
| IntDtypeArg
| UIntDtypeArg
| StrDtypeArg
| BytesDtypeArg
| FloatDtypeArg
| ComplexDtypeArg
| TimedeltaDtypeArg
| TimestampDtypeArg
| CategoryDtypeArg
| ExtensionDtype
| type[object]
| ObjectDtypeArg
| VoidDtypeArg
| DtypeObj
)
# DtypeArg specifies all allowable dtypes in a functions its dtype argument
DtypeObj: TypeAlias = np.dtype[np.generic] | ExtensionDtype

# filenames and file-like-objects
AnyStr_cov = TypeVar("AnyStr_cov", str, bytes, covariant=True)
Expand Down
9 changes: 6 additions & 3 deletions pandas-stubs/core/series.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ from pandas._typing import (
ListLikeU,
MaskType,
NaPosition,
ObjectDtypeArg,
QuantileInterpolation,
RandomState,
Renamer,
Expand All @@ -138,6 +139,8 @@ from pandas._typing import (
TimedeltaDtypeArg,
TimestampConvention,
TimestampDtypeArg,
UIntDtypeArg,
VoidDtypeArg,
WriteBuffer,
np_ndarray_anyint,
np_ndarray_bool,
Expand Down Expand Up @@ -329,7 +332,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
is_copy: _bool | None = ...,
**kwargs,
) -> Series[S1]: ...
def __getattr__(self, name: str) -> S1: ...
def __getattr__(self, name: _str) -> S1: ...
@overload
def __getitem__(
self,
Expand Down Expand Up @@ -1152,7 +1155,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
@overload
def astype(
self,
dtype: IntDtypeArg,
dtype: IntDtypeArg | UIntDtypeArg,
copy: _bool = ...,
errors: IgnoreRaise = ...,
) -> Series[int]: ...
Expand Down Expand Up @@ -1208,7 +1211,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
@overload
def astype(
self,
dtype: type[object] | ExtensionDtype,
dtype: ObjectDtypeArg | VoidDtypeArg | ExtensionDtype | DtypeObj,
copy: _bool = ...,
errors: IgnoreRaise = ...,
) -> Series: ...
Expand Down
Loading