From b4efffd2879986b1a1e8a0fc76d74b505711a283 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Fri, 21 Jul 2023 19:41:38 +0200 Subject: [PATCH 01/14] added pyarrow/numpy dtype literals & allowed str as astype input --- pandas-stubs/_typing.pyi | 117 ++++++++++++++++++++++++++++------- pandas-stubs/core/series.pyi | 4 +- tests/test_series.py | 60 +++++++++++++++++- 3 files changed, 156 insertions(+), 25 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 2ac17830..7ba15eca 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -86,7 +86,11 @@ BooleanDtypeArg: TypeAlias = ( | pd.BooleanDtype | Literal["boolean"] # Numpy bool type + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.bool_ | type[np.bool_] + | Literal["?", "bool8", "bool_"] + # PyArrow boolean type and its string alias + | Literal["bool[pyarrow]", "boolean[pyarrow]"] ) IntDtypeArg: TypeAlias = ( # Builtin integer type and its string alias @@ -98,22 +102,48 @@ IntDtypeArg: TypeAlias = ( | pd.Int32Dtype | pd.Int64Dtype | Literal["Int8", "Int16", "Int32", "Int64"] + # Pandas nullable unsigned integer types and their string aliases + | pd.UInt8Dtype + | pd.UInt16Dtype + | pd.UInt32Dtype + | pd.UInt64Dtype + | Literal["UInt8", "UInt16", "UInt32", "UInt64"] # Numpy signed integer types and their string aliases + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.byte | type[np.byte] - | type[np.int8] + | Literal["b", "int8", "byte"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.short | type[np.int16] - | type[np.int32] - | type[np.int64] - | type[np.intp] - | Literal["byte", "int8", "int16", "int32", "int64", "intp"] + | Literal["h", "int16", "short"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.intc + | type[np.intc] + | Literal["i", "int32", "intc"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.int_ + | type[np.int_] + | Literal["l", "int64", "int_", "intp", "long"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.longlong + | type[np.longlong] + | Literal["q", "longlong"] # NOTE: int128 not assigned # Numpy unsigned integer types and their string aliases + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ubyte | type[np.ubyte] - | type[np.uint8] - | type[np.uint16] - | type[np.uint32] - | type[np.uint64] - | type[np.uintp] - | Literal["ubyte", "uint8", "uint16", "uint32", "uint64", "uintp"] + | Literal["B", "uint8", "ubyte"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ushort + | type[np.ushort] + | Literal["H", "uint16", "ushort"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uintc + | type[np.uintc] + | Literal["I", "uint32", "uintc"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uint + | type[np.uint] + | Literal["L", "uint64", "uint", "uintp"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ulonglong + | type[np.ulonglong] + | Literal["Q", "ulonglong"] # NOTE: uint128 not assigned + # PyArrow integer types and their string aliases + | Literal["int8[pyarrow]", "int16[pyarrow]", "int32[pyarrow]", "int64[pyarrow]"] + # PyArrow unsigned integer types and their string aliases + | Literal["uint8[pyarrow]", "uint16[pyarrow]", "uint32[pyarrow]", "uint64[pyarrow]"] ) StrDtypeArg: TypeAlias = ( # Builtin str type and its string alias @@ -122,30 +152,59 @@ StrDtypeArg: TypeAlias = ( # Pandas nullable string type and its string alias | pd.StringDtype | Literal["string"] + # PyArrow string type and its string alias + | Literal["string[pyarrow]"] ) BytesDtypeArg: TypeAlias = type[bytes] FloatDtypeArg: TypeAlias = ( # Builtin float type and its string alias type[float] # noqa: Y030 - | Literal["float"] + | Literal[ + "float", + "float", + ] # Pandas nullable float types and their string aliases | pd.Float32Dtype | pd.Float64Dtype | Literal["Float32", "Float64"] # Numpy float types and their string aliases - | type[np.float16] - | type[np.float32] - | type[np.float64] - | Literal["float16", "float32", "float64"] + # NOTE: Alias np.float16 only on Linux x86_64, use np.half instead + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.half + | type[np.half] + | Literal["e", "float16", "half"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.single + | type[np.single] + | Literal["f", "float32", "single"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.double + | type[np.double] + | Literal["d", "float64", "double", "float_"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.longdouble + | type[np.longdouble] + | Literal["g", "float128", "longdouble", "longfloat"] + # PyArrow floating point types and their string aliases + | Literal[ + "float[pyarrow]", + "double[pyarrow]", + "float16[pyarrow]", + "float32[pyarrow]", + "float64[pyarrow]", + ] ) ComplexDtypeArg: TypeAlias = ( # Builtin complex type and its string alias type[complex] # noqa: Y030 | Literal["complex"] # Numpy complex types and their aliases - | type[np.complex64] - | type[np.complex128] - | Literal["complex64", "complex128"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.csingle + | type[np.csingle] + | Literal["F", "complex64", "singlecomplex"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.cdouble + | type[np.cdouble] + | Literal["D", "complex128", "cdouble", "cfloat", "complex_"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.clongdouble + # NOTE: Alias np.complex256 only on Linux x86_64, use np.clongdouble instead + | type[np.clongdouble] + | Literal["G", "complex256", "clongdouble", "clongfloat", "longcomplex"] ) # Refer to https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units TimedeltaDtypeArg: TypeAlias = Literal[ @@ -163,6 +222,11 @@ TimedeltaDtypeArg: TypeAlias = Literal[ "timedelta64[ps]", "timedelta64[fs]", "timedelta64[as]", + # PyArrow duration type and its string alias + "duration[s][pyarrow]", + "duration[ms][pyarrow]", + "duration[us][pyarrow]", + "duration[ns][pyarrow]", ] TimestampDtypeArg: TypeAlias = Literal[ "datetime64[Y]", @@ -179,9 +243,19 @@ TimestampDtypeArg: TypeAlias = Literal[ "datetime64[ps]", "datetime64[fs]", "datetime64[as]", + # PyArrow timestamp type and its string alias + "date32[pyarrow]", + "date64[pyarrow]", + "timestamp[s][pyarrow]", + "timestamp[ms][pyarrow]", + "timestamp[us][pyarrow]", + "timestamp[ns][pyarrow]", ] CategoryDtypeArg: TypeAlias = CategoricalDtype | Literal["category"] +# DtypeArg specifies all allowable dtypes in a functions its dtype argument +DtypeObj: TypeAlias = np.dtype[np.generic] | ExtensionDtype + AstypeArg: TypeAlias = ( BooleanDtypeArg | IntDtypeArg @@ -192,11 +266,10 @@ AstypeArg: TypeAlias = ( | TimedeltaDtypeArg | TimestampDtypeArg | CategoryDtypeArg - | ExtensionDtype + | DtypeObj | type[object] + | str ) -# DtypeArg specifies all allowable dtypes in a functions its dtype argument -DtypeObj: TypeAlias = np.dtype[np.generic] | ExtensionDtype # filenames and file-like-objects AnyStr_cov = TypeVar("AnyStr_cov", str, bytes, covariant=True) diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index cf9d32ef..5b9ad63b 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -329,7 +329,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): is_copy: _bool | None = ..., **kwargs, ) -> Series[S1]: ... - def __getattr__(self, name: str) -> S1: ... + def __getattr__(self, name: _str) -> S1: ... @overload def __getitem__( self, @@ -1208,7 +1208,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): @overload def astype( self, - dtype: type[object] | ExtensionDtype, + dtype: type[object] | ExtensionDtype | DtypeObj | _str, copy: _bool = ..., errors: IgnoreRaise = ..., ) -> Series: ... diff --git a/tests/test_series.py b/tests/test_series.py index 8d01db9d..457fedea 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -1697,8 +1697,12 @@ def test_updated_astype() -> None: s = pd.Series([3, 4, 5]) s1 = pd.Series(True) - # Boolean types + # dynamically typed + string: str = "int" # not Literal! + check(assert_type(s.astype(string), "pd.Series[Any]"), pd.Series, np.integer) + check(assert_type(s.astype(s.dtype), "pd.Series[Any]"), pd.Series, np.integer) + # Boolean types # Builtin bool types check(assert_type(s.astype(bool), "pd.Series[bool]"), pd.Series, np.bool_) check(assert_type(s.astype("bool"), "pd.Series[bool]"), pd.Series, np.bool_) @@ -1791,6 +1795,20 @@ def test_updated_astype() -> None: check(assert_type(s.astype("float16"), "pd.Series[float]"), pd.Series, np.float16) check(assert_type(s.astype("float32"), "pd.Series[float]"), pd.Series, np.float32) check(assert_type(s.astype("float64"), "pd.Series[float]"), pd.Series, np.float64) + check(assert_type(s.astype("float64"), "pd.Series[float]"), pd.Series, np.float64) + check(assert_type(s.astype("float64"), "pd.Series[float]"), pd.Series, np.float64) + + # pyarrow + check( + assert_type(s.astype("int64[pyarrow]"), "pd.Series[int]"), + pd.Series, + int, + ) + check( + assert_type(s.astype("float[pyarrow]"), "pd.Series[float]"), + pd.Series, + float, + ) # Complex types @@ -1889,6 +1907,26 @@ def test_updated_astype() -> None: pd.Series, Timedelta, ) + check( + assert_type(s.astype("duration[s][pyarrow]"), TimedeltaSeries), + pd.Series, + datetime.timedelta, + ) + check( + assert_type(s.astype("duration[ms][pyarrow]"), TimedeltaSeries), + pd.Series, + datetime.timedelta, + ) + check( + assert_type(s.astype("duration[us][pyarrow]"), TimedeltaSeries), + pd.Series, + datetime.timedelta, + ) + check( + assert_type(s.astype("duration[ns][pyarrow]"), TimedeltaSeries), + pd.Series, + datetime.timedelta, + ) check( assert_type(s.astype("datetime64[Y]"), TimestampSeries), @@ -1960,6 +1998,26 @@ def test_updated_astype() -> None: pd.Series, Timestamp, ) + check( + assert_type(s.astype("timestamp[s][pyarrow]"), TimestampSeries), + pd.Series, + datetime.datetime, + ) + check( + assert_type(s.astype("timestamp[ms][pyarrow]"), TimestampSeries), + pd.Series, + datetime.datetime, + ) + check( + assert_type(s.astype("timestamp[us][pyarrow]"), TimestampSeries), + pd.Series, + datetime.datetime, + ) + check( + assert_type(s.astype("timestamp[ns][pyarrow]"), TimestampSeries), + pd.Series, + datetime.datetime, + ) orseries = pd.Series([Decimal(x) for x in [1, 2, 3]]) newtype = DecimalDtype() From 6e422e17153e9c2e4e87c52233917d4a032029d7 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Fri, 21 Jul 2023 19:45:29 +0200 Subject: [PATCH 02/14] removed accidental double float --- pandas-stubs/_typing.pyi | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 7ba15eca..3bcddce2 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -159,10 +159,7 @@ BytesDtypeArg: TypeAlias = type[bytes] FloatDtypeArg: TypeAlias = ( # Builtin float type and its string alias type[float] # noqa: Y030 - | Literal[ - "float", - "float", - ] + | Literal["float"] # Pandas nullable float types and their string aliases | pd.Float32Dtype | pd.Float64Dtype From f92534bc4318817dbb6887f96e14fb5a79491cc3 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Fri, 21 Jul 2023 21:04:14 +0200 Subject: [PATCH 03/14] added ObjectDtypeArg and lots of unit tests for literals --- pandas-stubs/_typing.pyi | 31 +++- pandas-stubs/core/series.pyi | 3 +- tests/test_series.py | 281 +++++++++++++++++++++++++++++------ 3 files changed, 264 insertions(+), 51 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 3bcddce2..b4b59175 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -113,7 +113,7 @@ IntDtypeArg: TypeAlias = ( | type[np.byte] | Literal["b", "int8", "byte"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.short - | type[np.int16] + | type[np.short] | Literal["h", "int16", "short"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.intc | type[np.intc] @@ -152,10 +152,24 @@ StrDtypeArg: TypeAlias = ( # Pandas nullable string type and its string alias | pd.StringDtype | Literal["string"] + # Numpy string type and its string alias + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.str_ + | type[np.str_] + | Literal["U", "str_", "unicode"] # PyArrow string type and its string alias | Literal["string[pyarrow]"] ) -BytesDtypeArg: TypeAlias = type[bytes] +BytesDtypeArg: TypeAlias = ( + # Builtin bytes type and its string alias + type[bytes] # noqa: Y030 + | Literal["bytes"] + # Numpy bytes type and its string alias + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.bytes_ + | type[np.bytes_] + | Literal["S", "bytes_", "string_"] + # PyArrow binary type and its string alias + | Literal["binary[pyarrow]"] +) FloatDtypeArg: TypeAlias = ( # Builtin float type and its string alias type[float] # noqa: Y030 @@ -194,7 +208,7 @@ ComplexDtypeArg: TypeAlias = ( # Numpy complex types and their aliases # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.csingle | type[np.csingle] - | Literal["F", "complex64", "singlecomplex"] + | Literal["F", "complex64", "csingle", "singlecomplex"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.cdouble | type[np.cdouble] | Literal["D", "complex128", "cdouble", "cfloat", "complex_"] @@ -250,6 +264,15 @@ TimestampDtypeArg: TypeAlias = Literal[ ] CategoryDtypeArg: TypeAlias = CategoricalDtype | Literal["category"] +ObjectDtypeArg: TypeAlias = ( + # Builtin object type and its string alias + type[object] # noqa: Y030 + | Literal["object"] + # Numpy object type and its string alias + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.object_ + | type[np.object_] + | Literal["O"] # NOTE: "object_" not assigned +) # DtypeArg specifies all allowable dtypes in a functions its dtype argument DtypeObj: TypeAlias = np.dtype[np.generic] | ExtensionDtype @@ -263,8 +286,8 @@ AstypeArg: TypeAlias = ( | TimedeltaDtypeArg | TimestampDtypeArg | CategoryDtypeArg + | ObjectDtypeArg | DtypeObj - | type[object] | str ) diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 5b9ad63b..79d64871 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -127,6 +127,7 @@ from pandas._typing import ( ListLikeU, MaskType, NaPosition, + ObjectDtypeArg, QuantileInterpolation, RandomState, Renamer, @@ -1208,7 +1209,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): @overload def astype( self, - dtype: type[object] | ExtensionDtype | DtypeObj | _str, + dtype: ObjectDtypeArg | ExtensionDtype | DtypeObj | _str, copy: _bool = ..., errors: IgnoreRaise = ..., ) -> Series: ... diff --git a/tests/test_series.py b/tests/test_series.py index 457fedea..89d60176 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -1715,9 +1715,14 @@ def test_updated_astype() -> None: check(assert_type(s1.astype("boolean"), "pd.Series[bool]"), pd.Series, np.bool_) # Numpy bool type check(assert_type(s.astype(np.bool_), "pd.Series[bool]"), pd.Series, np.bool_) + check(assert_type(s.astype("bool_"), "pd.Series[bool]"), pd.Series, np.bool_) + check(assert_type(s.astype("bool8"), "pd.Series[bool]"), pd.Series, np.bool_) + check(assert_type(s.astype("?"), "pd.Series[bool]"), pd.Series, np.bool_) + # pyarrow bool type + check(assert_type(s.astype("bool[pyarrow]"), "pd.Series[bool]"), pd.Series, bool) + check(assert_type(s.astype("boolean[pyarrow]"), "pd.Series[bool]"), pd.Series, bool) # Integer types - # Builtin integer types check(assert_type(s.astype(int), "pd.Series[int]"), pd.Series, np.integer) check(assert_type(s.astype("int"), "pd.Series[int]"), pd.Series, np.integer) @@ -1730,48 +1735,114 @@ def test_updated_astype() -> None: check(assert_type(s.astype("Int16"), "pd.Series[int]"), pd.Series, np.int16) check(assert_type(s.astype("Int32"), "pd.Series[int]"), pd.Series, np.int32) check(assert_type(s.astype("Int64"), "pd.Series[int]"), pd.Series, np.int64) + # Pandas nullable unsigned integer types + check(assert_type(s.astype(pd.UInt8Dtype()), "pd.Series[int]"), pd.Series, np.uint8) + check( + assert_type(s.astype(pd.UInt16Dtype()), "pd.Series[int]"), pd.Series, np.uint16 + ) + check( + assert_type(s.astype(pd.UInt32Dtype()), "pd.Series[int]"), pd.Series, np.uint32 + ) + check( + assert_type(s.astype(pd.UInt64Dtype()), "pd.Series[int]"), pd.Series, np.uint64 + ) + check(assert_type(s.astype("UInt8"), "pd.Series[int]"), pd.Series, np.uint8) + check(assert_type(s.astype("UInt16"), "pd.Series[int]"), pd.Series, np.uint16) + check(assert_type(s.astype("UInt32"), "pd.Series[int]"), pd.Series, np.uint32) + check(assert_type(s.astype("UInt64"), "pd.Series[int]"), pd.Series, np.uint64) + # Numpy signed integer types + # int8 check(assert_type(s.astype(np.byte), "pd.Series[int]"), pd.Series, np.byte) - check(assert_type(s.astype(np.int8), "pd.Series[int]"), pd.Series, np.int8) - check(assert_type(s.astype(np.int16), "pd.Series[int]"), pd.Series, np.int16) - check(assert_type(s.astype(np.int32), "pd.Series[int]"), pd.Series, np.int32) - check(assert_type(s.astype(np.int64), "pd.Series[int]"), pd.Series, np.int64) - check(assert_type(s.astype(np.intp), "pd.Series[int]"), pd.Series, np.intp) check(assert_type(s.astype("byte"), "pd.Series[int]"), pd.Series, np.byte) - check(assert_type(s.astype("int8"), "pd.Series[int]"), pd.Series, np.int8) - check(assert_type(s.astype("int16"), "pd.Series[int]"), pd.Series, np.int16) - check(assert_type(s.astype("int32"), "pd.Series[int]"), pd.Series, np.int32) - check(assert_type(s.astype("int64"), "pd.Series[int]"), pd.Series, np.int64) - check(assert_type(s.astype("intp"), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s.astype("int8"), "pd.Series[int]"), pd.Series, np.byte) + check(assert_type(s.astype("b"), "pd.Series[int]"), pd.Series, np.byte) + # int16 + check(assert_type(s.astype(np.short), "pd.Series[int]"), pd.Series, np.short) + check(assert_type(s.astype("short"), "pd.Series[int]"), pd.Series, np.short) + check(assert_type(s.astype("int16"), "pd.Series[int]"), pd.Series, np.short) + check(assert_type(s.astype("h"), "pd.Series[int]"), pd.Series, np.short) + # int32 + check(assert_type(s.astype(np.intc), "pd.Series[int]"), pd.Series, np.intc) + check(assert_type(s.astype("intc"), "pd.Series[int]"), pd.Series, np.intc) + check(assert_type(s.astype("int32"), "pd.Series[int]"), pd.Series, np.intc) + check(assert_type(s.astype("i"), "pd.Series[int]"), pd.Series, np.intc) + # int64 + check(assert_type(s.astype(np.int_), "pd.Series[int]"), pd.Series, np.int_) + check(assert_type(s.astype("int_"), "pd.Series[int]"), pd.Series, np.int_) + check(assert_type(s.astype("int64"), "pd.Series[int]"), pd.Series, np.int_) + check(assert_type(s.astype("intp"), "pd.Series[int]"), pd.Series, np.int_) + check(assert_type(s.astype("long"), "pd.Series[int]"), pd.Series, np.int_) + check(assert_type(s.astype("l"), "pd.Series[int]"), pd.Series, np.int_) + # int128 + # NOTE: currently not supported by pandas + # check(assert_type(s.astype(np.longlong), "pd.Series[int]"), pd.Series, np.longlong) + # check(assert_type(s.astype("longlong"), "pd.Series[int]"), pd.Series, np.longlong) + # check(assert_type(s.astype("q"), "pd.Series[int]"), pd.Series, np.longlong) + # Numpy unsigned integer types + # uint8 check(assert_type(s.astype(np.ubyte), "pd.Series[int]"), pd.Series, np.ubyte) - check(assert_type(s.astype(np.uint8), "pd.Series[int]"), pd.Series, np.uint8) - check(assert_type(s.astype(np.uint16), "pd.Series[int]"), pd.Series, np.uint16) - check(assert_type(s.astype(np.uint32), "pd.Series[int]"), pd.Series, np.uint32) - check(assert_type(s.astype(np.uint64), "pd.Series[int]"), pd.Series, np.uint64) - check(assert_type(s.astype(np.uintp), "pd.Series[int]"), pd.Series, np.uintp) check(assert_type(s.astype("ubyte"), "pd.Series[int]"), pd.Series, np.ubyte) - check(assert_type(s.astype("uint8"), "pd.Series[int]"), pd.Series, np.uint8) - check(assert_type(s.astype("uint16"), "pd.Series[int]"), pd.Series, np.uint16) - check(assert_type(s.astype("uint32"), "pd.Series[int]"), pd.Series, np.uint32) - check(assert_type(s.astype("uint64"), "pd.Series[int]"), pd.Series, np.uint64) - check(assert_type(s.astype("uintp"), "pd.Series[int]"), pd.Series, np.uintp) + check(assert_type(s.astype("uint8"), "pd.Series[int]"), pd.Series, np.ubyte) + check(assert_type(s.astype("B"), "pd.Series[int]"), pd.Series, np.ubyte) + # uint16 + check(assert_type(s.astype(np.ushort), "pd.Series[int]"), pd.Series, np.ushort) + check(assert_type(s.astype("ushort"), "pd.Series[int]"), pd.Series, np.ushort) + check(assert_type(s.astype("uint16"), "pd.Series[int]"), pd.Series, np.ushort) + check(assert_type(s.astype("H"), "pd.Series[int]"), pd.Series, np.ushort) + # uint32 + check(assert_type(s.astype(np.uintc), "pd.Series[int]"), pd.Series, np.uintc) + check(assert_type(s.astype("uintc"), "pd.Series[int]"), pd.Series, np.uintc) + check(assert_type(s.astype("uint32"), "pd.Series[int]"), pd.Series, np.uintc) + check(assert_type(s.astype("I"), "pd.Series[int]"), pd.Series, np.uintc) + # uint64 + check(assert_type(s.astype(np.uint), "pd.Series[int]"), pd.Series, np.uint) + check(assert_type(s.astype("uint"), "pd.Series[int]"), pd.Series, np.uint) + check(assert_type(s.astype("uint64"), "pd.Series[int]"), pd.Series, np.uint) + check(assert_type(s.astype("uintp"), "pd.Series[int]"), pd.Series, np.uint) + check(assert_type(s.astype("L"), "pd.Series[int]"), pd.Series, np.uint) + + # pyarrow integer types + check(assert_type(s.astype("int8[pyarrow]"), "pd.Series[int]"), pd.Series, int) + check(assert_type(s.astype("int16[pyarrow]"), "pd.Series[int]"), pd.Series, int) + check(assert_type(s.astype("int32[pyarrow]"), "pd.Series[int]"), pd.Series, int) + check(assert_type(s.astype("int64[pyarrow]"), "pd.Series[int]"), pd.Series, int) + # pyarrow unsigned integer types + check(assert_type(s.astype("uint8[pyarrow]"), "pd.Series[int]"), pd.Series, int) + check(assert_type(s.astype("uint16[pyarrow]"), "pd.Series[int]"), pd.Series, int) + check(assert_type(s.astype("uint32[pyarrow]"), "pd.Series[int]"), pd.Series, int) + check(assert_type(s.astype("uint64[pyarrow]"), "pd.Series[int]"), pd.Series, int) # String types - # Builtin str types check(assert_type(s.astype(str), "pd.Series[str]"), pd.Series, str) check(assert_type(s.astype("str"), "pd.Series[str]"), pd.Series, str) # Pandas nullable string types check(assert_type(s.astype(pd.StringDtype()), "pd.Series[str]"), pd.Series, str) check(assert_type(s.astype("string"), "pd.Series[str]"), pd.Series, str) + # Numpy string types + check(assert_type(s.astype(np.str_), "pd.Series[str]"), pd.Series, str) + check(assert_type(s.astype("str_"), "pd.Series[str]"), pd.Series, str) + check(assert_type(s.astype("unicode"), "pd.Series[str]"), pd.Series, str) + check(assert_type(s.astype("U"), "pd.Series[str]"), pd.Series, str) + # pyarrow string types + check(assert_type(s.astype("string[pyarrow]"), "pd.Series[str]"), pd.Series, str) # Bytes types - check(assert_type(s.astype(bytes), "pd.Series[bytes]"), pd.Series, bytes) + check(assert_type(s.astype("bytes"), "pd.Series[bytes]"), pd.Series, bytes) + # NumPy bytes types + check(assert_type(s.astype(np.bytes_), "pd.Series[bytes]"), pd.Series, bytes) + check(assert_type(s.astype("bytes_"), "pd.Series[bytes]"), pd.Series, bytes) + check(assert_type(s.astype("string_"), "pd.Series[bytes]"), pd.Series, bytes) + check(assert_type(s.astype("S"), "pd.Series[bytes]"), pd.Series, bytes) + # pyarrow bytes types + check( + assert_type(s.astype("binary[pyarrow]"), "pd.Series[bytes]"), pd.Series, bytes + ) # Float types - # Builtin float types check(assert_type(s.astype(float), "pd.Series[float]"), pd.Series, float) check(assert_type(s.astype("float"), "pd.Series[float]"), pd.Series, float) @@ -1789,54 +1860,149 @@ def test_updated_astype() -> None: check(assert_type(s.astype("Float32"), "pd.Series[float]"), pd.Series, np.float32) check(assert_type(s.astype("Float64"), "pd.Series[float]"), pd.Series, np.float64) # Numpy float types - check(assert_type(s.astype(np.float16), "pd.Series[float]"), pd.Series, np.float16) - check(assert_type(s.astype(np.float32), "pd.Series[float]"), pd.Series, np.float32) - check(assert_type(s.astype(np.float64), "pd.Series[float]"), pd.Series, np.float64) - check(assert_type(s.astype("float16"), "pd.Series[float]"), pd.Series, np.float16) - check(assert_type(s.astype("float32"), "pd.Series[float]"), pd.Series, np.float32) - check(assert_type(s.astype("float64"), "pd.Series[float]"), pd.Series, np.float64) - check(assert_type(s.astype("float64"), "pd.Series[float]"), pd.Series, np.float64) - check(assert_type(s.astype("float64"), "pd.Series[float]"), pd.Series, np.float64) + # float16 + check(assert_type(s.astype(np.half), "pd.Series[float]"), pd.Series, np.half) + check(assert_type(s.astype("half"), "pd.Series[float]"), pd.Series, np.half) + check(assert_type(s.astype("float16"), "pd.Series[float]"), pd.Series, np.half) + check(assert_type(s.astype("e"), "pd.Series[float]"), pd.Series, np.half) + # float32 + check(assert_type(s.astype(np.single), "pd.Series[float]"), pd.Series, np.single) + check(assert_type(s.astype("single"), "pd.Series[float]"), pd.Series, np.single) + check(assert_type(s.astype("float32"), "pd.Series[float]"), pd.Series, np.single) + check(assert_type(s.astype("f"), "pd.Series[float]"), pd.Series, np.single) + # float64 + check(assert_type(s.astype(np.double), "pd.Series[float]"), pd.Series, np.double) + check(assert_type(s.astype("double"), "pd.Series[float]"), pd.Series, np.double) + check(assert_type(s.astype("float_"), "pd.Series[float]"), pd.Series, np.double) + check(assert_type(s.astype("float64"), "pd.Series[float]"), pd.Series, np.double) + check(assert_type(s.astype("d"), "pd.Series[float]"), pd.Series, np.double) + # float128 + check( + assert_type(s.astype(np.longdouble), "pd.Series[float]"), + pd.Series, + np.longdouble, + ) + check( + assert_type(s.astype("longdouble"), "pd.Series[float]"), + pd.Series, + np.longdouble, + ) + check( + assert_type(s.astype("longfloat"), "pd.Series[float]"), pd.Series, np.longdouble + ) + check( + assert_type(s.astype("float128"), "pd.Series[float]"), pd.Series, np.longdouble + ) + check(assert_type(s.astype("g"), "pd.Series[float]"), pd.Series, np.longdouble) # pyarrow + check(assert_type(s.astype("float[pyarrow]"), "pd.Series[float]"), pd.Series, float) check( - assert_type(s.astype("int64[pyarrow]"), "pd.Series[int]"), - pd.Series, - int, + assert_type(s.astype("double[pyarrow]"), "pd.Series[float]"), pd.Series, float ) + # check(assert_type(s.astype("float16[pyarrow]"), "pd.Series[float]"), pd.Series, float) check( - assert_type(s.astype("float[pyarrow]"), "pd.Series[float]"), - pd.Series, - float, + assert_type(s.astype("float32[pyarrow]"), "pd.Series[float]"), pd.Series, float + ) + check( + assert_type(s.astype("float64[pyarrow]"), "pd.Series[float]"), pd.Series, float ) # Complex types - # Builtin complex types check(assert_type(s.astype(complex), "pd.Series[complex]"), pd.Series, complex) check(assert_type(s.astype("complex"), "pd.Series[complex]"), pd.Series, complex) # Numpy complex types + # complex64 check( - assert_type(s.astype(np.complex64), "pd.Series[complex]"), + assert_type(s.astype(np.csingle), "pd.Series[complex]"), pd.Series, - np.complex64, + np.csingle, ) check( - assert_type(s.astype(np.complex128), "pd.Series[complex]"), + assert_type(s.astype("complex64"), "pd.Series[complex]"), pd.Series, - np.complex128, + np.csingle, ) check( - assert_type(s.astype("complex64"), "pd.Series[complex]"), + assert_type(s.astype("F"), "pd.Series[complex]"), + pd.Series, + np.csingle, + ) + check( + assert_type(s.astype("csingle"), "pd.Series[complex]"), + pd.Series, + np.csingle, + ) + check( + assert_type(s.astype("singlecomplex"), "pd.Series[complex]"), pd.Series, - np.complex64, + np.csingle, + ) + # complex128 + check( + assert_type(s.astype(np.cdouble), "pd.Series[complex]"), + pd.Series, + np.cdouble, ) check( assert_type(s.astype("complex128"), "pd.Series[complex]"), pd.Series, - np.complex128, + np.cdouble, + ) + check( + assert_type(s.astype("D"), "pd.Series[complex]"), + pd.Series, + np.cdouble, + ) + check( + assert_type(s.astype("cdouble"), "pd.Series[complex]"), + pd.Series, + np.cdouble, + ) + check( + assert_type(s.astype("cfloat"), "pd.Series[complex]"), + pd.Series, + np.cdouble, + ) + check( + assert_type(s.astype("complex_"), "pd.Series[complex]"), + pd.Series, + np.cdouble, + ) + # complex 256 + check( + assert_type(s.astype(np.clongdouble), "pd.Series[complex]"), + pd.Series, + np.clongdouble, + ) + check( + assert_type(s.astype("complex256"), "pd.Series[complex]"), + pd.Series, + np.clongdouble, + ) + check( + assert_type(s.astype("G"), "pd.Series[complex]"), + pd.Series, + np.clongdouble, + ) + check( + assert_type(s.astype("clongdouble"), "pd.Series[complex]"), + pd.Series, + np.clongdouble, + ) + check( + assert_type(s.astype("clongfloat"), "pd.Series[complex]"), + pd.Series, + np.clongdouble, + ) + check( + assert_type(s.astype("longcomplex"), "pd.Series[complex]"), + pd.Series, + np.clongdouble, ) + # Timedelta Types check( assert_type(s.astype("timedelta64[Y]"), TimedeltaSeries), pd.Series, @@ -2019,6 +2185,29 @@ def test_updated_astype() -> None: datetime.datetime, ) + # Object types + check( + assert_type(s.astype(object), "pd.Series[Any]"), + pd.Series, + object, + ) + check( + assert_type(s.astype("object"), "pd.Series[Any]"), + pd.Series, + object, + ) + # Numpy object types + check( + assert_type(s.astype(np.object_), "pd.Series[Any]"), + pd.Series, + object, + ) + check( + assert_type(s.astype("O"), "pd.Series[Any]"), + pd.Series, + object, + ) + orseries = pd.Series([Decimal(x) for x in [1, 2, 3]]) newtype = DecimalDtype() decseries = orseries.astype(newtype) From a6de69667f256e885bc9bab53c72bf1395b4e131 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Mon, 24 Jul 2023 14:59:21 +0200 Subject: [PATCH 04/14] removed str overload --- pandas-stubs/core/series.pyi | 2 +- tests/test_series.py | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 79d64871..4707cd47 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -1209,7 +1209,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): @overload def astype( self, - dtype: ObjectDtypeArg | ExtensionDtype | DtypeObj | _str, + dtype: ObjectDtypeArg | ExtensionDtype | DtypeObj, copy: _bool = ..., errors: IgnoreRaise = ..., ) -> Series: ... diff --git a/tests/test_series.py b/tests/test_series.py index 89d60176..931b76b3 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -1697,10 +1697,18 @@ def test_updated_astype() -> None: s = pd.Series([3, 4, 5]) s1 = pd.Series(True) + # Test incorrect Literal + if TYPE_CHECKING_INVALID_USAGE: + s.astype("foobar") # type: ignore[call-overload] # pyright: ignore[reportGeneralTypeIssues] + # dynamically typed - string: str = "int" # not Literal! - check(assert_type(s.astype(string), "pd.Series[Any]"), pd.Series, np.integer) - check(assert_type(s.astype(s.dtype), "pd.Series[Any]"), pd.Series, np.integer) + # NOTE: https://github.com/python/typing/issues/801#issuecomment-1646171898 + # enable in the future if Intersection and Not supported + # string: str = "int" # not Literal! + # check(assert_type(s.astype(string), "pd.Series[Any]"), pd.Series, np.integer) + # check(assert_type(s.astype(s.dtype), "pd.Series[Any]"), pd.Series, np.integer) + # check bad literal + # s.astype("some nonsense") # Boolean types # Builtin bool types From b56ea5d82a9c372f0072d2365b8a469ff8b9a484 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Mon, 24 Jul 2023 15:12:02 +0200 Subject: [PATCH 05/14] re-enabled s.astype(s.dtype) test --- tests/test_series.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_series.py b/tests/test_series.py index 931b76b3..1f40480d 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -1703,10 +1703,13 @@ def test_updated_astype() -> None: # dynamically typed # NOTE: https://github.com/python/typing/issues/801#issuecomment-1646171898 + check( + assert_type(s.astype(s.dtype), "pd.Series[Any]"), pd.Series, np.integer + ) # #747 + # enable in the future if Intersection and Not supported # string: str = "int" # not Literal! # check(assert_type(s.astype(string), "pd.Series[Any]"), pd.Series, np.integer) - # check(assert_type(s.astype(s.dtype), "pd.Series[Any]"), pd.Series, np.integer) # check bad literal # s.astype("some nonsense") From e0154f568a213dc936dce85b1d05062a4dd460ed Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Mon, 24 Jul 2023 20:22:21 +0200 Subject: [PATCH 06/14] refactored astype-tests to use pytest.mark.parametrize --- pandas-stubs/_typing.pyi | 31 +- tests/test_series.py | 1221 +++++++++++++++++++++----------------- 2 files changed, 704 insertions(+), 548 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index b4b59175..675338f6 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -233,6 +233,21 @@ TimedeltaDtypeArg: TypeAlias = Literal[ "timedelta64[ps]", "timedelta64[fs]", "timedelta64[as]", + # numpy type codes + " None: pd.Series(1) pd.Series((1, 2, 3)) pd.Series(np.array([1, 2, 3])) - pd.Series(data=[1, 2, 3, 4], name="series") + pd.Series(data=[1, 2, 3, 4], name="pd.Series") pd.Series(data=[1, 2, 3, 4], dtype=np.int8) pd.Series(data={"row1": [1, 2], "row2": [3, 4]}) pd.Series(data=[1, 2, 3, 4], index=[4, 3, 2, 1], copy=True) @@ -122,7 +132,7 @@ def test_types_select() -> None: s = pd.Series(data={"row1": 1, "row2": 2}) with pytest_warns_bounded( FutureWarning, - "Series.__getitem__ treating keys as positions is deprecated", + "pd.Series.__getitem__ treating keys as positions is deprecated", lower="2.0.99", ): s[0] @@ -255,7 +265,7 @@ def test_types_fillna() -> None: check(assert_type(s.fillna(0, axis="index"), pd.Series), pd.Series) with pytest_warns_bounded( FutureWarning, - "Series.fillna with 'method' is deprecated", + "pd.Series.fillna with 'method' is deprecated", lower="2.0.99", ): check(assert_type(s.fillna(method="backfill", axis=0), pd.Series), pd.Series) @@ -1693,532 +1703,671 @@ def test_change_to_dict_return_type() -> None: check(assert_type(fd, Dict[Any, Any]), dict) -def test_updated_astype() -> None: - s = pd.Series([3, 4, 5]) - s1 = pd.Series(True) +ASTYPE_BOOL: list[tuple[BooleanDtypeArg, type]] = [ + # python boolean + (bool, np.bool_), + ("bool", np.bool_), + # pandas boolean + (pd.BooleanDtype(), np.bool_), + ("boolean", np.bool_), + # numpy boolean type + (np.bool_, np.bool_), + ("bool_", np.bool_), + ("bool8", np.bool_), + ("?", np.bool_), + # pyarrow boolean type + ("bool[pyarrow]", bool), + ("boolean[pyarrow]", bool), +] + +ASTYPE_INT: list[tuple[IntDtypeArg, type]] = [ + # python int + (int, np.integer), + ("int", np.integer), + # pandas Int8 + (pd.Int8Dtype(), np.int8), + ("Int8", np.int8), + # pandas Int16 + (pd.Int16Dtype(), np.int16), + ("Int16", np.int16), + # pandas Int32 + (pd.Int32Dtype(), np.int32), + ("Int32", np.int32), + # pandas Int64 + (pd.Int64Dtype(), np.int64), + ("Int64", np.int64), + # numpy int8 + (np.byte, np.byte), + ("byte", np.byte), + ("int8", np.byte), + ("b", np.byte), + # numpy int16 + (np.short, np.short), + ("short", np.short), + ("int16", np.short), + ("h", np.short), + # numpy int32 + (np.intc, np.intc), + ("intc", np.intc), + ("int32", np.intc), + ("i", np.intc), + # numpy int64 + (np.int_, np.int_), + ("int_", np.int_), + ("int64", np.int_), + ("intp", np.int_), + ("long", np.int_), + ("l", np.int_), + # pyarrow integer types + ("int8[pyarrow]", int), + ("int16[pyarrow]", int), + ("int32[pyarrow]", int), + ("int64[pyarrow]", int), +] + +ASTYPE_UINT: list[tuple[IntDtypeArg, type]] = [ + # pandas UInt8 + (pd.UInt8Dtype(), np.uint8), + ("UInt8", np.uint8), + # pandas UInt16 + (pd.UInt16Dtype(), np.uint16), + ("UInt16", np.uint16), + # pandas UInt32 + (pd.UInt32Dtype(), np.uint32), + ("UInt32", np.uint32), + # pandas UInt64 + (pd.UInt64Dtype(), np.uint64), + ("UInt64", np.uint64), + # numpy uint8 + (np.ubyte, np.ubyte), + ("ubyte", np.ubyte), + ("uint8", np.ubyte), + ("B", np.ubyte), + # numpy uint16 + (np.ushort, np.ushort), + ("ushort", np.ushort), + ("uint16", np.ushort), + ("H", np.ushort), + # numpy uint32 + (np.uintc, np.uintc), + ("uintc", np.uintc), + ("uint32", np.uintc), + ("I", np.uintc), + # numpy uint64 + (np.uint, np.uint), + ("uint", np.uint), + ("uint64", np.uint), + ("uintp", np.uint), + ("L", np.uint), + # pyarrow unsigned integer types + ("uint8[pyarrow]", int), + ("uint16[pyarrow]", int), + ("uint32[pyarrow]", int), + ("uint64[pyarrow]", int), +] + +ASTYPE_FLOAT = [ + # python float + (float, np.floating), + ("float", np.floating), + # pandas Float32 + (pd.Float32Dtype(), np.float32), + ("Float32", np.float32), + # pandas Float64 + (pd.Float64Dtype(), np.float64), + ("Float64", np.float64), + # numpy float16 + (np.half, np.half), + ("half", np.half), + ("float16", np.half), + ("e", np.half), + # numpy float32 + (np.single, np.single), + ("single", np.single), + ("float32", np.single), + ("f", np.single), + # numpy float64 + (np.double, np.double), + ("double", np.double), + ("float_", np.double), + ("float64", np.double), + ("d", np.double), + # numpy float128 + (np.longdouble, np.longdouble), + ("longdouble", np.longdouble), + ("longfloat", np.longdouble), + ("float128", np.longdouble), + ("g", np.longdouble), + # pyarrow float32 + ("float32[pyarrow]", float), + ("float[pyarrow]", float), + # pyarrow float64 + ("float64[pyarrow]", float), + ("double[pyarrow]", float), +] + +ASTYPE_COMPLEX: list[tuple[ComplexDtypeArg, type]] = [ + # python complex + (complex, np.complexfloating), + ("complex", np.complexfloating), + # numpy complex64 + (np.csingle, np.csingle), + ("csingle", np.csingle), + ("singlecomplex", np.csingle), + ("complex64", np.csingle), + ("F", np.csingle), + # numpy complex128 + (np.cdouble, np.cdouble), + ("cdouble", np.cdouble), + ("cfloat", np.cdouble), + ("complex_", np.cdouble), + ("complex128", np.cdouble), + ("D", np.cdouble), + # numpy complex256 + (np.clongdouble, np.clongdouble), + ("clongdouble", np.clongdouble), + ("clongfloat", np.clongdouble), + ("longcomplex", np.clongdouble), + ("complex256", np.clongdouble), + ("G", np.clongdouble), +] + + +ASTYPE_TIMESTAMP: list[TimestampDtypeArg] = [ + # numpy datetime64 + "datetime64[Y]", + "datetime64[M]", + "datetime64[W]", + "datetime64[D]", + "datetime64[h]", + "datetime64[m]", + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[μs]", + "datetime64[ns]", + "datetime64[ps]", + "datetime64[fs]", + "datetime64[as]", + # numpy datetime64 type codes + " None: + s = pd.Series([0, 1]) + check(s.astype(cast_arg), pd.Series, target_type) + + if TYPE_CHECKING: + # python boolean + assert_type(s.astype(bool), "pd.Series[bool]") + assert_type(s.astype("bool"), "pd.Series[bool]") + # pandas boolean + assert_type(s.astype(pd.BooleanDtype()), "pd.Series[bool]") + assert_type(s.astype("boolean"), "pd.Series[bool]") + # numpy boolean type + assert_type(s.astype(np.bool_), "pd.Series[bool]") + assert_type(s.astype("bool_"), "pd.Series[bool]") + assert_type(s.astype("bool8"), "pd.Series[bool]") + assert_type(s.astype("?"), "pd.Series[bool]") + # pyarrow boolean type + assert_type(s.astype("bool[pyarrow]"), "pd.Series[bool]") + assert_type(s.astype("boolean[pyarrow]"), "pd.Series[bool]") + + +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_INT, ids=repr) +def test_astype_int(cast_arg: IntDtypeArg, target_type: type) -> None: + s = pd.Series([1, 2, 3]) + check(s.astype(cast_arg), pd.Series, target_type) + + if TYPE_CHECKING: + # python int + assert_type(s.astype(int), "pd.Series[int]") + assert_type(s.astype("int"), "pd.Series[int]") + # pandas Int8 + assert_type(s.astype(pd.Int8Dtype()), "pd.Series[int]") + assert_type(s.astype("Int8"), "pd.Series[int]") + # pandas Int16 + assert_type(s.astype(pd.Int16Dtype()), "pd.Series[int]") + assert_type(s.astype("Int16"), "pd.Series[int]") + # pandas Int32 + assert_type(s.astype(pd.Int32Dtype()), "pd.Series[int]") + assert_type(s.astype("Int32"), "pd.Series[int]") + # pandas Int64 + assert_type(s.astype(pd.Int64Dtype()), "pd.Series[int]") + assert_type(s.astype("Int64"), "pd.Series[int]") + # numpy int8 + assert_type(s.astype(np.byte), "pd.Series[int]") + assert_type(s.astype("byte"), "pd.Series[int]") + assert_type(s.astype("int8"), "pd.Series[int]") + assert_type(s.astype("b"), "pd.Series[int]") + # numpy int16 + assert_type(s.astype(np.short), "pd.Series[int]") + assert_type(s.astype("short"), "pd.Series[int]") + assert_type(s.astype("int16"), "pd.Series[int]") + assert_type(s.astype("h"), "pd.Series[int]") + # numpy int32 + assert_type(s.astype(np.intc), "pd.Series[int]") + assert_type(s.astype("intc"), "pd.Series[int]") + assert_type(s.astype("int32"), "pd.Series[int]") + assert_type(s.astype("i"), "pd.Series[int]") + # numpy int64 + assert_type(s.astype(np.int_), "pd.Series[int]") + assert_type(s.astype("int_"), "pd.Series[int]") + assert_type(s.astype("int64"), "pd.Series[int]") + assert_type(s.astype("intp"), "pd.Series[int]") + assert_type(s.astype("long"), "pd.Series[int]") + assert_type(s.astype("l"), "pd.Series[int]") + # pyarrow integer types + assert_type(s.astype("int8[pyarrow]"), "pd.Series[int]") + assert_type(s.astype("int16[pyarrow]"), "pd.Series[int]") + assert_type(s.astype("int32[pyarrow]"), "pd.Series[int]") + assert_type(s.astype("int64[pyarrow]"), "pd.Series[int]") + + +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_UINT, ids=repr) +def test_astype_uint(cast_arg: IntDtypeArg, target_type: type) -> None: + s = pd.Series([1, 2, 3]) + check(s.astype(cast_arg), pd.Series, target_type) + + if TYPE_CHECKING: + # pandas UInt8 + assert_type(s.astype(pd.UInt8Dtype()), "pd.Series[int]") + assert_type(s.astype("UInt8"), "pd.Series[int]") + # pandas UInt16 + assert_type(s.astype(pd.UInt16Dtype()), "pd.Series[int]") + assert_type(s.astype("UInt16"), "pd.Series[int]") + # pandas UInt32 + assert_type(s.astype(pd.UInt32Dtype()), "pd.Series[int]") + assert_type(s.astype("UInt32"), "pd.Series[int]") + # pandas UInt64 + assert_type(s.astype(pd.UInt64Dtype()), "pd.Series[int]") + assert_type(s.astype("UInt64"), "pd.Series[int]") + # numpy uint8 + assert_type(s.astype(np.ubyte), "pd.Series[int]") + assert_type(s.astype("ubyte"), "pd.Series[int]") + assert_type(s.astype("uint8"), "pd.Series[int]") + assert_type(s.astype("B"), "pd.Series[int]") + # numpy uint16 + assert_type(s.astype(np.ushort), "pd.Series[int]") + assert_type(s.astype("ushort"), "pd.Series[int]") + assert_type(s.astype("uint16"), "pd.Series[int]") + assert_type(s.astype("H"), "pd.Series[int]") + # numpy uint32 + assert_type(s.astype(np.uintc), "pd.Series[int]") + assert_type(s.astype("uintc"), "pd.Series[int]") + assert_type(s.astype("uint32"), "pd.Series[int]") + assert_type(s.astype("I"), "pd.Series[int]") + # numpy uint64 + assert_type(s.astype(np.uint), "pd.Series[int]") + assert_type(s.astype("uint"), "pd.Series[int]") + assert_type(s.astype("uint64"), "pd.Series[int]") + assert_type(s.astype("uintp"), "pd.Series[int]") + assert_type(s.astype("L"), "pd.Series[int]") + # pyarrow unsigned integer types + assert_type(s.astype("uint8[pyarrow]"), "pd.Series[int]") + assert_type(s.astype("uint16[pyarrow]"), "pd.Series[int]") + assert_type(s.astype("uint32[pyarrow]"), "pd.Series[int]") + assert_type(s.astype("uint64[pyarrow]"), "pd.Series[int]") + + +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_COMPLEX, ids=repr) +def test_astype_float(cast_arg: FloatDtypeArg, target_type: type) -> None: + s = pd.Series([1, 2, 3]) + check(s.astype(cast_arg), pd.Series, target_type) + + if TYPE_CHECKING: + # python float + assert_type(s.astype(float), "pd.Series[float]") + assert_type(s.astype("float"), "pd.Series[float]") + # pandas Float32 + assert_type(s.astype(pd.Float32Dtype()), "pd.Series[float]") + assert_type(s.astype("Float32"), "pd.Series[float]") + # pandas Float64 + assert_type(s.astype(pd.Float64Dtype()), "pd.Series[float]") + assert_type(s.astype("Float64"), "pd.Series[float]") + # numpy float16 + assert_type(s.astype(np.half), "pd.Series[float]") + assert_type(s.astype("half"), "pd.Series[float]") + assert_type(s.astype("float16"), "pd.Series[float]") + assert_type(s.astype("e"), "pd.Series[float]") + # numpy float32 + assert_type(s.astype(np.single), "pd.Series[float]") + assert_type(s.astype("single"), "pd.Series[float]") + assert_type(s.astype("float32"), "pd.Series[float]") + assert_type(s.astype("f"), "pd.Series[float]") + # numpy float64 + assert_type(s.astype(np.double), "pd.Series[float]") + assert_type(s.astype("double"), "pd.Series[float]") + assert_type(s.astype("float_"), "pd.Series[float]") + assert_type(s.astype("float64"), "pd.Series[float]") + assert_type(s.astype("d"), "pd.Series[float]") + # numpy float128 + assert_type(s.astype(np.longdouble), "pd.Series[float]") + assert_type(s.astype("longdouble"), "pd.Series[float]") + assert_type(s.astype("longfloat"), "pd.Series[float]") + assert_type(s.astype("float128"), "pd.Series[float]") + assert_type(s.astype("g"), "pd.Series[float]") + # pyarrow float32 + assert_type(s.astype("float32[pyarrow]"), "pd.Series[float]") + assert_type(s.astype("float[pyarrow]"), "pd.Series[float]") + # pyarrow float64 + assert_type(s.astype("float64[pyarrow]"), "pd.Series[float]") + assert_type(s.astype("double[pyarrow]"), "pd.Series[float]") + + +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_COMPLEX, ids=repr) +def test_astype_complex(cast_arg: ComplexDtypeArg, target_type: type) -> None: + s = pd.Series([1, 2, 3]) + check(s.astype(cast_arg), pd.Series, target_type) + + if TYPE_CHECKING: + assert_type(s.astype(complex), "pd.Series[complex]") + assert_type(s.astype("complex"), "pd.Series[complex]") + # numpy complex64 + assert_type(s.astype(np.csingle), "pd.Series[complex]") + assert_type(s.astype("csingle"), "pd.Series[complex]") + assert_type(s.astype("singlecomplex"), "pd.Series[complex]") + assert_type(s.astype("complex64"), "pd.Series[complex]") + assert_type(s.astype("F"), "pd.Series[complex]") + # numpy complex128 + assert_type(s.astype(np.cdouble), "pd.Series[complex]") + assert_type(s.astype("cdouble"), "pd.Series[complex]") + assert_type(s.astype("cfloat"), "pd.Series[complex]") + assert_type(s.astype("complex_"), "pd.Series[complex]") + assert_type(s.astype("complex128"), "pd.Series[complex]") + assert_type(s.astype("D"), "pd.Series[complex]") + # numpy complex256 + assert_type(s.astype(np.clongdouble), "pd.Series[complex]") + assert_type(s.astype("clongdouble"), "pd.Series[complex]") + assert_type(s.astype("clongfloat"), "pd.Series[complex]") + assert_type(s.astype("longcomplex"), "pd.Series[complex]") + assert_type(s.astype("complex256"), "pd.Series[complex]") + assert_type(s.astype("G"), "pd.Series[complex]") + + +@pytest.mark.parametrize("cast_arg", ASTYPE_TIMESTAMP, ids=repr) +def test_astype_timestamp(cast_arg: TimestampDtypeArg) -> None: + s = pd.Series([1, 2, 3]) - # Test incorrect Literal - if TYPE_CHECKING_INVALID_USAGE: - s.astype("foobar") # type: ignore[call-overload] # pyright: ignore[reportGeneralTypeIssues] + if cast_arg in ("date32[pyarrow]", "date64[pyarrow]"): + x = pd.Series(pd.date_range("2000-01-01", "2000-02-01")) + check(x.astype(cast_arg), TimestampSeries, datetime.date) + else: + check(s.astype(cast_arg), TimestampSeries, datetime.datetime) + + if TYPE_CHECKING: + # numpy datetime64 + assert_type(s.astype("datetime64[Y]"), TimestampSeries) + assert_type(s.astype("datetime64[M]"), TimestampSeries) + assert_type(s.astype("datetime64[W]"), TimestampSeries) + assert_type(s.astype("datetime64[D]"), TimestampSeries) + assert_type(s.astype("datetime64[h]"), TimestampSeries) + assert_type(s.astype("datetime64[m]"), TimestampSeries) + assert_type(s.astype("datetime64[s]"), TimestampSeries) + assert_type(s.astype("datetime64[ms]"), TimestampSeries) + assert_type(s.astype("datetime64[us]"), TimestampSeries) + assert_type(s.astype("datetime64[μs]"), TimestampSeries) + assert_type(s.astype("datetime64[ns]"), TimestampSeries) + assert_type(s.astype("datetime64[ps]"), TimestampSeries) + assert_type(s.astype("datetime64[fs]"), TimestampSeries) + assert_type(s.astype("datetime64[as]"), TimestampSeries) + # numpy datetime64 type codes + assert_type(s.astype(" None: + s = pd.Series([1, 2, 3]) + check(s.astype(cast_arg), TimedeltaSeries, datetime.timedelta) + + if TYPE_CHECKING: + assert_type(s.astype("timedelta64[Y]"), "TimedeltaSeries") + assert_type(s.astype("timedelta64[M]"), "TimedeltaSeries") + assert_type(s.astype("timedelta64[W]"), "TimedeltaSeries") + assert_type(s.astype("timedelta64[D]"), "TimedeltaSeries") + assert_type(s.astype("timedelta64[h]"), "TimedeltaSeries") + assert_type(s.astype("timedelta64[m]"), "TimedeltaSeries") + assert_type(s.astype("timedelta64[s]"), "TimedeltaSeries") + assert_type(s.astype("timedelta64[ms]"), "TimedeltaSeries") + assert_type(s.astype("timedelta64[us]"), "TimedeltaSeries") + assert_type(s.astype("timedelta64[μs]"), "TimedeltaSeries") + assert_type(s.astype("timedelta64[ns]"), "TimedeltaSeries") + assert_type(s.astype("timedelta64[ps]"), "TimedeltaSeries") + assert_type(s.astype("timedelta64[fs]"), "TimedeltaSeries") + assert_type(s.astype("timedelta64[as]"), "TimedeltaSeries") + # numpy timedelta64 type codes + assert_type(s.astype(" None: + s = pd.Series(["a", "b"]) + check(s.astype(cast_arg), pd.Series, str) + + if TYPE_CHECKING: + # python string + assert_type(s.astype(str), "pd.Series[str]") + assert_type(s.astype("str"), "pd.Series[str]") + # pandas string + assert_type(s.astype(pd.StringDtype()), "pd.Series[str]") + assert_type(s.astype("string"), "pd.Series[str]") + # numpy string + assert_type(s.astype(np.str_), "pd.Series[str]") + assert_type(s.astype("str_"), "pd.Series[str]") + assert_type(s.astype("unicode"), "pd.Series[str]") + assert_type(s.astype("U"), "pd.Series[str]") + # pyarrow string + assert_type(s.astype("string[pyarrow]"), "pd.Series[str]") + + +@pytest.mark.parametrize("cast_arg", ASTYPE_BYTES, ids=repr) +def test_astype_bytes(cast_arg: BytesDtypeArg) -> None: + s = pd.Series(["a", "b"]) + check(s.astype(cast_arg), pd.Series, bytes) + + if TYPE_CHECKING: + # python bytes + assert_type(s.astype(bytes), "pd.Series[bytes]") + assert_type(s.astype("bytes"), "pd.Series[bytes]") + # numpy bytes + assert_type(s.astype(np.bytes_), "pd.Series[bytes]") + assert_type(s.astype("bytes_"), "pd.Series[bytes]") + assert_type(s.astype("string_"), "pd.Series[bytes]") + assert_type(s.astype("S"), "pd.Series[bytes]") + # pyarrow bytes + assert_type(s.astype("binary[pyarrow]"), "pd.Series[bytes]") + + +@pytest.mark.parametrize("cast_arg", ASTYPE_CATEGORICAL, ids=repr) +def test_astype_categorical(cast_arg: CategoryDtypeArg) -> None: + s = pd.Series(["a", "b"]) + check(s.astype("category"), pd.Series, str) - # dynamically typed - # NOTE: https://github.com/python/typing/issues/801#issuecomment-1646171898 - check( - assert_type(s.astype(s.dtype), "pd.Series[Any]"), pd.Series, np.integer - ) # #747 + if TYPE_CHECKING: + # pandas category + assert_type(s.astype(pd.CategoricalDtype()), "pd.Series[Any]") + assert_type(s.astype("category"), "pd.Series[Any]") + # pyarrow dictionary + # assert_type(s.astype("dictionary[pyarrow]"), "pd.Series[Categorical]") - # enable in the future if Intersection and Not supported - # string: str = "int" # not Literal! - # check(assert_type(s.astype(string), "pd.Series[Any]"), pd.Series, np.integer) - # check bad literal - # s.astype("some nonsense") - - # Boolean types - # Builtin bool types - check(assert_type(s.astype(bool), "pd.Series[bool]"), pd.Series, np.bool_) - check(assert_type(s.astype("bool"), "pd.Series[bool]"), pd.Series, np.bool_) - # Pandas nullable boolean types - check( - assert_type(s1.astype(pd.BooleanDtype()), "pd.Series[bool]"), - pd.Series, - np.bool_, - ) - check(assert_type(s1.astype("boolean"), "pd.Series[bool]"), pd.Series, np.bool_) - # Numpy bool type - check(assert_type(s.astype(np.bool_), "pd.Series[bool]"), pd.Series, np.bool_) - check(assert_type(s.astype("bool_"), "pd.Series[bool]"), pd.Series, np.bool_) - check(assert_type(s.astype("bool8"), "pd.Series[bool]"), pd.Series, np.bool_) - check(assert_type(s.astype("?"), "pd.Series[bool]"), pd.Series, np.bool_) - # pyarrow bool type - check(assert_type(s.astype("bool[pyarrow]"), "pd.Series[bool]"), pd.Series, bool) - check(assert_type(s.astype("boolean[pyarrow]"), "pd.Series[bool]"), pd.Series, bool) - - # Integer types - # Builtin integer types - check(assert_type(s.astype(int), "pd.Series[int]"), pd.Series, np.integer) - check(assert_type(s.astype("int"), "pd.Series[int]"), pd.Series, np.integer) - # Pandas nullable integer types - check(assert_type(s.astype(pd.Int8Dtype()), "pd.Series[int]"), pd.Series, np.int8) - check(assert_type(s.astype(pd.Int16Dtype()), "pd.Series[int]"), pd.Series, np.int16) - check(assert_type(s.astype(pd.Int32Dtype()), "pd.Series[int]"), pd.Series, np.int32) - check(assert_type(s.astype(pd.Int64Dtype()), "pd.Series[int]"), pd.Series, np.int64) - check(assert_type(s.astype("Int8"), "pd.Series[int]"), pd.Series, np.int8) - check(assert_type(s.astype("Int16"), "pd.Series[int]"), pd.Series, np.int16) - check(assert_type(s.astype("Int32"), "pd.Series[int]"), pd.Series, np.int32) - check(assert_type(s.astype("Int64"), "pd.Series[int]"), pd.Series, np.int64) - # Pandas nullable unsigned integer types - check(assert_type(s.astype(pd.UInt8Dtype()), "pd.Series[int]"), pd.Series, np.uint8) - check( - assert_type(s.astype(pd.UInt16Dtype()), "pd.Series[int]"), pd.Series, np.uint16 - ) - check( - assert_type(s.astype(pd.UInt32Dtype()), "pd.Series[int]"), pd.Series, np.uint32 - ) - check( - assert_type(s.astype(pd.UInt64Dtype()), "pd.Series[int]"), pd.Series, np.uint64 - ) - check(assert_type(s.astype("UInt8"), "pd.Series[int]"), pd.Series, np.uint8) - check(assert_type(s.astype("UInt16"), "pd.Series[int]"), pd.Series, np.uint16) - check(assert_type(s.astype("UInt32"), "pd.Series[int]"), pd.Series, np.uint32) - check(assert_type(s.astype("UInt64"), "pd.Series[int]"), pd.Series, np.uint64) - - # Numpy signed integer types - # int8 - check(assert_type(s.astype(np.byte), "pd.Series[int]"), pd.Series, np.byte) - check(assert_type(s.astype("byte"), "pd.Series[int]"), pd.Series, np.byte) - check(assert_type(s.astype("int8"), "pd.Series[int]"), pd.Series, np.byte) - check(assert_type(s.astype("b"), "pd.Series[int]"), pd.Series, np.byte) - # int16 - check(assert_type(s.astype(np.short), "pd.Series[int]"), pd.Series, np.short) - check(assert_type(s.astype("short"), "pd.Series[int]"), pd.Series, np.short) - check(assert_type(s.astype("int16"), "pd.Series[int]"), pd.Series, np.short) - check(assert_type(s.astype("h"), "pd.Series[int]"), pd.Series, np.short) - # int32 - check(assert_type(s.astype(np.intc), "pd.Series[int]"), pd.Series, np.intc) - check(assert_type(s.astype("intc"), "pd.Series[int]"), pd.Series, np.intc) - check(assert_type(s.astype("int32"), "pd.Series[int]"), pd.Series, np.intc) - check(assert_type(s.astype("i"), "pd.Series[int]"), pd.Series, np.intc) - # int64 - check(assert_type(s.astype(np.int_), "pd.Series[int]"), pd.Series, np.int_) - check(assert_type(s.astype("int_"), "pd.Series[int]"), pd.Series, np.int_) - check(assert_type(s.astype("int64"), "pd.Series[int]"), pd.Series, np.int_) - check(assert_type(s.astype("intp"), "pd.Series[int]"), pd.Series, np.int_) - check(assert_type(s.astype("long"), "pd.Series[int]"), pd.Series, np.int_) - check(assert_type(s.astype("l"), "pd.Series[int]"), pd.Series, np.int_) - # int128 - # NOTE: currently not supported by pandas - # check(assert_type(s.astype(np.longlong), "pd.Series[int]"), pd.Series, np.longlong) - # check(assert_type(s.astype("longlong"), "pd.Series[int]"), pd.Series, np.longlong) - # check(assert_type(s.astype("q"), "pd.Series[int]"), pd.Series, np.longlong) - - # Numpy unsigned integer types - # uint8 - check(assert_type(s.astype(np.ubyte), "pd.Series[int]"), pd.Series, np.ubyte) - check(assert_type(s.astype("ubyte"), "pd.Series[int]"), pd.Series, np.ubyte) - check(assert_type(s.astype("uint8"), "pd.Series[int]"), pd.Series, np.ubyte) - check(assert_type(s.astype("B"), "pd.Series[int]"), pd.Series, np.ubyte) - # uint16 - check(assert_type(s.astype(np.ushort), "pd.Series[int]"), pd.Series, np.ushort) - check(assert_type(s.astype("ushort"), "pd.Series[int]"), pd.Series, np.ushort) - check(assert_type(s.astype("uint16"), "pd.Series[int]"), pd.Series, np.ushort) - check(assert_type(s.astype("H"), "pd.Series[int]"), pd.Series, np.ushort) - # uint32 - check(assert_type(s.astype(np.uintc), "pd.Series[int]"), pd.Series, np.uintc) - check(assert_type(s.astype("uintc"), "pd.Series[int]"), pd.Series, np.uintc) - check(assert_type(s.astype("uint32"), "pd.Series[int]"), pd.Series, np.uintc) - check(assert_type(s.astype("I"), "pd.Series[int]"), pd.Series, np.uintc) - # uint64 - check(assert_type(s.astype(np.uint), "pd.Series[int]"), pd.Series, np.uint) - check(assert_type(s.astype("uint"), "pd.Series[int]"), pd.Series, np.uint) - check(assert_type(s.astype("uint64"), "pd.Series[int]"), pd.Series, np.uint) - check(assert_type(s.astype("uintp"), "pd.Series[int]"), pd.Series, np.uint) - check(assert_type(s.astype("L"), "pd.Series[int]"), pd.Series, np.uint) - # pyarrow integer types - check(assert_type(s.astype("int8[pyarrow]"), "pd.Series[int]"), pd.Series, int) - check(assert_type(s.astype("int16[pyarrow]"), "pd.Series[int]"), pd.Series, int) - check(assert_type(s.astype("int32[pyarrow]"), "pd.Series[int]"), pd.Series, int) - check(assert_type(s.astype("int64[pyarrow]"), "pd.Series[int]"), pd.Series, int) - # pyarrow unsigned integer types - check(assert_type(s.astype("uint8[pyarrow]"), "pd.Series[int]"), pd.Series, int) - check(assert_type(s.astype("uint16[pyarrow]"), "pd.Series[int]"), pd.Series, int) - check(assert_type(s.astype("uint32[pyarrow]"), "pd.Series[int]"), pd.Series, int) - check(assert_type(s.astype("uint64[pyarrow]"), "pd.Series[int]"), pd.Series, int) - - # String types - # Builtin str types - check(assert_type(s.astype(str), "pd.Series[str]"), pd.Series, str) - check(assert_type(s.astype("str"), "pd.Series[str]"), pd.Series, str) - # Pandas nullable string types - check(assert_type(s.astype(pd.StringDtype()), "pd.Series[str]"), pd.Series, str) - check(assert_type(s.astype("string"), "pd.Series[str]"), pd.Series, str) - # Numpy string types - check(assert_type(s.astype(np.str_), "pd.Series[str]"), pd.Series, str) - check(assert_type(s.astype("str_"), "pd.Series[str]"), pd.Series, str) - check(assert_type(s.astype("unicode"), "pd.Series[str]"), pd.Series, str) - check(assert_type(s.astype("U"), "pd.Series[str]"), pd.Series, str) - # pyarrow string types - check(assert_type(s.astype("string[pyarrow]"), "pd.Series[str]"), pd.Series, str) - - # Bytes types - check(assert_type(s.astype(bytes), "pd.Series[bytes]"), pd.Series, bytes) - check(assert_type(s.astype("bytes"), "pd.Series[bytes]"), pd.Series, bytes) - # NumPy bytes types - check(assert_type(s.astype(np.bytes_), "pd.Series[bytes]"), pd.Series, bytes) - check(assert_type(s.astype("bytes_"), "pd.Series[bytes]"), pd.Series, bytes) - check(assert_type(s.astype("string_"), "pd.Series[bytes]"), pd.Series, bytes) - check(assert_type(s.astype("S"), "pd.Series[bytes]"), pd.Series, bytes) - # pyarrow bytes types - check( - assert_type(s.astype("binary[pyarrow]"), "pd.Series[bytes]"), pd.Series, bytes - ) +@pytest.mark.parametrize("cast_arg", ASTYPE_OBJECT, ids=repr) +def test_astype_object(cast_arg: ObjectDtypeArg) -> None: + s = pd.Series([1, 2, 3]) + check(s.astype(cast_arg), pd.Series, object) - # Float types - # Builtin float types - check(assert_type(s.astype(float), "pd.Series[float]"), pd.Series, float) - check(assert_type(s.astype("float"), "pd.Series[float]"), pd.Series, float) - # Pandas nullable float types - check( - assert_type(s.astype(pd.Float32Dtype()), "pd.Series[float]"), - pd.Series, - np.float32, - ) - check( - assert_type(s.astype(pd.Float64Dtype()), "pd.Series[float]"), - pd.Series, - np.float64, - ) - check(assert_type(s.astype("Float32"), "pd.Series[float]"), pd.Series, np.float32) - check(assert_type(s.astype("Float64"), "pd.Series[float]"), pd.Series, np.float64) - # Numpy float types - # float16 - check(assert_type(s.astype(np.half), "pd.Series[float]"), pd.Series, np.half) - check(assert_type(s.astype("half"), "pd.Series[float]"), pd.Series, np.half) - check(assert_type(s.astype("float16"), "pd.Series[float]"), pd.Series, np.half) - check(assert_type(s.astype("e"), "pd.Series[float]"), pd.Series, np.half) - # float32 - check(assert_type(s.astype(np.single), "pd.Series[float]"), pd.Series, np.single) - check(assert_type(s.astype("single"), "pd.Series[float]"), pd.Series, np.single) - check(assert_type(s.astype("float32"), "pd.Series[float]"), pd.Series, np.single) - check(assert_type(s.astype("f"), "pd.Series[float]"), pd.Series, np.single) - # float64 - check(assert_type(s.astype(np.double), "pd.Series[float]"), pd.Series, np.double) - check(assert_type(s.astype("double"), "pd.Series[float]"), pd.Series, np.double) - check(assert_type(s.astype("float_"), "pd.Series[float]"), pd.Series, np.double) - check(assert_type(s.astype("float64"), "pd.Series[float]"), pd.Series, np.double) - check(assert_type(s.astype("d"), "pd.Series[float]"), pd.Series, np.double) - # float128 - check( - assert_type(s.astype(np.longdouble), "pd.Series[float]"), - pd.Series, - np.longdouble, - ) - check( - assert_type(s.astype("longdouble"), "pd.Series[float]"), - pd.Series, - np.longdouble, - ) - check( - assert_type(s.astype("longfloat"), "pd.Series[float]"), pd.Series, np.longdouble - ) - check( - assert_type(s.astype("float128"), "pd.Series[float]"), pd.Series, np.longdouble - ) - check(assert_type(s.astype("g"), "pd.Series[float]"), pd.Series, np.longdouble) + if TYPE_CHECKING: + # python object + assert_type(s.astype(object), "pd.Series[Any]") + assert_type(s.astype("object"), "pd.Series[Any]") + # numpy object + assert_type(s.astype(np.object_), "pd.Series[Any]") + # "object_" # NOTE: not assigned + assert_type(s.astype("O"), "pd.Series[Any]") - # pyarrow - check(assert_type(s.astype("float[pyarrow]"), "pd.Series[float]"), pd.Series, float) - check( - assert_type(s.astype("double[pyarrow]"), "pd.Series[float]"), pd.Series, float - ) - # check(assert_type(s.astype("float16[pyarrow]"), "pd.Series[float]"), pd.Series, float) - check( - assert_type(s.astype("float32[pyarrow]"), "pd.Series[float]"), pd.Series, float - ) - check( - assert_type(s.astype("float64[pyarrow]"), "pd.Series[float]"), pd.Series, float - ) - # Complex types - # Builtin complex types - check(assert_type(s.astype(complex), "pd.Series[complex]"), pd.Series, complex) - check(assert_type(s.astype("complex"), "pd.Series[complex]"), pd.Series, complex) - # Numpy complex types - # complex64 - check( - assert_type(s.astype(np.csingle), "pd.Series[complex]"), - pd.Series, - np.csingle, - ) - check( - assert_type(s.astype("complex64"), "pd.Series[complex]"), - pd.Series, - np.csingle, - ) - check( - assert_type(s.astype("F"), "pd.Series[complex]"), - pd.Series, - np.csingle, - ) - check( - assert_type(s.astype("csingle"), "pd.Series[complex]"), - pd.Series, - np.csingle, - ) - check( - assert_type(s.astype("singlecomplex"), "pd.Series[complex]"), - pd.Series, - np.csingle, - ) - # complex128 - check( - assert_type(s.astype(np.cdouble), "pd.Series[complex]"), - pd.Series, - np.cdouble, - ) - check( - assert_type(s.astype("complex128"), "pd.Series[complex]"), - pd.Series, - np.cdouble, - ) - check( - assert_type(s.astype("D"), "pd.Series[complex]"), - pd.Series, - np.cdouble, - ) - check( - assert_type(s.astype("cdouble"), "pd.Series[complex]"), - pd.Series, - np.cdouble, - ) - check( - assert_type(s.astype("cfloat"), "pd.Series[complex]"), - pd.Series, - np.cdouble, - ) - check( - assert_type(s.astype("complex_"), "pd.Series[complex]"), - pd.Series, - np.cdouble, - ) - # complex 256 - check( - assert_type(s.astype(np.clongdouble), "pd.Series[complex]"), - pd.Series, - np.clongdouble, - ) - check( - assert_type(s.astype("complex256"), "pd.Series[complex]"), - pd.Series, - np.clongdouble, - ) - check( - assert_type(s.astype("G"), "pd.Series[complex]"), - pd.Series, - np.clongdouble, - ) - check( - assert_type(s.astype("clongdouble"), "pd.Series[complex]"), - pd.Series, - np.clongdouble, - ) - check( - assert_type(s.astype("clongfloat"), "pd.Series[complex]"), - pd.Series, - np.clongdouble, - ) - check( - assert_type(s.astype("longcomplex"), "pd.Series[complex]"), - pd.Series, - np.clongdouble, - ) - - # Timedelta Types - check( - assert_type(s.astype("timedelta64[Y]"), TimedeltaSeries), - pd.Series, - Timedelta, - ) - check( - assert_type(s.astype("timedelta64[M]"), TimedeltaSeries), - pd.Series, - Timedelta, - ) - check( - assert_type(s.astype("timedelta64[W]"), TimedeltaSeries), - pd.Series, - Timedelta, - ) - check( - assert_type(s.astype("timedelta64[D]"), TimedeltaSeries), - pd.Series, - Timedelta, - ) - check( - assert_type(s.astype("timedelta64[h]"), TimedeltaSeries), - pd.Series, - Timedelta, - ) - check( - assert_type(s.astype("timedelta64[m]"), TimedeltaSeries), - pd.Series, - Timedelta, - ) - check( - assert_type(s.astype("timedelta64[s]"), TimedeltaSeries), - pd.Series, - Timedelta, - ) - check( - assert_type(s.astype("timedelta64[ms]"), TimedeltaSeries), - pd.Series, - Timedelta, - ) - check( - assert_type(s.astype("timedelta64[us]"), TimedeltaSeries), - pd.Series, - Timedelta, - ) - check( - assert_type(s.astype("timedelta64[μs]"), TimedeltaSeries), - pd.Series, - Timedelta, - ) - check( - assert_type(s.astype("timedelta64[ns]"), TimedeltaSeries), - pd.Series, - Timedelta, - ) - check( - assert_type(s.astype("timedelta64[ps]"), TimedeltaSeries), - pd.Series, - Timedelta, - ) - check( - assert_type(s.astype("timedelta64[fs]"), TimedeltaSeries), - pd.Series, - Timedelta, - ) - check( - assert_type(s.astype("timedelta64[as]"), TimedeltaSeries), - pd.Series, - Timedelta, - ) - check( - assert_type(s.astype("duration[s][pyarrow]"), TimedeltaSeries), - pd.Series, - datetime.timedelta, - ) - check( - assert_type(s.astype("duration[ms][pyarrow]"), TimedeltaSeries), - pd.Series, - datetime.timedelta, - ) - check( - assert_type(s.astype("duration[us][pyarrow]"), TimedeltaSeries), - pd.Series, - datetime.timedelta, - ) - check( - assert_type(s.astype("duration[ns][pyarrow]"), TimedeltaSeries), - pd.Series, - datetime.timedelta, - ) +def test_astype_other() -> None: + s = pd.Series([3, 4, 5]) - check( - assert_type(s.astype("datetime64[Y]"), TimestampSeries), - pd.Series, - Timestamp, - ) - check( - assert_type(s.astype("datetime64[M]"), TimestampSeries), - pd.Series, - Timestamp, - ) - check( - assert_type(s.astype("datetime64[W]"), TimestampSeries), - pd.Series, - Timestamp, - ) - check( - assert_type(s.astype("datetime64[D]"), TimestampSeries), - pd.Series, - Timestamp, - ) - check( - assert_type(s.astype("datetime64[h]"), TimestampSeries), - pd.Series, - Timestamp, - ) - check( - assert_type(s.astype("datetime64[m]"), TimestampSeries), - pd.Series, - Timestamp, - ) - check( - assert_type(s.astype("datetime64[s]"), TimestampSeries), - pd.Series, - Timestamp, - ) - check( - assert_type(s.astype("datetime64[ms]"), TimestampSeries), - pd.Series, - Timestamp, - ) - check( - assert_type(s.astype("datetime64[us]"), TimestampSeries), - pd.Series, - Timestamp, - ) - check( - assert_type(s.astype("datetime64[μs]"), TimestampSeries), - pd.Series, - Timestamp, - ) - check( - assert_type(s.astype("datetime64[ns]"), TimestampSeries), - pd.Series, - Timestamp, - ) - check( - assert_type(s.astype("datetime64[ps]"), TimestampSeries), - pd.Series, - Timestamp, - ) - check( - assert_type(s.astype("datetime64[fs]"), TimestampSeries), - pd.Series, - Timestamp, - ) - check( - assert_type(s.astype("datetime64[as]"), TimestampSeries), - pd.Series, - Timestamp, - ) - check( - assert_type(s.astype("timestamp[s][pyarrow]"), TimestampSeries), - pd.Series, - datetime.datetime, - ) - check( - assert_type(s.astype("timestamp[ms][pyarrow]"), TimestampSeries), - pd.Series, - datetime.datetime, - ) - check( - assert_type(s.astype("timestamp[us][pyarrow]"), TimestampSeries), - pd.Series, - datetime.datetime, - ) - check( - assert_type(s.astype("timestamp[ns][pyarrow]"), TimestampSeries), - pd.Series, - datetime.datetime, - ) + # Test incorrect Literal + if TYPE_CHECKING_INVALID_USAGE: + s.astype("foobar") # type: ignore[call-overload] # pyright: ignore[reportGeneralTypeIssues] - # Object types - check( - assert_type(s.astype(object), "pd.Series[Any]"), - pd.Series, - object, - ) - check( - assert_type(s.astype("object"), "pd.Series[Any]"), - pd.Series, - object, - ) - # Numpy object types - check( - assert_type(s.astype(np.object_), "pd.Series[Any]"), - pd.Series, - object, - ) - check( - assert_type(s.astype("O"), "pd.Series[Any]"), - pd.Series, - object, - ) + # Test self-consistent with s.dtype (#747) + # NOTE: https://github.com/python/typing/issues/801#issuecomment-1646171898 + check(assert_type(s.astype(s.dtype), "pd.Series[Any]"), pd.Series, np.integer) + # test DecimalDtype orseries = pd.Series([Decimal(x) for x in [1, 2, 3]]) newtype = DecimalDtype() decseries = orseries.astype(newtype) @@ -2228,32 +2377,10 @@ def test_updated_astype() -> None: Decimal, ) - s4 = pd.Series([1, 1]) - s5 = pd.Series([s4, 4]) - population_dict = { - "California": 38332521, - "Texas": 26448193, - "New York": 19651127, - "Florida": 19552860, - "Illinois": 12882135, - } - population = pd.Series(population_dict) - - check(assert_type(s4.astype(object), pd.Series), pd.Series, object) - check(assert_type(s5.astype(object), pd.Series), pd.Series, object) - check(assert_type(population.astype(object), pd.Series), pd.Series, object) - - # Categorical - check( - assert_type(s.astype(pd.CategoricalDtype()), "pd.Series[Any]"), - pd.Series, - np.integer, - ) - check( - assert_type(s.astype("category"), "pd.Series[Any]"), - pd.Series, - np.integer, - ) + # Test non-literal string + # NOTE: currently unsupported! Enable in future. + # string: str = "int" # not Literal! + # check(assert_type(s.astype(string), "pd.Series[Any]"), pd.Series, np.integer) def test_check_xs() -> None: From 3aae26e2e68d24137899d026d10b26af3fc94b3f Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Tue, 25 Jul 2023 15:19:16 +0200 Subject: [PATCH 07/14] added VoidDtype, fixed some test issues --- pandas-stubs/_typing.pyi | 158 ++++++++---- pandas-stubs/core/series.pyi | 6 +- tests/test_series.py | 459 +++++++++++++++++++++++++---------- 3 files changed, 445 insertions(+), 178 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 675338f6..27a789fa 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -78,6 +78,17 @@ NpDtype: TypeAlias = str | np.dtype[np.generic] | type[str | complex | bool | ob Dtype: TypeAlias = ExtensionDtype | NpDtype DtypeArg: TypeAlias = Dtype | Mapping[Any, Dtype] DtypeBackend: TypeAlias = Literal["pyarrow", "numpy_nullable"] + +# NOTE: we want to catch all the possible dtypes from np.sctypeDict +# timedelta64 +# M +# m8 +# M8 +# object_ +# object0 +# m +# datetime64 + BooleanDtypeArg: TypeAlias = ( # Builtin bool type and its string alias type[bool] # noqa: Y030 @@ -88,7 +99,7 @@ BooleanDtypeArg: TypeAlias = ( # Numpy bool type # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.bool_ | type[np.bool_] - | Literal["?", "bool8", "bool_"] + | Literal["?", "b1", "bool8", "bool_"] # PyArrow boolean type and its string alias | Literal["bool[pyarrow]", "boolean[pyarrow]"] ) @@ -102,74 +113,57 @@ IntDtypeArg: TypeAlias = ( | pd.Int32Dtype | pd.Int64Dtype | Literal["Int8", "Int16", "Int32", "Int64"] - # Pandas nullable unsigned integer types and their string aliases - | pd.UInt8Dtype - | pd.UInt16Dtype - | pd.UInt32Dtype - | pd.UInt64Dtype - | Literal["UInt8", "UInt16", "UInt32", "UInt64"] # Numpy signed integer types and their string aliases # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.byte | type[np.byte] - | Literal["b", "int8", "byte"] + | Literal["b", "i1", "int8", "byte"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.short | type[np.short] - | Literal["h", "int16", "short"] + | Literal["h", "i2", "int16", "short"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.intc | type[np.intc] - | Literal["i", "int32", "intc"] + | Literal["i", "i4", "int32", "intc"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.int_ | type[np.int_] - | Literal["l", "int64", "int_", "intp", "long"] + | Literal["l", "i8", "int64", "int0", "int_", "long"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.longlong | type[np.longlong] | Literal["q", "longlong"] # NOTE: int128 not assigned + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.intp + | type[np.intp] # signed pointer (=`intptr_t`, platform dependent) + | Literal["p", "intp"] + # PyArrow integer types and their string aliases + | Literal["int8[pyarrow]", "int16[pyarrow]", "int32[pyarrow]", "int64[pyarrow]"] +) +UIntDtypeArg: TypeAlias = ( + # Pandas nullable unsigned integer types and their string aliases + pd.UInt8Dtype # noqa: Y030 + | pd.UInt16Dtype + | pd.UInt32Dtype + | pd.UInt64Dtype + | Literal["UInt8", "UInt16", "UInt32", "UInt64"] # Numpy unsigned integer types and their string aliases # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ubyte | type[np.ubyte] - | Literal["B", "uint8", "ubyte"] + | Literal["B", "u1", "uint8", "ubyte"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ushort | type[np.ushort] - | Literal["H", "uint16", "ushort"] + | Literal["H", "u2", "uint16", "ushort"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uintc | type[np.uintc] - | Literal["I", "uint32", "uintc"] + | Literal["I", "u4", "uint32", "uintc"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uint | type[np.uint] - | Literal["L", "uint64", "uint", "uintp"] + | Literal["L", "u8", "uint", "ulong", "uint64", "uint0"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ulonglong | type[np.ulonglong] | Literal["Q", "ulonglong"] # NOTE: uint128 not assigned - # PyArrow integer types and their string aliases - | Literal["int8[pyarrow]", "int16[pyarrow]", "int32[pyarrow]", "int64[pyarrow]"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uintp + | type[np.uintp] # unsigned pointer (=`uintptr_t`, platform dependent) + | Literal["P", "uintp"] # PyArrow unsigned integer types and their string aliases | Literal["uint8[pyarrow]", "uint16[pyarrow]", "uint32[pyarrow]", "uint64[pyarrow]"] ) -StrDtypeArg: TypeAlias = ( - # Builtin str type and its string alias - type[str] # noqa: Y030 - | Literal["str"] - # Pandas nullable string type and its string alias - | pd.StringDtype - | Literal["string"] - # Numpy string type and its string alias - # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.str_ - | type[np.str_] - | Literal["U", "str_", "unicode"] - # PyArrow string type and its string alias - | Literal["string[pyarrow]"] -) -BytesDtypeArg: TypeAlias = ( - # Builtin bytes type and its string alias - type[bytes] # noqa: Y030 - | Literal["bytes"] - # Numpy bytes type and its string alias - # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.bytes_ - | type[np.bytes_] - | Literal["S", "bytes_", "string_"] - # PyArrow binary type and its string alias - | Literal["binary[pyarrow]"] -) FloatDtypeArg: TypeAlias = ( # Builtin float type and its string alias type[float] # noqa: Y030 @@ -182,16 +176,16 @@ FloatDtypeArg: TypeAlias = ( # NOTE: Alias np.float16 only on Linux x86_64, use np.half instead # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.half | type[np.half] - | Literal["e", "float16", "half"] + | Literal["e", "f2", " Series[int]: ... @@ -1209,7 +1211,7 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]): @overload def astype( self, - dtype: ObjectDtypeArg | ExtensionDtype | DtypeObj, + dtype: ObjectDtypeArg | VoidDtypeArg | ExtensionDtype | DtypeObj, copy: _bool = ..., errors: IgnoreRaise = ..., ) -> Series: ... diff --git a/tests/test_series.py b/tests/test_series.py index 67e10d92..5939ca7b 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -4,6 +4,7 @@ from decimal import Decimal from enum import Enum from pathlib import Path +import platform import re from typing import ( TYPE_CHECKING, @@ -70,6 +71,8 @@ StrDtypeArg, TimedeltaDtypeArg, TimestampDtypeArg, + UIntDtypeArg, + VoidDtypeArg, ) from pandas._typing import np_ndarray_int # noqa: F401 @@ -1703,7 +1706,7 @@ def test_change_to_dict_return_type() -> None: check(assert_type(fd, Dict[Any, Any]), dict) -ASTYPE_BOOL: list[tuple[BooleanDtypeArg, type]] = [ +ASTYPE_BOOL_ARGS: list[tuple[BooleanDtypeArg, type]] = [ # python boolean (bool, np.bool_), ("bool", np.bool_), @@ -1715,12 +1718,13 @@ def test_change_to_dict_return_type() -> None: ("bool_", np.bool_), ("bool8", np.bool_), ("?", np.bool_), + ("b1", np.bool_), # pyarrow boolean type ("bool[pyarrow]", bool), ("boolean[pyarrow]", bool), ] -ASTYPE_INT: list[tuple[IntDtypeArg, type]] = [ +ASTYPE_INT_ARGS: list[tuple[IntDtypeArg, type]] = [ # python int (int, np.integer), ("int", np.integer), @@ -1741,23 +1745,35 @@ def test_change_to_dict_return_type() -> None: ("byte", np.byte), ("int8", np.byte), ("b", np.byte), + ("i1", np.byte), # numpy int16 (np.short, np.short), ("short", np.short), ("int16", np.short), ("h", np.short), + ("i2", np.short), # numpy int32 (np.intc, np.intc), ("intc", np.intc), ("int32", np.intc), ("i", np.intc), + ("i4", np.intc), # numpy int64 (np.int_, np.int_), ("int_", np.int_), + ("int0", np.int_), ("int64", np.int_), - ("intp", np.int_), ("long", np.int_), ("l", np.int_), + ("i8", np.int_), + # numpy extended int + (np.longlong, np.longlong), + ("longlong", np.longlong), + ("q", np.longlong), + # numpy signed pointer (platform dependent one of int[8,16,32,64]) + (np.intp, np.intp), + ("intp", np.intp), + ("p", np.intp), # pyarrow integer types ("int8[pyarrow]", int), ("int16[pyarrow]", int), @@ -1765,7 +1781,7 @@ def test_change_to_dict_return_type() -> None: ("int64[pyarrow]", int), ] -ASTYPE_UINT: list[tuple[IntDtypeArg, type]] = [ +ASTYPE_UINT_ARGS: list[tuple[UIntDtypeArg, type]] = [ # pandas UInt8 (pd.UInt8Dtype(), np.uint8), ("UInt8", np.uint8), @@ -1783,22 +1799,35 @@ def test_change_to_dict_return_type() -> None: ("ubyte", np.ubyte), ("uint8", np.ubyte), ("B", np.ubyte), + ("u1", np.ubyte), # numpy uint16 (np.ushort, np.ushort), ("ushort", np.ushort), ("uint16", np.ushort), ("H", np.ushort), + ("u2", np.ushort), # numpy uint32 (np.uintc, np.uintc), ("uintc", np.uintc), ("uint32", np.uintc), ("I", np.uintc), + ("u4", np.uintc), # numpy uint64 (np.uint, np.uint), ("uint", np.uint), + ("uint0", np.uint), ("uint64", np.uint), - ("uintp", np.uint), + ("ulong", np.uint), ("L", np.uint), + ("u8", np.uint), + # numpy extended uint + (np.ulonglong, np.ulonglong), + ("ulonglong", np.ulonglong), + ("Q", np.ulonglong), + # numpy unsigned pointer (platform dependent one of uint[8,16,32,64]) + (np.uintp, np.uintp), + ("uintp", np.uintp), + ("P", np.uintp), # pyarrow unsigned integer types ("uint8[pyarrow]", int), ("uint16[pyarrow]", int), @@ -1806,7 +1835,7 @@ def test_change_to_dict_return_type() -> None: ("uint64[pyarrow]", int), ] -ASTYPE_FLOAT = [ +ASTYPE_FLOAT_ARGS: list[tuple[FloatDtypeArg, type]] = [ # python float (float, np.floating), ("float", np.floating), @@ -1821,23 +1850,27 @@ def test_change_to_dict_return_type() -> None: ("half", np.half), ("float16", np.half), ("e", np.half), + ("f2", np.half), # numpy float32 (np.single, np.single), ("single", np.single), ("float32", np.single), ("f", np.single), + ("f4", np.single), # numpy float64 (np.double, np.double), ("double", np.double), ("float_", np.double), ("float64", np.double), ("d", np.double), + ("f8", np.double), # numpy float128 (np.longdouble, np.longdouble), ("longdouble", np.longdouble), ("longfloat", np.longdouble), ("float128", np.longdouble), ("g", np.longdouble), + ("f16", np.longdouble), # pyarrow float32 ("float32[pyarrow]", float), ("float[pyarrow]", float), @@ -1846,7 +1879,7 @@ def test_change_to_dict_return_type() -> None: ("double[pyarrow]", float), ] -ASTYPE_COMPLEX: list[tuple[ComplexDtypeArg, type]] = [ +ASTYPE_COMPLEX_ARGS: list[tuple[ComplexDtypeArg, type]] = [ # python complex (complex, np.complexfloating), ("complex", np.complexfloating), @@ -1856,6 +1889,7 @@ def test_change_to_dict_return_type() -> None: ("singlecomplex", np.csingle), ("complex64", np.csingle), ("F", np.csingle), + ("c8", np.csingle), # numpy complex128 (np.cdouble, np.cdouble), ("cdouble", np.cdouble), @@ -1863,6 +1897,7 @@ def test_change_to_dict_return_type() -> None: ("complex_", np.cdouble), ("complex128", np.cdouble), ("D", np.cdouble), + ("c16", np.cdouble), # numpy complex256 (np.clongdouble, np.clongdouble), ("clongdouble", np.clongdouble), @@ -1870,140 +1905,183 @@ def test_change_to_dict_return_type() -> None: ("longcomplex", np.clongdouble), ("complex256", np.clongdouble), ("G", np.clongdouble), + ("c32", np.clongdouble), ] -ASTYPE_TIMESTAMP: list[TimestampDtypeArg] = [ +ASTYPE_TIMESTAMP_ARGS: list[tuple[TimestampDtypeArg, type]] = [ # numpy datetime64 - "datetime64[Y]", - "datetime64[M]", - "datetime64[W]", - "datetime64[D]", - "datetime64[h]", - "datetime64[m]", - "datetime64[s]", - "datetime64[ms]", - "datetime64[us]", - "datetime64[μs]", - "datetime64[ns]", - "datetime64[ps]", - "datetime64[fs]", - "datetime64[as]", + ("datetime64[Y]", datetime.datetime), + ("datetime64[M]", datetime.datetime), + ("datetime64[W]", datetime.datetime), + ("datetime64[D]", datetime.datetime), + ("datetime64[h]", datetime.datetime), + ("datetime64[m]", datetime.datetime), + ("datetime64[s]", datetime.datetime), + ("datetime64[ms]", datetime.datetime), + ("datetime64[us]", datetime.datetime), + ("datetime64[μs]", datetime.datetime), + ("datetime64[ns]", datetime.datetime), + ("datetime64[ps]", datetime.datetime), + ("datetime64[fs]", datetime.datetime), + ("datetime64[as]", datetime.datetime), # numpy datetime64 type codes - " None: s = pd.Series([0, 1]) check(s.astype(cast_arg), pd.Series, target_type) @@ -2025,9 +2103,21 @@ def test_astype_bool(cast_arg: BooleanDtypeArg, target_type: type) -> None: assert_type(s.astype("boolean[pyarrow]"), "pd.Series[bool]") -@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_INT, ids=repr) +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_INT_ARGS, ids=repr) def test_astype_int(cast_arg: IntDtypeArg, target_type: type) -> None: s = pd.Series([1, 2, 3]) + + if platform.system() == "Windows": + # Different behavior for uint32, uint64 and uintp on Windows + if cast_arg in ["uint32", "uint64", "uintp"]: + pass + + if cast_arg in (np.longlong, "longlong", "q"): + pytest.skip( + "longlong is bugged, for details, see" + "https://github.com/pandas-dev/pandas/issues/54252" + ) + check(s.astype(cast_arg), pd.Series, target_type) if TYPE_CHECKING: @@ -2051,23 +2141,31 @@ def test_astype_int(cast_arg: IntDtypeArg, target_type: type) -> None: assert_type(s.astype("byte"), "pd.Series[int]") assert_type(s.astype("int8"), "pd.Series[int]") assert_type(s.astype("b"), "pd.Series[int]") + assert_type(s.astype("i1"), "pd.Series[int]") # numpy int16 assert_type(s.astype(np.short), "pd.Series[int]") assert_type(s.astype("short"), "pd.Series[int]") assert_type(s.astype("int16"), "pd.Series[int]") assert_type(s.astype("h"), "pd.Series[int]") + assert_type(s.astype("i2"), "pd.Series[int]") # numpy int32 assert_type(s.astype(np.intc), "pd.Series[int]") assert_type(s.astype("intc"), "pd.Series[int]") assert_type(s.astype("int32"), "pd.Series[int]") assert_type(s.astype("i"), "pd.Series[int]") + assert_type(s.astype("i4"), "pd.Series[int]") # numpy int64 assert_type(s.astype(np.int_), "pd.Series[int]") assert_type(s.astype("int_"), "pd.Series[int]") + assert_type(s.astype("int0"), "pd.Series[int]") assert_type(s.astype("int64"), "pd.Series[int]") - assert_type(s.astype("intp"), "pd.Series[int]") assert_type(s.astype("long"), "pd.Series[int]") assert_type(s.astype("l"), "pd.Series[int]") + assert_type(s.astype("i8"), "pd.Series[int]") + # numpy signed pointer + assert_type(s.astype(np.intp), "pd.Series[int]") + assert_type(s.astype("intp"), "pd.Series[int]") + assert_type(s.astype("p"), "pd.Series[int]") # pyarrow integer types assert_type(s.astype("int8[pyarrow]"), "pd.Series[int]") assert_type(s.astype("int16[pyarrow]"), "pd.Series[int]") @@ -2075,7 +2173,7 @@ def test_astype_int(cast_arg: IntDtypeArg, target_type: type) -> None: assert_type(s.astype("int64[pyarrow]"), "pd.Series[int]") -@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_UINT, ids=repr) +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_UINT_ARGS, ids=repr) def test_astype_uint(cast_arg: IntDtypeArg, target_type: type) -> None: s = pd.Series([1, 2, 3]) check(s.astype(cast_arg), pd.Series, target_type) @@ -2098,22 +2196,31 @@ def test_astype_uint(cast_arg: IntDtypeArg, target_type: type) -> None: assert_type(s.astype("ubyte"), "pd.Series[int]") assert_type(s.astype("uint8"), "pd.Series[int]") assert_type(s.astype("B"), "pd.Series[int]") + assert_type(s.astype("u1"), "pd.Series[int]") # numpy uint16 assert_type(s.astype(np.ushort), "pd.Series[int]") assert_type(s.astype("ushort"), "pd.Series[int]") assert_type(s.astype("uint16"), "pd.Series[int]") assert_type(s.astype("H"), "pd.Series[int]") + assert_type(s.astype("u2"), "pd.Series[int]") # numpy uint32 assert_type(s.astype(np.uintc), "pd.Series[int]") assert_type(s.astype("uintc"), "pd.Series[int]") assert_type(s.astype("uint32"), "pd.Series[int]") assert_type(s.astype("I"), "pd.Series[int]") + assert_type(s.astype("u4"), "pd.Series[int]") # numpy uint64 assert_type(s.astype(np.uint), "pd.Series[int]") assert_type(s.astype("uint"), "pd.Series[int]") + assert_type(s.astype("uint0"), "pd.Series[int]") assert_type(s.astype("uint64"), "pd.Series[int]") - assert_type(s.astype("uintp"), "pd.Series[int]") + assert_type(s.astype("ulong"), "pd.Series[int]") assert_type(s.astype("L"), "pd.Series[int]") + assert_type(s.astype("u8"), "pd.Series[int]") + # numpy unsigned pointer + assert_type(s.astype(np.uintp), "pd.Series[int]") + assert_type(s.astype("uintp"), "pd.Series[int]") + assert_type(s.astype("P"), "pd.Series[int]") # pyarrow unsigned integer types assert_type(s.astype("uint8[pyarrow]"), "pd.Series[int]") assert_type(s.astype("uint16[pyarrow]"), "pd.Series[int]") @@ -2121,9 +2228,15 @@ def test_astype_uint(cast_arg: IntDtypeArg, target_type: type) -> None: assert_type(s.astype("uint64[pyarrow]"), "pd.Series[int]") -@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_COMPLEX, ids=repr) +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_FLOAT_ARGS, ids=repr) def test_astype_float(cast_arg: FloatDtypeArg, target_type: type) -> None: s = pd.Series([1, 2, 3]) + + if platform.system() == "Windows" and cast_arg == "float128": + with pytest.raises(TypeError): + s.astype(cast_arg) + pytest.skip("Windows does not support float128") + check(s.astype(cast_arg), pd.Series, target_type) if TYPE_CHECKING: @@ -2141,23 +2254,27 @@ def test_astype_float(cast_arg: FloatDtypeArg, target_type: type) -> None: assert_type(s.astype("half"), "pd.Series[float]") assert_type(s.astype("float16"), "pd.Series[float]") assert_type(s.astype("e"), "pd.Series[float]") + assert_type(s.astype("f2"), "pd.Series[float]") # numpy float32 assert_type(s.astype(np.single), "pd.Series[float]") assert_type(s.astype("single"), "pd.Series[float]") assert_type(s.astype("float32"), "pd.Series[float]") assert_type(s.astype("f"), "pd.Series[float]") + assert_type(s.astype("f4"), "pd.Series[float]") # numpy float64 assert_type(s.astype(np.double), "pd.Series[float]") assert_type(s.astype("double"), "pd.Series[float]") assert_type(s.astype("float_"), "pd.Series[float]") assert_type(s.astype("float64"), "pd.Series[float]") assert_type(s.astype("d"), "pd.Series[float]") + assert_type(s.astype("f8"), "pd.Series[float]") # numpy float128 assert_type(s.astype(np.longdouble), "pd.Series[float]") assert_type(s.astype("longdouble"), "pd.Series[float]") assert_type(s.astype("longfloat"), "pd.Series[float]") assert_type(s.astype("float128"), "pd.Series[float]") assert_type(s.astype("g"), "pd.Series[float]") + assert_type(s.astype("f16"), "pd.Series[float]") # pyarrow float32 assert_type(s.astype("float32[pyarrow]"), "pd.Series[float]") assert_type(s.astype("float[pyarrow]"), "pd.Series[float]") @@ -2166,9 +2283,15 @@ def test_astype_float(cast_arg: FloatDtypeArg, target_type: type) -> None: assert_type(s.astype("double[pyarrow]"), "pd.Series[float]") -@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_COMPLEX, ids=repr) +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_COMPLEX_ARGS, ids=repr) def test_astype_complex(cast_arg: ComplexDtypeArg, target_type: type) -> None: s = pd.Series([1, 2, 3]) + + if platform.system() == "Windows" and cast_arg == "complex256": + with pytest.raises(TypeError): + s.astype(cast_arg) + pytest.skip("Windows does not support complex256") + check(s.astype(cast_arg), pd.Series, target_type) if TYPE_CHECKING: @@ -2180,6 +2303,7 @@ def test_astype_complex(cast_arg: ComplexDtypeArg, target_type: type) -> None: assert_type(s.astype("singlecomplex"), "pd.Series[complex]") assert_type(s.astype("complex64"), "pd.Series[complex]") assert_type(s.astype("F"), "pd.Series[complex]") + assert_type(s.astype("c8"), "pd.Series[complex]") # numpy complex128 assert_type(s.astype(np.cdouble), "pd.Series[complex]") assert_type(s.astype("cdouble"), "pd.Series[complex]") @@ -2187,6 +2311,7 @@ def test_astype_complex(cast_arg: ComplexDtypeArg, target_type: type) -> None: assert_type(s.astype("complex_"), "pd.Series[complex]") assert_type(s.astype("complex128"), "pd.Series[complex]") assert_type(s.astype("D"), "pd.Series[complex]") + assert_type(s.astype("c16"), "pd.Series[complex]") # numpy complex256 assert_type(s.astype(np.clongdouble), "pd.Series[complex]") assert_type(s.astype("clongdouble"), "pd.Series[complex]") @@ -2194,17 +2319,18 @@ def test_astype_complex(cast_arg: ComplexDtypeArg, target_type: type) -> None: assert_type(s.astype("longcomplex"), "pd.Series[complex]") assert_type(s.astype("complex256"), "pd.Series[complex]") assert_type(s.astype("G"), "pd.Series[complex]") + assert_type(s.astype("c32"), "pd.Series[complex]") -@pytest.mark.parametrize("cast_arg", ASTYPE_TIMESTAMP, ids=repr) -def test_astype_timestamp(cast_arg: TimestampDtypeArg) -> None: +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_TIMESTAMP_ARGS, ids=repr) +def test_astype_timestamp(cast_arg: TimestampDtypeArg, target_type: type) -> None: s = pd.Series([1, 2, 3]) if cast_arg in ("date32[pyarrow]", "date64[pyarrow]"): x = pd.Series(pd.date_range("2000-01-01", "2000-02-01")) - check(x.astype(cast_arg), TimestampSeries, datetime.date) + check(x.astype(cast_arg), TimestampSeries, target_type) else: - check(s.astype(cast_arg), TimestampSeries, datetime.datetime) + check(s.astype(cast_arg), TimestampSeries, target_type) if TYPE_CHECKING: # numpy datetime64 @@ -2223,6 +2349,21 @@ def test_astype_timestamp(cast_arg: TimestampDtypeArg) -> None: assert_type(s.astype("datetime64[fs]"), TimestampSeries) assert_type(s.astype("datetime64[as]"), TimestampSeries) # numpy datetime64 type codes + assert_type(s.astype("M8[Y]"), TimestampSeries) + assert_type(s.astype("M8[M]"), TimestampSeries) + assert_type(s.astype("M8[W]"), TimestampSeries) + assert_type(s.astype("M8[D]"), TimestampSeries) + assert_type(s.astype("M8[h]"), TimestampSeries) + assert_type(s.astype("M8[m]"), TimestampSeries) + assert_type(s.astype("M8[s]"), TimestampSeries) + assert_type(s.astype("M8[ms]"), TimestampSeries) + assert_type(s.astype("M8[us]"), TimestampSeries) + assert_type(s.astype("M8[μs]"), TimestampSeries) + assert_type(s.astype("M8[ns]"), TimestampSeries) + assert_type(s.astype("M8[ps]"), TimestampSeries) + assert_type(s.astype("M8[fs]"), TimestampSeries) + assert_type(s.astype("M8[as]"), TimestampSeries) + # numpy datetime64 type codes assert_type(s.astype(" None: assert_type(s.astype("date64[pyarrow]"), TimestampSeries) -@pytest.mark.parametrize("cast_arg", ASTYPE_TIMEDELTA, ids=repr) -def test_astype_timedelta(cast_arg: TimedeltaDtypeArg) -> None: +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_TIMEDELTA_ARGS, ids=repr) +def test_astype_timedelta(cast_arg: TimedeltaDtypeArg, target_type: type) -> None: s = pd.Series([1, 2, 3]) - check(s.astype(cast_arg), TimedeltaSeries, datetime.timedelta) + check(s.astype(cast_arg), TimedeltaSeries, target_type) if TYPE_CHECKING: assert_type(s.astype("timedelta64[Y]"), "TimedeltaSeries") @@ -2268,6 +2409,21 @@ def test_astype_timedelta(cast_arg: TimedeltaDtypeArg) -> None: assert_type(s.astype("timedelta64[fs]"), "TimedeltaSeries") assert_type(s.astype("timedelta64[as]"), "TimedeltaSeries") # numpy timedelta64 type codes + assert_type(s.astype("m8[Y]"), "TimedeltaSeries") + assert_type(s.astype("m8[M]"), "TimedeltaSeries") + assert_type(s.astype("m8[W]"), "TimedeltaSeries") + assert_type(s.astype("m8[D]"), "TimedeltaSeries") + assert_type(s.astype("m8[h]"), "TimedeltaSeries") + assert_type(s.astype("m8[m]"), "TimedeltaSeries") + assert_type(s.astype("m8[s]"), "TimedeltaSeries") + assert_type(s.astype("m8[ms]"), "TimedeltaSeries") + assert_type(s.astype("m8[us]"), "TimedeltaSeries") + assert_type(s.astype("m8[μs]"), "TimedeltaSeries") + assert_type(s.astype("m8[ns]"), "TimedeltaSeries") + assert_type(s.astype("m8[ps]"), "TimedeltaSeries") + assert_type(s.astype("m8[fs]"), "TimedeltaSeries") + assert_type(s.astype("m8[as]"), "TimedeltaSeries") + # numpy timedelta64 type codes assert_type(s.astype(" None: assert_type(s.astype("duration[ns][pyarrow]"), "TimedeltaSeries") -@pytest.mark.parametrize("cast_arg", ASTYPE_STRING, ids=repr) -def test_astype_string(cast_arg: StrDtypeArg) -> None: +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_STRING_ARGS, ids=repr) +def test_astype_string(cast_arg: StrDtypeArg, target_type: type) -> None: s = pd.Series(["a", "b"]) - check(s.astype(cast_arg), pd.Series, str) + check(s.astype(cast_arg), pd.Series, target_type) if TYPE_CHECKING: # python string @@ -2304,16 +2460,18 @@ def test_astype_string(cast_arg: StrDtypeArg) -> None: # numpy string assert_type(s.astype(np.str_), "pd.Series[str]") assert_type(s.astype("str_"), "pd.Series[str]") + assert_type(s.astype("str0"), "pd.Series[str]") assert_type(s.astype("unicode"), "pd.Series[str]") + assert_type(s.astype("unicode_"), "pd.Series[str]") assert_type(s.astype("U"), "pd.Series[str]") # pyarrow string assert_type(s.astype("string[pyarrow]"), "pd.Series[str]") -@pytest.mark.parametrize("cast_arg", ASTYPE_BYTES, ids=repr) -def test_astype_bytes(cast_arg: BytesDtypeArg) -> None: +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_BYTES_ARGS, ids=repr) +def test_astype_bytes(cast_arg: BytesDtypeArg, target_type: type) -> None: s = pd.Series(["a", "b"]) - check(s.astype(cast_arg), pd.Series, bytes) + check(s.astype(cast_arg), pd.Series, target_type) if TYPE_CHECKING: # python bytes @@ -2322,16 +2480,17 @@ def test_astype_bytes(cast_arg: BytesDtypeArg) -> None: # numpy bytes assert_type(s.astype(np.bytes_), "pd.Series[bytes]") assert_type(s.astype("bytes_"), "pd.Series[bytes]") + assert_type(s.astype("bytes0"), "pd.Series[bytes]") assert_type(s.astype("string_"), "pd.Series[bytes]") assert_type(s.astype("S"), "pd.Series[bytes]") # pyarrow bytes assert_type(s.astype("binary[pyarrow]"), "pd.Series[bytes]") -@pytest.mark.parametrize("cast_arg", ASTYPE_CATEGORICAL, ids=repr) -def test_astype_categorical(cast_arg: CategoryDtypeArg) -> None: +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_CATEGORICAL_ARGS, ids=repr) +def test_astype_categorical(cast_arg: CategoryDtypeArg, target_type: type) -> None: s = pd.Series(["a", "b"]) - check(s.astype("category"), pd.Series, str) + check(s.astype("category"), pd.Series, target_type) if TYPE_CHECKING: # pandas category @@ -2341,10 +2500,10 @@ def test_astype_categorical(cast_arg: CategoryDtypeArg) -> None: # assert_type(s.astype("dictionary[pyarrow]"), "pd.Series[Categorical]") -@pytest.mark.parametrize("cast_arg", ASTYPE_OBJECT, ids=repr) -def test_astype_object(cast_arg: ObjectDtypeArg) -> None: +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_OBJECT_ARGS, ids=repr) +def test_astype_object(cast_arg: ObjectDtypeArg, target_type: type) -> None: s = pd.Series([1, 2, 3]) - check(s.astype(cast_arg), pd.Series, object) + check(s.astype(cast_arg), pd.Series, target_type) if TYPE_CHECKING: # python object @@ -2356,6 +2515,19 @@ def test_astype_object(cast_arg: ObjectDtypeArg) -> None: assert_type(s.astype("O"), "pd.Series[Any]") +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_VOID_ARGS, ids=repr) +def test_astype_void(cast_arg: VoidDtypeArg, target_type: type) -> None: + s = pd.Series([1, 2, 3]) + check(s.astype(cast_arg), pd.Series, target_type) + + if TYPE_CHECKING: + # numpy void + assert_type(s.astype(np.void), "pd.Series[Any]") + assert_type(s.astype("void"), "pd.Series[Any]") + assert_type(s.astype("V"), "pd.Series[Any]") + assert_type(s.astype("void0"), "pd.Series[Any]") + + def test_astype_other() -> None: s = pd.Series([3, 4, 5]) @@ -2383,6 +2555,39 @@ def test_astype_other() -> None: # check(assert_type(s.astype(string), "pd.Series[Any]"), pd.Series, np.integer) +def test_all_numpy_aliases_tested() -> None: + """Check that all relevant numpy type aliases are tested.""" + NUMPY_ALIASES: set[str] = {k for k in np.sctypeDict if isinstance(k, str)} + EXCLUDED_ALIASES = { + "M", + "m", + "object0", + "M8", + "datetime64", + "m8", + "timedelta64", + "object_", + } + TESTED_ASTYPE_ARGS: list[tuple[Any, type]] = ( + ASTYPE_BOOL_ARGS + + ASTYPE_INT_ARGS # noqa: W503 + + ASTYPE_UINT_ARGS # noqa: W503 + + ASTYPE_FLOAT_ARGS # noqa: W503 + + ASTYPE_COMPLEX_ARGS # noqa: W503 + + ASTYPE_TIMEDELTA_ARGS # noqa: W503 + + ASTYPE_TIMESTAMP_ARGS # noqa: W503 + + ASTYPE_BYTES_ARGS # noqa: W503 + + ASTYPE_STRING_ARGS # noqa: W503 + + ASTYPE_CATEGORICAL_ARGS # noqa: W503 + + ASTYPE_OBJECT_ARGS # noqa: W503 + + ASTYPE_VOID_ARGS # noqa: W503 + ) + + TESTED_ALIASES = {arg for arg, _ in TESTED_ASTYPE_ARGS if isinstance(arg, str)} + UNTESTED = (NUMPY_ALIASES - TESTED_ALIASES) - EXCLUDED_ALIASES + assert not UNTESTED, f"following aliases were not tested! {UNTESTED}" + + def test_check_xs() -> None: s4 = pd.Series([1, 4]) s4.xs(0, axis=0) From 195e790c3b7af833ca91a8098a89f805d0a29416 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Tue, 25 Jul 2023 15:32:55 +0200 Subject: [PATCH 08/14] attempted fix for float96/complex192 --- pandas-stubs/_typing.pyi | 12 ++++++-- tests/test_series.py | 66 +++++++++++++++++++++++----------------- 2 files changed, 48 insertions(+), 30 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 27a789fa..868f6aa0 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -185,7 +185,7 @@ FloatDtypeArg: TypeAlias = ( | Literal["d", "f8", "float64", "double", "float_"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.longdouble | type[np.longdouble] - | Literal["g", "f16", "float128", "longdouble", "longfloat"] + | Literal["g", "f16", "float96", "float128", "longdouble", "longfloat"] # PyArrow floating point types and their string aliases | Literal[ "float[pyarrow]", @@ -209,7 +209,15 @@ ComplexDtypeArg: TypeAlias = ( # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.clongdouble # NOTE: Alias np.complex256 only on Linux x86_64, use np.clongdouble instead | type[np.clongdouble] - | Literal["G", "c32", "complex256", "clongdouble", "clongfloat", "longcomplex"] + | Literal[ + "G", + "c32", + "complex192", + "complex256", + "clongdouble", + "clongfloat", + "longcomplex", + ] ) # Refer to https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units TimedeltaDtypeArg: TypeAlias = Literal[ diff --git a/tests/test_series.py b/tests/test_series.py index 5939ca7b..18fd7785 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -1743,29 +1743,29 @@ def test_change_to_dict_return_type() -> None: # numpy int8 (np.byte, np.byte), ("byte", np.byte), - ("int8", np.byte), ("b", np.byte), - ("i1", np.byte), + ("int8", np.int8), + ("i1", np.int8), # numpy int16 (np.short, np.short), ("short", np.short), - ("int16", np.short), ("h", np.short), - ("i2", np.short), + ("int16", np.int16), + ("i2", np.int16), # numpy int32 (np.intc, np.intc), ("intc", np.intc), - ("int32", np.intc), ("i", np.intc), - ("i4", np.intc), + ("int32", np.int32), + ("i4", np.int32), # numpy int64 (np.int_, np.int_), ("int_", np.int_), ("int0", np.int_), - ("int64", np.int_), ("long", np.int_), ("l", np.int_), - ("i8", np.int_), + ("int64", np.int64), + ("i8", np.int64), # numpy extended int (np.longlong, np.longlong), ("longlong", np.longlong), @@ -1797,29 +1797,29 @@ def test_change_to_dict_return_type() -> None: # numpy uint8 (np.ubyte, np.ubyte), ("ubyte", np.ubyte), - ("uint8", np.ubyte), ("B", np.ubyte), - ("u1", np.ubyte), + ("uint8", np.uint8), + ("u1", np.uint8), # numpy uint16 (np.ushort, np.ushort), ("ushort", np.ushort), - ("uint16", np.ushort), ("H", np.ushort), - ("u2", np.ushort), + ("uint16", np.uint16), + ("u2", np.uint16), # numpy uint32 (np.uintc, np.uintc), ("uintc", np.uintc), - ("uint32", np.uintc), ("I", np.uintc), - ("u4", np.uintc), + ("uint32", np.uint32), + ("u4", np.uint32), # numpy uint64 (np.uint, np.uint), ("uint", np.uint), ("uint0", np.uint), - ("uint64", np.uint), ("ulong", np.uint), ("L", np.uint), - ("u8", np.uint), + ("uint64", np.uint64), + ("u8", np.uint64), # numpy extended uint (np.ulonglong, np.ulonglong), ("ulonglong", np.ulonglong), @@ -1848,29 +1848,30 @@ def test_change_to_dict_return_type() -> None: # numpy float16 (np.half, np.half), ("half", np.half), - ("float16", np.half), ("e", np.half), - ("f2", np.half), + ("float16", np.float16), + ("f2", np.float16), # numpy float32 (np.single, np.single), ("single", np.single), - ("float32", np.single), ("f", np.single), - ("f4", np.single), + ("float32", np.float32), + ("f4", np.float32), # numpy float64 (np.double, np.double), ("double", np.double), ("float_", np.double), - ("float64", np.double), ("d", np.double), - ("f8", np.double), + ("float64", np.float64), + ("f8", np.float64), # numpy float128 (np.longdouble, np.longdouble), ("longdouble", np.longdouble), ("longfloat", np.longdouble), - ("float128", np.longdouble), ("g", np.longdouble), ("f16", np.longdouble), + ("float96", np.longdouble), # NOTE: WINDOWS ONLY + ("float128", np.longdouble), # NOTE: UNIX ONLY # pyarrow float32 ("float32[pyarrow]", float), ("float[pyarrow]", float), @@ -1887,25 +1888,26 @@ def test_change_to_dict_return_type() -> None: (np.csingle, np.csingle), ("csingle", np.csingle), ("singlecomplex", np.csingle), - ("complex64", np.csingle), ("F", np.csingle), - ("c8", np.csingle), + ("complex64", np.complex64), + ("c8", np.complex64), # numpy complex128 (np.cdouble, np.cdouble), ("cdouble", np.cdouble), ("cfloat", np.cdouble), ("complex_", np.cdouble), - ("complex128", np.cdouble), ("D", np.cdouble), - ("c16", np.cdouble), + ("complex128", np.complex128), + ("c16", np.complex128), # numpy complex256 (np.clongdouble, np.clongdouble), ("clongdouble", np.clongdouble), ("clongfloat", np.clongdouble), ("longcomplex", np.clongdouble), - ("complex256", np.clongdouble), ("G", np.clongdouble), ("c32", np.clongdouble), + ("complex192", np.clongdouble), # NOTE: WINDOWS ONLY + ("complex256", np.clongdouble), # NOTE: UNIX ONLY ] @@ -2232,6 +2234,10 @@ def test_astype_uint(cast_arg: IntDtypeArg, target_type: type) -> None: def test_astype_float(cast_arg: FloatDtypeArg, target_type: type) -> None: s = pd.Series([1, 2, 3]) + if platform.system() != "Windows" and cast_arg == "float96": + with pytest.raises(TypeError): + s.astype(cast_arg) + pytest.skip("Unix does not support float96") if platform.system() == "Windows" and cast_arg == "float128": with pytest.raises(TypeError): s.astype(cast_arg) @@ -2287,6 +2293,10 @@ def test_astype_float(cast_arg: FloatDtypeArg, target_type: type) -> None: def test_astype_complex(cast_arg: ComplexDtypeArg, target_type: type) -> None: s = pd.Series([1, 2, 3]) + if platform.system() != "Windows" and cast_arg == "complex192": + with pytest.raises(TypeError): + s.astype(cast_arg) + pytest.skip("Unix does not support complex192") if platform.system() == "Windows" and cast_arg == "complex256": with pytest.raises(TypeError): s.astype(cast_arg) From fae83c18d6bad16e8e601fd78b305742c9a50e70 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Tue, 25 Jul 2023 15:50:21 +0200 Subject: [PATCH 09/14] added coded for testing that all types are tested --- tests/test_series.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/test_series.py b/tests/test_series.py index 18fd7785..62879a5b 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -2512,7 +2512,7 @@ def test_astype_categorical(cast_arg: CategoryDtypeArg, target_type: type) -> No @pytest.mark.parametrize("cast_arg, target_type", ASTYPE_OBJECT_ARGS, ids=repr) def test_astype_object(cast_arg: ObjectDtypeArg, target_type: type) -> None: - s = pd.Series([1, 2, 3]) + s = pd.Series([object(), 2, 3]) check(s.astype(cast_arg), pd.Series, target_type) if TYPE_CHECKING: @@ -2521,7 +2521,8 @@ def test_astype_object(cast_arg: ObjectDtypeArg, target_type: type) -> None: assert_type(s.astype("object"), "pd.Series[Any]") # numpy object assert_type(s.astype(np.object_), "pd.Series[Any]") - # "object_" # NOTE: not assigned + # assert_type(s.astype("object_"), "pd.Series[Any]") # NOTE: not assigned + # assert_type(s.astype("object0"), "pd.Series[Any]") # NOTE: not assigned assert_type(s.astype("O"), "pd.Series[Any]") @@ -2565,7 +2566,7 @@ def test_astype_other() -> None: # check(assert_type(s.astype(string), "pd.Series[Any]"), pd.Series, np.integer) -def test_all_numpy_aliases_tested() -> None: +def test_all_astype_args_tested() -> None: """Check that all relevant numpy type aliases are tested.""" NUMPY_ALIASES: set[str] = {k for k in np.sctypeDict if isinstance(k, str)} EXCLUDED_ALIASES = { @@ -2593,9 +2594,17 @@ def test_all_numpy_aliases_tested() -> None: + ASTYPE_VOID_ARGS # noqa: W503 ) - TESTED_ALIASES = {arg for arg, _ in TESTED_ASTYPE_ARGS if isinstance(arg, str)} - UNTESTED = (NUMPY_ALIASES - TESTED_ALIASES) - EXCLUDED_ALIASES - assert not UNTESTED, f"following aliases were not tested! {UNTESTED}" + TESTED_ALIASES: set[str] = { + arg for arg, _ in TESTED_ASTYPE_ARGS if isinstance(arg, str) + } + UNTESTED_ALIASES = (NUMPY_ALIASES - TESTED_ALIASES) - EXCLUDED_ALIASES + assert not UNTESTED_ALIASES, f"{UNTESTED_ALIASES}" + + NUMPY_TYPES: set[type] = set(np.sctypeDict.values()) + EXCLUDED_TYPES: set[type] = {np.str_, np.object_, np.timedelta64, np.datetime64} + TESTED_TYPES: set[type] = {t for _, t in TESTED_ASTYPE_ARGS} + UNTESTED_TYPES = (NUMPY_TYPES - TESTED_TYPES) - EXCLUDED_TYPES + assert not UNTESTED_TYPES, f"{UNTESTED_TYPES}" def test_check_xs() -> None: From 5ebcdb20ad847636a2003776b62ff6c401c62e3f Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Tue, 25 Jul 2023 16:05:25 +0200 Subject: [PATCH 10/14] small edit --- tests/test_series.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_series.py b/tests/test_series.py index 62879a5b..aef95068 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -2570,14 +2570,14 @@ def test_all_astype_args_tested() -> None: """Check that all relevant numpy type aliases are tested.""" NUMPY_ALIASES: set[str] = {k for k in np.sctypeDict if isinstance(k, str)} EXCLUDED_ALIASES = { - "M", - "m", - "object0", - "M8", "datetime64", + "m", "m8", "timedelta64", + "M", + "M8", "object_", + "object0", } TESTED_ASTYPE_ARGS: list[tuple[Any, type]] = ( ASTYPE_BOOL_ARGS From 4ad43254a936c5daf6818b797e77156bfb0aba18 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Tue, 25 Jul 2023 17:23:35 +0200 Subject: [PATCH 11/14] removed float96, complex192 and fixed integer tests --- pandas-stubs/_typing.pyi | 3 +-- tests/test_series.py | 20 ++++++-------------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 868f6aa0..2718057d 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -185,7 +185,7 @@ FloatDtypeArg: TypeAlias = ( | Literal["d", "f8", "float64", "double", "float_"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.longdouble | type[np.longdouble] - | Literal["g", "f16", "float96", "float128", "longdouble", "longfloat"] + | Literal["g", "f16", "float128", "longdouble", "longfloat"] # PyArrow floating point types and their string aliases | Literal[ "float[pyarrow]", @@ -212,7 +212,6 @@ ComplexDtypeArg: TypeAlias = ( | Literal[ "G", "c32", - "complex192", "complex256", "clongdouble", "clongfloat", diff --git a/tests/test_series.py b/tests/test_series.py index aef95068..bc3ebc79 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -1761,7 +1761,6 @@ def test_change_to_dict_return_type() -> None: # numpy int64 (np.int_, np.int_), ("int_", np.int_), - ("int0", np.int_), ("long", np.int_), ("l", np.int_), ("int64", np.int64), @@ -1773,6 +1772,7 @@ def test_change_to_dict_return_type() -> None: # numpy signed pointer (platform dependent one of int[8,16,32,64]) (np.intp, np.intp), ("intp", np.intp), + ("int0", np.intp), ("p", np.intp), # pyarrow integer types ("int8[pyarrow]", int), @@ -1815,7 +1815,6 @@ def test_change_to_dict_return_type() -> None: # numpy uint64 (np.uint, np.uint), ("uint", np.uint), - ("uint0", np.uint), ("ulong", np.uint), ("L", np.uint), ("uint64", np.uint64), @@ -1827,6 +1826,7 @@ def test_change_to_dict_return_type() -> None: # numpy unsigned pointer (platform dependent one of uint[8,16,32,64]) (np.uintp, np.uintp), ("uintp", np.uintp), + ("uint0", np.uintp), ("P", np.uintp), # pyarrow unsigned integer types ("uint8[pyarrow]", int), @@ -1870,7 +1870,7 @@ def test_change_to_dict_return_type() -> None: ("longfloat", np.longdouble), ("g", np.longdouble), ("f16", np.longdouble), - ("float96", np.longdouble), # NOTE: WINDOWS ONLY + # ("float96", np.longdouble), # NOTE: WINDOWS ONLY ("float128", np.longdouble), # NOTE: UNIX ONLY # pyarrow float32 ("float32[pyarrow]", float), @@ -1906,7 +1906,7 @@ def test_change_to_dict_return_type() -> None: ("longcomplex", np.clongdouble), ("G", np.clongdouble), ("c32", np.clongdouble), - ("complex192", np.clongdouble), # NOTE: WINDOWS ONLY + # ("complex192", np.clongdouble), # NOTE: WINDOWS ONLY ("complex256", np.clongdouble), # NOTE: UNIX ONLY ] @@ -2234,11 +2234,7 @@ def test_astype_uint(cast_arg: IntDtypeArg, target_type: type) -> None: def test_astype_float(cast_arg: FloatDtypeArg, target_type: type) -> None: s = pd.Series([1, 2, 3]) - if platform.system() != "Windows" and cast_arg == "float96": - with pytest.raises(TypeError): - s.astype(cast_arg) - pytest.skip("Unix does not support float96") - if platform.system() == "Windows" and cast_arg == "float128": + if platform.system() == "Windows" and cast_arg in ("f16", "float128"): with pytest.raises(TypeError): s.astype(cast_arg) pytest.skip("Windows does not support float128") @@ -2293,11 +2289,7 @@ def test_astype_float(cast_arg: FloatDtypeArg, target_type: type) -> None: def test_astype_complex(cast_arg: ComplexDtypeArg, target_type: type) -> None: s = pd.Series([1, 2, 3]) - if platform.system() != "Windows" and cast_arg == "complex192": - with pytest.raises(TypeError): - s.astype(cast_arg) - pytest.skip("Unix does not support complex192") - if platform.system() == "Windows" and cast_arg == "complex256": + if platform.system() == "Windows" and cast_arg in ("c32", "complex256"): with pytest.raises(TypeError): s.astype(cast_arg) pytest.skip("Windows does not support complex256") From 2296a8598fa7559b1cf3c7dcf8d65fbf0506f514 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Tue, 25 Jul 2023 17:26:30 +0200 Subject: [PATCH 12/14] reverted accidental Series renames --- tests/test_series.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_series.py b/tests/test_series.py index bc3ebc79..6572228f 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -81,7 +81,7 @@ def test_types_init() -> None: pd.Series(1) pd.Series((1, 2, 3)) pd.Series(np.array([1, 2, 3])) - pd.Series(data=[1, 2, 3, 4], name="pd.Series") + pd.Series(data=[1, 2, 3, 4], name="series") pd.Series(data=[1, 2, 3, 4], dtype=np.int8) pd.Series(data={"row1": [1, 2], "row2": [3, 4]}) pd.Series(data=[1, 2, 3, 4], index=[4, 3, 2, 1], copy=True) @@ -135,7 +135,7 @@ def test_types_select() -> None: s = pd.Series(data={"row1": 1, "row2": 2}) with pytest_warns_bounded( FutureWarning, - "pd.Series.__getitem__ treating keys as positions is deprecated", + "Series.__getitem__ treating keys as positions is deprecated", lower="2.0.99", ): s[0] @@ -268,7 +268,7 @@ def test_types_fillna() -> None: check(assert_type(s.fillna(0, axis="index"), pd.Series), pd.Series) with pytest_warns_bounded( FutureWarning, - "pd.Series.fillna with 'method' is deprecated", + "Series.fillna with 'method' is deprecated", lower="2.0.99", ): check(assert_type(s.fillna(method="backfill", axis=0), pd.Series), pd.Series) From 2471ec8f9e12c97186b9a79b7c037a4c6e0e6220 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Tue, 25 Jul 2023 17:29:03 +0200 Subject: [PATCH 13/14] removed windows check for test_astype_int --- tests/test_series.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test_series.py b/tests/test_series.py index 6572228f..4665b5d7 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -2109,11 +2109,6 @@ def test_astype_bool(cast_arg: BooleanDtypeArg, target_type: type) -> None: def test_astype_int(cast_arg: IntDtypeArg, target_type: type) -> None: s = pd.Series([1, 2, 3]) - if platform.system() == "Windows": - # Different behavior for uint32, uint64 and uintp on Windows - if cast_arg in ["uint32", "uint64", "uintp"]: - pass - if cast_arg in (np.longlong, "longlong", "q"): pytest.skip( "longlong is bugged, for details, see" From b6f3829aed29d1003e04904d74e2e5242b6214b4 Mon Sep 17 00:00:00 2001 From: Randolf Scholz Date: Tue, 25 Jul 2023 17:33:49 +0200 Subject: [PATCH 14/14] reordered literals --- pandas-stubs/_typing.pyi | 8 ++++---- tests/test_series.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 2718057d..ac311799 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -125,13 +125,13 @@ IntDtypeArg: TypeAlias = ( | Literal["i", "i4", "int32", "intc"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.int_ | type[np.int_] - | Literal["l", "i8", "int64", "int0", "int_", "long"] + | Literal["l", "i8", "int64", "int_", "long"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.longlong | type[np.longlong] | Literal["q", "longlong"] # NOTE: int128 not assigned # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.intp | type[np.intp] # signed pointer (=`intptr_t`, platform dependent) - | Literal["p", "intp"] + | Literal["p", "intp", "int0"] # PyArrow integer types and their string aliases | Literal["int8[pyarrow]", "int16[pyarrow]", "int32[pyarrow]", "int64[pyarrow]"] ) @@ -154,13 +154,13 @@ UIntDtypeArg: TypeAlias = ( | Literal["I", "u4", "uint32", "uintc"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uint | type[np.uint] - | Literal["L", "u8", "uint", "ulong", "uint64", "uint0"] + | Literal["L", "u8", "uint", "ulong", "uint64"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ulonglong | type[np.ulonglong] | Literal["Q", "ulonglong"] # NOTE: uint128 not assigned # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uintp | type[np.uintp] # unsigned pointer (=`uintptr_t`, platform dependent) - | Literal["P", "uintp"] + | Literal["P", "uintp", "uint0"] # PyArrow unsigned integer types and their string aliases | Literal["uint8[pyarrow]", "uint16[pyarrow]", "uint32[pyarrow]", "uint64[pyarrow]"] ) diff --git a/tests/test_series.py b/tests/test_series.py index 4665b5d7..055d16c0 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -1870,7 +1870,7 @@ def test_change_to_dict_return_type() -> None: ("longfloat", np.longdouble), ("g", np.longdouble), ("f16", np.longdouble), - # ("float96", np.longdouble), # NOTE: WINDOWS ONLY + # ("float96", np.longdouble), # NOTE: unsupported ("float128", np.longdouble), # NOTE: UNIX ONLY # pyarrow float32 ("float32[pyarrow]", float), @@ -1906,7 +1906,7 @@ def test_change_to_dict_return_type() -> None: ("longcomplex", np.clongdouble), ("G", np.clongdouble), ("c32", np.clongdouble), - # ("complex192", np.clongdouble), # NOTE: WINDOWS ONLY + # ("complex192", np.clongdouble), # NOTE: unsupported ("complex256", np.clongdouble), # NOTE: UNIX ONLY ]