From 845808bb9bcdbb9054775ca77f6708e603d2cb2b Mon Sep 17 00:00:00 2001 From: Gregory Rome Date: Mon, 8 Apr 2019 17:15:12 -0500 Subject: [PATCH 1/3] Fix type annotations in pandas.core.dtypes --- mypy.ini | 6 ------ pandas/core/dtypes/base.py | 7 +++--- pandas/core/dtypes/dtypes.py | 40 +++++++++++++++++++++-------------- pandas/core/dtypes/missing.py | 3 ++- 4 files changed, 29 insertions(+), 27 deletions(-) diff --git a/mypy.ini b/mypy.ini index ddfa9885e3351..62a97da7b6178 100644 --- a/mypy.ini +++ b/mypy.ini @@ -62,12 +62,6 @@ ignore_errors=True [mypy-pandas.core.config_init] ignore_errors=True -[mypy-pandas.core.dtypes.dtypes] -ignore_errors=True - -[mypy-pandas.core.dtypes.missing] -ignore_errors=True - [mypy-pandas.core.frame] ignore_errors=True diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 9e7d771abe342..d08b663fbb538 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -1,5 +1,5 @@ """Extend pandas with custom array types""" -from typing import List, Optional, Type +from typing import List, Optional, Tuple, Type import numpy as np @@ -24,7 +24,7 @@ class _DtypeOpsMixin(object): # of the NA value, not the physical NA vaalue for storage. # e.g. for JSONArray, this is an empty dictionary. na_value = np.nan - _metadata = () + _metadata = () # type: Tuple[str, ...] def __eq__(self, other): """Check whether 'other' is equal to self. @@ -219,8 +219,7 @@ def type(self) -> Type: raise AbstractMethodError(self) @property - def kind(self): - # type () -> str + def kind(self) -> str: """ A character code (one of 'biufcmMOSUV'), default 'O' diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index f0dd70886dc06..2e73438434cd3 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1,5 +1,7 @@ """ define extension dtypes """ +import builtins import re +from typing import Any, Dict, Optional, Tuple, Type import warnings import numpy as np @@ -104,17 +106,23 @@ class PandasExtensionDtype(_DtypeOpsMixin): THIS IS NOT A REAL NUMPY DTYPE """ - type = None + type = None # type: Any + kind = None # type: Any + """ + The Any type annotations above are here only because mypy seems to have a + problem dealing with with multiple inheritance from PandasExtensionDtype + and ExtensionDtype's @properties in the subclasses below. Those subclasses + are explicitly typed, as appropriate. + """ subdtype = None - kind = None - str = None + str = None # type: Optional[builtins.str] num = 100 - shape = tuple() + shape = tuple() # type: Tuple[int, ...] itemsize = 8 base = None isbuiltin = 0 isnative = 0 - _cache = {} + _cache = {} # type: Dict[builtins.str, object] def __unicode__(self): return self.name @@ -217,12 +225,12 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): """ # TODO: Document public vs. private API name = 'category' - type = CategoricalDtypeType - kind = 'O' + type = CategoricalDtypeType # type: Type[CategoricalDtypeType] + kind = 'O' # type: builtins.str str = '|O08' base = np.dtype('O') _metadata = ('categories', 'ordered') - _cache = {} + _cache = {} # type: Dict[builtins.str, object] def __init__(self, categories=None, ordered=None): self._finalize(categories, ordered, fastpath=False) @@ -584,15 +592,15 @@ class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype): THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.datetime64[ns] """ - type = Timestamp - kind = 'M' + type = Timestamp # type: Type[Timestamp] + kind = 'M' # type: builtins.str str = '|M8[ns]' num = 101 base = np.dtype('M8[ns]') na_value = NaT _metadata = ('unit', 'tz') _match = re.compile(r"(datetime64|M8)\[(?P.+), (?P.+)\]") - _cache = {} + _cache = {} # type: Dict[builtins.str, object] def __init__(self, unit="ns", tz=None): """ @@ -736,14 +744,14 @@ class PeriodDtype(ExtensionDtype, PandasExtensionDtype): THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.int64. """ - type = Period - kind = 'O' + type = Period # type: Type[Period] + kind = 'O' # type: builtins.str str = '|O08' base = np.dtype('O') num = 102 _metadata = ('freq',) _match = re.compile(r"(P|p)eriod\[(?P.+)\]") - _cache = {} + _cache = {} # type: Dict[builtins.str, object] def __new__(cls, freq=None): """ @@ -860,13 +868,13 @@ class IntervalDtype(PandasExtensionDtype, ExtensionDtype): THIS IS NOT A REAL NUMPY DTYPE """ name = 'interval' - kind = None + kind = None # type: Optional[builtins.str] str = '|O08' base = np.dtype('O') num = 103 _metadata = ('subtype',) _match = re.compile(r"(I|i)nterval\[(?P.+)\]") - _cache = {} + _cache = {} # type: Dict[builtins.str, object] def __new__(cls, subtype=None): """ diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 94efab1f94009..914a292d3db97 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -3,7 +3,8 @@ """ import numpy as np -from pandas._libs import lib, missing as libmissing +from pandas._libs import lib +import pandas._libs.missing as libmissing from pandas._libs.tslibs import NaT, iNaT from .common import ( From 6dae7ca2ee6d538b8dcb5ebbbcd8780d49d35f3e Mon Sep 17 00:00:00 2001 From: Gregory Rome Date: Wed, 10 Apr 2019 08:45:56 -0500 Subject: [PATCH 2/3] Changed typing of _cache variables --- pandas/core/dtypes/dtypes.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 2e73438434cd3..8d8fe1e3201d7 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -108,12 +108,10 @@ class PandasExtensionDtype(_DtypeOpsMixin): """ type = None # type: Any kind = None # type: Any - """ - The Any type annotations above are here only because mypy seems to have a - problem dealing with with multiple inheritance from PandasExtensionDtype - and ExtensionDtype's @properties in the subclasses below. Those subclasses - are explicitly typed, as appropriate. - """ + # The Any type annotations above are here only because mypy seems to have a + # problem dealing with with multiple inheritance from PandasExtensionDtype + # and ExtensionDtype's @properties in the subclasses below. The kind and + # type variables in those subclasses are explicitly typed below. subdtype = None str = None # type: Optional[builtins.str] num = 100 @@ -122,7 +120,7 @@ class PandasExtensionDtype(_DtypeOpsMixin): base = None isbuiltin = 0 isnative = 0 - _cache = {} # type: Dict[builtins.str, object] + _cache = {} # type: Dict[builtins.str, 'PandasExtensionDtype'] def __unicode__(self): return self.name @@ -230,7 +228,7 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): str = '|O08' base = np.dtype('O') _metadata = ('categories', 'ordered') - _cache = {} # type: Dict[builtins.str, object] + _cache = {} # type: Dict[builtins.str, PandasExtensionDtype] def __init__(self, categories=None, ordered=None): self._finalize(categories, ordered, fastpath=False) @@ -600,7 +598,7 @@ class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype): na_value = NaT _metadata = ('unit', 'tz') _match = re.compile(r"(datetime64|M8)\[(?P.+), (?P.+)\]") - _cache = {} # type: Dict[builtins.str, object] + _cache = {} # type: Dict[builtins.str, PandasExtensionDtype] def __init__(self, unit="ns", tz=None): """ @@ -751,7 +749,7 @@ class PeriodDtype(ExtensionDtype, PandasExtensionDtype): num = 102 _metadata = ('freq',) _match = re.compile(r"(P|p)eriod\[(?P.+)\]") - _cache = {} # type: Dict[builtins.str, object] + _cache = {} # type: Dict[builtins.str, PandasExtensionDtype] def __new__(cls, freq=None): """ @@ -874,7 +872,7 @@ class IntervalDtype(PandasExtensionDtype, ExtensionDtype): num = 103 _metadata = ('subtype',) _match = re.compile(r"(I|i)nterval\[(?P.+)\]") - _cache = {} # type: Dict[builtins.str, object] + _cache = {} # type: Dict[builtins.str, PandasExtensionDtype] def __new__(cls, subtype=None): """ From 85ef4b44e18395ecf55d155ae600b49213eef9ab Mon Sep 17 00:00:00 2001 From: Gregory Rome Date: Wed, 10 Apr 2019 18:26:11 -0500 Subject: [PATCH 3/3] Replace builtins.str with type alias --- pandas/core/dtypes/dtypes.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 8d8fe1e3201d7..0cff3ed649802 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1,5 +1,4 @@ """ define extension dtypes """ -import builtins import re from typing import Any, Dict, Optional, Tuple, Type import warnings @@ -18,6 +17,8 @@ from .base import ExtensionDtype, _DtypeOpsMixin from .inference import is_list_like +str_type = str + def register_extension_dtype(cls): """ @@ -113,14 +114,14 @@ class PandasExtensionDtype(_DtypeOpsMixin): # and ExtensionDtype's @properties in the subclasses below. The kind and # type variables in those subclasses are explicitly typed below. subdtype = None - str = None # type: Optional[builtins.str] + str = None # type: Optional[str_type] num = 100 shape = tuple() # type: Tuple[int, ...] itemsize = 8 base = None isbuiltin = 0 isnative = 0 - _cache = {} # type: Dict[builtins.str, 'PandasExtensionDtype'] + _cache = {} # type: Dict[str_type, 'PandasExtensionDtype'] def __unicode__(self): return self.name @@ -224,11 +225,11 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): # TODO: Document public vs. private API name = 'category' type = CategoricalDtypeType # type: Type[CategoricalDtypeType] - kind = 'O' # type: builtins.str + kind = 'O' # type: str_type str = '|O08' base = np.dtype('O') _metadata = ('categories', 'ordered') - _cache = {} # type: Dict[builtins.str, PandasExtensionDtype] + _cache = {} # type: Dict[str_type, PandasExtensionDtype] def __init__(self, categories=None, ordered=None): self._finalize(categories, ordered, fastpath=False) @@ -591,14 +592,14 @@ class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype): np.datetime64[ns] """ type = Timestamp # type: Type[Timestamp] - kind = 'M' # type: builtins.str + kind = 'M' # type: str_type str = '|M8[ns]' num = 101 base = np.dtype('M8[ns]') na_value = NaT _metadata = ('unit', 'tz') _match = re.compile(r"(datetime64|M8)\[(?P.+), (?P.+)\]") - _cache = {} # type: Dict[builtins.str, PandasExtensionDtype] + _cache = {} # type: Dict[str_type, PandasExtensionDtype] def __init__(self, unit="ns", tz=None): """ @@ -743,13 +744,13 @@ class PeriodDtype(ExtensionDtype, PandasExtensionDtype): THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.int64. """ type = Period # type: Type[Period] - kind = 'O' # type: builtins.str + kind = 'O' # type: str_type str = '|O08' base = np.dtype('O') num = 102 _metadata = ('freq',) _match = re.compile(r"(P|p)eriod\[(?P.+)\]") - _cache = {} # type: Dict[builtins.str, PandasExtensionDtype] + _cache = {} # type: Dict[str_type, PandasExtensionDtype] def __new__(cls, freq=None): """ @@ -866,13 +867,13 @@ class IntervalDtype(PandasExtensionDtype, ExtensionDtype): THIS IS NOT A REAL NUMPY DTYPE """ name = 'interval' - kind = None # type: Optional[builtins.str] + kind = None # type: Optional[str_type] str = '|O08' base = np.dtype('O') num = 103 _metadata = ('subtype',) _match = re.compile(r"(I|i)nterval\[(?P.+)\]") - _cache = {} # type: Dict[builtins.str, PandasExtensionDtype] + _cache = {} # type: Dict[str_type, PandasExtensionDtype] def __new__(cls, subtype=None): """