From e2794ac10f2da2c08e5809f667181f19464ac54b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 30 Sep 2019 21:00:18 -0700 Subject: [PATCH] CLN: Assorted typings (#28604) --- pandas/core/algorithms.py | 24 ++++++++++++------------ pandas/core/util/hashing.py | 21 ++++++++++++++------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6e73e1636a75ba..002bbcc63d04f2 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -176,7 +176,6 @@ def _reconstruct_data(values, dtype, original): ------- Index for extension types, otherwise ndarray casted to dtype """ - from pandas import Index if is_extension_array_dtype(dtype): values = dtype.construct_array_type()._from_sequence(values) @@ -184,7 +183,7 @@ def _reconstruct_data(values, dtype, original): values = values.astype(dtype) # we only support object dtypes bool Index - if isinstance(original, Index): + if isinstance(original, ABCIndexClass): values = values.astype(object) elif dtype is not None: values = values.astype(dtype) @@ -833,7 +832,7 @@ def duplicated(values, keep="first"): return f(values, keep=keep) -def mode(values, dropna=True): +def mode(values, dropna: bool = True): """ Returns the mode(s) of an array. @@ -1888,7 +1887,7 @@ def searchsorted(arr, value, side="left", sorter=None): } -def diff(arr, n, axis=0): +def diff(arr, n: int, axis: int = 0): """ difference of n between self, analogous to s-s.shift(n) @@ -1904,7 +1903,6 @@ def diff(arr, n, axis=0): Returns ------- shifted - """ n = int(n) @@ -1935,13 +1933,15 @@ def diff(arr, n, axis=0): f = _diff_special[arr.dtype.name] f(arr, out_arr, n, axis) else: - res_indexer = [slice(None)] * arr.ndim - res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n) - res_indexer = tuple(res_indexer) - - lag_indexer = [slice(None)] * arr.ndim - lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None) - lag_indexer = tuple(lag_indexer) + # To keep mypy happy, _res_indexer is a list while res_indexer is + # a tuple, ditto for lag_indexer. + _res_indexer = [slice(None)] * arr.ndim + _res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n) + res_indexer = tuple(_res_indexer) + + _lag_indexer = [slice(None)] * arr.ndim + _lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None) + lag_indexer = tuple(_lag_indexer) # need to make sure that we account for na for datelike/timedelta # we don't actually want to subtract these i8 numbers diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index bcdbf0855cbb49..4bcc53606aecab 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -26,7 +26,7 @@ _default_hash_key = "0123456789123456" -def _combine_hash_arrays(arrays, num_items): +def _combine_hash_arrays(arrays, num_items: int): """ Parameters ---------- @@ -55,7 +55,11 @@ def _combine_hash_arrays(arrays, num_items): def hash_pandas_object( - obj, index=True, encoding="utf8", hash_key=None, categorize=True + obj, + index: bool = True, + encoding: str = "utf8", + hash_key=None, + categorize: bool = True, ): """ Return a data hash of the Index/Series/DataFrame. @@ -125,7 +129,10 @@ def hash_pandas_object( for _ in [None] ) num_items += 1 - hashes = itertools.chain(hashes, index_hash_generator) + + # keep `hashes` specifically a generator to keep mypy happy + _hashes = itertools.chain(hashes, index_hash_generator) + hashes = (x for x in _hashes) h = _combine_hash_arrays(hashes, num_items) h = Series(h, index=obj.index, dtype="uint64", copy=False) @@ -179,7 +186,7 @@ def hash_tuples(vals, encoding="utf8", hash_key=None): return h -def hash_tuple(val, encoding="utf8", hash_key=None): +def hash_tuple(val, encoding: str = "utf8", hash_key=None): """ Hash a single tuple efficiently @@ -201,7 +208,7 @@ def hash_tuple(val, encoding="utf8", hash_key=None): return h -def _hash_categorical(c, encoding, hash_key): +def _hash_categorical(c, encoding: str, hash_key: str): """ Hash a Categorical by hashing its categories, and then mapping the codes to the hashes @@ -239,7 +246,7 @@ def _hash_categorical(c, encoding, hash_key): return result -def hash_array(vals, encoding="utf8", hash_key=None, categorize=True): +def hash_array(vals, encoding: str = "utf8", hash_key=None, categorize: bool = True): """ Given a 1d array, return an array of deterministic integers. @@ -317,7 +324,7 @@ def hash_array(vals, encoding="utf8", hash_key=None, categorize=True): return vals -def _hash_scalar(val, encoding="utf8", hash_key=None): +def _hash_scalar(val, encoding: str = "utf8", hash_key=None): """ Hash scalar value