Merge remote-tracking branch 'upstream/master' into boolean-array

linxiaow · Apr 5, 2020 · 81860e3 · 81860e3
2 parents 1a08e61 + ae75f35
commit 81860e3
Show file tree

Hide file tree

Showing 10 changed files with 57 additions and 48 deletions.
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -256,6 +256,7 @@ Deprecations
 - :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`)
 - :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`)
 - The ``fastpath`` keyword in the ``SingleBlockManager`` constructor is deprecated and will be removed in a future version (:issue:`33092`)
+- :meth:`Index.is_mixed` is deprecated and will be removed in a future version, check ``index.inferred_type`` directly instead (:issue:`32922`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -50,18 +50,17 @@ from pandas._libs.khash cimport (
 
 import pandas._libs.missing as missing
 
-cdef float64_t FP_ERR = 1e-13
-
-cdef float64_t NaN = <float64_t>np.NaN
-
-cdef int64_t NPY_NAT = get_nat()
+cdef:
+    float64_t FP_ERR = 1e-13
+    float64_t NaN = <float64_t>np.NaN
+    int64_t NPY_NAT = get_nat()
 
 tiebreakers = {
-    'average': TIEBREAK_AVERAGE,
-    'min': TIEBREAK_MIN,
-    'max': TIEBREAK_MAX,
-    'first': TIEBREAK_FIRST,
-    'dense': TIEBREAK_DENSE,
+    "average": TIEBREAK_AVERAGE,
+    "min": TIEBREAK_MIN,
+    "max": TIEBREAK_MAX,
+    "first": TIEBREAK_FIRST,
+    "dense": TIEBREAK_DENSE,
 }
 
 
@@ -120,6 +119,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr):
         kh_int64_t *table
         int ret = 0
         list uniques = []
+        ndarray[int64_t, ndim=1] result
 
     table = kh_init_int64()
     kh_resize_int64(table, 10)
@@ -261,7 +261,7 @@ def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def nancorr(const float64_t[:, :] mat, bint cov=0, minp=None):
+def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
     cdef:
         Py_ssize_t i, j, xi, yi, N, K
         bint minpv
@@ -325,7 +325,7 @@ def nancorr(const float64_t[:, :] mat, bint cov=0, minp=None):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
+def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1) -> ndarray:
     cdef:
         Py_ssize_t i, j, xi, yi, N, K
         ndarray[float64_t, ndim=2] result
@@ -581,7 +581,7 @@ D
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
+def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
     cdef:
         Py_ssize_t i, j, nleft, nright
         ndarray[int64_t, ndim=1] indexer
@@ -810,18 +810,14 @@ def rank_1d(
     """
     cdef:
         Py_ssize_t i, j, n, dups = 0, total_tie_count = 0, non_na_idx = 0
-
         ndarray[rank_t] sorted_data, values
-
         ndarray[float64_t] ranks
         ndarray[int64_t] argsorted
         ndarray[uint8_t, cast=True] sorted_mask
-
         rank_t val, nan_value
-
         float64_t sum_ranks = 0
         int tiebreak = 0
-        bint keep_na = 0
+        bint keep_na = False
         bint isnan, condition
         float64_t count = 0.0
 
@@ -1034,19 +1030,14 @@ def rank_2d(
     """
     cdef:
         Py_ssize_t i, j, z, k, n, dups = 0, total_tie_count = 0
-
         Py_ssize_t infs
-
         ndarray[float64_t, ndim=2] ranks
         ndarray[rank_t, ndim=2] values
-
         ndarray[int64_t, ndim=2] argsorted
-
         rank_t val, nan_value
-
         float64_t sum_ranks = 0
         int tiebreak = 0
-        bint keep_na = 0
+        bint keep_na = False
         float64_t count = 0.0
         bint condition, skip_condition
 

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -3,6 +3,7 @@
 """
 
 from datetime import date, datetime, timedelta
+from typing import TYPE_CHECKING, Type
 
 import numpy as np
 
@@ -63,13 +64,18 @@
     ABCDataFrame,
     ABCDatetimeArray,
     ABCDatetimeIndex,
+    ABCExtensionArray,
     ABCPeriodArray,
     ABCPeriodIndex,
     ABCSeries,
 )
 from pandas.core.dtypes.inference import is_list_like
 from pandas.core.dtypes.missing import isna, notna
 
+if TYPE_CHECKING:
+    from pandas import Series
+    from pandas.core.arrays import ExtensionArray  # noqa: F401
+
 _int8_max = np.iinfo(np.int8).max
 _int16_max = np.iinfo(np.int16).max
 _int32_max = np.iinfo(np.int32).max
@@ -246,18 +252,16 @@ def trans(x):
     return result
 
 
-def maybe_cast_result(
-    result, obj: ABCSeries, numeric_only: bool = False, how: str = ""
-):
+def maybe_cast_result(result, obj: "Series", numeric_only: bool = False, how: str = ""):
     """
     Try casting result to a different type if appropriate
 
     Parameters
     ----------
     result : array-like
         Result to cast.
-    obj : ABCSeries
-        Input series from which result was calculated.
+    obj : Series
+        Input Series from which result was calculated.
     numeric_only : bool, default False
         Whether to cast only numerics or datetimes as well.
     how : str, default ""
@@ -313,13 +317,13 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj:
     return d.get((dtype, how), dtype)
 
 
-def maybe_cast_to_extension_array(cls, obj, dtype=None):
+def maybe_cast_to_extension_array(cls: Type["ExtensionArray"], obj, dtype=None):
     """
     Call to `_from_sequence` that returns the object unchanged on Exception.
 
     Parameters
     ----------
-    cls : ExtensionArray subclass
+    cls : class, subclass of ExtensionArray
     obj : arraylike
         Values to pass to cls._from_sequence
     dtype : ExtensionDtype, optional
@@ -329,6 +333,8 @@ def maybe_cast_to_extension_array(cls, obj, dtype=None):
     ExtensionArray or obj
     """
     assert isinstance(cls, type), f"must pass a type: {cls}"
+    assertion_msg = f"must pass a subclass of ExtensionArray: {cls}"
+    assert issubclass(cls, ABCExtensionArray), assertion_msg
     try:
         result = cls._from_sequence(obj, dtype=dtype)
     except Exception:

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -151,7 +151,7 @@ def pinner(cls):
 
 
 @pin_whitelisted_properties(Series, base.series_apply_whitelist)
-class SeriesGroupBy(GroupBy):
+class SeriesGroupBy(GroupBy[Series]):
     _apply_whitelist = base.series_apply_whitelist
 
     def _iterate_slices(self) -> Iterable[Series]:
@@ -815,7 +815,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None):
 
 
 @pin_whitelisted_properties(DataFrame, base.dataframe_apply_whitelist)
-class DataFrameGroupBy(GroupBy):
+class DataFrameGroupBy(GroupBy[DataFrame]):
 
     _apply_whitelist = base.dataframe_apply_whitelist
 
@@ -1462,7 +1462,7 @@ def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame:
         for i, _ in enumerate(result.columns):
             res = algorithms.take_1d(result.iloc[:, i].values, ids)
             # TODO: we have no test cases that get here with EA dtypes;
-            #  try_cast may not be needed if EAs never get here
+            #  maybe_cast_result may not be needed if EAs never get here
             if cast:
                 res = maybe_cast_result(res, obj.iloc[:, i], how=func_nm)
             output.append(res)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -17,13 +17,15 @@ class providing the base-class of operations.
     Callable,
     Dict,
     FrozenSet,
+    Generic,
     Hashable,
     Iterable,
     List,
     Mapping,
     Optional,
     Tuple,
     Type,
+    TypeVar,
     Union,
 )
 
@@ -353,13 +355,13 @@ def _group_selection_context(groupby):
 ]
 
 
-class _GroupBy(PandasObject, SelectionMixin):
+class _GroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]):
     _group_selection = None
     _apply_whitelist: FrozenSet[str] = frozenset()
 
     def __init__(
         self,
-        obj: NDFrame,
+        obj: FrameOrSeries,
         keys: Optional[_KeysArgType] = None,
         axis: int = 0,
         level=None,
@@ -995,7 +997,11 @@ def _apply_filter(self, indices, dropna):
         return filtered
 
 
-class GroupBy(_GroupBy):
+# To track operations that expand dimensions, like ohlc
+OutputFrameOrSeries = TypeVar("OutputFrameOrSeries", bound=NDFrame)
+
+
+class GroupBy(_GroupBy[FrameOrSeries]):
     """
     Class for grouping and aggregating relational data.
 
@@ -2420,8 +2426,8 @@ def tail(self, n=5):
         return self._selected_obj[mask]
 
     def _reindex_output(
-        self, output: FrameOrSeries, fill_value: Scalar = np.NaN
-    ) -> FrameOrSeries:
+        self, output: OutputFrameOrSeries, fill_value: Scalar = np.NaN
+    ) -> OutputFrameOrSeries:
         """
         If we have categorical groupers, then we might want to make sure that
         we have a fully re-indexed output to the levels. This means expanding

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -682,7 +682,7 @@ def _aggregate_series_pure_python(self, obj: Series, func):
 
         assert result is not None
         result = lib.maybe_convert_objects(result, try_float=0)
-        # TODO: try_cast back to EA?
+        # TODO: maybe_cast_to_extension_array?
 
         return result, counts
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -1955,6 +1955,12 @@ def is_mixed(self) -> bool:
         >>> idx.is_mixed()
         False
         """
+        warnings.warn(
+            "Index.is_mixed is deprecated and will be removed in a future version. "
+            "Check index.inferred_type directly instead.",
+            FutureWarning,
+            stacklevel=2,
+        )
         return self.inferred_type in ["mixed"]
 
     def holds_integer(self) -> bool:
@@ -3131,7 +3137,7 @@ def is_int(v):
         # convert the slice to an indexer here
 
         # if we are mixed and have integers
-        if is_positional and self.is_mixed():
+        if is_positional:
             try:
                 # Validate start & stop
                 if start is not None:

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -791,9 +791,6 @@ def get_slice(self, slobj: slice, axis: int = 0) -> "BlockManager":
         bm = type(self)(new_blocks, new_axes, do_integrity_check=False)
         return bm
 
-    def __contains__(self, item) -> bool:
-        return item in self.items
-
     @property
     def nblocks(self) -> int:
         return len(self.blocks)

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -1160,6 +1160,12 @@ def test_intersection_difference(self, indices, sort):
         diff = indices.difference(indices, sort=sort)
         tm.assert_index_equal(inter, diff)
 
+    def test_is_mixed_deprecated(self):
+        # GH#32922
+        index = self.create_index()
+        with tm.assert_produces_warning(FutureWarning):
+            index.is_mixed()
+
     @pytest.mark.parametrize(
         "indices, expected",
         [

diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -301,10 +301,6 @@ def test_duplicate_ref_loc_failure(self):
         mgr = BlockManager(blocks, axes)
         mgr.iget(1)
 
-    def test_contains(self, mgr):
-        assert "a" in mgr
-        assert "baz" not in mgr
-
     def test_pickle(self, mgr):
 
         mgr2 = tm.round_trip_pickle(mgr)