diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index bb9a26f7462ce..fcdd2fada2d62 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -256,6 +256,7 @@ Deprecations - :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`) - :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`) - The ``fastpath`` keyword in the ``SingleBlockManager`` constructor is deprecated and will be removed in a future version (:issue:`33092`) +- :meth:`Index.is_mixed` is deprecated and will be removed in a future version, check ``index.inferred_type`` directly instead (:issue:`32922`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 7a32b8957003e..6b6ead795584f 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -50,18 +50,17 @@ from pandas._libs.khash cimport ( import pandas._libs.missing as missing -cdef float64_t FP_ERR = 1e-13 - -cdef float64_t NaN = <float64_t>np.NaN - -cdef int64_t NPY_NAT = get_nat() +cdef: + float64_t FP_ERR = 1e-13 + float64_t NaN = <float64_t>np.NaN + int64_t NPY_NAT = get_nat() tiebreakers = { - 'average': TIEBREAK_AVERAGE, - 'min': TIEBREAK_MIN, - 'max': TIEBREAK_MAX, - 'first': TIEBREAK_FIRST, - 'dense': TIEBREAK_DENSE, + "average": TIEBREAK_AVERAGE, + "min": TIEBREAK_MIN, + "max": TIEBREAK_MAX, + "first": TIEBREAK_FIRST, + "dense": TIEBREAK_DENSE, } @@ -120,6 +119,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr): kh_int64_t *table int ret = 0 list uniques = [] + ndarray[int64_t, ndim=1] result table = kh_init_int64() kh_resize_int64(table, 10) @@ -261,7 +261,7 @@ def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric: @cython.boundscheck(False) @cython.wraparound(False) -def nancorr(const float64_t[:, :] mat, bint cov=0, minp=None): +def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None): cdef: Py_ssize_t i, j, xi, yi, N, K bint minpv @@ -325,7 +325,7 @@ def nancorr(const float64_t[:, :] mat, bint cov=0, minp=None): @cython.boundscheck(False) @cython.wraparound(False) -def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1): +def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1) -> ndarray: cdef: Py_ssize_t i, j, xi, yi, N, K ndarray[float64_t, ndim=2] result @@ -581,7 +581,7 @@ D @cython.boundscheck(False) @cython.wraparound(False) -def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): +def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray: cdef: Py_ssize_t i, j, nleft, nright ndarray[int64_t, ndim=1] indexer @@ -810,18 +810,14 @@ def rank_1d( """ cdef: Py_ssize_t i, j, n, dups = 0, total_tie_count = 0, non_na_idx = 0 - ndarray[rank_t] sorted_data, values - ndarray[float64_t] ranks ndarray[int64_t] argsorted ndarray[uint8_t, cast=True] sorted_mask - rank_t val, nan_value - float64_t sum_ranks = 0 int tiebreak = 0 - bint keep_na = 0 + bint keep_na = False bint isnan, condition float64_t count = 0.0 @@ -1034,19 +1030,14 @@ def rank_2d( """ cdef: Py_ssize_t i, j, z, k, n, dups = 0, total_tie_count = 0 - Py_ssize_t infs - ndarray[float64_t, ndim=2] ranks ndarray[rank_t, ndim=2] values - ndarray[int64_t, ndim=2] argsorted - rank_t val, nan_value - float64_t sum_ranks = 0 int tiebreak = 0 - bint keep_na = 0 + bint keep_na = False float64_t count = 0.0 bint condition, skip_condition diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 57c17f48e01ce..223cc43d158e6 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -3,6 +3,7 @@ """ from datetime import date, datetime, timedelta +from typing import TYPE_CHECKING, Type import numpy as np @@ -63,6 +64,7 @@ ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, + ABCExtensionArray, ABCPeriodArray, ABCPeriodIndex, ABCSeries, @@ -70,6 +72,10 @@ from pandas.core.dtypes.inference import is_list_like from pandas.core.dtypes.missing import isna, notna +if TYPE_CHECKING: + from pandas import Series + from pandas.core.arrays import ExtensionArray # noqa: F401 + _int8_max = np.iinfo(np.int8).max _int16_max = np.iinfo(np.int16).max _int32_max = np.iinfo(np.int32).max @@ -246,9 +252,7 @@ def trans(x): return result -def maybe_cast_result( - result, obj: ABCSeries, numeric_only: bool = False, how: str = "" -): +def maybe_cast_result(result, obj: "Series", numeric_only: bool = False, how: str = ""): """ Try casting result to a different type if appropriate @@ -256,8 +260,8 @@ def maybe_cast_result( ---------- result : array-like Result to cast. - obj : ABCSeries - Input series from which result was calculated. + obj : Series + Input Series from which result was calculated. numeric_only : bool, default False Whether to cast only numerics or datetimes as well. how : str, default "" @@ -313,13 +317,13 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj: return d.get((dtype, how), dtype) -def maybe_cast_to_extension_array(cls, obj, dtype=None): +def maybe_cast_to_extension_array(cls: Type["ExtensionArray"], obj, dtype=None): """ Call to `_from_sequence` that returns the object unchanged on Exception. Parameters ---------- - cls : ExtensionArray subclass + cls : class, subclass of ExtensionArray obj : arraylike Values to pass to cls._from_sequence dtype : ExtensionDtype, optional @@ -329,6 +333,8 @@ def maybe_cast_to_extension_array(cls, obj, dtype=None): ExtensionArray or obj """ assert isinstance(cls, type), f"must pass a type: {cls}" + assertion_msg = f"must pass a subclass of ExtensionArray: {cls}" + assert issubclass(cls, ABCExtensionArray), assertion_msg try: result = cls._from_sequence(obj, dtype=dtype) except Exception: diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 093c925acbc49..88580f6ebb3ed 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -151,7 +151,7 @@ def pinner(cls): @pin_whitelisted_properties(Series, base.series_apply_whitelist) -class SeriesGroupBy(GroupBy): +class SeriesGroupBy(GroupBy[Series]): _apply_whitelist = base.series_apply_whitelist def _iterate_slices(self) -> Iterable[Series]: @@ -815,7 +815,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None): @pin_whitelisted_properties(DataFrame, base.dataframe_apply_whitelist) -class DataFrameGroupBy(GroupBy): +class DataFrameGroupBy(GroupBy[DataFrame]): _apply_whitelist = base.dataframe_apply_whitelist @@ -1462,7 +1462,7 @@ def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame: for i, _ in enumerate(result.columns): res = algorithms.take_1d(result.iloc[:, i].values, ids) # TODO: we have no test cases that get here with EA dtypes; - # try_cast may not be needed if EAs never get here + # maybe_cast_result may not be needed if EAs never get here if cast: res = maybe_cast_result(res, obj.iloc[:, i], how=func_nm) output.append(res) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index dff712ee17ea6..1474e173b4f8c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -17,6 +17,7 @@ class providing the base-class of operations. Callable, Dict, FrozenSet, + Generic, Hashable, Iterable, List, @@ -24,6 +25,7 @@ class providing the base-class of operations. Optional, Tuple, Type, + TypeVar, Union, ) @@ -353,13 +355,13 @@ def _group_selection_context(groupby): ] -class _GroupBy(PandasObject, SelectionMixin): +class _GroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]): _group_selection = None _apply_whitelist: FrozenSet[str] = frozenset() def __init__( self, - obj: NDFrame, + obj: FrameOrSeries, keys: Optional[_KeysArgType] = None, axis: int = 0, level=None, @@ -995,7 +997,11 @@ def _apply_filter(self, indices, dropna): return filtered -class GroupBy(_GroupBy): +# To track operations that expand dimensions, like ohlc +OutputFrameOrSeries = TypeVar("OutputFrameOrSeries", bound=NDFrame) + + +class GroupBy(_GroupBy[FrameOrSeries]): """ Class for grouping and aggregating relational data. @@ -2420,8 +2426,8 @@ def tail(self, n=5): return self._selected_obj[mask] def _reindex_output( - self, output: FrameOrSeries, fill_value: Scalar = np.NaN - ) -> FrameOrSeries: + self, output: OutputFrameOrSeries, fill_value: Scalar = np.NaN + ) -> OutputFrameOrSeries: """ If we have categorical groupers, then we might want to make sure that we have a fully re-indexed output to the levels. This means expanding diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 742de397956c0..8d535374a083f 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -682,7 +682,7 @@ def _aggregate_series_pure_python(self, obj: Series, func): assert result is not None result = lib.maybe_convert_objects(result, try_float=0) - # TODO: try_cast back to EA? + # TODO: maybe_cast_to_extension_array? return result, counts diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b97f0366579b3..cc03208b34fe6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1955,6 +1955,12 @@ def is_mixed(self) -> bool: >>> idx.is_mixed() False """ + warnings.warn( + "Index.is_mixed is deprecated and will be removed in a future version. " + "Check index.inferred_type directly instead.", + FutureWarning, + stacklevel=2, + ) return self.inferred_type in ["mixed"] def holds_integer(self) -> bool: @@ -3131,7 +3137,7 @@ def is_int(v): # convert the slice to an indexer here # if we are mixed and have integers - if is_positional and self.is_mixed(): + if is_positional: try: # Validate start & stop if start is not None: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 230cb7af75f58..ac8de977b9a1a 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -791,9 +791,6 @@ def get_slice(self, slobj: slice, axis: int = 0) -> "BlockManager": bm = type(self)(new_blocks, new_axes, do_integrity_check=False) return bm - def __contains__(self, item) -> bool: - return item in self.items - @property def nblocks(self) -> int: return len(self.blocks) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 35ee81229b716..0417208868314 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1160,6 +1160,12 @@ def test_intersection_difference(self, indices, sort): diff = indices.difference(indices, sort=sort) tm.assert_index_equal(inter, diff) + def test_is_mixed_deprecated(self): + # GH#32922 + index = self.create_index() + with tm.assert_produces_warning(FutureWarning): + index.is_mixed() + @pytest.mark.parametrize( "indices, expected", [ diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 657849874f091..57fbc9ab13f84 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -301,10 +301,6 @@ def test_duplicate_ref_loc_failure(self): mgr = BlockManager(blocks, axes) mgr.iget(1) - def test_contains(self, mgr): - assert "a" in mgr - assert "baz" not in mgr - def test_pickle(self, mgr): mgr2 = tm.round_trip_pickle(mgr)