Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into boolean-array
Browse files Browse the repository at this point in the history
  • Loading branch information
linxiaow committed Apr 5, 2020
2 parents 1a08e61 + ae75f35 commit 81860e3
Show file tree
Hide file tree
Showing 10 changed files with 57 additions and 48 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ Deprecations
- :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`)
- :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`)
- The ``fastpath`` keyword in the ``SingleBlockManager`` constructor is deprecated and will be removed in a future version (:issue:`33092`)
- :meth:`Index.is_mixed` is deprecated and will be removed in a future version, check ``index.inferred_type`` directly instead (:issue:`32922`)

.. ---------------------------------------------------------------------------
Expand Down
39 changes: 15 additions & 24 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,17 @@ from pandas._libs.khash cimport (

import pandas._libs.missing as missing

cdef float64_t FP_ERR = 1e-13

cdef float64_t NaN = <float64_t>np.NaN

cdef int64_t NPY_NAT = get_nat()
cdef:
float64_t FP_ERR = 1e-13
float64_t NaN = <float64_t>np.NaN
int64_t NPY_NAT = get_nat()

tiebreakers = {
'average': TIEBREAK_AVERAGE,
'min': TIEBREAK_MIN,
'max': TIEBREAK_MAX,
'first': TIEBREAK_FIRST,
'dense': TIEBREAK_DENSE,
"average": TIEBREAK_AVERAGE,
"min": TIEBREAK_MIN,
"max": TIEBREAK_MAX,
"first": TIEBREAK_FIRST,
"dense": TIEBREAK_DENSE,
}


Expand Down Expand Up @@ -120,6 +119,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr):
kh_int64_t *table
int ret = 0
list uniques = []
ndarray[int64_t, ndim=1] result

table = kh_init_int64()
kh_resize_int64(table, 10)
Expand Down Expand Up @@ -261,7 +261,7 @@ def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric:

@cython.boundscheck(False)
@cython.wraparound(False)
def nancorr(const float64_t[:, :] mat, bint cov=0, minp=None):
def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
cdef:
Py_ssize_t i, j, xi, yi, N, K
bint minpv
Expand Down Expand Up @@ -325,7 +325,7 @@ def nancorr(const float64_t[:, :] mat, bint cov=0, minp=None):

@cython.boundscheck(False)
@cython.wraparound(False)
def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1) -> ndarray:
cdef:
Py_ssize_t i, j, xi, yi, N, K
ndarray[float64_t, ndim=2] result
Expand Down Expand Up @@ -581,7 +581,7 @@ D

@cython.boundscheck(False)
@cython.wraparound(False)
def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t, ndim=1] indexer
Expand Down Expand Up @@ -810,18 +810,14 @@ def rank_1d(
"""
cdef:
Py_ssize_t i, j, n, dups = 0, total_tie_count = 0, non_na_idx = 0

ndarray[rank_t] sorted_data, values

ndarray[float64_t] ranks
ndarray[int64_t] argsorted
ndarray[uint8_t, cast=True] sorted_mask

rank_t val, nan_value

float64_t sum_ranks = 0
int tiebreak = 0
bint keep_na = 0
bint keep_na = False
bint isnan, condition
float64_t count = 0.0

Expand Down Expand Up @@ -1034,19 +1030,14 @@ def rank_2d(
"""
cdef:
Py_ssize_t i, j, z, k, n, dups = 0, total_tie_count = 0

Py_ssize_t infs

ndarray[float64_t, ndim=2] ranks
ndarray[rank_t, ndim=2] values

ndarray[int64_t, ndim=2] argsorted

rank_t val, nan_value

float64_t sum_ranks = 0
int tiebreak = 0
bint keep_na = 0
bint keep_na = False
float64_t count = 0.0
bint condition, skip_condition

Expand Down
20 changes: 13 additions & 7 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

from datetime import date, datetime, timedelta
from typing import TYPE_CHECKING, Type

import numpy as np

Expand Down Expand Up @@ -63,13 +64,18 @@
ABCDataFrame,
ABCDatetimeArray,
ABCDatetimeIndex,
ABCExtensionArray,
ABCPeriodArray,
ABCPeriodIndex,
ABCSeries,
)
from pandas.core.dtypes.inference import is_list_like
from pandas.core.dtypes.missing import isna, notna

if TYPE_CHECKING:
from pandas import Series
from pandas.core.arrays import ExtensionArray # noqa: F401

_int8_max = np.iinfo(np.int8).max
_int16_max = np.iinfo(np.int16).max
_int32_max = np.iinfo(np.int32).max
Expand Down Expand Up @@ -246,18 +252,16 @@ def trans(x):
return result


def maybe_cast_result(
result, obj: ABCSeries, numeric_only: bool = False, how: str = ""
):
def maybe_cast_result(result, obj: "Series", numeric_only: bool = False, how: str = ""):
"""
Try casting result to a different type if appropriate
Parameters
----------
result : array-like
Result to cast.
obj : ABCSeries
Input series from which result was calculated.
obj : Series
Input Series from which result was calculated.
numeric_only : bool, default False
Whether to cast only numerics or datetimes as well.
how : str, default ""
Expand Down Expand Up @@ -313,13 +317,13 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj:
return d.get((dtype, how), dtype)


def maybe_cast_to_extension_array(cls, obj, dtype=None):
def maybe_cast_to_extension_array(cls: Type["ExtensionArray"], obj, dtype=None):
"""
Call to `_from_sequence` that returns the object unchanged on Exception.
Parameters
----------
cls : ExtensionArray subclass
cls : class, subclass of ExtensionArray
obj : arraylike
Values to pass to cls._from_sequence
dtype : ExtensionDtype, optional
Expand All @@ -329,6 +333,8 @@ def maybe_cast_to_extension_array(cls, obj, dtype=None):
ExtensionArray or obj
"""
assert isinstance(cls, type), f"must pass a type: {cls}"
assertion_msg = f"must pass a subclass of ExtensionArray: {cls}"
assert issubclass(cls, ABCExtensionArray), assertion_msg
try:
result = cls._from_sequence(obj, dtype=dtype)
except Exception:
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def pinner(cls):


@pin_whitelisted_properties(Series, base.series_apply_whitelist)
class SeriesGroupBy(GroupBy):
class SeriesGroupBy(GroupBy[Series]):
_apply_whitelist = base.series_apply_whitelist

def _iterate_slices(self) -> Iterable[Series]:
Expand Down Expand Up @@ -815,7 +815,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None):


@pin_whitelisted_properties(DataFrame, base.dataframe_apply_whitelist)
class DataFrameGroupBy(GroupBy):
class DataFrameGroupBy(GroupBy[DataFrame]):

_apply_whitelist = base.dataframe_apply_whitelist

Expand Down Expand Up @@ -1462,7 +1462,7 @@ def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame:
for i, _ in enumerate(result.columns):
res = algorithms.take_1d(result.iloc[:, i].values, ids)
# TODO: we have no test cases that get here with EA dtypes;
# try_cast may not be needed if EAs never get here
# maybe_cast_result may not be needed if EAs never get here
if cast:
res = maybe_cast_result(res, obj.iloc[:, i], how=func_nm)
output.append(res)
Expand Down
16 changes: 11 additions & 5 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@ class providing the base-class of operations.
Callable,
Dict,
FrozenSet,
Generic,
Hashable,
Iterable,
List,
Mapping,
Optional,
Tuple,
Type,
TypeVar,
Union,
)

Expand Down Expand Up @@ -353,13 +355,13 @@ def _group_selection_context(groupby):
]


class _GroupBy(PandasObject, SelectionMixin):
class _GroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]):
_group_selection = None
_apply_whitelist: FrozenSet[str] = frozenset()

def __init__(
self,
obj: NDFrame,
obj: FrameOrSeries,
keys: Optional[_KeysArgType] = None,
axis: int = 0,
level=None,
Expand Down Expand Up @@ -995,7 +997,11 @@ def _apply_filter(self, indices, dropna):
return filtered


class GroupBy(_GroupBy):
# To track operations that expand dimensions, like ohlc
OutputFrameOrSeries = TypeVar("OutputFrameOrSeries", bound=NDFrame)


class GroupBy(_GroupBy[FrameOrSeries]):
"""
Class for grouping and aggregating relational data.
Expand Down Expand Up @@ -2420,8 +2426,8 @@ def tail(self, n=5):
return self._selected_obj[mask]

def _reindex_output(
self, output: FrameOrSeries, fill_value: Scalar = np.NaN
) -> FrameOrSeries:
self, output: OutputFrameOrSeries, fill_value: Scalar = np.NaN
) -> OutputFrameOrSeries:
"""
If we have categorical groupers, then we might want to make sure that
we have a fully re-indexed output to the levels. This means expanding
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,7 @@ def _aggregate_series_pure_python(self, obj: Series, func):

assert result is not None
result = lib.maybe_convert_objects(result, try_float=0)
# TODO: try_cast back to EA?
# TODO: maybe_cast_to_extension_array?

return result, counts

Expand Down
8 changes: 7 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1955,6 +1955,12 @@ def is_mixed(self) -> bool:
>>> idx.is_mixed()
False
"""
warnings.warn(
"Index.is_mixed is deprecated and will be removed in a future version. "
"Check index.inferred_type directly instead.",
FutureWarning,
stacklevel=2,
)
return self.inferred_type in ["mixed"]

def holds_integer(self) -> bool:
Expand Down Expand Up @@ -3131,7 +3137,7 @@ def is_int(v):
# convert the slice to an indexer here

# if we are mixed and have integers
if is_positional and self.is_mixed():
if is_positional:
try:
# Validate start & stop
if start is not None:
Expand Down
3 changes: 0 additions & 3 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,9 +791,6 @@ def get_slice(self, slobj: slice, axis: int = 0) -> "BlockManager":
bm = type(self)(new_blocks, new_axes, do_integrity_check=False)
return bm

def __contains__(self, item) -> bool:
return item in self.items

@property
def nblocks(self) -> int:
return len(self.blocks)
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1160,6 +1160,12 @@ def test_intersection_difference(self, indices, sort):
diff = indices.difference(indices, sort=sort)
tm.assert_index_equal(inter, diff)

def test_is_mixed_deprecated(self):
# GH#32922
index = self.create_index()
with tm.assert_produces_warning(FutureWarning):
index.is_mixed()

@pytest.mark.parametrize(
"indices, expected",
[
Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,10 +301,6 @@ def test_duplicate_ref_loc_failure(self):
mgr = BlockManager(blocks, axes)
mgr.iget(1)

def test_contains(self, mgr):
assert "a" in mgr
assert "baz" not in mgr

def test_pickle(self, mgr):

mgr2 = tm.round_trip_pickle(mgr)
Expand Down

0 comments on commit 81860e3

Please sign in to comment.