Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/pandas-dev/pandas into 31623
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Feb 17, 2020
2 parents abc73f8 + 7b0887c commit 409ffd6
Show file tree
Hide file tree
Showing 10 changed files with 139 additions and 127 deletions.
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dependencies:

# documentation
- gitpython # obtain contributors from git for whatsnew
- gitdb2=2.0.6 # GH-32060
- sphinx

# documentation (jupyter notebooks)
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1173,12 +1173,12 @@ ctypedef fused out_t:

@cython.boundscheck(False)
@cython.wraparound(False)
def diff_2d(ndarray[diff_t, ndim=2] arr,
ndarray[out_t, ndim=2] out,
def diff_2d(diff_t[:, :] arr,
out_t[:, :] out,
Py_ssize_t periods, int axis):
cdef:
Py_ssize_t i, j, sx, sy, start, stop
bint f_contig = arr.flags.f_contiguous
bint f_contig = arr.is_f_contig()

# Disable for unsupported dtype combinations,
# see https://github.com/cython/cython/issues/2646
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import cython
from libc.stdlib cimport malloc, free

import numpy as np
from numpy cimport uint8_t, uint32_t, uint64_t, import_array
from numpy cimport ndarray, uint8_t, uint32_t, uint64_t, import_array
import_array()

from pandas._libs.util cimport is_nan
Expand All @@ -15,7 +15,7 @@ DEF dROUNDS = 4


@cython.boundscheck(False)
def hash_object_array(object[:] arr, object key, object encoding='utf8'):
def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
"""
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool=True):

@cython.boundscheck(False)
@cython.wraparound(False)
def datetime_to_datetime64(object[:] values):
def datetime_to_datetime64(ndarray[object] values):
"""
Convert ndarray of datetime-like objects to int64 array representing
nanosecond timestamps.
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ cdef:
cdef inline int int_max(int a, int b): return a if a >= b else b
cdef inline int int_min(int a, int b): return a if a <= b else b

cdef inline bint is_monotonic_start_end_bounds(
cdef bint is_monotonic_start_end_bounds(
ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end
):
return is_monotonic(start, False)[0] and is_monotonic(end, False)[0]
Expand Down
27 changes: 27 additions & 0 deletions pandas/core/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,33 @@ def deprecate_ndim_indexing(result):
)


def unpack_1tuple(tup):
"""
If we have a length-1 tuple/list that contains a slice, unpack to just
the slice.
Notes
-----
The list case is deprecated.
"""
if len(tup) == 1 and isinstance(tup[0], slice):
# if we don't have a MultiIndex, we may still be able to handle
# a 1-tuple. see test_1tuple_without_multiindex

if isinstance(tup, list):
# GH#31299
warnings.warn(
"Indexing with a single-item list containing a "
"slice is deprecated and will raise in a future "
"version. Pass a tuple instead.",
FutureWarning,
stacklevel=3,
)

return tup[0]
return tup


# -----------------------------------------------------------
# Public indexer validation

Expand Down
61 changes: 32 additions & 29 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,18 +577,6 @@ def __call__(self, axis=None):
new_self.axis = axis
return new_self

def _get_label(self, label, axis: int):
if self.ndim == 1:
# for perf reasons we want to try _xs first
# as its basically direct indexing
# but will fail when the index is not present
# see GH5667
return self.obj._xs(label, axis=axis)
elif isinstance(label, tuple) and isinstance(label[axis], slice):
raise IndexingError("no slices here, handle elsewhere")

return self.obj._xs(label, axis=axis)

def _get_setitem_indexer(self, key):
"""
Convert a potentially-label-based key into a positional indexer.
Expand Down Expand Up @@ -700,23 +688,6 @@ def _convert_tuple(self, key, is_setter: bool = False):
keyidx.append(idx)
return tuple(keyidx)

def _handle_lowerdim_multi_index_axis0(self, tup: Tuple):
# we have an axis0 multi-index, handle or raise
axis = self.axis or 0
try:
# fast path for series or for tup devoid of slices
return self._get_label(tup, axis=axis)
except TypeError:
# slices are unhashable
pass
except KeyError as ek:
# raise KeyError if number of indexers match
# else IndexingError will be raised
if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim:
raise ek

return None

def _getitem_tuple_same_dim(self, tup: Tuple):
"""
Index with indexers that should return an object of the same dimension
Expand Down Expand Up @@ -798,6 +769,9 @@ def _getitem_nested_tuple(self, tup: Tuple):
# multi-index dimension, try to see if we have something like
# a tuple passed to a series with a multi-index
if len(tup) > self.ndim:
if self.name != "loc":
# This should never be reached, but lets be explicit about it
raise ValueError("Too many indices")
result = self._handle_lowerdim_multi_index_axis0(tup)
if result is not None:
return result
Expand Down Expand Up @@ -1069,6 +1043,35 @@ def _getitem_tuple(self, tup: Tuple):

return self._getitem_tuple_same_dim(tup)

def _get_label(self, label, axis: int):
if self.ndim == 1:
# for perf reasons we want to try _xs first
# as its basically direct indexing
# but will fail when the index is not present
# see GH5667
return self.obj._xs(label, axis=axis)
elif isinstance(label, tuple) and isinstance(label[axis], slice):
raise IndexingError("no slices here, handle elsewhere")

return self.obj._xs(label, axis=axis)

def _handle_lowerdim_multi_index_axis0(self, tup: Tuple):
# we have an axis0 multi-index, handle or raise
axis = self.axis or 0
try:
# fast path for series or for tup devoid of slices
return self._get_label(tup, axis=axis)
except TypeError:
# slices are unhashable
pass
except KeyError as ek:
# raise KeyError if number of indexers match
# else IndexingError will be raised
if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim:
raise ek

return None

def _getitem_axis(self, key, axis: int):
key = item_from_zerodim(key)
if is_iterator(key):
Expand Down
35 changes: 5 additions & 30 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
sanitize_array,
)
from pandas.core.generic import NDFrame
from pandas.core.indexers import maybe_convert_indices
from pandas.core.indexers import maybe_convert_indices, unpack_1tuple
from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
from pandas.core.indexes.api import (
Float64Index,
Expand Down Expand Up @@ -851,6 +851,8 @@ def __getitem__(self, key):
key_is_scalar = is_scalar(key)
if key_is_scalar:
key = self.index._convert_scalar_indexer(key, kind="getitem")
elif isinstance(key, (list, tuple)):
key = unpack_1tuple(key)

if key_is_scalar or isinstance(self.index, MultiIndex):
# Otherwise index.get_value will raise InvalidIndexError
Expand Down Expand Up @@ -893,16 +895,7 @@ def _get_with(self, key):
"supported, use the appropriate DataFrame column"
)
elif isinstance(key, tuple):
try:
return self._get_values_tuple(key)
except ValueError:
# if we don't have a MultiIndex, we may still be able to handle
# a 1-tuple. see test_1tuple_without_multiindex
if len(key) == 1:
key = key[0]
if isinstance(key, slice):
return self._get_values(key)
raise
return self._get_values_tuple(key)

if not isinstance(key, (list, np.ndarray, ExtensionArray, Series, Index)):
key = list(key)
Expand All @@ -924,26 +917,8 @@ def _get_with(self, key):
else:
return self.iloc[key]

if isinstance(key, (list, tuple)):
# TODO: de-dup with tuple case handled above?
if isinstance(key, list):
# handle the dup indexing case GH#4246
if len(key) == 1 and isinstance(key[0], slice):
# [slice(0, 5, None)] will break if you convert to ndarray,
# e.g. as requested by np.median
# FIXME: hack
if isinstance(key, list):
# GH#31299
warnings.warn(
"Indexing with a single-item list containing a "
"slice is deprecated and will raise in a future "
"version. Pass a tuple instead.",
FutureWarning,
stacklevel=3,
)
# TODO: use a message more like numpy's?
key = tuple(key)
return self._get_values(key)

return self.loc[key]

return self.reindex(key)
Expand Down
127 changes: 66 additions & 61 deletions pandas/tests/reductions/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,60 +66,64 @@ def test_ops(self, opname, obj):
expected = expected.astype("M8[ns]").astype("int64")
assert result.value == expected

def test_nanops(self):
@pytest.mark.parametrize("opname", ["max", "min"])
def test_nanops(self, opname, index_or_series):
# GH#7261
for opname in ["max", "min"]:
for klass in [Index, Series]:
arg_op = "arg" + opname if klass is Index else "idx" + opname

obj = klass([np.nan, 2.0])
assert getattr(obj, opname)() == 2.0

obj = klass([np.nan])
assert pd.isna(getattr(obj, opname)())
assert pd.isna(getattr(obj, opname)(skipna=False))

obj = klass([], dtype=object)
assert pd.isna(getattr(obj, opname)())
assert pd.isna(getattr(obj, opname)(skipna=False))

obj = klass([pd.NaT, datetime(2011, 11, 1)])
# check DatetimeIndex monotonic path
assert getattr(obj, opname)() == datetime(2011, 11, 1)
assert getattr(obj, opname)(skipna=False) is pd.NaT

assert getattr(obj, arg_op)() == 1
result = getattr(obj, arg_op)(skipna=False)
if klass is Series:
assert np.isnan(result)
else:
assert result == -1

obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT])
# check DatetimeIndex non-monotonic path
assert getattr(obj, opname)(), datetime(2011, 11, 1)
assert getattr(obj, opname)(skipna=False) is pd.NaT

assert getattr(obj, arg_op)() == 1
result = getattr(obj, arg_op)(skipna=False)
if klass is Series:
assert np.isnan(result)
else:
assert result == -1

for dtype in ["M8[ns]", "datetime64[ns, UTC]"]:
# cases with empty Series/DatetimeIndex
obj = klass([], dtype=dtype)

assert getattr(obj, opname)() is pd.NaT
assert getattr(obj, opname)(skipna=False) is pd.NaT

with pytest.raises(ValueError, match="empty sequence"):
getattr(obj, arg_op)()
with pytest.raises(ValueError, match="empty sequence"):
getattr(obj, arg_op)(skipna=False)

# argmin/max
klass = index_or_series
arg_op = "arg" + opname if klass is Index else "idx" + opname

obj = klass([np.nan, 2.0])
assert getattr(obj, opname)() == 2.0

obj = klass([np.nan])
assert pd.isna(getattr(obj, opname)())
assert pd.isna(getattr(obj, opname)(skipna=False))

obj = klass([], dtype=object)
assert pd.isna(getattr(obj, opname)())
assert pd.isna(getattr(obj, opname)(skipna=False))

obj = klass([pd.NaT, datetime(2011, 11, 1)])
# check DatetimeIndex monotonic path
assert getattr(obj, opname)() == datetime(2011, 11, 1)
assert getattr(obj, opname)(skipna=False) is pd.NaT

assert getattr(obj, arg_op)() == 1
result = getattr(obj, arg_op)(skipna=False)
if klass is Series:
assert np.isnan(result)
else:
assert result == -1

obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT])
# check DatetimeIndex non-monotonic path
assert getattr(obj, opname)(), datetime(2011, 11, 1)
assert getattr(obj, opname)(skipna=False) is pd.NaT

assert getattr(obj, arg_op)() == 1
result = getattr(obj, arg_op)(skipna=False)
if klass is Series:
assert np.isnan(result)
else:
assert result == -1

@pytest.mark.parametrize("opname", ["max", "min"])
@pytest.mark.parametrize("dtype", ["M8[ns]", "datetime64[ns, UTC]"])
def test_nanops_empty_object(self, opname, index_or_series, dtype):
klass = index_or_series
arg_op = "arg" + opname if klass is Index else "idx" + opname

obj = klass([], dtype=dtype)

assert getattr(obj, opname)() is pd.NaT
assert getattr(obj, opname)(skipna=False) is pd.NaT

with pytest.raises(ValueError, match="empty sequence"):
getattr(obj, arg_op)()
with pytest.raises(ValueError, match="empty sequence"):
getattr(obj, arg_op)(skipna=False)

def test_argminmax(self):
obj = Index(np.arange(5, dtype="int64"))
assert obj.argmin() == 0
assert obj.argmax() == 4
Expand Down Expand Up @@ -224,16 +228,17 @@ def test_minmax_timedelta64(self):
assert idx.argmin() == 0
assert idx.argmax() == 2

for op in ["min", "max"]:
# Return NaT
obj = TimedeltaIndex([])
assert pd.isna(getattr(obj, op)())
@pytest.mark.parametrize("op", ["min", "max"])
def test_minmax_timedelta_empty_or_na(self, op):
# Return NaT
obj = TimedeltaIndex([])
assert getattr(obj, op)() is pd.NaT

obj = TimedeltaIndex([pd.NaT])
assert pd.isna(getattr(obj, op)())
obj = TimedeltaIndex([pd.NaT])
assert getattr(obj, op)() is pd.NaT

obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT])
assert pd.isna(getattr(obj, op)())
obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT])
assert getattr(obj, op)() is pd.NaT

def test_numpy_minmax_timedelta64(self):
td = timedelta_range("16815 days", "16820 days", freq="D")
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ isort
mypy==0.730
pycodestyle
gitpython
gitdb2==2.0.6
sphinx
nbconvert>=5.4.1
nbsphinx
Expand Down

0 comments on commit 409ffd6

Please sign in to comment.