Merge branch 'master' of https://github.com/pandas-dev/pandas into 31623

pandas-dev · Feb 17, 2020 · 409ffd6 · 409ffd6
2 parents abc73f8 + 7b0887c
commit 409ffd6
Show file tree

Hide file tree

Showing 10 changed files with 139 additions and 127 deletions.
diff --git a/environment.yml b/environment.yml
@@ -26,6 +26,7 @@ dependencies:
 
   # documentation
   - gitpython  # obtain contributors from git for whatsnew
+  - gitdb2=2.0.6  # GH-32060
   - sphinx
 
   # documentation (jupyter notebooks)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -1173,12 +1173,12 @@ ctypedef fused out_t:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def diff_2d(ndarray[diff_t, ndim=2] arr,
-            ndarray[out_t, ndim=2] out,
+def diff_2d(diff_t[:, :] arr,
+            out_t[:, :] out,
             Py_ssize_t periods, int axis):
     cdef:
         Py_ssize_t i, j, sx, sy, start, stop
-        bint f_contig = arr.flags.f_contiguous
+        bint f_contig = arr.is_f_contig()
 
     # Disable for unsupported dtype combinations,
     #  see https://github.com/cython/cython/issues/2646

diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
@@ -5,7 +5,7 @@ import cython
 from libc.stdlib cimport malloc, free
 
 import numpy as np
-from numpy cimport uint8_t, uint32_t, uint64_t, import_array
+from numpy cimport ndarray, uint8_t, uint32_t, uint64_t, import_array
 import_array()
 
 from pandas._libs.util cimport is_nan
@@ -15,7 +15,7 @@ DEF dROUNDS = 4
 
 
 @cython.boundscheck(False)
-def hash_object_array(object[:] arr, object key, object encoding='utf8'):
+def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
     """
     Parameters
     ----------

diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
@@ -152,7 +152,7 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool=True):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def datetime_to_datetime64(object[:] values):
+def datetime_to_datetime64(ndarray[object] values):
     """
     Convert ndarray of datetime-like objects to int64 array representing
     nanosecond timestamps.

diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -56,7 +56,7 @@ cdef:
 cdef inline int int_max(int a, int b): return a if a >= b else b
 cdef inline int int_min(int a, int b): return a if a <= b else b
 
-cdef inline bint is_monotonic_start_end_bounds(
+cdef bint is_monotonic_start_end_bounds(
     ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end
 ):
     return is_monotonic(start, False)[0] and is_monotonic(end, False)[0]

diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py
@@ -270,6 +270,33 @@ def deprecate_ndim_indexing(result):
         )
 
 
+def unpack_1tuple(tup):
+    """
+    If we have a length-1 tuple/list that contains a slice, unpack to just
+    the slice.
+
+    Notes
+    -----
+    The list case is deprecated.
+    """
+    if len(tup) == 1 and isinstance(tup[0], slice):
+        # if we don't have a MultiIndex, we may still be able to handle
+        #  a 1-tuple.  see test_1tuple_without_multiindex
+
+        if isinstance(tup, list):
+            # GH#31299
+            warnings.warn(
+                "Indexing with a single-item list containing a "
+                "slice is deprecated and will raise in a future "
+                "version.  Pass a tuple instead.",
+                FutureWarning,
+                stacklevel=3,
+            )
+
+        return tup[0]
+    return tup
+
+
 # -----------------------------------------------------------
 # Public indexer validation
 

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -577,18 +577,6 @@ def __call__(self, axis=None):
         new_self.axis = axis
         return new_self
 
-    def _get_label(self, label, axis: int):
-        if self.ndim == 1:
-            # for perf reasons we want to try _xs first
-            # as its basically direct indexing
-            # but will fail when the index is not present
-            # see GH5667
-            return self.obj._xs(label, axis=axis)
-        elif isinstance(label, tuple) and isinstance(label[axis], slice):
-            raise IndexingError("no slices here, handle elsewhere")
-
-        return self.obj._xs(label, axis=axis)
-
     def _get_setitem_indexer(self, key):
         """
         Convert a potentially-label-based key into a positional indexer.
@@ -700,23 +688,6 @@ def _convert_tuple(self, key, is_setter: bool = False):
                 keyidx.append(idx)
         return tuple(keyidx)
 
-    def _handle_lowerdim_multi_index_axis0(self, tup: Tuple):
-        # we have an axis0 multi-index, handle or raise
-        axis = self.axis or 0
-        try:
-            # fast path for series or for tup devoid of slices
-            return self._get_label(tup, axis=axis)
-        except TypeError:
-            # slices are unhashable
-            pass
-        except KeyError as ek:
-            # raise KeyError if number of indexers match
-            # else IndexingError will be raised
-            if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim:
-                raise ek
-
-        return None
-
     def _getitem_tuple_same_dim(self, tup: Tuple):
         """
         Index with indexers that should return an object of the same dimension
@@ -798,6 +769,9 @@ def _getitem_nested_tuple(self, tup: Tuple):
         # multi-index dimension, try to see if we have something like
         # a tuple passed to a series with a multi-index
         if len(tup) > self.ndim:
+            if self.name != "loc":
+                # This should never be reached, but lets be explicit about it
+                raise ValueError("Too many indices")
             result = self._handle_lowerdim_multi_index_axis0(tup)
             if result is not None:
                 return result
@@ -1069,6 +1043,35 @@ def _getitem_tuple(self, tup: Tuple):
 
         return self._getitem_tuple_same_dim(tup)
 
+    def _get_label(self, label, axis: int):
+        if self.ndim == 1:
+            # for perf reasons we want to try _xs first
+            # as its basically direct indexing
+            # but will fail when the index is not present
+            # see GH5667
+            return self.obj._xs(label, axis=axis)
+        elif isinstance(label, tuple) and isinstance(label[axis], slice):
+            raise IndexingError("no slices here, handle elsewhere")
+
+        return self.obj._xs(label, axis=axis)
+
+    def _handle_lowerdim_multi_index_axis0(self, tup: Tuple):
+        # we have an axis0 multi-index, handle or raise
+        axis = self.axis or 0
+        try:
+            # fast path for series or for tup devoid of slices
+            return self._get_label(tup, axis=axis)
+        except TypeError:
+            # slices are unhashable
+            pass
+        except KeyError as ek:
+            # raise KeyError if number of indexers match
+            # else IndexingError will be raised
+            if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim:
+                raise ek
+
+        return None
+
     def _getitem_axis(self, key, axis: int):
         key = item_from_zerodim(key)
         if is_iterator(key):

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -72,7 +72,7 @@
     sanitize_array,
 )
 from pandas.core.generic import NDFrame
-from pandas.core.indexers import maybe_convert_indices
+from pandas.core.indexers import maybe_convert_indices, unpack_1tuple
 from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
 from pandas.core.indexes.api import (
     Float64Index,
@@ -851,6 +851,8 @@ def __getitem__(self, key):
         key_is_scalar = is_scalar(key)
         if key_is_scalar:
             key = self.index._convert_scalar_indexer(key, kind="getitem")
+        elif isinstance(key, (list, tuple)):
+            key = unpack_1tuple(key)
 
         if key_is_scalar or isinstance(self.index, MultiIndex):
             # Otherwise index.get_value will raise InvalidIndexError
@@ -893,16 +895,7 @@ def _get_with(self, key):
                 "supported, use the appropriate DataFrame column"
             )
         elif isinstance(key, tuple):
-            try:
-                return self._get_values_tuple(key)
-            except ValueError:
-                # if we don't have a MultiIndex, we may still be able to handle
-                #  a 1-tuple.  see test_1tuple_without_multiindex
-                if len(key) == 1:
-                    key = key[0]
-                    if isinstance(key, slice):
-                        return self._get_values(key)
-                raise
+            return self._get_values_tuple(key)
 
         if not isinstance(key, (list, np.ndarray, ExtensionArray, Series, Index)):
             key = list(key)
@@ -924,26 +917,8 @@ def _get_with(self, key):
             else:
                 return self.iloc[key]
 
-        if isinstance(key, (list, tuple)):
-            # TODO: de-dup with tuple case handled above?
+        if isinstance(key, list):
             # handle the dup indexing case GH#4246
-            if len(key) == 1 and isinstance(key[0], slice):
-                # [slice(0, 5, None)] will break if you convert to ndarray,
-                # e.g. as requested by np.median
-                # FIXME: hack
-                if isinstance(key, list):
-                    # GH#31299
-                    warnings.warn(
-                        "Indexing with a single-item list containing a "
-                        "slice is deprecated and will raise in a future "
-                        "version.  Pass a tuple instead.",
-                        FutureWarning,
-                        stacklevel=3,
-                    )
-                    # TODO: use a message more like numpy's?
-                    key = tuple(key)
-                return self._get_values(key)
-
             return self.loc[key]
 
         return self.reindex(key)

diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
@@ -66,60 +66,64 @@ def test_ops(self, opname, obj):
             expected = expected.astype("M8[ns]").astype("int64")
             assert result.value == expected
 
-    def test_nanops(self):
+    @pytest.mark.parametrize("opname", ["max", "min"])
+    def test_nanops(self, opname, index_or_series):
         # GH#7261
-        for opname in ["max", "min"]:
-            for klass in [Index, Series]:
-                arg_op = "arg" + opname if klass is Index else "idx" + opname
-
-                obj = klass([np.nan, 2.0])
-                assert getattr(obj, opname)() == 2.0
-
-                obj = klass([np.nan])
-                assert pd.isna(getattr(obj, opname)())
-                assert pd.isna(getattr(obj, opname)(skipna=False))
-
-                obj = klass([], dtype=object)
-                assert pd.isna(getattr(obj, opname)())
-                assert pd.isna(getattr(obj, opname)(skipna=False))
-
-                obj = klass([pd.NaT, datetime(2011, 11, 1)])
-                # check DatetimeIndex monotonic path
-                assert getattr(obj, opname)() == datetime(2011, 11, 1)
-                assert getattr(obj, opname)(skipna=False) is pd.NaT
-
-                assert getattr(obj, arg_op)() == 1
-                result = getattr(obj, arg_op)(skipna=False)
-                if klass is Series:
-                    assert np.isnan(result)
-                else:
-                    assert result == -1
-
-                obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT])
-                # check DatetimeIndex non-monotonic path
-                assert getattr(obj, opname)(), datetime(2011, 11, 1)
-                assert getattr(obj, opname)(skipna=False) is pd.NaT
-
-                assert getattr(obj, arg_op)() == 1
-                result = getattr(obj, arg_op)(skipna=False)
-                if klass is Series:
-                    assert np.isnan(result)
-                else:
-                    assert result == -1
-
-                for dtype in ["M8[ns]", "datetime64[ns, UTC]"]:
-                    # cases with empty Series/DatetimeIndex
-                    obj = klass([], dtype=dtype)
-
-                    assert getattr(obj, opname)() is pd.NaT
-                    assert getattr(obj, opname)(skipna=False) is pd.NaT
-
-                    with pytest.raises(ValueError, match="empty sequence"):
-                        getattr(obj, arg_op)()
-                    with pytest.raises(ValueError, match="empty sequence"):
-                        getattr(obj, arg_op)(skipna=False)
-
-        # argmin/max
+        klass = index_or_series
+        arg_op = "arg" + opname if klass is Index else "idx" + opname
+
+        obj = klass([np.nan, 2.0])
+        assert getattr(obj, opname)() == 2.0
+
+        obj = klass([np.nan])
+        assert pd.isna(getattr(obj, opname)())
+        assert pd.isna(getattr(obj, opname)(skipna=False))
+
+        obj = klass([], dtype=object)
+        assert pd.isna(getattr(obj, opname)())
+        assert pd.isna(getattr(obj, opname)(skipna=False))
+
+        obj = klass([pd.NaT, datetime(2011, 11, 1)])
+        # check DatetimeIndex monotonic path
+        assert getattr(obj, opname)() == datetime(2011, 11, 1)
+        assert getattr(obj, opname)(skipna=False) is pd.NaT
+
+        assert getattr(obj, arg_op)() == 1
+        result = getattr(obj, arg_op)(skipna=False)
+        if klass is Series:
+            assert np.isnan(result)
+        else:
+            assert result == -1
+
+        obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT])
+        # check DatetimeIndex non-monotonic path
+        assert getattr(obj, opname)(), datetime(2011, 11, 1)
+        assert getattr(obj, opname)(skipna=False) is pd.NaT
+
+        assert getattr(obj, arg_op)() == 1
+        result = getattr(obj, arg_op)(skipna=False)
+        if klass is Series:
+            assert np.isnan(result)
+        else:
+            assert result == -1
+
+    @pytest.mark.parametrize("opname", ["max", "min"])
+    @pytest.mark.parametrize("dtype", ["M8[ns]", "datetime64[ns, UTC]"])
+    def test_nanops_empty_object(self, opname, index_or_series, dtype):
+        klass = index_or_series
+        arg_op = "arg" + opname if klass is Index else "idx" + opname
+
+        obj = klass([], dtype=dtype)
+
+        assert getattr(obj, opname)() is pd.NaT
+        assert getattr(obj, opname)(skipna=False) is pd.NaT
+
+        with pytest.raises(ValueError, match="empty sequence"):
+            getattr(obj, arg_op)()
+        with pytest.raises(ValueError, match="empty sequence"):
+            getattr(obj, arg_op)(skipna=False)
+
+    def test_argminmax(self):
         obj = Index(np.arange(5, dtype="int64"))
         assert obj.argmin() == 0
         assert obj.argmax() == 4
@@ -224,16 +228,17 @@ def test_minmax_timedelta64(self):
             assert idx.argmin() == 0
             assert idx.argmax() == 2
 
-        for op in ["min", "max"]:
-            # Return NaT
-            obj = TimedeltaIndex([])
-            assert pd.isna(getattr(obj, op)())
+    @pytest.mark.parametrize("op", ["min", "max"])
+    def test_minmax_timedelta_empty_or_na(self, op):
+        # Return NaT
+        obj = TimedeltaIndex([])
+        assert getattr(obj, op)() is pd.NaT
 
-            obj = TimedeltaIndex([pd.NaT])
-            assert pd.isna(getattr(obj, op)())
+        obj = TimedeltaIndex([pd.NaT])
+        assert getattr(obj, op)() is pd.NaT
 
-            obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT])
-            assert pd.isna(getattr(obj, op)())
+        obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT])
+        assert getattr(obj, op)() is pd.NaT
 
     def test_numpy_minmax_timedelta64(self):
         td = timedelta_range("16815 days", "16820 days", freq="D")

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -15,6 +15,7 @@ isort
 mypy==0.730
 pycodestyle
 gitpython
+gitdb2==2.0.6
 sphinx
 nbconvert>=5.4.1
 nbsphinx