From c81b0ba9292fe04502dbe759e3520c03818167e2 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Mon, 17 Feb 2020 14:50:32 -0500 Subject: [PATCH 1/5] Clean Up C Warnings (#31935) --- pandas/_libs/algos.pyx | 6 +++--- pandas/_libs/hashing.pyx | 4 ++-- pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/_libs/window/aggregations.pyx | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index dd1f38ce3a842..5f3d946a1e024 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -1173,12 +1173,12 @@ ctypedef fused out_t: @cython.boundscheck(False) @cython.wraparound(False) -def diff_2d(ndarray[diff_t, ndim=2] arr, - ndarray[out_t, ndim=2] out, +def diff_2d(diff_t[:, :] arr, + out_t[:, :] out, Py_ssize_t periods, int axis): cdef: Py_ssize_t i, j, sx, sy, start, stop - bint f_contig = arr.flags.f_contiguous + bint f_contig = arr.is_f_contig() # Disable for unsupported dtype combinations, # see https://github.com/cython/cython/issues/2646 diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 878da670b2f68..2d859db22ea23 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -5,7 +5,7 @@ import cython from libc.stdlib cimport malloc, free import numpy as np -from numpy cimport uint8_t, uint32_t, uint64_t, import_array +from numpy cimport ndarray, uint8_t, uint32_t, uint64_t, import_array import_array() from pandas._libs.util cimport is_nan @@ -15,7 +15,7 @@ DEF dROUNDS = 4 @cython.boundscheck(False) -def hash_object_array(object[:] arr, object key, object encoding='utf8'): +def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'): """ Parameters ---------- diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index bf38fcfb6103c..57b4100fbceb0 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -152,7 +152,7 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool=True): @cython.boundscheck(False) @cython.wraparound(False) -def datetime_to_datetime64(object[:] values): +def datetime_to_datetime64(ndarray[object] values): """ Convert ndarray of datetime-like objects to int64 array representing nanosecond timestamps. diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index f675818599b2c..80b9144042041 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -56,7 +56,7 @@ cdef: cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b -cdef inline bint is_monotonic_start_end_bounds( +cdef bint is_monotonic_start_end_bounds( ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end ): return is_monotonic(start, False)[0] and is_monotonic(end, False)[0] From 92bb4c9517c41503d42571ea697ea2c81f3a189c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 17 Feb 2020 11:53:58 -0800 Subject: [PATCH 2/5] REF: implement unpack_1tuple to clean up Series.__getitem__ (#31906) --- pandas/core/indexers.py | 27 +++++++++++++++++++++++++++ pandas/core/series.py | 35 +++++------------------------------ 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index cadae9da6092f..4fb42fce29e1a 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -270,6 +270,33 @@ def deprecate_ndim_indexing(result): ) +def unpack_1tuple(tup): + """ + If we have a length-1 tuple/list that contains a slice, unpack to just + the slice. + + Notes + ----- + The list case is deprecated. + """ + if len(tup) == 1 and isinstance(tup[0], slice): + # if we don't have a MultiIndex, we may still be able to handle + # a 1-tuple. see test_1tuple_without_multiindex + + if isinstance(tup, list): + # GH#31299 + warnings.warn( + "Indexing with a single-item list containing a " + "slice is deprecated and will raise in a future " + "version. Pass a tuple instead.", + FutureWarning, + stacklevel=3, + ) + + return tup[0] + return tup + + # ----------------------------------------------------------- # Public indexer validation diff --git a/pandas/core/series.py b/pandas/core/series.py index 256586f3d36a1..15fe0bb98b536 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -72,7 +72,7 @@ sanitize_array, ) from pandas.core.generic import NDFrame -from pandas.core.indexers import maybe_convert_indices +from pandas.core.indexers import maybe_convert_indices, unpack_1tuple from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.api import ( Float64Index, @@ -851,6 +851,8 @@ def __getitem__(self, key): key_is_scalar = is_scalar(key) if key_is_scalar: key = self.index._convert_scalar_indexer(key, kind="getitem") + elif isinstance(key, (list, tuple)): + key = unpack_1tuple(key) if key_is_scalar or isinstance(self.index, MultiIndex): # Otherwise index.get_value will raise InvalidIndexError @@ -893,16 +895,7 @@ def _get_with(self, key): "supported, use the appropriate DataFrame column" ) elif isinstance(key, tuple): - try: - return self._get_values_tuple(key) - except ValueError: - # if we don't have a MultiIndex, we may still be able to handle - # a 1-tuple. see test_1tuple_without_multiindex - if len(key) == 1: - key = key[0] - if isinstance(key, slice): - return self._get_values(key) - raise + return self._get_values_tuple(key) if not isinstance(key, (list, np.ndarray, ExtensionArray, Series, Index)): key = list(key) @@ -924,26 +917,8 @@ def _get_with(self, key): else: return self.iloc[key] - if isinstance(key, (list, tuple)): - # TODO: de-dup with tuple case handled above? + if isinstance(key, list): # handle the dup indexing case GH#4246 - if len(key) == 1 and isinstance(key[0], slice): - # [slice(0, 5, None)] will break if you convert to ndarray, - # e.g. as requested by np.median - # FIXME: hack - if isinstance(key, list): - # GH#31299 - warnings.warn( - "Indexing with a single-item list containing a " - "slice is deprecated and will raise in a future " - "version. Pass a tuple instead.", - FutureWarning, - stacklevel=3, - ) - # TODO: use a message more like numpy's? - key = tuple(key) - return self._get_values(key) - return self.loc[key] return self.reindex(key) From cea40592e61d9451aca6c66108b9d7a38e3f9b6a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 17 Feb 2020 12:01:51 -0800 Subject: [PATCH 3/5] REF: move loc-only methods to loc (#31859) --- pandas/core/indexing.py | 61 +++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 46017377f2b9c..36140d3213ce1 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -577,18 +577,6 @@ def __call__(self, axis=None): new_self.axis = axis return new_self - def _get_label(self, label, axis: int): - if self.ndim == 1: - # for perf reasons we want to try _xs first - # as its basically direct indexing - # but will fail when the index is not present - # see GH5667 - return self.obj._xs(label, axis=axis) - elif isinstance(label, tuple) and isinstance(label[axis], slice): - raise IndexingError("no slices here, handle elsewhere") - - return self.obj._xs(label, axis=axis) - def _get_setitem_indexer(self, key): """ Convert a potentially-label-based key into a positional indexer. @@ -700,23 +688,6 @@ def _convert_tuple(self, key, is_setter: bool = False): keyidx.append(idx) return tuple(keyidx) - def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): - # we have an axis0 multi-index, handle or raise - axis = self.axis or 0 - try: - # fast path for series or for tup devoid of slices - return self._get_label(tup, axis=axis) - except TypeError: - # slices are unhashable - pass - except KeyError as ek: - # raise KeyError if number of indexers match - # else IndexingError will be raised - if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim: - raise ek - - return None - def _getitem_tuple_same_dim(self, tup: Tuple): """ Index with indexers that should return an object of the same dimension @@ -798,6 +769,9 @@ def _getitem_nested_tuple(self, tup: Tuple): # multi-index dimension, try to see if we have something like # a tuple passed to a series with a multi-index if len(tup) > self.ndim: + if self.name != "loc": + # This should never be reached, but lets be explicit about it + raise ValueError("Too many indices") result = self._handle_lowerdim_multi_index_axis0(tup) if result is not None: return result @@ -1069,6 +1043,35 @@ def _getitem_tuple(self, tup: Tuple): return self._getitem_tuple_same_dim(tup) + def _get_label(self, label, axis: int): + if self.ndim == 1: + # for perf reasons we want to try _xs first + # as its basically direct indexing + # but will fail when the index is not present + # see GH5667 + return self.obj._xs(label, axis=axis) + elif isinstance(label, tuple) and isinstance(label[axis], slice): + raise IndexingError("no slices here, handle elsewhere") + + return self.obj._xs(label, axis=axis) + + def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): + # we have an axis0 multi-index, handle or raise + axis = self.axis or 0 + try: + # fast path for series or for tup devoid of slices + return self._get_label(tup, axis=axis) + except TypeError: + # slices are unhashable + pass + except KeyError as ek: + # raise KeyError if number of indexers match + # else IndexingError will be raised + if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim: + raise ek + + return None + def _getitem_axis(self, key, axis: int): key = item_from_zerodim(key) if is_iterator(key): From 3b4b86b833e7f765ca035cc2067897f8ae89fac2 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Mon, 17 Feb 2020 14:03:05 -0600 Subject: [PATCH 4/5] CLN: Clean reductions/test_reductions.py (#32035) --- pandas/tests/reductions/test_reductions.py | 127 +++++++++++---------- 1 file changed, 66 insertions(+), 61 deletions(-) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 0b312fe2f8990..211d0d52d8357 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -66,60 +66,64 @@ def test_ops(self, opname, obj): expected = expected.astype("M8[ns]").astype("int64") assert result.value == expected - def test_nanops(self): + @pytest.mark.parametrize("opname", ["max", "min"]) + def test_nanops(self, opname, index_or_series): # GH#7261 - for opname in ["max", "min"]: - for klass in [Index, Series]: - arg_op = "arg" + opname if klass is Index else "idx" + opname - - obj = klass([np.nan, 2.0]) - assert getattr(obj, opname)() == 2.0 - - obj = klass([np.nan]) - assert pd.isna(getattr(obj, opname)()) - assert pd.isna(getattr(obj, opname)(skipna=False)) - - obj = klass([], dtype=object) - assert pd.isna(getattr(obj, opname)()) - assert pd.isna(getattr(obj, opname)(skipna=False)) - - obj = klass([pd.NaT, datetime(2011, 11, 1)]) - # check DatetimeIndex monotonic path - assert getattr(obj, opname)() == datetime(2011, 11, 1) - assert getattr(obj, opname)(skipna=False) is pd.NaT - - assert getattr(obj, arg_op)() == 1 - result = getattr(obj, arg_op)(skipna=False) - if klass is Series: - assert np.isnan(result) - else: - assert result == -1 - - obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT]) - # check DatetimeIndex non-monotonic path - assert getattr(obj, opname)(), datetime(2011, 11, 1) - assert getattr(obj, opname)(skipna=False) is pd.NaT - - assert getattr(obj, arg_op)() == 1 - result = getattr(obj, arg_op)(skipna=False) - if klass is Series: - assert np.isnan(result) - else: - assert result == -1 - - for dtype in ["M8[ns]", "datetime64[ns, UTC]"]: - # cases with empty Series/DatetimeIndex - obj = klass([], dtype=dtype) - - assert getattr(obj, opname)() is pd.NaT - assert getattr(obj, opname)(skipna=False) is pd.NaT - - with pytest.raises(ValueError, match="empty sequence"): - getattr(obj, arg_op)() - with pytest.raises(ValueError, match="empty sequence"): - getattr(obj, arg_op)(skipna=False) - - # argmin/max + klass = index_or_series + arg_op = "arg" + opname if klass is Index else "idx" + opname + + obj = klass([np.nan, 2.0]) + assert getattr(obj, opname)() == 2.0 + + obj = klass([np.nan]) + assert pd.isna(getattr(obj, opname)()) + assert pd.isna(getattr(obj, opname)(skipna=False)) + + obj = klass([], dtype=object) + assert pd.isna(getattr(obj, opname)()) + assert pd.isna(getattr(obj, opname)(skipna=False)) + + obj = klass([pd.NaT, datetime(2011, 11, 1)]) + # check DatetimeIndex monotonic path + assert getattr(obj, opname)() == datetime(2011, 11, 1) + assert getattr(obj, opname)(skipna=False) is pd.NaT + + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 + + obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT]) + # check DatetimeIndex non-monotonic path + assert getattr(obj, opname)(), datetime(2011, 11, 1) + assert getattr(obj, opname)(skipna=False) is pd.NaT + + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 + + @pytest.mark.parametrize("opname", ["max", "min"]) + @pytest.mark.parametrize("dtype", ["M8[ns]", "datetime64[ns, UTC]"]) + def test_nanops_empty_object(self, opname, index_or_series, dtype): + klass = index_or_series + arg_op = "arg" + opname if klass is Index else "idx" + opname + + obj = klass([], dtype=dtype) + + assert getattr(obj, opname)() is pd.NaT + assert getattr(obj, opname)(skipna=False) is pd.NaT + + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)() + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)(skipna=False) + + def test_argminmax(self): obj = Index(np.arange(5, dtype="int64")) assert obj.argmin() == 0 assert obj.argmax() == 4 @@ -224,16 +228,17 @@ def test_minmax_timedelta64(self): assert idx.argmin() == 0 assert idx.argmax() == 2 - for op in ["min", "max"]: - # Return NaT - obj = TimedeltaIndex([]) - assert pd.isna(getattr(obj, op)()) + @pytest.mark.parametrize("op", ["min", "max"]) + def test_minmax_timedelta_empty_or_na(self, op): + # Return NaT + obj = TimedeltaIndex([]) + assert getattr(obj, op)() is pd.NaT - obj = TimedeltaIndex([pd.NaT]) - assert pd.isna(getattr(obj, op)()) + obj = TimedeltaIndex([pd.NaT]) + assert getattr(obj, op)() is pd.NaT - obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) - assert pd.isna(getattr(obj, op)()) + obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) + assert getattr(obj, op)() is pd.NaT def test_numpy_minmax_timedelta64(self): td = timedelta_range("16815 days", "16820 days", freq="D") From 7b0887c2ea7255139be1cc16a179e0b4574384d2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 17 Feb 2020 15:35:21 -0600 Subject: [PATCH 5/5] DOC: pin gitdb2 (#32064) --- environment.yml | 1 + requirements-dev.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/environment.yml b/environment.yml index 5f1184e921119..cbdaf8e6c4217 100644 --- a/environment.yml +++ b/environment.yml @@ -26,6 +26,7 @@ dependencies: # documentation - gitpython # obtain contributors from git for whatsnew + - gitdb2=2.0.6 # GH-32060 - sphinx # documentation (jupyter notebooks) diff --git a/requirements-dev.txt b/requirements-dev.txt index 08cbef2c7fc6b..a469cbdd93ceb 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -15,6 +15,7 @@ isort mypy==0.730 pycodestyle gitpython +gitdb2==2.0.6 sphinx nbconvert>=5.4.1 nbsphinx