From cde3cab62839c7bc23a1ab7578ea1ab9b7ab2945 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Heikkil=C3=A4?= Date: Fri, 20 Apr 2018 13:39:03 +0300 Subject: [PATCH] BUG: fixes indexing with monotonic decreasing DTI (#19362) (#20677) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/algorithms.py | 2 + pandas/core/indexes/datetimelike.py | 3 +- pandas/core/reshape/merge.py | 3 +- .../indexes/datetimes/test_partial_slicing.py | 21 ++++ pandas/tests/reshape/merge/test_merge.py | 102 ++++++++++++++---- 6 files changed, 106 insertions(+), 26 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index e9a4ec9328a9b8..bcc442189bf11b 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1113,6 +1113,7 @@ Indexing - Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` in presence of entire rows of NaNs in the middle of values (:issue:`20499`). - Bug in :class:`IntervalIndex` where some indexing operations were not supported for overlapping or non-monotonic ``uint64`` data (:issue:`20636`) - Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`) +- Bug in partial string indexing on a ``Series/DataFrame`` with a monotonic decreasing ``DatetimeIndex`` (:issue:`19362`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 065a5782aced16..c6a2586f0e93a2 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1490,6 +1490,8 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, if is_sparse(arr): arr = arr.get_values() + elif isinstance(arr, (ABCIndexClass, ABCSeries)): + arr = arr.values if indexer is None: indexer = np.arange(arr.shape[axis], dtype=np.int64) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 95e1f8438c704f..95186b2e79a167 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -342,7 +342,8 @@ def _format_with_header(self, header, **kwargs): def __contains__(self, key): try: res = self.get_loc(key) - return is_scalar(res) or type(res) == slice or np.any(res) + return (is_scalar(res) or isinstance(res, slice) or + (is_list_like(res) and len(res))) except (KeyError, TypeError, ValueError): return False diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 7b1a0875bba590..0204e655bfa2c3 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -705,8 +705,7 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): take_right = self.right[name]._values elif left_indexer is not None \ - and isinstance(self.left_join_keys[i], np.ndarray): - + and is_array_like(self.left_join_keys[i]): take_left = self.left_join_keys[i] take_right = self.right_join_keys[i] diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index f263ac78cd3438..4580d9fff31d59 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -91,6 +91,27 @@ def test_slice_duplicate_monotonic(self): expected = Timestamp('2017-01-01') assert result == expected + def test_monotone_DTI_indexing_bug(self): + # GH 19362 + # Testing accessing the first element in a montononic descending + # partial string indexing. + + df = pd.DataFrame(list(range(5))) + date_list = ['2018-01-02', '2017-02-10', '2016-03-10', + '2015-03-15', '2014-03-16'] + date_index = pd.to_datetime(date_list) + df['date'] = date_index + expected = pd.DataFrame({0: list(range(5)), 'date': date_index}) + tm.assert_frame_equal(df, expected) + + df = pd.DataFrame({'A': [1, 2, 3]}, + index=pd.date_range('20170101', + periods=3)[::-1]) + expected = pd.DataFrame({'A': 1}, + index=pd.date_range('20170103', + periods=1)) + tm.assert_frame_equal(df.loc['2017-01-03'], expected) + def test_slice_year(self): dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index dbf7c7f100b0e1..3164367e6861f6 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1,26 +1,27 @@ # pylint: disable=E1103 -import pytest -from datetime import datetime, date -from numpy.random import randn -from numpy import nan -import numpy as np import random import re +from collections import OrderedDict +from datetime import date, datetime + +import numpy as np +import pytest +from numpy import nan +from numpy.random import randn import pandas as pd +import pandas.util.testing as tm +from pandas import (Categorical, CategoricalIndex, DataFrame, DatetimeIndex, + Float64Index, Index, Int64Index, MultiIndex, RangeIndex, + Series, UInt64Index) +from pandas.api.types import CategoricalDtype as CDT from pandas.compat import lrange, lzip +from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype +from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.reshape.concat import concat -from pandas.core.reshape.merge import merge, MergeError +from pandas.core.reshape.merge import MergeError, merge from pandas.util.testing import assert_frame_equal, assert_series_equal -from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.dtypes.common import ( - is_categorical_dtype, - is_object_dtype, -) -from pandas import DataFrame, Index, MultiIndex, Series, Categorical -import pandas.util.testing as tm -from pandas.api.types import CategoricalDtype as CDT N = 50 NGROUPS = 8 @@ -813,7 +814,7 @@ def test_validation(self): # Dups on right right_w_dups = right.append(pd.DataFrame({'a': ['e'], 'c': ['moo']}, - index=[4])) + index=[4])) merge(left, right_w_dups, left_index=True, right_index=True, validate='one_to_many') @@ -1388,17 +1389,24 @@ def test_merge_datetime_index(self, klass): if klass is not None: on_vector = klass(on_vector) - expected = DataFrame({"a": [1, 2, 3]}) - - if klass == np.asarray: - # The join key is added for ndarray. - expected["key_1"] = [2016, 2017, 2018] + expected = DataFrame( + OrderedDict([ + ("a", [1, 2, 3]), + ("key_1", [2016, 2017, 2018]), + ]) + ) result = df.merge(df, on=["a", on_vector], how="inner") tm.assert_frame_equal(result, expected) - expected = DataFrame({"a_x": [1, 2, 3], - "a_y": [1, 2, 3]}) + expected = DataFrame( + OrderedDict([ + ("key_0", [2016, 2017, 2018]), + ("a_x", [1, 2, 3]), + ("a_y", [1, 2, 3]), + ]) + ) + result = df.merge(df, on=[df.index.year], how="inner") tm.assert_frame_equal(result, expected) @@ -1427,7 +1435,7 @@ def test_different(self, right_vals): # We allow merging on object and categorical cols and cast # categorical cols to object if (is_categorical_dtype(right['A'].dtype) or - is_object_dtype(right['A'].dtype)): + is_object_dtype(right['A'].dtype)): result = pd.merge(left, right, on='A') assert is_object_dtype(result.A.dtype) @@ -1826,3 +1834,51 @@ def test_merge_on_indexes(self, left_df, right_df, how, sort, expected): how=how, sort=sort) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize('index', + [ + CategoricalIndex( + data=['A', 'B'], + categories=['A', 'B'], + name='index_col', + dtype='category'), + Float64Index( + data=[1.0, 2.0], + name='index_col'), + Int64Index( + data=[1, 2], + name='index_col'), + UInt64Index( + data=[1, 2], + name='index_col'), + RangeIndex( + start=0, + stop=2, + name='index_col'), + DatetimeIndex( + data=["2018-01-01", "2018-01-02"], + name='index_col'), + ]) +def test_merge_index_types(index): + left = DataFrame( + {"left_data": [1, 2]}, + index=index + ) + right = DataFrame( + {"right_data": [1.0, 2.0]}, + index=index + ) + + result = left.merge(right, on=['index_col']) + + expected = DataFrame( + OrderedDict( + [ + ('left_data', [1, 2]), + ('right_data', [1.0, 2.0]), + ] + ), + index=index + ) + assert_frame_equal(result, expected)