From 350b0ecdf11c18c40dbc7fed0082240add8ae047 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 20 Dec 2018 20:44:46 -0800 Subject: [PATCH 01/13] implement _index_data parts of #24024 --- pandas/_libs/reduction.pyx | 13 ++++++++++--- pandas/core/indexes/base.py | 6 ++++++ pandas/core/indexes/datetimes.py | 2 ++ pandas/core/indexes/period.py | 2 ++ pandas/core/indexes/timedeltas.py | 2 ++ 5 files changed, 22 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 6f892c928805e..a61295f781901 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -265,7 +265,10 @@ cdef class SeriesBinGrouper: cached_typ = self.typ(vslider.buf, index=cached_ityp, name=name) else: - object.__setattr__(cached_ityp, '_data', islider.buf) + # See the comment in indexes/base.py about _index_data. + # We need this for EA-backed indexes that have a reference + # to a 1-d ndarray like datetime / timedelta / period. + object.__setattr__(cached_ityp, '_index_data', islider.buf) cached_ityp._engine.clear_mapping() object.__setattr__( cached_typ._data._block, 'values', vslider.buf) @@ -569,8 +572,11 @@ cdef class BlockSlider: util.set_array_not_contiguous(x) self.nblocks = len(self.blocks) + # See the comment in indexes/base.py about _index_data. + # We need this for EA-backed indexes that have a reference to a 1-d + # ndarray like datetime / timedelta / period. self.idx_slider = Slider( - self.frame.index.values, self.dummy.index.values) + self.frame.index._index_data, self.dummy.index._index_data) self.base_ptrs = malloc(sizeof(char*) * len(self.blocks)) for i, block in enumerate(self.blocks): @@ -594,7 +600,8 @@ cdef class BlockSlider: # move and set the index self.idx_slider.move(start, end) - object.__setattr__(self.index, '_data', self.idx_slider.buf) + + object.__setattr__(self.index, '_index_data', self.idx_slider.buf) self.index._engine.clear_mapping() cdef reset(self): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cc6f182fadce6..a2cf88fa9cb1a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -519,6 +519,12 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs): result = object.__new__(cls) result._data = values + # _index_data is a (temporary?) fix to ensure that the direct data + # manipulation we do in `_libs/reduction.pyx` continues to work. + # We need access to the actual ndarray, since we're messing with + # data buffers and strides. We don't re-use `_ndarray_values`, since + # we actually set this value too. + result._index_data = values result.name = name for k, v in compat.iteritems(kwargs): setattr(result, k, v) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1c966ab58e8c4..0e4132524045c 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -269,6 +269,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): result = super(DatetimeIndex, cls)._simple_new(values, freq, tz) result.name = name + # For groupby perf. See note in indexes/base about _index_data + result._index_data = result._data result._reset_identity() return result diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 7ece1eaf547c8..17666cd651a50 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -235,6 +235,8 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): raise TypeError("PeriodIndex._simple_new only accepts PeriodArray") result = object.__new__(cls) result._data = values + # For groupby perf. See note in indexes/base about _index_data + result._index_data = values._data result.name = name result._reset_identity() return result diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 5d52696992c30..e6c714683979f 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -199,6 +199,8 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): result = super(TimedeltaIndex, cls)._simple_new(values, freq) result.name = name + # For groupby perf. See note in indexes/base about _index_data + result._index_data = result._data result._reset_identity() return result From 837c16ad9c34d51f8281c7b9f8aae86d6f575935 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 20 Dec 2018 21:16:44 -0800 Subject: [PATCH 02/13] implement _eadata, dispatch arithmetic methods to it --- pandas/core/indexes/datetimelike.py | 29 ++++++++++++++--- pandas/core/indexes/datetimes.py | 5 +++ pandas/core/indexes/period.py | 37 +++------------------ pandas/core/indexes/timedeltas.py | 38 +++++++--------------- pandas/tests/arithmetic/test_datetime64.py | 15 +++++---- 5 files changed, 55 insertions(+), 69 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index dd2537c11a94c..b1812bb83f913 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -40,7 +40,6 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): # override DatetimeLikeArrayMixin method copy = Index.copy unique = Index.unique - take = Index.take # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index @@ -420,7 +419,7 @@ def _add_datetimelike_methods(cls): def __add__(self, other): # dispatch to ExtensionArray implementation - result = super(cls, self).__add__(other) + result = self._eadata.__add__(maybe_unwrap_index(other)) return wrap_arithmetic_op(self, other, result) cls.__add__ = __add__ @@ -432,13 +431,13 @@ def __radd__(self, other): def __sub__(self, other): # dispatch to ExtensionArray implementation - result = super(cls, self).__sub__(other) + result = self._eadata.__sub__(maybe_unwrap_index(other)) return wrap_arithmetic_op(self, other, result) cls.__sub__ = __sub__ def __rsub__(self, other): - result = super(cls, self).__rsub__(other) + result = self._eadata.__rsub__(maybe_unwrap_index(other)) return wrap_arithmetic_op(self, other, result) cls.__rsub__ = __rsub__ @@ -573,6 +572,28 @@ def _time_shift(self, periods, freq=None): return result +def maybe_unwrap_index(obj): + """ + If operating against another Index object, we need to unwrap the underlying + data before deferring to the DatetimeArray/TimedeltaArray/PeriodArray + implementation, otherwise we will incorrectly return NotImplemented. + + Parameters + ---------- + obj : object + + Returns + ------- + unwrapped object + """ + if isinstance(obj, ABCIndexClass): + if isinstance(obj, DatetimeIndexOpsMixin): + # i.e. PeriodIndex/DatetimeIndex/TimedeltaIndex + return obj._eadata + return obj._data + return obj + + def wrap_arithmetic_op(self, other, result): if result is NotImplemented: return NotImplemented diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 0e4132524045c..22ca6e67735cf 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -276,6 +276,11 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): # -------------------------------------------------------------------- + @property + def _eadata(self): + return DatetimeArray._simple_new(self._data, tz=self.tz, + freq=self.freq) + @property def _values(self): # tz-naive -> ndarray diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 17666cd651a50..daca160d956d0 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -247,6 +247,10 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): # ------------------------------------------------------------------------ # Data + @property + def _eadata(self): + return self._data # PeriodArray + @property def _ndarray_values(self): return self._data._ndarray_values @@ -878,39 +882,6 @@ def __setstate__(self, state): _unpickle_compat = __setstate__ - @classmethod - def _add_datetimelike_methods(cls): - """ - add in the datetimelike methods (as we may have to override the - superclass) - """ - # TODO(DatetimeArray): move this up to DatetimeArrayMixin - - def __add__(self, other): - # dispatch to ExtensionArray implementation - result = self._data.__add__(other) - return wrap_arithmetic_op(self, other, result) - - cls.__add__ = __add__ - - def __radd__(self, other): - # alias for __add__ - return self.__add__(other) - cls.__radd__ = __radd__ - - def __sub__(self, other): - # dispatch to ExtensionArray implementation - result = self._data.__sub__(other) - return wrap_arithmetic_op(self, other, result) - - cls.__sub__ = __sub__ - - def __rsub__(self, other): - result = self._data.__rsub__(other) - return wrap_arithmetic_op(self, other, result) - - cls.__rsub__ = __rsub__ - @classmethod def _create_comparison_method(cls, op): """ diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index e6c714683979f..69c9cd70a4172 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -23,7 +23,7 @@ from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.datetimelike import ( DatetimeIndexOpsMixin, wrap_arithmetic_op, wrap_array_method, - wrap_field_accessor) + wrap_field_accessor, maybe_unwrap_index) from pandas.core.indexes.numeric import Int64Index from pandas.core.ops import get_op_result_name from pandas.core.tools.timedeltas import _coerce_scalar_to_timedelta_type @@ -36,11 +36,7 @@ def _make_wrapped_arith_op(opname): meth = getattr(TimedeltaArray, opname) def method(self, other): - oth = other - if isinstance(other, Index): - oth = other._data - - result = meth(self, oth) + result = meth(self._eadata, maybe_unwrap_index(other)) return wrap_arithmetic_op(self, other, result) method.__name__ = opname @@ -206,6 +202,11 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): # ------------------------------------------------------------------- + @property + def _eadata(self): + return TimedeltaArray._simple_new(self._data, + freq=self.freq) + def __setstate__(self, state): """Necessary for making this object picklable""" if isinstance(state, dict): @@ -247,6 +248,11 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): __rmod__ = _make_wrapped_arith_op("__rmod__") __divmod__ = _make_wrapped_arith_op("__divmod__") __rdivmod__ = _make_wrapped_arith_op("__rdivmod__") + __truediv__ = _make_wrapped_arith_op("__truediv__") + __rtruediv__ = _make_wrapped_arith_op("__rtruediv__") + if compat.PY2: + __div__ = __truediv__ + __rdiv__ = __rtruediv__ days = wrap_field_accessor(TimedeltaArray.days) seconds = wrap_field_accessor(TimedeltaArray.seconds) @@ -255,26 +261,6 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): total_seconds = wrap_array_method(TimedeltaArray.total_seconds, True) - def __truediv__(self, other): - oth = other - if isinstance(other, Index): - # TimedeltaArray defers, so we need to unwrap - oth = other._values - result = TimedeltaArray.__truediv__(self, oth) - return wrap_arithmetic_op(self, other, result) - - def __rtruediv__(self, other): - oth = other - if isinstance(other, Index): - # TimedeltaArray defers, so we need to unwrap - oth = other._values - result = TimedeltaArray.__rtruediv__(self, oth) - return wrap_arithmetic_op(self, other, result) - - if compat.PY2: - __div__ = __truediv__ - __rdiv__ = __rtruediv__ - # Compat for frequency inference, see GH#23789 _is_monotonic_increasing = Index.is_monotonic_increasing _is_monotonic_decreasing = Index.is_monotonic_decreasing diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 02e9c212b56ef..dbd9e4f265dbe 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1471,7 +1471,8 @@ def check(get_ser, test_ser): # with 'operate' (from core/ops.py) for the ops that are not # defined op = getattr(get_ser, op_str, None) - with pytest.raises(TypeError, match='operate|[cC]annot'): + with pytest.raises(TypeError, + match='operate|[cC]annot|unsupported operand'): op(test_ser) # ## timedelta64 ### @@ -1853,7 +1854,7 @@ def test_dti_sub_tdi(self, tz_naive_fixture): result = dti - tdi tm.assert_index_equal(result, expected) - msg = 'cannot subtract .*TimedeltaIndex' + msg = 'cannot subtract .*TimedeltaArrayMixin' with pytest.raises(TypeError, match=msg): tdi - dti @@ -1861,7 +1862,7 @@ def test_dti_sub_tdi(self, tz_naive_fixture): result = dti - tdi.values tm.assert_index_equal(result, expected) - msg = 'cannot subtract DatetimeIndex from' + msg = 'cannot subtract DatetimeArrayMixin from' with pytest.raises(TypeError, match=msg): tdi.values - dti @@ -1877,7 +1878,7 @@ def test_dti_isub_tdi(self, tz_naive_fixture): result -= tdi tm.assert_index_equal(result, expected) - msg = 'cannot subtract .*TimedeltaIndex' + msg = 'cannot subtract .* from a TimedeltaArrayMixin' with pytest.raises(TypeError, match=msg): tdi -= dti @@ -1888,7 +1889,7 @@ def test_dti_isub_tdi(self, tz_naive_fixture): msg = '|'.join(['cannot perform __neg__ with this index type:', 'ufunc subtract cannot use operands with types', - 'cannot subtract DatetimeIndex from']) + 'cannot subtract DatetimeArrayMixin from']) with pytest.raises(TypeError, match=msg): tdi.values -= dti @@ -1908,7 +1909,9 @@ def test_dti_isub_tdi(self, tz_naive_fixture): def test_add_datetimelike_and_dti(self, addend, tz): # GH#9631 dti = DatetimeIndex(['2011-01-01', '2011-01-02']).tz_localize(tz) - msg = 'cannot add DatetimeIndex and {0}'.format(type(addend).__name__) + msg = ('cannot add DatetimeArrayMixin and {0}' + .format(type(addend).__name__)).replace('DatetimeIndex', + 'DatetimeArrayMixin') with pytest.raises(TypeError, match=msg): dti + addend with pytest.raises(TypeError, match=msg): From e28ff51b1527133deca6cd29e26442f541c46d96 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 21 Dec 2018 08:43:40 -0800 Subject: [PATCH 03/13] dont mix DatetimeLikeArrayMixin into DatetimeIndexOpsMixin --- pandas/core/arrays/datetimes.py | 5 ++- pandas/core/indexes/datetimelike.py | 52 +++++++++++++++++++++++------ pandas/core/indexes/datetimes.py | 17 ++++------ pandas/core/indexes/period.py | 18 ++-------- pandas/core/indexes/timedeltas.py | 15 +++++++-- pandas/tests/indexes/common.py | 8 +++-- 6 files changed, 73 insertions(+), 42 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c197d6d6e634b..959396a79d82a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -113,7 +113,7 @@ def wrapper(self, other): elif lib.is_scalar(other): return ops.invalid_comparison(self, other, op) else: - if isinstance(other, list): + if isinstance(other, list) or is_object_dtype(other): try: other = type(self)(other) except ValueError: @@ -124,6 +124,9 @@ def wrapper(self, other): # and __ne__ is all True, others raise TypeError. return ops.invalid_comparison(self, other, op) + if len(other) != len(self): + raise ValueError("Lengths must match") + if is_object_dtype(other): result = op(self.astype('O'), np.array(other)) o_mask = isna(other) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index b1812bb83f913..ff3040f73a60d 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -10,7 +10,7 @@ from pandas._libs import NaT, iNaT, lib from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError -from pandas.util._decorators import Appender, cache_readonly +from pandas.util._decorators import Appender, cache_readonly, deprecate_kwarg from pandas.core.dtypes.common import ( ensure_int64, is_bool_dtype, is_categorical_dtype, @@ -32,14 +32,13 @@ _index_doc_kwargs = dict(ibase._index_doc_kwargs) -class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): +class DatetimeIndexOpsMixin(object): """ common ops mixin to support a unified interface datetimelike Index """ # override DatetimeLikeArrayMixin method copy = Index.copy - unique = Index.unique # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index @@ -50,6 +49,34 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): _resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget) resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget) + @property + def freqstr(self): + return self._eadata.freqstr + + def unique(self, level=None): + if level is not None: + self._validate_index_level(level) + + result = self._eadata.unique() + + # Note: if `self` is already unique, then self.unique() should share + # a `freq` with self. If not already unique, then self.freq must be + # None, so again sharing freq is correct. + return self._shallow_copy(result._data) + + @classmethod + def _create_comparison_method(cls, op): + """ + Create a comparison method that dispatches to ``cls.values``. + """ + def wrapper(self, other): + result = op(self._eadata, maybe_unwrap_index(other)) + return result + + wrapper.__doc__ = op.__doc__ + wrapper.__name__ = '__{}__'.format(op.__name__) + return wrapper + def equals(self, other): """ Determines if two Index objects contain the same elements. @@ -100,7 +127,7 @@ def wrapper(left, right): @Appender(DatetimeLikeArrayMixin._evaluate_compare.__doc__) def _evaluate_compare(self, other, op): - result = DatetimeLikeArrayMixin._evaluate_compare(self, other, op) + result = self._eadata._evaluate_compare(other, op) if is_bool_dtype(result): return result try: @@ -563,13 +590,18 @@ def astype(self, dtype, copy=True): # and conversions for any datetimelike to float msg = 'Cannot cast {name} to dtype {dtype}' raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) - return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy) + return Index.astype(self, dtype, copy=copy) @Appender(DatetimeLikeArrayMixin._time_shift.__doc__) def _time_shift(self, periods, freq=None): - result = DatetimeLikeArrayMixin._time_shift(self, periods, freq=freq) - result.name = self.name - return result + result = self._eadata._time_shift(periods, freq=freq) + return type(self)(result, name=self.name) + + @deprecate_kwarg(old_arg_name='n', new_arg_name='periods') + @Appender(DatetimeLikeArrayMixin.shift.__doc__) + def shift(self, periods, freq=None): + result = self._eadata.shift(periods, freq=freq) + return type(self)(result, name=self.name) def maybe_unwrap_index(obj): @@ -630,7 +662,7 @@ def wrap_array_method(method, pin_name=False): method """ def index_method(self, *args, **kwargs): - result = method(self, *args, **kwargs) + result = method(self._eadata, *args, **kwargs) # Index.__new__ will choose the appropriate subclass to return result = Index(result) @@ -659,7 +691,7 @@ def wrap_field_accessor(prop): fget = prop.fget def f(self): - result = fget(self) + result = fget(self._eadata) if is_bool_dtype(result): # return numpy array b/c there is no BoolIndex return result diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 22ca6e67735cf..3279971eda4f6 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -620,7 +620,7 @@ def astype(self, dtype, copy=True): return self.tz_convert(new_tz) elif is_period_dtype(dtype): return self.to_period(freq=dtype.freq) - return super(DatetimeIndex, self).astype(dtype, copy=copy) + return DatetimeIndexOpsMixin.astype(self, dtype, copy=copy) def _get_time_micros(self): values = self.asi8 @@ -718,15 +718,6 @@ def snap(self, freq='S'): return DatetimeIndex._simple_new(snapped, freq=freq) # TODO: what about self.name? tz? if so, use shallow_copy? - def unique(self, level=None): - if level is not None: - self._validate_index_level(level) - - # TODO(DatetimeArray): change dispatch once inheritance is removed - # call DatetimeArray method - result = DatetimeArray.unique(self) - return self._shallow_copy(result._data) - def join(self, other, how='left', level=None, return_indexers=False, sort=False): """ @@ -1094,6 +1085,12 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): # -------------------------------------------------------------------- # Wrapping DatetimeArray + copy = DatetimeIndexOpsMixin.copy # i.e. Index.copy + unique = DatetimeIndexOpsMixin.unique # i.e. Index.unique + take = DatetimeIndexOpsMixin.take + shift = DatetimeIndexOpsMixin.shift + _create_comparison_method = DatetimeIndexOpsMixin._create_comparison_method + # Compat for frequency inference, see GH#23789 _is_monotonic_increasing = Index.is_monotonic_increasing _is_monotonic_decreasing = Index.is_monotonic_decreasing diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index daca160d956d0..96fe927e0f74c 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -19,6 +19,7 @@ from pandas.core import common as com from pandas.core.accessor import delegate_names from pandas.core.algorithms import unique1d +from pandas.core.arrays import ExtensionOpsMixin from pandas.core.arrays.datetimelike import DatelikeOps from pandas.core.arrays.period import ( PeriodArray, period_array, validate_dtype_freq) @@ -26,7 +27,7 @@ import pandas.core.indexes.base as ibase from pandas.core.indexes.base import _index_shared_docs, ensure_index from pandas.core.indexes.datetimelike import ( - DatetimeIndexOpsMixin, DatetimelikeDelegateMixin, wrap_arithmetic_op) + DatetimeIndexOpsMixin, DatetimelikeDelegateMixin) from pandas.core.indexes.datetimes import DatetimeIndex, Index, Int64Index from pandas.core.missing import isna from pandas.core.ops import get_op_result_name @@ -73,7 +74,7 @@ class PeriodDelegateMixin(DatetimelikeDelegateMixin): PeriodDelegateMixin._delegated_methods, typ="method") class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index, - PeriodDelegateMixin): + PeriodDelegateMixin, ExtensionOpsMixin): """ Immutable ndarray holding ordinal values indicating regular periods in time such as particular years, quarters, months, etc. @@ -882,19 +883,6 @@ def __setstate__(self, state): _unpickle_compat = __setstate__ - @classmethod - def _create_comparison_method(cls, op): - """ - Create a comparison method that dispatches to ``cls.values``. - """ - # TODO(DatetimeArray): move to base class. - def wrapper(self, other): - return op(self._data, other) - - wrapper.__doc__ = op.__doc__ - wrapper.__name__ = '__{}__'.format(op.__name__) - return wrapper - def repeat(self, repeats, *args, **kwargs): # TODO(DatetimeArray): Just use Index.repeat return Index.repeat(self, repeats, *args, **kwargs) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 69c9cd70a4172..862148ac5b7ec 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -22,8 +22,8 @@ import pandas.core.common as com from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.datetimelike import ( - DatetimeIndexOpsMixin, wrap_arithmetic_op, wrap_array_method, - wrap_field_accessor, maybe_unwrap_index) + DatetimeIndexOpsMixin, maybe_unwrap_index, wrap_arithmetic_op, + wrap_array_method, wrap_field_accessor) from pandas.core.indexes.numeric import Int64Index from pandas.core.ops import get_op_result_name from pandas.core.tools.timedeltas import _coerce_scalar_to_timedelta_type @@ -266,6 +266,15 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): _is_monotonic_decreasing = Index.is_monotonic_decreasing _is_unique = Index.is_unique + copy = DatetimeIndexOpsMixin.copy # i.e. Index.copy + unique = DatetimeIndexOpsMixin.unique # i.e. Index.unique + take = DatetimeIndexOpsMixin.take + shift = DatetimeIndexOpsMixin.shift + _create_comparison_method = DatetimeIndexOpsMixin._create_comparison_method + # TODO: make sure we have a test for name retention analogous + # to series.test_arithmetic.test_ser_cmp_result_names; + # also for PeriodIndex which I think may be missing one + # ------------------------------------------------------------------- @Appender(_index_shared_docs['astype']) @@ -279,7 +288,7 @@ def astype(self, dtype, copy=True): convert='float64') return Index(values, name=self.name) return Index(result.astype('i8'), name=self.name) - return super(TimedeltaIndex, self).astype(dtype, copy=copy) + return DatetimeIndexOpsMixin.astype(self, dtype, copy=copy) def union(self, other): """ diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 0c886b9fd3c4b..499f01f0e7f7b 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -610,7 +610,9 @@ def test_equals_op(self): index_b = index_a[0:-1] index_c = index_a[0:-1].append(index_a[-2:-1]) index_d = index_a[0:1] - with pytest.raises(ValueError, match="Lengths must match"): + + msg = "Lengths must match|could not be broadcast" + with pytest.raises(ValueError, match=msg): index_a == index_b expected1 = np.array([True] * n) expected2 = np.array([True] * (n - 1) + [False]) @@ -622,7 +624,7 @@ def test_equals_op(self): array_b = np.array(index_a[0:-1]) array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) array_d = np.array(index_a[0:1]) - with pytest.raises(ValueError, match="Lengths must match"): + with pytest.raises(ValueError, match=msg): index_a == array_b tm.assert_numpy_array_equal(index_a == array_a, expected1) tm.assert_numpy_array_equal(index_a == array_c, expected2) @@ -632,7 +634,7 @@ def test_equals_op(self): series_b = Series(array_b) series_c = Series(array_c) series_d = Series(array_d) - with pytest.raises(ValueError, match="Lengths must match"): + with pytest.raises(ValueError, match=msg): index_a == series_b tm.assert_numpy_array_equal(index_a == series_a, expected1) From fdf17707daabe416cb5f8f7b694221b881747050 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 21 Dec 2018 09:42:16 -0800 Subject: [PATCH 04/13] dont inherit TimedeltaIndex from TimedeltaArray --- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/indexes/datetimelike.py | 9 ++++- pandas/core/indexes/timedeltas.py | 62 +++++++++++++++++++++-------- 3 files changed, 55 insertions(+), 18 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a5d074df338ee..50cbd396ab0bd 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -151,7 +151,7 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): result = object.__new__(cls) result._data = values - result._freq = freq + result._freq = to_offset(freq) return result def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index f9bd010ade981..f6faf4e3219ce 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -49,6 +49,13 @@ class DatetimeIndexOpsMixin(object): _resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget) resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget) + def _box_values(self, values): + return self._eadata._box_values(values) + + def _maybe_mask_results(self, result, fill_value=iNaT, convert=None): + return self._eadata._maybe_mask_results( + result, fill_value=fill_value, convert=convert) + @property def freqstr(self): return self._eadata.freqstr @@ -540,7 +547,7 @@ def _concat_same_dtype(self, to_concat, name): # - remove the .asi8 here # - remove the _maybe_box_as_values # - combine with the `else` block - new_data = self._concat_same_type(to_concat).asi8 + new_data = self._eadata._concat_same_type(to_concat).asi8 else: new_data = type(self._values)._concat_same_type(to_concat) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 862148ac5b7ec..7e2aef778b74b 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -15,7 +15,7 @@ import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.missing import isna -from pandas.core.arrays import datetimelike as dtl +from pandas.core.arrays import ExtensionOpsMixin, datetimelike as dtl from pandas.core.arrays.timedeltas import ( TimedeltaArrayMixin as TimedeltaArray, _is_convertible_to_td, _to_m8) from pandas.core.base import _shared_docs @@ -43,8 +43,8 @@ def method(self, other): return method -class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin, - dtl.TimelikeOps, Int64Index): +class TimedeltaIndex(DatetimeIndexOpsMixin, + dtl.TimelikeOps, Int64Index, ExtensionOpsMixin): """ Immutable ndarray of timedelta64 data, represented internally as int64, and which can be boxed to timedelta objects @@ -159,10 +159,9 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, "endpoints is deprecated. Use " "`pandas.timedelta_range` instead.", FutureWarning, stacklevel=2) - result = cls._generate_range(start, end, periods, freq, - closed=closed) - result.name = name - return result + result = TimedeltaArray._generate_range(start, end, periods, freq, + closed=closed) + return cls._simple_new(result._data, freq=freq, name=name) if is_scalar(data): raise TypeError('{cls}() must be called with a ' @@ -177,10 +176,9 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, # - Cases checked above all return/raise before reaching here - # - result = cls._from_sequence(data, freq=freq, unit=unit, - dtype=dtype, copy=copy) - result.name = name - return result + tdarr = TimedeltaArray._from_sequence(data, freq=freq, unit=unit, + dtype=dtype, copy=copy) + return cls._simple_new(tdarr._data, freq=tdarr.freq, name=name) @classmethod def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): @@ -193,7 +191,11 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): values = values.view('m8[ns]') assert values.dtype == 'm8[ns]', values.dtype - result = super(TimedeltaIndex, cls)._simple_new(values, freq) + freq = to_offset(freq) + tdarr = TimedeltaArray._simple_new(values, freq=freq) + result = object.__new__(cls) + result._data = tdarr._data + result._freq = tdarr._freq result.name = name # For groupby perf. See note in indexes/base about _index_data result._index_data = result._data @@ -275,6 +277,35 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): # to series.test_arithmetic.test_ser_cmp_result_names; # also for PeriodIndex which I think may be missing one + @property + def _box_func(self): + return lambda x: Timedelta(x, unit='ns') + + def __getitem__(self, key): + result = self._eadata.__getitem__(key) + if is_scalar(result): + return result + return type(self)(result, name=self.name) + + @property + def freq(self): # TODO: get via eadata + return self._freq + + @freq.setter + def freq(self, value): # TODO: get via eadata + if value is not None: + # dispatch to TimedeltaArray to validate frequency + self._eadata.freq = value + + self._freq = to_offset(value) + + def to_pytimedelta(self): + return self._eadata.to_pytimedelta() + + @property + def components(self): + return self._eadata.components + # ------------------------------------------------------------------- @Appender(_index_shared_docs['astype']) @@ -761,7 +792,6 @@ def timedelta_range(start=None, end=None, periods=None, freq=None, freq = 'D' freq, freq_infer = dtl.maybe_infer_freq(freq) - result = TimedeltaIndex._generate_range(start, end, periods, freq, - closed=closed) - result.name = name - return result + tdarr = TimedeltaArray._generate_range(start, end, periods, freq, + closed=closed) + return TimedeltaIndex._simple_new(tdarr._data, freq=tdarr.freq, name=name) From ea3965d6406757aae714baf2d35c0d048635eed0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 21 Dec 2018 17:59:47 -0800 Subject: [PATCH 05/13] dont inherit from DatetimeArray --- pandas/core/arrays/datetimelike.py | 6 +- pandas/core/arrays/datetimes.py | 16 +++- pandas/core/arrays/timedeltas.py | 1 + pandas/core/indexes/datetimes.py | 127 ++++++++++++++++++++++++++--- 4 files changed, 134 insertions(+), 16 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index c01b04991e52b..6a96a3afc6abb 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -88,8 +88,7 @@ class DatelikeOps(object): def strftime(self, date_format): from pandas import Index - return Index(self.format(date_format=date_format), - dtype=compat.text_type) + return Index(self._format_native_types(date_format=date_format)) strftime.__doc__ = """ Convert to Index using specified date_format. @@ -1263,7 +1262,8 @@ def _ensure_datetimelike_to_i8(other, to_utc=False): if lib.is_scalar(other) and isna(other): return iNaT - elif isinstance(other, (PeriodArray, ABCIndexClass)): + elif isinstance(other, (PeriodArray, ABCIndexClass, + DatetimeLikeArrayMixin)): # convert tz if needed if getattr(other, 'tz', None) is not None: if to_utc: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 959396a79d82a..9e0c92c33eef0 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -18,7 +18,7 @@ is_datetime64tz_dtype, is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype -from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries, ABCDataFrame from pandas.core.dtypes.missing import isna from pandas.core import ops @@ -113,6 +113,8 @@ def wrapper(self, other): elif lib.is_scalar(other): return ops.invalid_comparison(self, other, op) else: + # TODO: figure out why the is_object_dtpye check is needed, + # without we fail to raise on tzawareness_compat if isinstance(other, list) or is_object_dtype(other): try: other = type(self)(other) @@ -471,6 +473,18 @@ def _validate_fill_value(self, fill_value): "Got '{got}'.".format(got=fill_value)) return fill_value + # ----------------------------------------------------------------- + # Rendering Methods + + def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): + from pandas.io.formats.format import _get_format_datetime64_from_values + fmt = _get_format_datetime64_from_values(self, date_format) + + return tslib.format_array_from_datetime(self.asi8, + tz=self.tz, + format=fmt, + na_rep=na_rep) + # ----------------------------------------------------------------- # Comparison Methods diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 50cbd396ab0bd..4692303b5ac91 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -75,6 +75,7 @@ def _td_array_cmp(cls, op): meth = getattr(dtl.DatetimeLikeArrayMixin, opname) def wrapper(self, other): + if _is_convertible_to_td(other) or other is NaT: try: other = _to_m8(other) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 3279971eda4f6..b0304b28bfb0c 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -20,6 +20,7 @@ import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.missing import isna +from pandas.core.arrays import ExtensionOpsMixin from pandas.core.arrays.datetimes import ( DatetimeArrayMixin as DatetimeArray, _to_m8) from pandas.core.base import _shared_docs @@ -61,7 +62,7 @@ def _new_DatetimeIndex(cls, d): return result -class DatetimeIndex(DatetimeArray, DatetimeIndexOpsMixin, Int64Index): +class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index, ExtensionOpsMixin): """ Immutable ndarray of datetime64 data, represented internally as int64, and which can be boxed to Timestamp objects that are subclasses of datetime and @@ -205,6 +206,7 @@ def _join_i8_wrapper(joinf, **kwargs): _object_ops = DatetimeArray._object_ops _field_ops = DatetimeArray._field_ops _datetimelike_ops = DatetimeArray._datetimelike_ops + _datetimelike_methods = DatetimeArray._datetimelike_methods # -------------------------------------------------------------------- # Constructors @@ -227,11 +229,12 @@ def __new__(cls, data=None, "endpoints is deprecated. Use " "`pandas.date_range` instead.", FutureWarning, stacklevel=2) - result = cls._generate_range(start, end, periods, - freq=freq, tz=tz, normalize=normalize, - closed=closed, ambiguous=ambiguous) - result.name = name - return result + dtarr = DatetimeArray._generate_range( + start, end, periods, + freq=freq, tz=tz, normalize=normalize, + closed=closed, ambiguous=ambiguous) + return cls._simple_new( + dtarr._data, freq=dtarr.freq, tz=dtarr.tz, name=name) if is_scalar(data): raise TypeError("{cls}() must be called with a " @@ -267,7 +270,11 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): # DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes assert isinstance(values, np.ndarray), type(values) - result = super(DatetimeIndex, cls)._simple_new(values, freq, tz) + dtarr = DatetimeArray._simple_new(values, freq=freq, tz=tz) + result = object.__new__(cls) + result._data = dtarr._data + result._freq = dtarr.freq + result._tz = dtarr.tz result.name = name # For groupby perf. See note in indexes/base about _index_data result._index_data = result._data @@ -281,6 +288,10 @@ def _eadata(self): return DatetimeArray._simple_new(self._data, tz=self.tz, freq=self.freq) + @property + def dtype(self): + return self._eadata.dtype + @property def _values(self): # tz-naive -> ndarray @@ -301,6 +312,8 @@ def tz(self, value): raise AttributeError("Cannot directly set timezone. Use tz_localize() " "or tz_convert() as appropriate") + tzinfo = tz + @property def size(self): # TODO: Remove this when we have a DatetimeTZArray @@ -625,7 +638,7 @@ def astype(self, dtype, copy=True): def _get_time_micros(self): values = self.asi8 if self.tz is not None and not timezones.is_utc(self.tz): - values = self._local_timestamps() + values = self._eadata._local_timestamps() return fields.get_time_micros(values) def to_series(self, keep_tz=None, index=None, name=None): @@ -1138,6 +1151,97 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): month_name = wrap_array_method(DatetimeArray.month_name, True) day_name = wrap_array_method(DatetimeArray.day_name, True) + @property + def date(self): + return self._eadata.date + + @property + def time(self): + return self._eadata.time + + @property + def timetz(self): + return self._eadata.timetz + + def strftime(self, date_format): + return self._eadata.strftime(date_format) + + def round(self, freq, ambiguous='raise', nonexistent='raise'): + result = self._eadata.round( + freq, ambiguous=ambiguous, nonexistent=nonexistent) + return type(self)._simple_new( + result._data, freq=result.freq, tz=result.tz) + + def floor(self, freq, ambiguous='raise', nonexistent='raise'): + result = self._eadata.floor( + freq, ambiguous=ambiguous, nonexistent=nonexistent) + return type(self)._simple_new( + result._data, freq=result.freq, tz=result.tz) + + def ceil(self, freq, ambiguous='raise', nonexistent='raise'): + result = self._eadata.ceil( + freq, ambiguous=ambiguous, nonexistent=nonexistent) + return type(self)._simple_new( + result._data, freq=result.freq, tz=result.tz) + + @property + def offset(self): + """ + get/set the frequency of the instance + """ + msg = ('{cls}.offset has been deprecated and will be removed ' + 'in a future version; use {cls}.freq instead.' + .format(cls=type(self).__name__)) + warnings.warn(msg, FutureWarning, stacklevel=2) + return self.freq + + @offset.setter + def offset(self, value): + """ + get/set the frequency of the instance + """ + msg = ('{cls}.offset has been deprecated and will be removed ' + 'in a future version; use {cls}.freq instead.' + .format(cls=type(self).__name__)) + warnings.warn(msg, FutureWarning, stacklevel=2) + self.freq = value + + @property + def freq(self): + return self._freq + + @freq.setter + def freq(self, value): + if value is not None: + # let DatetimeArray to validation + self._eadata.freq = value + + self._freq = to_offset(value) + + def __getitem__(self, key): + result = self._eadata.__getitem__(key) + if is_scalar(result): + return result + elif result.ndim > 1: + # To support MPL which performs slicing with 2 dim + # even though it only has 1 dim by definition + assert isinstance(result, np.ndarray), result + return result + return type(self)(result, name=self.name) + + def _has_same_tz(self, other): + return self._eadata._has_same_tz(other) + + @property + def _box_func(self): + return lambda x: Timestamp(x, tz=self.tz) + + def __array__(self, dtype=None): + return self._eadata.__array__(dtype=dtype) + + def to_pydatetime(self): + return self._eadata.to_pydatetime() + # -------------------------------------------------------------------- @Substitution(klass='DatetimeIndex') @@ -1475,13 +1579,12 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None, if freq is None and com._any_none(periods, start, end): freq = 'D' - result = DatetimeIndex._generate_range( + dtarr = DatetimeArray._generate_range( start=start, end=end, periods=periods, freq=freq, tz=tz, normalize=normalize, closed=closed, **kwargs) - - result.name = name - return result + return DatetimeIndex._simple_new( + dtarr._data, tz=dtarr.tz, freq=dtarr.freq, name=name) def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, From a5e5d65e8aa08c8e104466facbd59d3a8795b325 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 28 Dec 2018 13:15:13 -0800 Subject: [PATCH 06/13] use ea_passthrough --- pandas/core/indexes/datetimelike.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 9cdf68567bac0..10b7fb4ed8127 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -31,6 +31,17 @@ _index_doc_kwargs = dict(ibase._index_doc_kwargs) +def ea_passthrough(name): + meth = getattr(DatetimeLikeArrayMixin, name) + + def method(self, *args, **kwargs): + return meth(self._eadata, *args, **kwargs) + + method.__name__ = name + method.__doc__ = meth.__doc__ + return method + + class DatetimeIndexOpsMixin(ExtensionOpsMixin): """ common ops mixin to support a unified interface datetimelike Index @@ -50,15 +61,9 @@ class DatetimeIndexOpsMixin(ExtensionOpsMixin): _resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget) resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget) - def _box_values(self, values): - return self._eadata._box_values(values) - - def _maybe_mask_results(self, result, fill_value=iNaT, convert=None): - return self._eadata._maybe_mask_results( - result, fill_value=fill_value, convert=convert) - - def __iter__(self): - return self._eadata.__iter__() + _box_values = ea_passthrough("_box_values") + _maybe_mask_results = ea_passthrough("_maybe_mask_results") + __iter__ = ea_passthrough("__iter__") @property def freqstr(self): From 1ff0c4dfa49c32e55e79116d6c0d0101102e06c7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 28 Dec 2018 13:49:00 -0800 Subject: [PATCH 07/13] remove previously-overriden overridings --- pandas/core/arrays/timedeltas.py | 1 - pandas/core/indexes/datetimelike.py | 10 ++-------- pandas/core/indexes/datetimes.py | 29 ++++------------------------- pandas/core/indexes/timedeltas.py | 11 ----------- 4 files changed, 6 insertions(+), 45 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index e0aa79892626f..1dc73feb84390 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -76,7 +76,6 @@ def _td_array_cmp(cls, op): meth = getattr(dtl.DatetimeLikeArrayMixin, opname) def wrapper(self, other): - if _is_convertible_to_td(other) or other is NaT: try: other = _to_m8(other) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 10b7fb4ed8127..ac6e4760751a3 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -32,13 +32,11 @@ def ea_passthrough(name): - meth = getattr(DatetimeLikeArrayMixin, name) - def method(self, *args, **kwargs): - return meth(self._eadata, *args, **kwargs) + return getattr(self._eadata, name)(*args, **kwargs) method.__name__ = name - method.__doc__ = meth.__doc__ + # TODO: docstrings return method @@ -47,10 +45,6 @@ class DatetimeIndexOpsMixin(ExtensionOpsMixin): common ops mixin to support a unified interface datetimelike Index """ - # override DatetimeLikeArrayMixin method - copy = Index.copy - view = Index.view - # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index # subclasses bc they are immutable diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 73751f6555216..5b85f0e50b2be 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -26,7 +26,7 @@ import pandas.core.common as com from pandas.core.indexes.base import Index from pandas.core.indexes.datetimelike import ( - DatetimeIndexOpsMixin, DatetimelikeDelegateMixin) + DatetimeIndexOpsMixin, DatetimelikeDelegateMixin, ea_passthrough) from pandas.core.indexes.numeric import Int64Index from pandas.core.ops import get_op_result_name import pandas.core.tools.datetimes as tools @@ -1149,26 +1149,14 @@ def _eadata(self): _is_monotonic_increasing = Index.is_monotonic_increasing _is_monotonic_decreasing = Index.is_monotonic_decreasing _is_unique = Index.is_unique - astype = DatetimeIndexOpsMixin.astype _timezone = cache_readonly(DatetimeArray._timezone.fget) is_normalized = cache_readonly(DatetimeArray.is_normalized.fget) _resolution = cache_readonly(DatetimeArray._resolution.fget) - @property - def date(self): - return self._eadata.date - - @property - def time(self): - return self._eadata.time - - @property - def timetz(self): - return self._eadata.timetz - - def strftime(self, date_format): - return self._eadata.strftime(date_format) + strftime = ea_passthrough("strftime") + _has_same_tz = ea_passthrough("_has_same_tz") + __array__ = ea_passthrough("__array__") def round(self, freq, ambiguous='raise', nonexistent='raise'): result = self._eadata.round( @@ -1233,19 +1221,10 @@ def __getitem__(self, key): return result return type(self)(result, name=self.name) - def _has_same_tz(self, other): - return self._eadata._has_same_tz(other) - @property def _box_func(self): return lambda x: Timestamp(x, tz=self.tz) - def __array__(self, dtype=None): - return self._eadata.__array__(dtype=dtype) - - def to_pydatetime(self): - return self._eadata.to_pydatetime() - # -------------------------------------------------------------------- @Substitution(klass='DatetimeIndex') diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index b6cb8fe9d1401..166d23e5ac57f 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -312,10 +312,6 @@ def _eadata(self): _is_monotonic_decreasing = Index.is_monotonic_decreasing _is_unique = Index.is_unique - copy = DatetimeIndexOpsMixin.copy # i.e. Index.copy - unique = DatetimeIndexOpsMixin.unique # i.e. Index.unique - take = DatetimeIndexOpsMixin.take - shift = DatetimeIndexOpsMixin.shift _create_comparison_method = DatetimeIndexOpsMixin._create_comparison_method # TODO: make sure we have a test for name retention analogous # to series.test_arithmetic.test_ser_cmp_result_names; @@ -343,13 +339,6 @@ def freq(self, value): # TODO: get via eadata self._freq = to_offset(value) - def to_pytimedelta(self): - return self._eadata.to_pytimedelta() - - @property - def components(self): - return self._eadata.components - # ------------------------------------------------------------------- @Appender(_index_shared_docs['astype']) From 3faed22e93909052eec1d358da8a2e992de08d52 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 28 Dec 2018 14:27:02 -0800 Subject: [PATCH 08/13] stop double-mixing --- pandas/core/indexes/period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index df594b44cbb5a..5e70246419031 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -74,7 +74,7 @@ class PeriodDelegateMixin(DatetimelikeDelegateMixin): PeriodDelegateMixin._delegated_methods, typ="method") class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index, - PeriodDelegateMixin, ExtensionOpsMixin): + PeriodDelegateMixin): """ Immutable ndarray holding ordinal values indicating regular periods in time such as particular years, quarters, months, etc. From e607edd85cc47aca234da9cdbf9483d57e1c211c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 28 Dec 2018 15:27:02 -0800 Subject: [PATCH 09/13] stop over-writing --- pandas/core/indexes/datetimes.py | 5 ----- pandas/core/indexes/timedeltas.py | 6 ------ 2 files changed, 11 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 5b85f0e50b2be..2260d27edd847 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -96,11 +96,6 @@ class DatetimeDelegateMixin(DatetimelikeDelegateMixin): _delegate_class = DatetimeArray -@delegate_names(DatetimeArray, ["to_period", "tz_localize", "tz_convert", - "day_name", "month_name"], - typ="method", overwrite=True) -@delegate_names(DatetimeArray, - DatetimeArray._field_ops, typ="property", overwrite=True) @delegate_names(DatetimeArray, DatetimeDelegateMixin._delegated_properties, typ="property") diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 166d23e5ac57f..40b64436953d9 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -64,12 +64,6 @@ class TimedeltaDelegateMixin(DatetimelikeDelegateMixin): } -@delegate_names(TimedeltaArray, - ["to_pytimedelta", "total_seconds"], - typ="method", overwrite=True) -@delegate_names(TimedeltaArray, - ["days", "seconds", "microseconds", "nanoseconds"], - typ="property", overwrite=True) @delegate_names(TimedeltaArray, TimedeltaDelegateMixin._delegated_properties, typ="property") From 2afd6abed7abce21c9dba9490781ccb6679eed33 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 28 Dec 2018 17:07:50 -0800 Subject: [PATCH 10/13] handle+test object arrays --- pandas/core/arrays/datetimes.py | 37 +++++---------------- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/indexes/datetimelike.py | 38 +++++++++++----------- pandas/tests/arithmetic/test_datetime64.py | 5 +++ 4 files changed, 33 insertions(+), 49 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 0e020df6a9610..ece61d71cf212 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -119,9 +119,7 @@ def wrapper(self, other): elif len(other) != len(self): raise ValueError("Lengths must match") else: - # TODO: figure out why the is_object_dtpye check is needed, - # without we fail to raise on tzawareness_compat - if isinstance(other, list) or is_object_dtype(other): + if isinstance(other, list): try: other = type(self)._from_sequence(other) except ValueError: @@ -132,11 +130,14 @@ def wrapper(self, other): # and __ne__ is all True, others raise TypeError. return ops.invalid_comparison(self, other, op) - if len(other) != len(self): - raise ValueError("Lengths must match") - if is_object_dtype(other): - result = op(self.astype('O'), np.array(other)) + # We have to use _comp_method_OBJECT_ARRAY instead of numpy + # comparison otherwise it would fail to raise when + # comparing tz-aware and tz-naive + with np.errstate(all='ignore'): + result = ops._comp_method_OBJECT_ARRAY(op, + self.astype(object), + other) o_mask = isna(other) elif not (is_datetime64_dtype(other) or is_datetime64tz_dtype(other)): @@ -432,28 +433,6 @@ def _timezone(self): """ return timezones.get_timezone(self.tzinfo) - @property - def offset(self): - """ - get/set the frequency of the instance - """ - msg = ('{cls}.offset has been deprecated and will be removed ' - 'in a future version; use {cls}.freq instead.' - .format(cls=type(self).__name__)) - warnings.warn(msg, FutureWarning, stacklevel=2) - return self.freq - - @offset.setter - def offset(self, value): - """ - get/set the frequency of the instance - """ - msg = ('{cls}.offset has been deprecated and will be removed ' - 'in a future version; use {cls}.freq instead.' - .format(cls=type(self).__name__)) - warnings.warn(msg, FutureWarning, stacklevel=2) - self.freq = value - @property # NB: override with cache_readonly in immutable subclasses def is_normalized(self): """ diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index d37109b768a7a..376c99df080d8 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -156,7 +156,7 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): result = object.__new__(cls) result._data = values - result._freq = to_offset(freq) + result._freq = freq return result def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index ff77b1b165c10..376fe94890873 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -586,6 +586,25 @@ def shift(self, periods, freq=None): return type(self)(result, name=self.name) +def wrap_arithmetic_op(self, other, result): + if result is NotImplemented: + return NotImplemented + + if isinstance(result, tuple): + # divmod, rdivmod + assert len(result) == 2 + return (wrap_arithmetic_op(self, other, result[0]), + wrap_arithmetic_op(self, other, result[1])) + + if not isinstance(result, Index): + # Index.__new__ will choose appropriate subclass for dtype + result = Index(result) + + res_name = ops.get_op_result_name(self, other) + result.name = res_name + return result + + def maybe_unwrap_index(obj): """ If operating against another Index object, we need to unwrap the underlying @@ -608,25 +627,6 @@ def maybe_unwrap_index(obj): return obj -def wrap_arithmetic_op(self, other, result): - if result is NotImplemented: - return NotImplemented - - if isinstance(result, tuple): - # divmod, rdivmod - assert len(result) == 2 - return (wrap_arithmetic_op(self, other, result[0]), - wrap_arithmetic_op(self, other, result[1])) - - if not isinstance(result, Index): - # Index.__new__ will choose appropriate subclass for dtype - result = Index(result) - - res_name = ops.get_op_result_name(self, other) - result.name = res_name - return result - - class DatetimelikeDelegateMixin(PandasDelegate): """ Delegation mechanism, specific for Datetime, Timedelta, and Period types. diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 925b954a1f9c6..44817467b4694 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -593,12 +593,17 @@ def test_comparison_tzawareness_compat(self, op, box_with_array): # DataFrame op is invalid until transpose bug is fixed with pytest.raises(TypeError): op(dr, list(dz)) + with pytest.raises(TypeError): + op(dr, np.array(list(dz), dtype=object)) + with pytest.raises(TypeError): op(dz, dr) if box_with_array is not pd.DataFrame: # DataFrame op is invalid until transpose bug is fixed with pytest.raises(TypeError): op(dz, list(dr)) + with pytest.raises(TypeError): + op(dz, np.array(list(dr), dtype=object)) # Check that there isn't a problem aware-aware and naive-naive do not # raise From 9ddf4bd076cb66726a5baca32f2f2a6f31e4f6b5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 28 Dec 2018 17:10:39 -0800 Subject: [PATCH 11/13] Remove unused import --- pandas/core/arrays/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ece61d71cf212..204a759cf96f3 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -19,7 +19,7 @@ is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype -from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries, ABCDataFrame +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna from pandas.core import ops From 11dcef059811caee5fbb2d5d83984bce873d90e4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 28 Dec 2018 17:11:53 -0800 Subject: [PATCH 12/13] flake8 fixup --- pandas/core/indexes/period.py | 1 - pandas/core/indexes/timedeltas.py | 5 ----- 2 files changed, 6 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 5e70246419031..051c5ef3262ef 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -19,7 +19,6 @@ from pandas.core import common as com from pandas.core.accessor import delegate_names from pandas.core.algorithms import unique1d -from pandas.core.arrays import ExtensionOpsMixin from pandas.core.arrays.datetimelike import DatelikeOps from pandas.core.arrays.period import ( PeriodArray, period_array, validate_dtype_freq) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 40b64436953d9..53cd358e2f906 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -245,11 +245,6 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): # ------------------------------------------------------------------- - @property - def _eadata(self): - return TimedeltaArray._simple_new(self._data, - freq=self.freq) - def __setstate__(self, state): """Necessary for making this object picklable""" if isinstance(state, dict): From f6a8951409a26093548da95ebbe41c25d74d4783 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 29 Dec 2018 07:55:29 -0800 Subject: [PATCH 13/13] edits per comments --- pandas/core/indexes/datetimelike.py | 11 +++++++++++ pandas/core/indexes/datetimes.py | 26 ++++---------------------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index f8c1368def05f..d090d0e7d9caa 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -32,6 +32,17 @@ def ea_passthrough(name): + """ + Make an alias for a method of the underlying ExtensionArray. + + Parameters + ---------- + name : str + + Returns + ------- + method + """ def method(self, *args, **kwargs): return getattr(self._eadata, name)(*args, **kwargs) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 2260d27edd847..a8651a25eef6b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -281,14 +281,14 @@ def __new__(cls, data=None, verify_integrity = True if data is None: - warnings.warn("Creating a DatetimeIndex by passing range " - "endpoints is deprecated. Use " - "`pandas.date_range` instead.", - FutureWarning, stacklevel=2) dtarr = DatetimeArray._generate_range( start, end, periods, freq=freq, tz=tz, normalize=normalize, closed=closed, ambiguous=ambiguous) + warnings.warn("Creating a DatetimeIndex by passing range " + "endpoints is deprecated. Use " + "`pandas.date_range` instead.", + FutureWarning, stacklevel=2) return cls._simple_new( dtarr._data, freq=dtarr.freq, tz=dtarr.tz, name=name) @@ -1153,24 +1153,6 @@ def _eadata(self): _has_same_tz = ea_passthrough("_has_same_tz") __array__ = ea_passthrough("__array__") - def round(self, freq, ambiguous='raise', nonexistent='raise'): - result = self._eadata.round( - freq, ambiguous=ambiguous, nonexistent=nonexistent) - return type(self)._simple_new( - result._data, freq=result.freq, tz=result.tz) - - def floor(self, freq, ambiguous='raise', nonexistent='raise'): - result = self._eadata.floor( - freq, ambiguous=ambiguous, nonexistent=nonexistent) - return type(self)._simple_new( - result._data, freq=result.freq, tz=result.tz) - - def ceil(self, freq, ambiguous='raise', nonexistent='raise'): - result = self._eadata.ceil( - freq, ambiguous=ambiguous, nonexistent=nonexistent) - return type(self)._simple_new( - result._data, freq=result.freq, tz=result.tz) - @property def offset(self): """