From 40d6adab68ca51a777ee80d9d13185c5be409e84 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 28 Oct 2018 06:33:59 -0700 Subject: [PATCH] BUG/TST/REF: Datetimelike Arithmetic Methods (#23215) --- doc/source/whatsnew/v0.24.0.txt | 6 +- pandas/_libs/tslibs/offsets.pyx | 4 +- pandas/core/arrays/datetimelike.py | 91 ++++---- pandas/core/arrays/datetimes.py | 103 ++++----- pandas/core/arrays/period.py | 219 +++++++++----------- pandas/core/arrays/timedeltas.py | 78 ++++--- pandas/core/indexes/period.py | 47 ++++- pandas/core/indexes/timedeltas.py | 3 +- pandas/tests/arithmetic/test_period.py | 49 +++++ pandas/tests/arithmetic/test_timedelta64.py | 26 ++- pandas/tests/arrays/test_period.py | 14 -- pandas/tests/indexes/datetimes/test_ops.py | 13 +- pandas/tests/indexes/period/test_ops.py | 11 - pandas/tests/indexes/period/test_period.py | 11 + pandas/tests/indexes/timedeltas/test_ops.py | 12 -- 15 files changed, 360 insertions(+), 327 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 51c398518c1534..4b49bb3da13822 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1028,6 +1028,7 @@ Datetimelike - Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`) - Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`) - Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`) +- Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`) Timedelta ^^^^^^^^^ @@ -1039,7 +1040,8 @@ Timedelta - Bug in :class:`TimedeltaIndex` incorrectly allowing indexing with ``Timestamp`` object (:issue:`20464`) - Fixed bug where subtracting :class:`Timedelta` from an object-dtyped array would raise ``TypeError`` (:issue:`21980`) - Fixed bug in adding a :class:`DataFrame` with all-`timedelta64[ns]` dtypes to a :class:`DataFrame` with all-integer dtypes returning incorrect results instead of raising ``TypeError`` (:issue:`22696`) - +- Bug in :class:`TimedeltaIndex` where adding a timezone-aware datetime scalar incorrectly returned a timezone-naive :class:`DatetimeIndex` (:issue:`23215`) +- Bug in :class:`TimedeltaIndex` where adding ``np.timedelta64('NaT')`` incorrectly returned an all-`NaT` :class:`DatetimeIndex` instead of an all-`NaT` :class:`TimedeltaIndex` (:issue:`23215`) Timezones ^^^^^^^^^ @@ -1069,7 +1071,7 @@ Offsets - Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`) - Bug in :class:`DateOffset` where keyword arguments ``week`` and ``milliseconds`` were accepted and ignored. Passing these will now raise ``ValueError`` (:issue:`19398`) -- +- Bug in adding :class:`DateOffset` with :class:`DataFrame` or :class:`PeriodIndex` incorrectly raising ``TypeError`` (:issue:`23215`) Numeric ^^^^^^^ diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 393c2cdba85680..5baacfe5f725f3 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -344,8 +344,8 @@ class _BaseOffset(object): return {name: kwds[name] for name in kwds if kwds[name] is not None} def __add__(self, other): - if getattr(other, "_typ", None) in ["datetimeindex", - "series", "period"]: + if getattr(other, "_typ", None) in ["datetimeindex", "periodindex", + "series", "period", "dataframe"]: # defer to the other class's implementation return other + self try: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 28fe6471efb73e..0247ce8dc6ac4b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -221,11 +221,12 @@ def hasnans(self): """ return if I have any nans; enables various perf speedups """ return bool(self._isnan.any()) - def _maybe_mask_results(self, result, fill_value=None, convert=None): + def _maybe_mask_results(self, result, fill_value=iNaT, convert=None): """ Parameters ---------- result : a ndarray + fill_value : object, default iNaT convert : string/dtype or None Returns @@ -246,27 +247,6 @@ def _maybe_mask_results(self, result, fill_value=None, convert=None): result[self._isnan] = fill_value return result - def _nat_new(self, box=True): - """ - Return Array/Index or ndarray filled with NaT which has the same - length as the caller. - - Parameters - ---------- - box : boolean, default True - - If True returns a Array/Index as the same as caller. - - If False returns ndarray of np.int64. - """ - result = np.zeros(len(self), dtype=np.int64) - result.fill(iNaT) - if not box: - return result - - attribs = self._get_attributes_dict() - if not is_period_dtype(self): - attribs['freq'] = None - return self._simple_new(result, **attribs) - # ------------------------------------------------------------------ # Frequency Properties/Methods @@ -346,24 +326,58 @@ def _validate_frequency(cls, index, freq, **kwargs): # ------------------------------------------------------------------ # Arithmetic Methods - def _add_datelike(self, other): + def _add_datetimelike_scalar(self, other): + # Overriden by TimedeltaArray raise TypeError("cannot add {cls} and {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) - def _sub_datelike(self, other): - raise com.AbstractMethodError(self) + _add_datetime_arraylike = _add_datetimelike_scalar + + def _sub_datetimelike_scalar(self, other): + # Overridden by DatetimeArray + assert other is not NaT + raise TypeError("cannot subtract a datelike from a {cls}" + .format(cls=type(self).__name__)) + + _sub_datetime_arraylike = _sub_datetimelike_scalar def _sub_period(self, other): - return NotImplemented + # Overriden by PeriodArray + raise TypeError("cannot subtract Period from a {cls}" + .format(cls=type(self).__name__)) def _add_offset(self, offset): raise com.AbstractMethodError(self) def _add_delta(self, other): - return NotImplemented + """ + Add a timedelta-like, Tick or TimedeltaIndex-like object + to self, yielding an int64 numpy array + + Parameters + ---------- + delta : {timedelta, np.timedelta64, Tick, + TimedeltaIndex, ndarray[timedelta64]} + + Returns + ------- + result : ndarray[int64] - def _add_delta_td(self, other): + Notes + ----- + The result's name is set outside of _add_delta by the calling + method (__add__ or __sub__), if necessary (i.e. for Indexes). + """ + if isinstance(other, (Tick, timedelta, np.timedelta64)): + new_values = self._add_timedeltalike_scalar(other) + elif is_timedelta64_dtype(other): + # ndarray[timedelta64] or TimedeltaArray/index + new_values = self._add_delta_tdi(other) + + return new_values + + def _add_timedeltalike_scalar(self, other): """ Add a delta of a timedeltalike return the i8 result view @@ -371,8 +385,7 @@ def _add_delta_td(self, other): inc = delta_to_nanoseconds(other) new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan).view('i8') - if self.hasnans: - new_values[self._isnan] = iNaT + new_values = self._maybe_mask_results(new_values) return new_values.view('i8') def _add_delta_tdi(self, other): @@ -380,7 +393,7 @@ def _add_delta_tdi(self, other): Add a delta of a TimedeltaIndex return the i8 result view """ - if not len(self) == len(other): + if len(self) != len(other): raise ValueError("cannot add indices of unequal length") if isinstance(other, np.ndarray): @@ -407,7 +420,9 @@ def _add_nat(self): # GH#19124 pd.NaT is treated like a timedelta for both timedelta # and datetime dtypes - return self._nat_new(box=True) + result = np.zeros(len(self), dtype=np.int64) + result.fill(iNaT) + return self._shallow_copy(result, freq=None) def _sub_nat(self): """Subtract pd.NaT from self""" @@ -441,7 +456,7 @@ def _sub_period_array(self, other): .format(dtype=other.dtype, cls=type(self).__name__)) - if not len(self) == len(other): + if len(self) != len(other): raise ValueError("cannot subtract arrays/indices of " "unequal length") if self.freq != other.freq: @@ -473,6 +488,8 @@ def _addsub_int_array(self, other, op): ------- result : same class as self """ + # _addsub_int_array is overriden by PeriodArray + assert not is_period_dtype(self) assert op in [operator.add, operator.sub] if self.freq is None: @@ -613,7 +630,7 @@ def __add__(self, other): # specifically _not_ a Tick result = self._add_offset(other) elif isinstance(other, (datetime, np.datetime64)): - result = self._add_datelike(other) + result = self._add_datetimelike_scalar(other) elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these @@ -628,7 +645,7 @@ def __add__(self, other): result = self._addsub_offset_array(other, operator.add) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] - return self._add_datelike(other) + return self._add_datetime_arraylike(other) elif is_integer_dtype(other): result = self._addsub_int_array(other, operator.add) elif is_float_dtype(other): @@ -671,7 +688,7 @@ def __sub__(self, other): # specifically _not_ a Tick result = self._add_offset(-other) elif isinstance(other, (datetime, np.datetime64)): - result = self._sub_datelike(other) + result = self._sub_datetimelike_scalar(other) elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these @@ -688,7 +705,7 @@ def __sub__(self, other): result = self._addsub_offset_array(other, operator.sub) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] - result = self._sub_datelike(other) + result = self._sub_datetime_arraylike(other) elif is_period_dtype(other): # PeriodIndex result = self._sub_period_array(other) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 2392bbdd87f7a3..b656690b30e34c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from datetime import datetime, timedelta, time +from datetime import datetime, time import warnings import numpy as np @@ -21,7 +21,6 @@ is_object_dtype, is_datetime64tz_dtype, is_datetime64_dtype, - is_timedelta64_dtype, ensure_int64) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna @@ -76,11 +75,12 @@ def f(self): if field in self._object_ops: result = fields.get_date_name_field(values, field) - result = self._maybe_mask_results(result) + result = self._maybe_mask_results(result, fill_value=None) else: result = fields.get_date_field(values, field) - result = self._maybe_mask_results(result, convert='float64') + result = self._maybe_mask_results(result, fill_value=None, + convert='float64') return result @@ -424,11 +424,21 @@ def _assert_tzawareness_compat(self, other): # ----------------------------------------------------------------- # Arithmetic Methods - def _sub_datelike_dti(self, other): - """subtraction of two DatetimeIndexes""" - if not len(self) == len(other): + def _sub_datetime_arraylike(self, other): + """subtract DatetimeArray/Index or ndarray[datetime64]""" + if len(self) != len(other): raise ValueError("cannot add indices of unequal length") + if isinstance(other, np.ndarray): + assert is_datetime64_dtype(other) + other = type(self)(other) + + if not self._has_same_tz(other): + # require tz compat + raise TypeError("{cls} subtraction must have the same " + "timezones or no timezones" + .format(cls=type(self).__name__)) + self_i8 = self.asi8 other_i8 = other.asi8 new_values = checked_add_with_arr(self_i8, -other_i8, @@ -456,74 +466,41 @@ def _add_offset(self, offset): return type(self)(result, freq='infer') - def _sub_datelike(self, other): + def _sub_datetimelike_scalar(self, other): # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]] - if isinstance(other, (DatetimeArrayMixin, np.ndarray)): - if isinstance(other, np.ndarray): - # if other is an ndarray, we assume it is datetime64-dtype - other = type(self)(other) - if not self._has_same_tz(other): - # require tz compat - raise TypeError("{cls} subtraction must have the same " - "timezones or no timezones" - .format(cls=type(self).__name__)) - result = self._sub_datelike_dti(other) - elif isinstance(other, (datetime, np.datetime64)): - assert other is not NaT - other = Timestamp(other) - if other is NaT: - return self - NaT + assert isinstance(other, (datetime, np.datetime64)) + assert other is not NaT + other = Timestamp(other) + if other is NaT: + return self - NaT + + if not self._has_same_tz(other): # require tz compat - elif not self._has_same_tz(other): - raise TypeError("Timestamp subtraction must have the same " - "timezones or no timezones") - else: - i8 = self.asi8 - result = checked_add_with_arr(i8, -other.value, - arr_mask=self._isnan) - result = self._maybe_mask_results(result, - fill_value=iNaT) - else: - raise TypeError("cannot subtract {cls} and {typ}" - .format(cls=type(self).__name__, - typ=type(other).__name__)) + raise TypeError("Timestamp subtraction must have the same " + "timezones or no timezones") + + i8 = self.asi8 + result = checked_add_with_arr(i8, -other.value, + arr_mask=self._isnan) + result = self._maybe_mask_results(result) return result.view('timedelta64[ns]') def _add_delta(self, delta): """ - Add a timedelta-like, DateOffset, or TimedeltaIndex-like object - to self. + Add a timedelta-like, Tick, or TimedeltaIndex-like object + to self, yielding a new DatetimeArray Parameters ---------- - delta : {timedelta, np.timedelta64, DateOffset, + other : {timedelta, np.timedelta64, Tick, TimedeltaIndex, ndarray[timedelta64]} Returns ------- - result : same type as self - - Notes - ----- - The result's name is set outside of _add_delta by the calling - method (__add__ or __sub__) + result : DatetimeArray """ - from pandas.core.arrays import TimedeltaArrayMixin - - if isinstance(delta, (Tick, timedelta, np.timedelta64)): - new_values = self._add_delta_td(delta) - elif is_timedelta64_dtype(delta): - if not isinstance(delta, TimedeltaArrayMixin): - delta = TimedeltaArrayMixin(delta) - new_values = self._add_delta_tdi(delta) - else: - new_values = self.astype('O') + delta - - tz = 'UTC' if self.tz is not None else None - result = type(self)(new_values, tz=tz, freq='infer') - if self.tz is not None and self.tz is not utc: - result = result.tz_convert(self.tz) - return result + new_values = dtl.DatetimeLikeArrayMixin._add_delta(self, delta) + return type(self)(new_values, tz=self.tz, freq='infer') # ----------------------------------------------------------------- # Timezone Conversion and Localization Methods @@ -904,7 +881,7 @@ def month_name(self, locale=None): result = fields.get_date_name_field(values, 'month_name', locale=locale) - result = self._maybe_mask_results(result) + result = self._maybe_mask_results(result, fill_value=None) return result def day_name(self, locale=None): @@ -940,7 +917,7 @@ def day_name(self, locale=None): result = fields.get_date_name_field(values, 'day_name', locale=locale) - result = self._maybe_mask_results(result) + result = self._maybe_mask_results(result, fill_value=None) return result @property diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 085298d8324c58..31bcac2f4f529b 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -6,7 +6,6 @@ from pandas import compat from pandas.compat.numpy import function as nv -from pandas._libs import lib from pandas._libs.tslib import NaT, iNaT from pandas._libs.tslibs.period import ( Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX, @@ -15,7 +14,7 @@ from pandas._libs.tslibs import period as libperiod from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta from pandas._libs.tslibs.fields import isleapyear_arr -from pandas.util._decorators import cache_readonly +from pandas.util._decorators import cache_readonly, Appender from pandas.util._validators import validate_fillna_kwargs import pandas.core.algorithms as algos from pandas.core.dtypes.common import ( @@ -23,7 +22,6 @@ pandas_dtype, is_datetime64_dtype, is_categorical_dtype, - is_timedelta64_dtype, is_list_like, is_array_like, is_object_dtype, @@ -33,8 +31,6 @@ ensure_object, _TD_DTYPE, ) - - from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ( ABCSeries, ABCIndexClass, ABCPeriodIndex @@ -45,7 +41,7 @@ import pandas.core.common as com from pandas.tseries import frequencies -from pandas.tseries.offsets import Tick, DateOffset +from pandas.tseries.offsets import Tick from pandas.core.arrays import ExtensionArray from pandas.core.arrays import datetimelike as dtl @@ -497,7 +493,11 @@ def _time_shift(self, n, freq=None): freq : pandas.DateOffset, pandas.Timedelta, or string Frequency increment to shift by. """ - values = self._data + n * self.freq.n + if freq is not None: + raise TypeError("`freq` argument is not supported for " + "{cls}._time_shift" + .format(cls=type(self).__name__)) + values = self.asi8 + n * self.freq.n if self.hasnans: values[self._isnan] = iNaT return type(self)(values, freq=self.freq) @@ -606,52 +606,9 @@ def to_timestamp(self, freq=None, how='start'): new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base) return DatetimeArrayMixin(new_data, freq='infer') - def _maybe_convert_timedelta(self, other): - """ - Convert timedelta-like input to an integer multiple of self.freq - - Parameters - ---------- - other : timedelta, np.timedelta64, DateOffset, int, np.ndarray - - Returns - ------- - converted : int, np.ndarray[int64] - - Raises - ------ - IncompatibleFrequency : if the input cannot be written as a multiple - of self.freq. Note IncompatibleFrequency subclasses ValueError. - """ - if isinstance( - other, (timedelta, np.timedelta64, Tick, np.ndarray)): - offset = frequencies.to_offset(self.freq.rule_code) - if isinstance(offset, Tick): - # _check_timedeltalike_freq_compat will raise if incompatible - delta = self._check_timedeltalike_freq_compat(other) - return delta - elif isinstance(other, DateOffset): - freqstr = other.rule_code - base = frequencies.get_base_alias(freqstr) - if base == self.freq.rule_code: - return other.n - msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) - raise IncompatibleFrequency(msg) - elif lib.is_integer(other): - # integer is passed to .shift via - # _add_datetimelike_methods basically - # but ufunc may pass integer to _add_delta - return other - - # raise when input doesn't have freq - msg = "Input has different freq from {cls}(freq={freqstr})" - raise IncompatibleFrequency(msg.format(cls=type(self).__name__, - freqstr=self.freqstr)) - # ------------------------------------------------------------------ # Formatting - def _format_native_types(self, na_rep=u'NaT', date_format=None, - **kwargs): + def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): """ actually format my specific types """ # TODO(DatetimeArray): remove values = self.astype(object) @@ -671,58 +628,6 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, values = np.array([formatter(dt) for dt in values]) return values - def _check_timedeltalike_freq_compat(self, other): - """ - Arithmetic operations with timedelta-like scalars or array `other` - are only valid if `other` is an integer multiple of `self.freq`. - If the operation is valid, find that integer multiple. Otherwise, - raise because the operation is invalid. - - Parameters - ---------- - other : timedelta, np.timedelta64, Tick, - ndarray[timedelta64], TimedeltaArray, TimedeltaIndex - - Returns - ------- - multiple : int or ndarray[int64] - - Raises - ------ - IncompatibleFrequency - """ - assert isinstance(self.freq, Tick) # checked by calling function - own_offset = frequencies.to_offset(self.freq.rule_code) - base_nanos = delta_to_nanoseconds(own_offset) - - if isinstance(other, (timedelta, np.timedelta64, Tick)): - nanos = delta_to_nanoseconds(other) - - elif isinstance(other, np.ndarray): - # numpy timedelta64 array; all entries must be compatible - assert other.dtype.kind == 'm' - if other.dtype != _TD_DTYPE: - # i.e. non-nano unit - # TODO: disallow unit-less timedelta64 - other = other.astype(_TD_DTYPE) - nanos = other.view('i8') - else: - # TimedeltaArray/Index - nanos = other.asi8 - - if np.all(nanos % base_nanos == 0): - # nanos being added is an integer multiple of the - # base-frequency to self.freq - delta = nanos // base_nanos - # delta is the integer (or integer-array) number of periods - # by which will be added to self. - return delta - - raise IncompatibleFrequency("Input has different freq from " - "{cls}(freq={freqstr})" - .format(cls=type(self).__name__, - freqstr=self.freqstr)) - def repeat(self, repeats, *args, **kwargs): """ Repeat elements of a Categorical. @@ -810,6 +715,7 @@ def _sub_period(self, other): return new_data + @Appender(dtl.DatetimeLikeArrayMixin._addsub_int_array.__doc__) def _addsub_int_array( self, other, # type: Union[Index, ExtensionArray, np.ndarray[int]] @@ -817,7 +723,6 @@ def _addsub_int_array( ): # type: (...) -> PeriodArray assert op in [operator.add, operator.sub] - # easy case for PeriodIndex if op is operator.sub: other = -other res_values = algos.checked_add_with_arr(self.asi8, other, @@ -832,30 +737,53 @@ def _add_offset(self, other): if base != self.freq.rule_code: msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - return self._time_shift(other.n) - def _add_delta_td(self, other): + # Note: when calling parent class's _add_timedeltalike_scalar, + # it will call delta_to_nanoseconds(delta). Because delta here + # is an integer, delta_to_nanoseconds will return it unchanged. + result = super(PeriodArray, self)._add_timedeltalike_scalar(other.n) + return type(self)(result, freq=self.freq) + + def _add_timedeltalike_scalar(self, other): + """ + Parameters + ---------- + other : timedelta, Tick, np.timedelta64 + + Returns + ------- + result : ndarray[int64] + """ assert isinstance(self.freq, Tick) # checked by calling function assert isinstance(other, (timedelta, np.timedelta64, Tick)) delta = self._check_timedeltalike_freq_compat(other) - # Note: when calling parent class's _add_delta_td, it will call - # delta_to_nanoseconds(delta). Because delta here is an integer, - # delta_to_nanoseconds will return it unchanged. - ordinals = super(PeriodArray, self)._add_delta_td(delta) - return type(self)(ordinals, self.freq) + # Note: when calling parent class's _add_timedeltalike_scalar, + # it will call delta_to_nanoseconds(delta). Because delta here + # is an integer, delta_to_nanoseconds will return it unchanged. + ordinals = super(PeriodArray, self)._add_timedeltalike_scalar(delta) + return ordinals def _add_delta_tdi(self, other): + """ + Parameters + ---------- + other : TimedeltaArray or ndarray[timedelta64] + + Returns + ------- + result : ndarray[int64] + """ assert isinstance(self.freq, Tick) # checked by calling function delta = self._check_timedeltalike_freq_compat(other) - return self._addsub_int_array(delta, operator.add) + return self._addsub_int_array(delta, operator.add).asi8 def _add_delta(self, other): """ Add a timedelta-like, Tick, or TimedeltaIndex-like object - to self. + to self, yielding a new PeriodArray Parameters ---------- @@ -864,7 +792,7 @@ def _add_delta(self, other): Returns ------- - result : same type as self + result : PeriodArray """ if not isinstance(self.freq, Tick): # We cannot add timedelta-like to non-tick PeriodArray @@ -873,15 +801,60 @@ def _add_delta(self, other): .format(cls=type(self).__name__, freqstr=self.freqstr)) - # TODO: standardize across datetimelike subclasses whether to return - # i8 view or _shallow_copy - if isinstance(other, (Tick, timedelta, np.timedelta64)): - return self._add_delta_td(other) - elif is_timedelta64_dtype(other): - # ndarray[timedelta64] or TimedeltaArray/index - return self._add_delta_tdi(other) - else: # pragma: no cover - raise TypeError(type(other).__name__) + new_ordinals = super(PeriodArray, self)._add_delta(other) + return type(self)(new_ordinals, freq=self.freq) + + def _check_timedeltalike_freq_compat(self, other): + """ + Arithmetic operations with timedelta-like scalars or array `other` + are only valid if `other` is an integer multiple of `self.freq`. + If the operation is valid, find that integer multiple. Otherwise, + raise because the operation is invalid. + + Parameters + ---------- + other : timedelta, np.timedelta64, Tick, + ndarray[timedelta64], TimedeltaArray, TimedeltaIndex + + Returns + ------- + multiple : int or ndarray[int64] + + Raises + ------ + IncompatibleFrequency + """ + assert isinstance(self.freq, Tick) # checked by calling function + own_offset = frequencies.to_offset(self.freq.rule_code) + base_nanos = delta_to_nanoseconds(own_offset) + + if isinstance(other, (timedelta, np.timedelta64, Tick)): + nanos = delta_to_nanoseconds(other) + + elif isinstance(other, np.ndarray): + # numpy timedelta64 array; all entries must be compatible + assert other.dtype.kind == 'm' + if other.dtype != _TD_DTYPE: + # i.e. non-nano unit + # TODO: disallow unit-less timedelta64 + other = other.astype(_TD_DTYPE) + nanos = other.view('i8') + else: + # TimedeltaArray/Index + nanos = other.asi8 + + if np.all(nanos % base_nanos == 0): + # nanos being added is an integer multiple of the + # base-frequency to self.freq + delta = nanos // base_nanos + # delta is the integer (or integer-array) number of periods + # by which will be added to self. + return delta + + raise IncompatibleFrequency("Input has different freq from " + "{cls}(freq={freqstr})" + .format(cls=type(self).__name__, + freqstr=self.freqstr)) PeriodArray._add_comparison_ops() diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index eb7dabdc03b0b1..397297c1b88d0d 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -4,7 +4,7 @@ import numpy as np from pandas._libs import tslibs -from pandas._libs.tslibs import Timedelta, Timestamp, NaT, iNaT +from pandas._libs.tslibs import Timedelta, Timestamp, NaT from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import array_to_timedelta64 @@ -46,7 +46,8 @@ def f(self): values = self.asi8 result = get_timedelta_field(values, alias) if self.hasnans: - result = self._maybe_mask_results(result, convert='float64') + result = self._maybe_mask_results(result, fill_value=None, + convert='float64') return result @@ -188,61 +189,51 @@ def _add_offset(self, other): .format(typ=type(other).__name__, cls=type(self).__name__)) - def _sub_datelike(self, other): - assert other is not NaT - raise TypeError("cannot subtract a datelike from a {cls}" - .format(cls=type(self).__name__)) - def _add_delta(self, delta): """ Add a timedelta-like, Tick, or TimedeltaIndex-like object - to self. + to self, yielding a new TimedeltaArray Parameters ---------- - delta : timedelta, np.timedelta64, Tick, TimedeltaArray, TimedeltaIndex + other : {timedelta, np.timedelta64, Tick, + TimedeltaIndex, ndarray[timedelta64]} Returns ------- - result : same type as self - - Notes - ----- - The result's name is set outside of _add_delta by the calling - method (__add__ or __sub__) + result : TimedeltaArray """ - if isinstance(delta, (Tick, timedelta, np.timedelta64)): - new_values = self._add_delta_td(delta) - elif isinstance(delta, TimedeltaArrayMixin): - new_values = self._add_delta_tdi(delta) - elif is_timedelta64_dtype(delta): - # ndarray[timedelta64] --> wrap in TimedeltaArray/Index - delta = type(self)(delta) - new_values = self._add_delta_tdi(delta) - else: - raise TypeError("cannot add the type {0} to a TimedeltaIndex" - .format(type(delta))) - + new_values = dtl.DatetimeLikeArrayMixin._add_delta(self, delta) return type(self)(new_values, freq='infer') - def _add_datelike(self, other): + def _add_datetime_arraylike(self, other): + """Add DatetimeArray/Index or ndarray[datetime64] to TimedeltaArray""" + if isinstance(other, np.ndarray): + # At this point we have already checked that dtype is datetime64 + from pandas.core.arrays import DatetimeArrayMixin + other = DatetimeArrayMixin(other) + + # defer to implementation in DatetimeArray + return other + self + + def _add_datetimelike_scalar(self, other): # adding a timedeltaindex to a datetimelike from pandas.core.arrays import DatetimeArrayMixin - if isinstance(other, (DatetimeArrayMixin, np.ndarray)): - # if other is an ndarray, we assume it is datetime64-dtype - # defer to implementation in DatetimeIndex - if not isinstance(other, DatetimeArrayMixin): - other = DatetimeArrayMixin(other) - return other + self - else: - assert other is not NaT - other = Timestamp(other) - i8 = self.asi8 - result = checked_add_with_arr(i8, other.value, - arr_mask=self._isnan) - result = self._maybe_mask_results(result, fill_value=iNaT) + + assert other is not NaT + other = Timestamp(other) + if other is NaT: + # In this case we specifically interpret NaT as a datetime, not + # the timedelta interpretation we would get by returning self + NaT + result = self.asi8.view('m8[ms]') + NaT.to_datetime64() return DatetimeArrayMixin(result) + i8 = self.asi8 + result = checked_add_with_arr(i8, other.value, + arr_mask=self._isnan) + result = self._maybe_mask_results(result) + return DatetimeArrayMixin(result, tz=other.tz) + def _addsub_offset_array(self, other, op): # Add or subtract Array-like of DateOffset objects try: @@ -276,7 +267,8 @@ def _evaluate_with_timedelta_like(self, other, op): result = op(left, right) else: result = op(left, np.float64(right)) - result = self._maybe_mask_results(result, convert='float64') + result = self._maybe_mask_results(result, fill_value=None, + convert='float64') return result return NotImplemented @@ -339,7 +331,7 @@ def total_seconds(self): Float64Index([0.0, 86400.0, 172800.0, 259200.00000000003, 345600.0], dtype='float64') """ - return self._maybe_mask_results(1e-9 * self.asi8) + return self._maybe_mask_results(1e-9 * self.asi8, fill_value=None) def to_pytimedelta(self): """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index e59e696e98e517..c3728d8d956de9 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -1,5 +1,5 @@ # pylint: disable=E1101,E1103,W0232 -from datetime import datetime +from datetime import datetime, timedelta import numpy as np import operator import warnings @@ -39,6 +39,9 @@ Appender, Substitution, cache_readonly, deprecate_kwarg ) +from pandas.tseries.offsets import Tick, DateOffset +from pandas.tseries import frequencies + import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( @@ -406,8 +409,46 @@ def _format_native_types(self, na_rep=u'NaT', quoting=None, **kwargs): **kwargs) def _maybe_convert_timedelta(self, other): - # just dispatch, return ndarray - return self._data._maybe_convert_timedelta(other) + """ + Convert timedelta-like input to an integer multiple of self.freq + + Parameters + ---------- + other : timedelta, np.timedelta64, DateOffset, int, np.ndarray + + Returns + ------- + converted : int, np.ndarray[int64] + + Raises + ------ + IncompatibleFrequency : if the input cannot be written as a multiple + of self.freq. Note IncompatibleFrequency subclasses ValueError. + """ + if isinstance( + other, (timedelta, np.timedelta64, Tick, np.ndarray)): + offset = frequencies.to_offset(self.freq.rule_code) + if isinstance(offset, Tick): + # _check_timedeltalike_freq_compat will raise if incompatible + delta = self._data._check_timedeltalike_freq_compat(other) + return delta + elif isinstance(other, DateOffset): + freqstr = other.rule_code + base = frequencies.get_base_alias(freqstr) + if base == self.freq.rule_code: + return other.n + msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) + raise IncompatibleFrequency(msg) + elif is_integer(other): + # integer is passed to .shift via + # _add_datetimelike_methods basically + # but ufunc may pass integer to _add_delta + return other + + # raise when input doesn't have freq + msg = "Input has different freq from {cls}(freq={freqstr})" + raise IncompatibleFrequency(msg.format(cls=type(self).__name__, + freqstr=self.freqstr)) # ------------------------------------------------------------------------ # Indexing diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 1efa0a15d34d79..e5da21478d0a41 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -232,7 +232,8 @@ def astype(self, dtype, copy=True): # return an index (essentially this is division) result = self.values.astype(dtype, copy=copy) if self.hasnans: - values = self._maybe_mask_results(result, convert='float64') + values = self._maybe_mask_results(result, fill_value=None, + convert='float64') return Index(values, name=self.name) return Index(result.astype('i8'), name=self.name) return super(TimedeltaIndex, self).astype(dtype, copy=copy) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index cff2c252312206..184e76cfa490f5 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -15,6 +15,7 @@ import pandas.core.indexes.period as period from pandas.core import ops from pandas import Period, PeriodIndex, period_range, Series +from pandas.tseries.frequencies import to_offset # ------------------------------------------------------------------ @@ -372,6 +373,22 @@ def test_pi_add_sub_float(self, op, other, box): with pytest.raises(TypeError): op(pi, other) + @pytest.mark.parametrize('other', [pd.Timestamp.now(), + pd.Timestamp.now().to_pydatetime(), + pd.Timestamp.now().to_datetime64()]) + def test_pi_add_sub_datetime(self, other): + # GH#23215 + rng = pd.period_range('1/1/2000', freq='D', periods=3) + + with pytest.raises(TypeError): + rng + other + with pytest.raises(TypeError): + other + rng + with pytest.raises(TypeError): + rng - other + with pytest.raises(TypeError): + other - rng + # ----------------------------------------------------------------- # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64] @@ -536,6 +553,38 @@ def test_pi_sub_isub_offset(self): rng -= pd.offsets.MonthEnd(5) tm.assert_index_equal(rng, expected) + def test_pi_add_offset_n_gt1(self, box): + # GH#23215 + # add offset to PeriodIndex with freq.n > 1 + per = pd.Period('2016-01', freq='2M') + pi = pd.PeriodIndex([per]) + + expected = pd.PeriodIndex(['2016-03'], freq='2M') + pi = tm.box_expected(pi, box) + expected = tm.box_expected(expected, box) + + result = pi + per.freq + tm.assert_equal(result, expected) + + result = per.freq + pi + tm.assert_equal(result, expected) + + def test_pi_add_offset_n_gt1_not_divisible(self, box): + # GH#23215 + # PeriodIndex with freq.n > 1 add offset with offset.n % freq.n != 0 + + pi = pd.PeriodIndex(['2016-01'], freq='2M') + pi = tm.box_expected(pi, box) + + expected = pd.PeriodIndex(['2016-04'], freq='2M') + expected = tm.box_expected(expected, box) + + result = pi + to_offset('3M') + tm.assert_equal(result, expected) + + result = to_offset('3M') + pi + tm.assert_equal(result, expected) + # --------------------------------------------------------------- # __add__/__sub__ with integer arrays diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 56bef2fee2b417..9930297fd1a3c9 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -418,17 +418,22 @@ def test_td64arr_sub_timestamp_raises(self, box): with tm.assert_raises_regex(TypeError, msg): idx - Timestamp('2011-01-01') - def test_td64arr_add_timestamp(self, box): + def test_td64arr_add_timestamp(self, box, tz_naive_fixture): + # GH#23215 + # TODO: parametrize over scalar datetime types? + tz = tz_naive_fixture + other = Timestamp('2011-01-01', tz=tz) + idx = TimedeltaIndex(['1 day', '2 day']) - expected = DatetimeIndex(['2011-01-02', '2011-01-03']) + expected = DatetimeIndex(['2011-01-02', '2011-01-03'], tz=tz) idx = tm.box_expected(idx, box) expected = tm.box_expected(expected, box) - result = idx + Timestamp('2011-01-01') + result = idx + other tm.assert_equal(result, expected) - result = Timestamp('2011-01-01') + idx + result = other + idx tm.assert_equal(result, expected) def test_td64arr_add_sub_timestamp(self, box): @@ -489,6 +494,19 @@ def test_tdi_add_dt64_array(self, box_df_broadcast_failure): result = dtarr + tdi tm.assert_equal(result, expected) + def test_td64arr_add_datetime64_nat(self, box): + # GH#23215 + other = np.datetime64('NaT') + + tdi = timedelta_range('1 day', periods=3) + expected = pd.DatetimeIndex(["NaT", "NaT", "NaT"]) + + tdser = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + + tm.assert_equal(tdser + other, expected) + tm.assert_equal(other + tdser, expected) + # ------------------------------------------------------------------ # Operations with int-like others diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 780df579d27785..dcbb0d4048b0f8 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -190,17 +190,3 @@ def tet_sub_period(): other = pd.Period("2000", freq="M") with tm.assert_raises_regex(IncompatibleFrequency, "freq"): arr - other - - -# ---------------------------------------------------------------------------- -# other - -def test_maybe_convert_timedelta(): - arr = period_array(['2000', '2001'], freq='D') - offset = pd.tseries.offsets.Day(2) - assert arr._maybe_convert_timedelta(offset) == 2 - assert arr._maybe_convert_timedelta(2) == 2 - - offset = pd.tseries.offsets.BusinessDay() - with tm.assert_raises_regex(ValueError, 'freq'): - arr._maybe_convert_timedelta(offset) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index f06291a7e4e9e4..086c687148292b 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -5,7 +5,6 @@ import pytest import pandas as pd -import pandas._libs.tslib as tslib import pandas.util.testing as tm from pandas import ( DatetimeIndex, Index, PeriodIndex, Series, Timestamp, bdate_range, @@ -36,7 +35,7 @@ def test_ops_properties(self): def test_ops_properties_basic(self): # sanity check that the behavior didn't change - # GH7206 + # GH#7206 for op in ['year', 'day', 'second', 'weekday']: pytest.raises(TypeError, lambda x: getattr(self.dt_series, op)) @@ -339,16 +338,6 @@ def test_infer_freq(self, freq): tm.assert_index_equal(idx, result) assert result.freq == freq - def test_nat_new(self): - idx = pd.date_range('2011-01-01', freq='D', periods=5, name='x') - result = idx._nat_new() - exp = pd.DatetimeIndex([pd.NaT] * 5, name='x') - tm.assert_index_equal(result, exp) - - result = idx._nat_new(box=False) - exp = np.array([tslib.iNaT] * 5, dtype=np.int64) - tm.assert_numpy_array_equal(result, exp) - def test_nat(self, tz_naive_fixture): tz = tz_naive_fixture assert pd.DatetimeIndex._na_value is pd.NaT diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index f4ee5e88b77b23..80550292ed4f8f 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -312,17 +312,6 @@ def test_order(self): tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq == 'D' - def test_nat_new(self): - - idx = pd.period_range('2011-01', freq='M', periods=5, name='x') - result = idx._nat_new() - exp = pd.PeriodIndex([pd.NaT] * 5, freq='M', name='x') - tm.assert_index_equal(result, exp) - - result = idx._nat_new(box=False) - exp = np.array([tslib.iNaT] * 5, dtype=np.int64) - tm.assert_numpy_array_equal(result, exp) - def test_shift(self): # This is tested in test_arithmetic pass diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index e699a560cb2f69..300d5ef609b3ef 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -557,3 +557,14 @@ def test_insert(self): for na in (np.nan, pd.NaT, None): result = period_range('2017Q1', periods=4, freq='Q').insert(1, na) tm.assert_index_equal(result, expected) + + +def test_maybe_convert_timedelta(): + pi = PeriodIndex(['2000', '2001'], freq='D') + offset = offsets.Day(2) + assert pi._maybe_convert_timedelta(offset) == 2 + assert pi._maybe_convert_timedelta(2) == 2 + + offset = offsets.BusinessDay() + with tm.assert_raises_regex(ValueError, 'freq'): + pi._maybe_convert_timedelta(offset) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 9b3bcbef36805b..a8cfdd0add1786 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -9,7 +9,6 @@ Series, Timedelta, TimedeltaIndex, Timestamp, timedelta_range, to_timedelta ) -from pandas._libs.tslib import iNaT from pandas.core.dtypes.generic import ABCDateOffset from pandas.tests.test_base import Ops from pandas.tseries.offsets import Day, Hour @@ -238,17 +237,6 @@ def test_infer_freq(self, freq): tm.assert_index_equal(idx, result) assert result.freq == freq - def test_nat_new(self): - - idx = pd.timedelta_range('1', freq='D', periods=5, name='x') - result = idx._nat_new() - exp = pd.TimedeltaIndex([pd.NaT] * 5, name='x') - tm.assert_index_equal(result, exp) - - result = idx._nat_new(box=False) - exp = np.array([iNaT] * 5, dtype=np.int64) - tm.assert_numpy_array_equal(result, exp) - def test_shift(self): pass # handled in test_arithmetic.py