-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Datetimelike add/sub catch cases more explicitly, tests #19912
Changes from all commits
92a2df0
92e2249
f6a3fdd
e42c0f2
7662897
f47dace
1dcee10
58457d4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,7 +37,7 @@ | |
is_period_dtype, | ||
is_timedelta64_dtype) | ||
from pandas.core.dtypes.generic import ( | ||
ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass) | ||
ABCIndex, ABCSeries, ABCDataFrame, ABCPeriodIndex, ABCIndexClass) | ||
from pandas.core.dtypes.missing import isna | ||
from pandas.core import common as com, algorithms, ops | ||
from pandas.core.algorithms import checked_add_with_arr | ||
|
@@ -48,6 +48,7 @@ | |
from pandas.util._decorators import Appender, cache_readonly | ||
import pandas.core.dtypes.concat as _concat | ||
import pandas.tseries.frequencies as frequencies | ||
from pandas.tseries.offsets import Tick, DateOffset | ||
|
||
import pandas.core.indexes.base as ibase | ||
_index_doc_kwargs = dict(ibase._index_doc_kwargs) | ||
|
@@ -666,6 +667,9 @@ def _sub_nat(self): | |
def _sub_period(self, other): | ||
return NotImplemented | ||
|
||
def _add_offset(self, offset): | ||
raise com.AbstractMethodError(self) | ||
|
||
def _addsub_offset_array(self, other, op): | ||
""" | ||
Add or subtract array-like of DateOffset objects | ||
|
@@ -705,14 +709,17 @@ def __add__(self, other): | |
from pandas import DateOffset | ||
|
||
other = lib.item_from_zerodim(other) | ||
if isinstance(other, ABCSeries): | ||
if isinstance(other, (ABCSeries, ABCDataFrame)): | ||
return NotImplemented | ||
|
||
# scalar others | ||
elif other is NaT: | ||
result = self._add_nat() | ||
elif isinstance(other, (DateOffset, timedelta, np.timedelta64)): | ||
elif isinstance(other, (Tick, timedelta, np.timedelta64)): | ||
result = self._add_delta(other) | ||
elif isinstance(other, DateOffset): | ||
# specifically _not_ a Tick | ||
result = self._add_offset(other) | ||
elif isinstance(other, (datetime, np.datetime64)): | ||
result = self._add_datelike(other) | ||
elif is_integer(other): | ||
|
@@ -733,6 +740,12 @@ def __add__(self, other): | |
elif is_integer_dtype(other) and self.freq is None: | ||
# GH#19123 | ||
raise NullFrequencyError("Cannot shift with no freq") | ||
elif is_float_dtype(other): | ||
# Explicitly catch invalid dtypes | ||
raise TypeError("cannot add {dtype}-dtype to {cls}" | ||
.format(dtype=other.dtype, | ||
cls=type(self).__name__)) | ||
|
||
else: # pragma: no cover | ||
return NotImplemented | ||
|
||
|
@@ -753,17 +766,20 @@ def __radd__(self, other): | |
cls.__radd__ = __radd__ | ||
|
||
def __sub__(self, other): | ||
from pandas import Index, DateOffset | ||
from pandas import Index | ||
|
||
other = lib.item_from_zerodim(other) | ||
if isinstance(other, ABCSeries): | ||
if isinstance(other, (ABCSeries, ABCDataFrame)): | ||
return NotImplemented | ||
|
||
# scalar others | ||
elif other is NaT: | ||
result = self._sub_nat() | ||
elif isinstance(other, (DateOffset, timedelta, np.timedelta64)): | ||
elif isinstance(other, (Tick, timedelta, np.timedelta64)): | ||
result = self._add_delta(-other) | ||
elif isinstance(other, DateOffset): | ||
# specifically _not_ a Tick | ||
result = self._add_offset(-other) | ||
elif isinstance(other, (datetime, np.datetime64)): | ||
result = self._sub_datelike(other) | ||
elif is_integer(other): | ||
|
@@ -790,6 +806,12 @@ def __sub__(self, other): | |
elif is_integer_dtype(other) and self.freq is None: | ||
# GH#19123 | ||
raise NullFrequencyError("Cannot shift with no freq") | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in next pass remove the extra line, want to be consistent on formatting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do. |
||
elif is_float_dtype(other): | ||
# Explicitly catch invalid dtypes | ||
raise TypeError("cannot subtract {dtype}-dtype from {cls}" | ||
.format(dtype=other.dtype, | ||
cls=type(self).__name__)) | ||
else: # pragma: no cover | ||
return NotImplemented | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,10 +21,11 @@ | |
|
||
import pandas.tseries.frequencies as frequencies | ||
from pandas.tseries.frequencies import get_freq_code as _gfc | ||
from pandas.tseries.offsets import Tick, DateOffset | ||
|
||
from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index | ||
from pandas.core.indexes.datetimelike import DatelikeOps, DatetimeIndexOpsMixin | ||
from pandas.core.tools.datetimes import parse_time_string | ||
import pandas.tseries.offsets as offsets | ||
|
||
from pandas._libs.lib import infer_dtype | ||
from pandas._libs import tslib, index as libindex | ||
|
@@ -680,9 +681,9 @@ def to_timestamp(self, freq=None, how='start'): | |
|
||
def _maybe_convert_timedelta(self, other): | ||
if isinstance( | ||
other, (timedelta, np.timedelta64, offsets.Tick, np.ndarray)): | ||
other, (timedelta, np.timedelta64, Tick, np.ndarray)): | ||
offset = frequencies.to_offset(self.freq.rule_code) | ||
if isinstance(offset, offsets.Tick): | ||
if isinstance(offset, Tick): | ||
if isinstance(other, np.ndarray): | ||
nanos = np.vectorize(delta_to_nanoseconds)(other) | ||
else: | ||
|
@@ -691,7 +692,7 @@ def _maybe_convert_timedelta(self, other): | |
check = np.all(nanos % offset_nanos == 0) | ||
if check: | ||
return nanos // offset_nanos | ||
elif isinstance(other, offsets.DateOffset): | ||
elif isinstance(other, DateOffset): | ||
freqstr = other.rule_code | ||
base = frequencies.get_base_alias(freqstr) | ||
if base == self.freq.rule_code: | ||
|
@@ -707,6 +708,30 @@ def _maybe_convert_timedelta(self, other): | |
msg = "Input has different freq from PeriodIndex(freq={0})" | ||
raise IncompatibleFrequency(msg.format(self.freqstr)) | ||
|
||
def _add_offset(self, other): | ||
assert not isinstance(other, Tick) | ||
base = frequencies.get_base_alias(other.rule_code) | ||
if base != self.freq.rule_code: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you added lots of things here, but don't appear to be tested There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are not new cases; they are currently handled in a jumble in maybe_convert_timedeltalike. This method already exists for DatetimeIndex. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would this be easier to push through if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well, you are adding code here, not removing code, so not sure what is changing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this is a deal-breaker I can change this to just alias |
||
msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) | ||
raise IncompatibleFrequency(msg) | ||
return self.shift(other.n) | ||
|
||
def _add_delta_td(self, other): | ||
assert isinstance(other, (timedelta, np.timedelta64, Tick)) | ||
nanos = delta_to_nanoseconds(other) | ||
own_offset = frequencies.to_offset(self.freq.rule_code) | ||
|
||
if isinstance(own_offset, Tick): | ||
offset_nanos = delta_to_nanoseconds(own_offset) | ||
if np.all(nanos % offset_nanos == 0): | ||
return self.shift(nanos // offset_nanos) | ||
|
||
# raise when input doesn't have freq | ||
raise IncompatibleFrequency("Input has different freq from " | ||
"{cls}(freq={freqstr})" | ||
.format(cls=type(self).__name__, | ||
freqstr=self.freqstr)) | ||
|
||
def _add_delta(self, other): | ||
ordinal_delta = self._maybe_convert_timedelta(other) | ||
return self.shift(ordinal_delta) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
from pandas import (Timestamp, Timedelta, Series, | ||
DatetimeIndex, TimedeltaIndex, | ||
date_range) | ||
from pandas.core import ops | ||
from pandas._libs import tslib | ||
from pandas._libs.tslibs.offsets import shift_months | ||
|
||
|
@@ -307,6 +308,17 @@ def test_dti_cmp_list(self): | |
|
||
class TestDatetimeIndexArithmetic(object): | ||
|
||
# ------------------------------------------------------------- | ||
# Invalid Operations | ||
|
||
@pytest.mark.parametrize('other', [3.14, np.array([2.0, 3.0])]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a reason to add these to each rather than using a common test? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do slightly prefer the readability when they are in the sub-classes, but at the same time it doesn't fully guarantee that the tests are actually testing all subclasses, so we have a tradeoff here. ideally would systematically test common behaviors in test_datetimelike There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought about this, put them here because ATM this is these are well-established as the correct place for arithmetic tests. But I agree that there are a bunch of arithmetic tests that could be shared between DTI/TDI/PI. Maybe tests.indexes.test_datetimelike_arithmetic? A bit verbose... BTW next time I complain about parametrization/fixtures, point out statsmodels' tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, let's think about this. maybe open an issue about it. |
||
@pytest.mark.parametrize('op', [operator.add, ops.radd, | ||
operator.sub, ops.rsub]) | ||
def test_dti_add_sub_float(self, op, other): | ||
dti = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D') | ||
with pytest.raises(TypeError): | ||
op(dti, other) | ||
|
||
def test_dti_add_timestamp_raises(self): | ||
idx = DatetimeIndex(['2011-01-01', '2011-01-02']) | ||
msg = "cannot add DatetimeIndex and Timestamp" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what hits this then? You want to make this NOT propagate on the reversed ops yes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Many cases still hit this (objectdtype, categoricaldtype, miscellaneous scalars...) catching floatdype is the main obvious one. Handling object dtype correctly is the next PR.