Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Masking and overflow checks for datetimeindex and timedeltaindex ops #18020

Merged
merged 17 commits into from
Nov 4, 2017
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import pandas.core.dtypes.concat as _concat
from pandas.errors import PerformanceWarning
from pandas.core.common import _values_from_object, _maybe_box
from pandas.core.algorithms import checked_add_with_arr

from pandas.core.indexes.base import Index, _index_shared_docs
from pandas.core.indexes.numeric import Int64Index, Float64Index
Expand Down Expand Up @@ -777,7 +778,8 @@ def _sub_datelike(self, other):
"timezones or no timezones")
else:
i8 = self.asi8
result = i8 - other.value
result = checked_add_with_arr(i8, -other.value,
arr_mask=self._isnan)
result = self._maybe_mask_results(result,
fill_value=libts.iNaT)
else:
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,8 @@ def _add_datelike(self, other):
else:
other = Timestamp(other)
i8 = self.asi8
result = checked_add_with_arr(i8, other.value)
result = checked_add_with_arr(i8, other.value,
arr_mask=self._isnan)
result = self._maybe_mask_results(result, fill_value=iNaT)
return DatetimeIndex(result, name=self.name, copy=False)

Expand Down
191 changes: 191 additions & 0 deletions pandas/tests/indexes/datetimelike.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
""" generic datetimelike tests """

import pytest

import pandas as pd
from .common import Base
import pandas.util.testing as tm
from pandas import Timestamp, Timedelta, NaT


class DatetimeLike(Base):
Expand Down Expand Up @@ -38,3 +42,190 @@ def test_view(self, indices):
i_view = i.view(self._holder)
result = self._holder(i)
tm.assert_index_equal(result, i_view)


class TestDatetimeLikeIndexArithmetic(object):
# GH17991 checking for overflows and NaT masking on arithmetic ops

# TODO: Fill out the matrix of allowed arithmetic operations:
# - __rsub__, __radd__
# - ops with scalars boxed in Index/Series/DataFrame/np.array
# - ops with scalars:
# NaT, Timestamp.min/max, Timedelta.min/max
# datetime, timedelta, date(?),
# relativedelta,
# np.datetime64, np.timedelta64,
# DateOffset,
# Period
# - timezone-aware variants
# - object-dtype, categorical dtype
# - PeriodIndex
# - consistency with .map(...) ?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have a look thru test_ops, there is lots of coverage for things like this already (or maybe test_base). don't create a giant matrix, rather parametrize as much as possible.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll take a look. After adding tests for #7996 (separate branch/PR), this class gets pretty huge. So yah, parameterization sounds nice.

(Also it looks like tests in this module don't get run, so that needs changing anyway).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Also it looks like tests in this module don't get run, so that needs changing anyway).

sure they do, classes inherit from this. Pls pls pls don't create a huge matrix of tests w/o looking thru the existing. we cover quite a bit of this already.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pls pls pls don't create a huge matrix of tests w/o looking thru the existing. we cover quite a bit of this already.

Message received. Worrying about correctness first, brevity later.


def test_timedeltaindex_add_timestamp_nat_masking(self):
tdinat = pd.to_timedelta(['24658 days 11:15:00', 'NaT'])

# tsneg.value < 0, tspos.value > 0
tsneg = Timestamp('1950-01-01')
tspos = Timestamp('1980-01-01')

res1 = tdinat + tsneg
assert res1[1] is NaT
res2 = tdinat + tspos
assert res2[1] is NaT

def test_timedeltaindex_sub_timestamp_nat_masking(self):
tdinat = pd.to_timedelta(['24658 days 11:15:00', 'NaT'])

# tsneg.value < 0, tspos.value > 0
tsneg = Timestamp('1950-01-01')
tspos = Timestamp('1980-01-01')

res1 = tdinat - tsneg
assert res1[1] is NaT
res2 = tdinat - tspos
assert res2[1] is NaT

def test_timedeltaindex_add_timestamp_overflow(self):
tdimax = pd.to_timedelta(['24658 days 11:15:00', Timedelta.max])
tdimin = pd.to_timedelta(['24658 days 11:15:00', Timedelta.min])

# tsneg.value < 0, tspos.value > 0
tsneg = Timestamp('1950-01-01')
tspos = Timestamp('1980-01-01')

res1 = tdimax + tsneg
assert res1[1].value == Timedelta.max.value + tsneg.value
res2 = tdimin + tspos
assert res2[1].value == Timedelta.min.value + tspos.value

with pytest.raises(OverflowError):
tdimax + tspos

with pytest.raises(OverflowError):
tdimin + tsneg

def test_timedeltaindex_add_timedelta_overflow(self):
tdimax = pd.to_timedelta(['24658 days 11:15:00', Timedelta.max])
tdimin = pd.to_timedelta(['24658 days 11:15:00', Timedelta.min])

# tdpos.value > 0, tdneg.value < 0
tdpos = Timedelta('1h')
tdneg = Timedelta('-1h')

with pytest.raises(OverflowError):
tdimax + tdpos

res2 = tdimax + tdneg
assert res2[1].value == Timedelta.max.value + tdneg.value
res3 = tdimin + tdpos
assert res3[1].value == Timedelta.min.value + tdpos.value

with pytest.raises(OverflowError):
tdimin + tdneg

def test_timedeltaindex_sub_timedelta_overflow(self):
tdimax = pd.to_timedelta(['24658 days 11:15:00', Timedelta.max])
tdimin = pd.to_timedelta(['24658 days 11:15:00', Timedelta.min])

# tdpos.value > 0, tdneg.value < 0
tdpos = Timedelta('1h')
tdneg = Timedelta('-1h')

res1 = tdimax - tdpos
assert res1[1].value == Timedelta.max.value - tdpos.value

with pytest.raises(OverflowError):
tdimax - tdneg

with pytest.raises(OverflowError):
tdimin - tdpos

res4 = tdimin - tdneg
assert res4[1].value == Timedelta.min.value - tdneg.value

def test_datetimeindex_add_nat_masking(self):
# Checking for NaTs and checking that we don't get an OverflowError
dtinat = pd.to_datetime(['now', 'NaT'])

# tdpos.value > 0, tdneg.value < 0
tdpos = Timedelta('1h')
tdneg = Timedelta('-1h')

res1 = dtinat + tdpos
assert res1[1] is NaT
res2 = dtinat + tdneg
assert res2[1] is NaT

def test_datetimeindex_sub_nat_masking(self):
# Checking for NaTs and checking that we don't get an OverflowError
dtinat = pd.to_datetime(['now', 'NaT'])

# tdpos.value > 0, tdneg.value < 0
tdpos = Timedelta('1h')
tdneg = Timedelta('-1h')

res1 = dtinat - tdpos
assert res1[1] is NaT
res2 = dtinat - tdneg
assert res2[1] is NaT

def test_datetimeindex_add_timedelta_overflow(self):
dtimax = pd.to_datetime(['now', Timestamp.max])
dtimin = pd.to_datetime(['now', Timestamp.min])

# tdpos.value < 0, tdneg.value > 0
tdpos = Timedelta('1h')
tdneg = Timedelta('-1h')

with pytest.raises(OverflowError):
dtimax + tdpos

res2 = dtimax + tdneg
assert res2[1].value == Timestamp.max.value + tdneg.value

res3 = dtimin + tdpos
assert res3[1].value == Timestamp.min.value + tdpos.value

with pytest.raises(OverflowError):
dtimin + tdneg

def test_datetimeindex_sub_timedelta_overflow(self):
dtimax = pd.to_datetime(['now', Timestamp.max])
dtimin = pd.to_datetime(['now', Timestamp.min])

# tdpos.value < 0, tdneg.value > 0
tdpos = Timedelta('1h')
tdneg = Timedelta('-1h')

res1 = dtimax - tdpos
assert res1[1].value == Timestamp.max.value - tdpos.value

with pytest.raises(OverflowError):
dtimax - tdneg

with pytest.raises(OverflowError):
dtimin - tdpos

res4 = dtimin - tdneg
assert res4[1].value == Timestamp.min.value - tdneg.value

def test_datetimeindex_sub_timestamp_overflow(self):
dtimax = pd.to_datetime(['now', Timestamp.max])
dtimin = pd.to_datetime(['now', Timestamp.min])

# tsneg.value < 0, tspos.value > 0
tsneg = Timestamp('1950-01-01')
tspos = Timestamp('1980-01-01')

with pytest.raises(OverflowError):
dtimax - tsneg

res2 = dtimax - tspos
assert res2[1].value == Timestamp.max.value - tspos.value

res3 = dtimin - tsneg
assert res3[1].value == Timestamp.min.value - tsneg.value

with pytest.raises(OverflowError):
dtimin - tspos