Skip to content

Commit

Permalink
BUG: Prevent addition overflow with TimedeltaIndex
Browse files Browse the repository at this point in the history
Expands checked-add array addition introduced in
pandas-devgh-14237 to include all other addition cases (i.e.
TimedeltaIndex and Timedelta). Follow-up to pandas-devgh-14453.
  • Loading branch information
gfyoung committed Dec 7, 2016
1 parent 2466ecb commit 46909f0
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 8 deletions.
15 changes: 15 additions & 0 deletions asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ def setup(self):
self.arrneg = np.arange(-1000000, 0)
self.arrmixed = np.array([1, -1]).repeat(500000)

self.arr_nan = np.random.choice([True, False], size=1000000)
self.arrpos_nan = np.random.choice([True, False], size=1000000)
self.arrneg_nan = np.random.choice([True, False], size=1000000)
self.arrmixed_nan = np.random.choice([True, False], size=1000000)

def time_int_factorize(self):
self.int.factorize()

Expand Down Expand Up @@ -57,6 +62,16 @@ def time_add_overflow_neg_arr(self):
def time_add_overflow_mixed_arr(self):
self.checked_add(self.arr, self.arrmixed)

def time_add_overflow_first_arg_nan(self):
self.checked_add(self.arr, self.arrmixed, arr_nans=self.arr_nan)

def time_add_overflow_second_arg_nan(self):
self.checked_add(self.arr, self.arrmixed, b_nans=self.arrmixed_arr_nan)

def time_add_overflow_both_arg_nan(self):
self.checked_add(self.arr, self.arrmixed, arr_nans=self.arr_nan,
b_nans=self.arrmixed_arr_nan)


class hashing(object):
goal_time = 0.2
Expand Down
31 changes: 25 additions & 6 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,15 +812,21 @@ def unique1d(values):
return uniques


def _checked_add_with_arr(arr, b):
def _checked_add_with_arr(arr, b, arr_nans=None, b_nans=None):
"""
Perform array addition that checks for underflow and overflow.
Performs the addition of an int64 array and an int64 integer (or array)
but checks that they do not result in overflow first.
but checks that they do not result in overflow first. For elements that
are indicated to be NaN, whether or not there is overflow for that element
is automatically ignored.
Parameters
----------
arr : array addend.
b : array or scalar addend.
arr_nans : array indicating which elements are NaN
b_nans : array or scalar indicating which elements are NaN
Returns
-------
Expand All @@ -843,6 +849,17 @@ def _checked_add_with_arr(arr, b):
else:
b2 = np.broadcast_to(b, arr.shape)

# For elements that are NaN, regardless of their value, we should
# ignore whether they overflow or not when doing the checked add.
if arr_nans is not None and b_nans is not None:
not_nan = np.logical_not(arr_nans | b_nans)
elif arr_nans is not None:
not_nan = np.logical_not(arr_nans)
elif b_nans is not None:
not_nan = np.logical_not(b_nans)
else:
not_nan = np.array([True])

# gh-14324: For each element in 'arr' and its corresponding element
# in 'b2', we check the sign of the element in 'b2'. If it is positive,
# we then check whether its sum with the element in 'arr' exceeds
Expand All @@ -854,12 +871,14 @@ def _checked_add_with_arr(arr, b):
mask2 = b2 < 0

if not mask1.any():
to_raise = (np.iinfo(np.int64).min - b2 > arr).any()
to_raise = ((np.iinfo(np.int64).min - b2 > arr) & not_nan).any()
elif not mask2.any():
to_raise = (np.iinfo(np.int64).max - b2 < arr).any()
to_raise = ((np.iinfo(np.int64).max - b2 < arr) & not_nan).any()
else:
to_raise = ((np.iinfo(np.int64).max - b2[mask1] < arr[mask1]).any() or
(np.iinfo(np.int64).min - b2[mask2] > arr[mask2]).any())
to_raise = (((np.iinfo(np.int64).max -
b2[mask1] < arr[mask1]) & not_nan[mask1]).any() or
((np.iinfo(np.int64).min -
b2[mask2] > arr[mask2]) & not_nan[mask2]).any())

if to_raise:
raise OverflowError("Overflow in int64 addition")
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/test_nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1018,11 +1018,38 @@ def test_int64_add_overflow():
nanops._checked_add_with_arr(np.array([n, n]), np.array([n, n]))
with tm.assertRaisesRegexp(OverflowError, msg):
nanops._checked_add_with_arr(np.array([m, n]), np.array([n, n]))
with tm.assertRaisesRegexp(OverflowError, msg):
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]),
arr_nans=np.array([False, True]))
with tm.assertRaisesRegexp(OverflowError, msg):
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]),
b_nans=np.array([False, True]))
with tm.assertRaisesRegexp(OverflowError, msg):
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]),
arr_nans=np.array([False, True]),
b_nans=np.array([False, True]))
with tm.assertRaisesRegexp(OverflowError, msg):
with tm.assert_produces_warning(RuntimeWarning):
nanops._checked_add_with_arr(np.array([m, m]),
np.array([np.nan, m]))

# Check that the nan boolean arrays override whether or not
# the addition overflows. We don't check the result but just
# the fact that an OverflowError is not raised.
with tm.assertRaises(AssertionError):
with tm.assertRaisesRegexp(OverflowError, msg):
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]),
arr_nans=np.array([True, True]))
with tm.assertRaises(AssertionError):
with tm.assertRaisesRegexp(OverflowError, msg):
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]),
b_nans=np.array([True, True]))
with tm.assertRaises(AssertionError):
with tm.assertRaisesRegexp(OverflowError, msg):
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]),
arr_nans=np.array([True, False]),
b_nans=np.array([False, True]))


if __name__ == '__main__':
import nose
Expand Down
8 changes: 6 additions & 2 deletions pandas/tseries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pandas.types.missing import isnull
from pandas.core import common as com, algorithms
from pandas.core.common import AbstractMethodError
from pandas.core.nanops import _checked_add_with_arr

import pandas.formats.printing as printing
import pandas.tslib as tslib
Expand Down Expand Up @@ -684,7 +685,8 @@ def _add_delta_td(self, other):
# return the i8 result view

inc = tslib._delta_to_nanoseconds(other)
new_values = (self.asi8 + inc).view('i8')
new_values = _checked_add_with_arr(self.asi8, inc,
arr_nans=self._isnan).view('i8')
if self.hasnans:
new_values[self._isnan] = tslib.iNaT
return new_values.view('i8')
Expand All @@ -699,7 +701,9 @@ def _add_delta_tdi(self, other):

self_i8 = self.asi8
other_i8 = other.asi8
new_values = self_i8 + other_i8
new_values = _checked_add_with_arr(self_i8, other_i8,
arr_nans=self._isnan,
b_nans=other._isnan)
if self.hasnans or other.hasnans:
mask = (self._isnan) | (other._isnan)
new_values[mask] = tslib.iNaT
Expand Down
14 changes: 14 additions & 0 deletions pandas/tseries/tests/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1964,6 +1964,20 @@ def test_add_overflow(self):
with tm.assertRaisesRegexp(OverflowError, msg):
Timestamp('2000') + to_timedelta([106580], 'D')

# These should not overflow!
exp = TimedeltaIndex([pd.NaT])
result = to_timedelta([pd.NaT]) - Timedelta('1 days')
tm.assert_index_equal(result, exp)

exp = TimedeltaIndex(['4 days', pd.NaT])
result = to_timedelta(['5 days', pd.NaT]) - Timedelta('1 days')
tm.assert_index_equal(result, exp)

exp = TimedeltaIndex([pd.NaT, pd.NaT, '5 hours'])
result = (to_timedelta([pd.NaT, '5 days', '1 hours']) +
to_timedelta(['7 seconds', pd.NaT, '4 hours']))
tm.assert_index_equal(result, exp)

if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)

0 comments on commit 46909f0

Please sign in to comment.