Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Check for overflow in TimedeltaIndex addition. #14237

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1425,6 +1425,8 @@ Bug Fixes
- Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`)
- Bug in ``Categorical.from_codes()`` where an unhelpful error was raised when an invalid ``ordered`` parameter was passed in (:issue:`14058`)
- Bug in ``Series`` construction from a tuple of integers on windows not returning default dtype (int64) (:issue:`13646`)
- Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow was not being caught (:issue:`14068`)

- Bug in ``.groupby(..).resample(..)`` when the same object is called multiple times (:issue:`13174`)
- Bug in ``.to_records()`` when index name is a unicode string (:issue:`13172`)

Expand Down
26 changes: 26 additions & 0 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,3 +809,29 @@ def unique1d(values):
table = _hash.PyObjectHashTable(len(values))
uniques = table.unique(_ensure_object(values))
return uniques


def _checked_add_with_arr(arr, b):
"""
Performs the addition of an int64 array and an int64 integer (or array)
but checks that they do not result in overflow first.

Parameters
----------
arr : array addend.
b : array or scalar addend.

Returns
-------
sum : An array for elements x + b for each element x in arr if b is
a scalar or an array for elements x + y for each element pair
(x, y) in (arr, b).

Raises
------
OverflowError if any x + y exceeds the maximum int64 value.
"""
if (np.iinfo(np.int64).max - b < arr).any():
raise OverflowError("Python int too large to "
"convert to C long")
return arr + b
15 changes: 15 additions & 0 deletions pandas/tests/test_nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,21 @@ def prng(self):
return np.random.RandomState(1234)


def test_int64_add_overflow():
# see gh-14068
msg = "too (big|large) to convert"
m = np.iinfo(np.int64).max

with tm.assertRaisesRegexp(OverflowError, msg):
nanops._checked_add_with_arr(np.array([m, m]), m)
with tm.assertRaisesRegexp(OverflowError, msg):
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]))
with tm.assertRaisesRegexp(OverflowError, msg):
with tm.assert_produces_warning(RuntimeWarning):
nanops._checked_add_with_arr(np.array([m, m]),
np.array([np.nan, m]))


if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s'
Expand Down
3 changes: 2 additions & 1 deletion pandas/tseries/tdi.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from pandas.compat import u
from pandas.tseries.frequencies import to_offset
from pandas.core.base import _shared_docs
from pandas.core.nanops import _checked_add_with_arr
from pandas.indexes.base import _index_shared_docs
import pandas.core.common as com
import pandas.types.concat as _concat
Expand Down Expand Up @@ -343,7 +344,7 @@ def _add_datelike(self, other):
else:
other = Timestamp(other)
i8 = self.asi8
result = i8 + other.value
result = _checked_add_with_arr(i8, other.value)
result = self._maybe_mask_results(result, fill_value=tslib.iNaT)
return DatetimeIndex(result, name=self.name, copy=False)

Expand Down
11 changes: 11 additions & 0 deletions pandas/tseries/tests/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1950,6 +1950,17 @@ def test_tdi_ops_attributes(self):
tm.assert_index_equal(result, exp)
self.assertEqual(result.freq, None)

def test_add_overflow(self):
# see gh-14068
msg = "too (big|large) to convert"
with tm.assertRaisesRegexp(OverflowError, msg):
to_timedelta(106580, 'D') + Timestamp('2000')
with tm.assertRaisesRegexp(OverflowError, msg):
Timestamp('2000') + to_timedelta(106580, 'D')
with tm.assertRaisesRegexp(OverflowError, msg):
to_timedelta([106580], 'D') + Timestamp('2000')
with tm.assertRaisesRegexp(OverflowError, msg):
Timestamp('2000') + to_timedelta([106580], 'D')

if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down