From 77effde12797cd11642ed7e70b30b15164e18076 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 16 Sep 2016 14:39:23 -0400 Subject: [PATCH] BUG: Check for overflow in TimedeltaIndex addition. Closes gh-14068. --- doc/source/whatsnew/v0.19.0.txt | 2 ++ pandas/core/nanops.py | 26 +++++++++++++++++++++++++ pandas/tests/test_nanops.py | 15 ++++++++++++++ pandas/tseries/tdi.py | 3 ++- pandas/tseries/tests/test_timedeltas.py | 11 +++++++++++ 5 files changed, 56 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 454ffc5e5c685..7e62384f4b789 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1425,6 +1425,8 @@ Bug Fixes - Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`) - Bug in ``Categorical.from_codes()`` where an unhelpful error was raised when an invalid ``ordered`` parameter was passed in (:issue:`14058`) - Bug in ``Series`` construction from a tuple of integers on windows not returning default dtype (int64) (:issue:`13646`) +- Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow was not being caught (:issue:`14068`) + - Bug in ``.groupby(..).resample(..)`` when the same object is called multiple times (:issue:`13174`) - Bug in ``.to_records()`` when index name is a unicode string (:issue:`13172`) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index a76e348b7dee2..564586eec5a8e 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -809,3 +809,29 @@ def unique1d(values): table = _hash.PyObjectHashTable(len(values)) uniques = table.unique(_ensure_object(values)) return uniques + + +def _checked_add_with_arr(arr, b): + """ + Performs the addition of an int64 array and an int64 integer (or array) + but checks that they do not result in overflow first. + + Parameters + ---------- + arr : array addend. + b : array or scalar addend. + + Returns + ------- + sum : An array for elements x + b for each element x in arr if b is + a scalar or an array for elements x + y for each element pair + (x, y) in (arr, b). + + Raises + ------ + OverflowError if any x + y exceeds the maximum int64 value. + """ + if (np.iinfo(np.int64).max - b < arr).any(): + raise OverflowError("Python int too large to " + "convert to C long") + return arr + b diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index dd3a49de55d73..f00fdd196abea 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1002,6 +1002,21 @@ def prng(self): return np.random.RandomState(1234) +def test_int64_add_overflow(): + # see gh-14068 + msg = "too (big|large) to convert" + m = np.iinfo(np.int64).max + + with tm.assertRaisesRegexp(OverflowError, msg): + nanops._checked_add_with_arr(np.array([m, m]), m) + with tm.assertRaisesRegexp(OverflowError, msg): + nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m])) + with tm.assertRaisesRegexp(OverflowError, msg): + with tm.assert_produces_warning(RuntimeWarning): + nanops._checked_add_with_arr(np.array([m, m]), + np.array([np.nan, m])) + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s' diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index c527bbad555f9..f1e199adeebfc 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -21,6 +21,7 @@ from pandas.compat import u from pandas.tseries.frequencies import to_offset from pandas.core.base import _shared_docs +from pandas.core.nanops import _checked_add_with_arr from pandas.indexes.base import _index_shared_docs import pandas.core.common as com import pandas.types.concat as _concat @@ -343,7 +344,7 @@ def _add_datelike(self, other): else: other = Timestamp(other) i8 = self.asi8 - result = i8 + other.value + result = _checked_add_with_arr(i8, other.value) result = self._maybe_mask_results(result, fill_value=tslib.iNaT) return DatetimeIndex(result, name=self.name, copy=False) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index ab413af897215..38e210d698035 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -1950,6 +1950,17 @@ def test_tdi_ops_attributes(self): tm.assert_index_equal(result, exp) self.assertEqual(result.freq, None) + def test_add_overflow(self): + # see gh-14068 + msg = "too (big|large) to convert" + with tm.assertRaisesRegexp(OverflowError, msg): + to_timedelta(106580, 'D') + Timestamp('2000') + with tm.assertRaisesRegexp(OverflowError, msg): + Timestamp('2000') + to_timedelta(106580, 'D') + with tm.assertRaisesRegexp(OverflowError, msg): + to_timedelta([106580], 'D') + Timestamp('2000') + with tm.assertRaisesRegexp(OverflowError, msg): + Timestamp('2000') + to_timedelta([106580], 'D') if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],