Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix date_range overflow #23345

Merged
merged 9 commits into from
Oct 28, 2018
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,7 @@ Datetimelike
- Bug in :class:`PeriodIndex` where adding or subtracting a :class:`timedelta` or :class:`Tick` object produced incorrect results (:issue:`22988`)
- Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`)
- Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`)
- Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`)

Timedelta
^^^^^^^^^
Expand Down
42 changes: 40 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1355,11 +1355,11 @@ def _generate_regular_range(cls, start, end, periods, freq):
tz = start.tz
elif start is not None:
b = Timestamp(start).value
e = b + np.int64(periods) * stride
e = _generate_range_overflow_safe(b, periods, stride, side='start')
tz = start.tz
elif end is not None:
e = Timestamp(end).value + stride
b = e - np.int64(periods) * stride
b = _generate_range_overflow_safe(e, periods, stride, side='end')
tz = end.tz
else:
raise ValueError("at least 'start' or 'end' should be specified "
Expand All @@ -1384,6 +1384,44 @@ def _generate_regular_range(cls, start, end, periods, freq):
return data


def _generate_range_overflow_safe(endpoint, periods, stride, side='start'):
"""
Calculate the second endpoint for passing to np.arange, checking
to avoid an integer overflow. Catch OverflowError and re-raise
as OutOfBoundsDatetime.

Parameters
----------
endpoint : int
periods : int
stride : int
side : {'start', 'end'}

Returns
-------
other_end : int

Raises
------
OutOfBoundsDatetime
"""
# GH#14187 raise instead of incorrectly wrapping around
assert side in ['start', 'end']
if side == 'end':
stride *= -1

try:
other_end = checked_add_with_arr(np.int64(endpoint),
np.int64(periods) * stride)
except OverflowError:
raise tslib.OutOfBoundsDatetime('Cannot generate range with '
'{side}={endpoint} and '
'periods={periods}'
.format(side=side, endpoint=endpoint,
periods=periods))
return other_end


def _infer_tz_from_endpoints(start, end, tz):
"""
If a timezone is not explicitly given via `tz`, see if one can
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/indexes/datetimes/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pandas import (
DatetimeIndex, Timestamp, bdate_range, compat, date_range, offsets
)
from pandas.errors import OutOfBoundsDatetime
from pandas.tests.series.common import TestData
from pandas.tseries.offsets import (
BDay, CDay, DateOffset, MonthEnd, generate_range, prefix_mapping
Expand Down Expand Up @@ -79,6 +80,12 @@ def test_date_range_timestamp_equiv_preserve_frequency(self):


class TestDateRanges(TestData):
def test_date_range_out_of_bounds(self):
# GH#14187
with pytest.raises(OutOfBoundsDatetime):
date_range('2016-01-01', periods=100000, freq='D')
with pytest.raises(OutOfBoundsDatetime):
date_range(end='1763-10-12', periods=100000, freq='D')

def test_date_range_gen_error(self):
rng = date_range('1/1/2000 00:00', '1/1/2000 00:18', freq='5min')
Expand Down