Skip to content

Commit

Permalink
Fix parsing corner case closes pandas-dev#19382 (pandas-dev#19529)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Feb 6, 2018
1 parent a22acc2 commit 84522a0
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,7 @@ Datetimelike
- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`)
- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`)
- Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`)
- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`)
-

Timezones
Expand Down
30 changes: 24 additions & 6 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -609,20 +609,38 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
value = tz_convert_single(value, tz, 'UTC')
iresult[i] = value
check_dts_bounds(&dts)
except OutOfBoundsDatetime:
# GH#19382 for just-barely-OutOfBounds falling back to
# dateutil parser will return incorrect result because
# it will ignore nanoseconds
if require_iso8601:
if _parse_today_now(val, &iresult[i]):
continue
elif is_coerce:
iresult[i] = NPY_NAT
continue
elif is_raise:
raise ValueError("time data {val} doesn't match "
"format specified"
.format(val=val))
return values
elif is_coerce:
iresult[i] = NPY_NAT
continue
raise
except ValueError:
# if requiring iso8601 strings, skip trying other formats
if require_iso8601:
if _parse_today_now(val, &iresult[i]):
continue
if is_coerce:
elif is_coerce:
iresult[i] = NPY_NAT
continue
elif is_raise:
raise ValueError(
"time data %r doesn't match format "
"specified" % (val,))
else:
return values
raise ValueError("time data {val} doesn't match "
"format specified"
.format(val=val))
return values

try:
py_dt = parse_datetime_string(val, dayfirst=dayfirst,
Expand Down
8 changes: 8 additions & 0 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ from np_datetime cimport (check_dts_bounds,
dt64_to_dtstruct, dtstruct_to_dt64,
get_datetime64_unit, get_datetime64_value,
pydatetime_to_dt64)
from np_datetime import OutOfBoundsDatetime

from util cimport (is_string_object,
is_datetime64_object,
Expand Down Expand Up @@ -472,6 +473,13 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
ambiguous='raise',
errors='raise')[0]

except OutOfBoundsDatetime:
# GH#19382 for just-barely-OutOfBounds falling back to dateutil
# parser will return incorrect result because it will ignore
# nanoseconds
raise

except ValueError:
try:
ts = parse_datetime_string(ts, dayfirst=dayfirst,
Expand Down
16 changes: 15 additions & 1 deletion pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pandas._libs.tslibs import parsing
from pandas.core.tools import datetimes as tools

from pandas.errors import OutOfBoundsDatetime
from pandas.compat import lmap
from pandas.compat.numpy import np_array_datetime64_compat
from pandas.core.dtypes.common import is_datetime64_ns_dtype
Expand Down Expand Up @@ -783,7 +784,6 @@ def test_dataframe_dtypes(self, cache):


class TestToDatetimeMisc(object):

@pytest.mark.parametrize('cache', [True, False])
def test_to_datetime_iso8601(self, cache):
result = to_datetime(["2012-01-01 00:00:00"], cache=cache)
Expand Down Expand Up @@ -1596,6 +1596,20 @@ def test_coerce_of_invalid_datetimes(self):
)
)

def test_to_datetime_barely_out_of_bounds(self):
# GH#19529
# GH#19382 close enough to bounds that dropping nanos would result
# in an in-bounds datetime
arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object)

with pytest.raises(OutOfBoundsDatetime):
to_datetime(arr)

with pytest.raises(OutOfBoundsDatetime):
# Essentially the same as above, but more directly calling
# the relevant function
tslib.array_to_datetime(arr)


def test_normalize_date():
value = date(2012, 9, 7)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/scalar/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pandas._libs.tslibs import conversion
from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz

from pandas.errors import OutOfBoundsDatetime
from pandas.compat import long, PY3
from pandas.compat.numpy import np_datetime64_compat
from pandas import Timestamp, Period, Timedelta
Expand Down Expand Up @@ -410,6 +411,13 @@ def test_out_of_bounds_string(self):
with pytest.raises(ValueError):
Timestamp('2263-01-01')

def test_barely_out_of_bounds(self):
# GH#19529
# GH#19382 close enough to bounds that dropping nanos would result
# in an in-bounds datetime
with pytest.raises(OutOfBoundsDatetime):
Timestamp('2262-04-11 23:47:16.854775808')

def test_bounds_with_different_units(self):
out_of_bounds_dates = ('1677-09-21', '2262-04-12')

Expand Down

0 comments on commit 84522a0

Please sign in to comment.