Skip to content

Commit

Permalink
Fix parsing corner case closes pandas-dev#19382
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Feb 4, 2018
1 parent 3f3b4e0 commit f2cf915
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 11 deletions.
41 changes: 31 additions & 10 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -609,20 +609,26 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
value = tz_convert_single(value, tz, 'UTC')
iresult[i] = value
check_dts_bounds(&dts)
except OutOfBoundsDatetime:
# GH#19382 for just-barely-OutOfBounds falling back to
# dateutil parser will return incorrect result because
# it will ignore nanoseconds
if require_iso8601:
if _handle_error_require_iso8601(val, &iresult[i],
is_coerce, is_raise):
continue
return values
elif is_coerce:
iresult[i] = NPY_NAT
continue
raise
except ValueError:
# if requiring iso8601 strings, skip trying other formats
if require_iso8601:
if _parse_today_now(val, &iresult[i]):
if _handle_error_require_iso8601(val, &iresult[i],
is_coerce, is_raise):
continue
if is_coerce:
iresult[i] = NPY_NAT
continue
elif is_raise:
raise ValueError(
"time data %r doesn't match format "
"specified" % (val,))
else:
return values
return values

try:
py_dt = parse_datetime_string(val, dayfirst=dayfirst,
Expand Down Expand Up @@ -725,6 +731,21 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
return oresult


cdef bint _handle_error_require_iso8601(object val, int64_t* iresult,
bint is_coerce,
bint is_raise) except? -1:
# Return True to continue, False to return values, or raise
if _parse_today_now(val, iresult):
return True
elif is_coerce:
iresult[0] = NPY_NAT
return True
elif is_raise:
raise ValueError("time data {val} doesn't match format "
"specified".format(val=val))
return False


cdef inline bint _parse_today_now(str val, int64_t* iresult):
# We delay this check for as long as possible
# because it catches relatively rare cases
Expand Down
8 changes: 8 additions & 0 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ from np_datetime cimport (check_dts_bounds,
dt64_to_dtstruct, dtstruct_to_dt64,
get_datetime64_unit, get_datetime64_value,
pydatetime_to_dt64)
from np_datetime import OutOfBoundsDatetime

from util cimport (is_string_object,
is_datetime64_object,
Expand Down Expand Up @@ -472,6 +473,13 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
ambiguous='raise',
errors='raise')[0]

except OutOfBoundsDatetime:
# GH#19382 for just-barely-OutOfBounds falling back to dateutil
# parser will return incorrect result because it will ignore
# nanoseconds
raise

except ValueError:
try:
ts = parse_datetime_string(ts, dayfirst=dayfirst,
Expand Down
14 changes: 13 additions & 1 deletion pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,6 @@ def test_dataframe_dtypes(self, cache):


class TestToDatetimeMisc(object):

@pytest.mark.parametrize('cache', [True, False])
def test_to_datetime_iso8601(self, cache):
result = to_datetime(["2012-01-01 00:00:00"], cache=cache)
Expand Down Expand Up @@ -1596,6 +1595,19 @@ def test_coerce_of_invalid_datetimes(self):
)
)

def test_to_datetime_barely_out_of_bounds(self):
# GH#19382 close enough to bounds that dropping nanos would result
# in an in-bounds datetime
arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object)

with pytest.raises(tslib.OutOfBoundsDatetime):
to_datetime(arr)

with pytest.raises(tslib.OutOfBoundsDatetime):
# Essentially the same as above, but more directly calling
# the relevant function
tslib.array_to_datetime(arr)


def test_normalize_date():
value = date(2012, 9, 7)
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/scalar/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from pandas.tseries import offsets

from pandas._libs.tslib import OutOfBoundsDatetime
from pandas._libs.tslibs import conversion
from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz

Expand Down Expand Up @@ -410,6 +411,12 @@ def test_out_of_bounds_string(self):
with pytest.raises(ValueError):
Timestamp('2263-01-01')

def test_barely_out_of_bounds(self):
# GH#19382 close enough to bounds that dropping nanos would result
# in an in-bounds datetime
with pytest.raises(OutOfBoundsDatetime):
Timestamp('2262-04-11 23:47:16.854775808')

def test_bounds_with_different_units(self):
out_of_bounds_dates = ('1677-09-21', '2262-04-12')

Expand Down

0 comments on commit f2cf915

Please sign in to comment.