Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

order of exceptions in array_to_datetime #19621

Merged
merged 5 commits into from
Feb 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 44 additions & 40 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import numpy as np
cnp.import_array()


from cpython cimport PyFloat_Check
from cpython cimport PyFloat_Check, PyUnicode_Check

from util cimport (is_integer_object, is_float_object, is_string_object,
is_datetime64_object)
Expand Down Expand Up @@ -56,6 +56,8 @@ from tslibs.timestamps cimport (create_timestamp_from_ts,
_NS_UPPER_BOUND, _NS_LOWER_BOUND)
from tslibs.timestamps import Timestamp

cdef bint PY2 = str == bytes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move to util.pxd!



cdef inline object create_datetime_from_ts(
int64_t value, pandas_datetimestruct dts,
Expand Down Expand Up @@ -549,23 +551,23 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
raise

elif PyDate_Check(val):
seen_datetime = 1
iresult[i] = pydate_to_dt64(val, &dts)
try:
check_dts_bounds(&dts)
seen_datetime = 1
except ValueError:
if is_coerce:
iresult[i] = NPY_NAT
continue
raise

elif is_datetime64_object(val):
seen_datetime = 1
if get_datetime64_value(val) == NPY_NAT:
iresult[i] = NPY_NAT
else:
try:
iresult[i] = get_datetime64_nanos(val)
seen_datetime = 1
except ValueError:
if is_coerce:
iresult[i] = NPY_NAT
Expand All @@ -574,66 +576,44 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',

elif is_integer_object(val) or is_float_object(val):
# these must be ns unit by-definition
seen_integer = 1

if val != val or val == NPY_NAT:
iresult[i] = NPY_NAT
elif is_raise or is_ignore:
iresult[i] = val
seen_integer = 1
else:
# coerce
# we now need to parse this as if unit='ns'
# we can ONLY accept integers at this point
# if we have previously (or in future accept
# datetimes/strings, then we must coerce)
seen_integer = 1
try:
iresult[i] = cast_from_unit(val, 'ns')
except:
iresult[i] = NPY_NAT

elif is_string_object(val):
# string
seen_string = 1

if len(val) == 0 or val in nat_strings:
iresult[i] = NPY_NAT
continue

seen_string = 1
if PyUnicode_Check(val) and PY2:
val = val.encode('utf-8')

try:
_string_to_dts(val, &dts, &out_local, &out_tzoffset)
value = dtstruct_to_dt64(&dts)
if out_local == 1:
tz = pytz.FixedOffset(out_tzoffset)
value = tz_convert_single(value, tz, 'UTC')
iresult[i] = value
check_dts_bounds(&dts)
except OutOfBoundsDatetime:
# GH#19382 for just-barely-OutOfBounds falling back to
# dateutil parser will return incorrect result because
# it will ignore nanoseconds
if require_iso8601:
if _parse_today_now(val, &iresult[i]):
continue
elif is_coerce:
iresult[i] = NPY_NAT
continue
elif is_raise:
raise ValueError("time data {val} doesn't match "
"format specified"
.format(val=val))
return values
elif is_coerce:
iresult[i] = NPY_NAT
continue
raise
except ValueError:
# if requiring iso8601 strings, skip trying other formats
if require_iso8601:
if _parse_today_now(val, &iresult[i]):
continue
elif is_coerce:
# A ValueError at this point is a _parsing_ error
# specifically _not_ OutOfBoundsDatetime
if _parse_today_now(val, &iresult[i]):
continue
elif require_iso8601:
# if requiring iso8601 strings, skip trying
# other formats
if is_coerce:
iresult[i] = NPY_NAT
continue
elif is_raise:
Expand All @@ -646,8 +626,6 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
py_dt = parse_datetime_string(val, dayfirst=dayfirst,
yearfirst=yearfirst)
except Exception:
if _parse_today_now(val, &iresult[i]):
continue
if is_coerce:
iresult[i] = NPY_NAT
continue
Expand All @@ -656,16 +634,42 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
try:
_ts = convert_datetime_to_tsobject(py_dt, None)
iresult[i] = _ts.value
except ValueError:
except OutOfBoundsDatetime:
if is_coerce:
iresult[i] = NPY_NAT
continue
raise
except:
# TODO: What exception are we concerned with here?
if is_coerce:
iresult[i] = NPY_NAT
continue
raise
else:
# No error raised by string_to_dts, pick back up
# where we left off
value = dtstruct_to_dt64(&dts)
if out_local == 1:
tz = pytz.FixedOffset(out_tzoffset)
value = tz_convert_single(value, tz, 'UTC')
iresult[i] = value
try:
check_dts_bounds(&dts)
except OutOfBoundsDatetime:
# GH#19382 for just-barely-OutOfBounds falling back to
# dateutil parser will return incorrect result because
# it will ignore nanoseconds
if is_coerce:
iresult[i] = NPY_NAT
continue
elif require_iso8601:
if is_raise:
raise ValueError("time data {val} doesn't "
"match format specified"
.format(val=val))
return values
raise

else:
if is_coerce:
iresult[i] = NPY_NAT
Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pandas.core.tools import datetimes as tools

from pandas.errors import OutOfBoundsDatetime
from pandas.compat import lmap
from pandas.compat import lmap, PY3
from pandas.compat.numpy import np_array_datetime64_compat
from pandas.core.dtypes.common import is_datetime64_ns_dtype
from pandas.util import testing as tm
Expand Down Expand Up @@ -238,6 +238,13 @@ def test_to_datetime_today(self):
assert pdtoday.tzinfo is None
assert pdtoday2.tzinfo is None

def test_to_datetime_today_now_unicode_bytes(self):
to_datetime([u'now'])
to_datetime([u'today'])
if not PY3:
to_datetime(['now'])
to_datetime(['today'])

@pytest.mark.parametrize('cache', [True, False])
def test_to_datetime_dt64s(self, cache):
in_bound_dts = [
Expand Down