Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PERF: added no exception versions of '_string_to_dts' and 'parse_iso_8601_datetime' functions #26220

Merged
merged 14 commits into from
May 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 10 additions & 13 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def _test_parse_iso8601(object ts):
elif ts == 'today':
return Timestamp.now().normalize()

_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset, True)
obj.value = dtstruct_to_dt64(&obj.dts)
check_dts_bounds(&obj.dts)
if out_local == 1:
Expand Down Expand Up @@ -511,6 +511,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
int out_local=0, out_tzoffset=0
float offset_seconds, tz_offset
set out_tzoffset_vals = set()
bint string_to_dts_failed

# specify error conditions
assert is_raise or is_ignore or is_coerce
Expand Down Expand Up @@ -578,10 +579,12 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
iresult[i] = NPY_NAT
continue

try:
_string_to_dts(val, &dts, &out_local, &out_tzoffset)
except ValueError:
# A ValueError at this point is a _parsing_ error
string_to_dts_failed = _string_to_dts(
val, &dts, &out_local,
&out_tzoffset, False
)
if string_to_dts_failed:
# An error at this point is a _parsing_ error
# specifically _not_ OutOfBoundsDatetime
if _parse_today_now(val, &iresult[i]):
continue
Expand Down Expand Up @@ -623,14 +626,8 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',

_ts = convert_datetime_to_tsobject(py_dt, None)
iresult[i] = _ts.value
except:
# TODO: What exception are we concerned with here?
if is_coerce:
iresult[i] = NPY_NAT
continue
raise
else:
# No error raised by string_to_dts, pick back up
if not string_to_dts_failed:
# No error reported by string_to_dts, pick back up
# where we left off
value = dtstruct_to_dt64(&dts)
if out_local == 1:
Expand Down
89 changes: 59 additions & 30 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,44 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
return obj


cdef _TSObject create_tsobject_tz_using_offset(int64_t value,
jreback marked this conversation as resolved.
Show resolved Hide resolved
int tzoffset, object tz=None):
"""
Convert a numpy datetime64 `value`, along with initial timezone offset
`tzoffset` to a _TSObject (with timezone object `tz` - optional).

Parameters
----------
value: int64_t
numpy dt64
tzoffset: int
tz : tzinfo or None
anmyachev marked this conversation as resolved.
Show resolved Hide resolved
timezone for the timezone-aware output.

Returns
-------
obj : _TSObject
"""
cdef:
_TSObject obj
datetime dt

tzinfo = pytz.FixedOffset(tzoffset)
value = tz_convert_single(value, tzinfo, UTC)
obj = convert_to_tsobject(value, tzinfo, None, 0, 0)
if tz is None:
check_overflows(obj)
return obj

# Keep the converter same as PyDateTime's
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
obj.dts.hour, obj.dts.min, obj.dts.sec,
obj.dts.us, obj.tzinfo)
obj = convert_datetime_to_tsobject(
dt, tz, nanos=obj.dts.ps // 1000)
return obj


cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
bint dayfirst=False,
bint yearfirst=False):
Expand Down Expand Up @@ -420,15 +458,14 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
obj : _TSObject
"""
cdef:
_TSObject obj
npy_datetimestruct dts
int64_t value # numpy dt64
int out_local = 0, out_tzoffset = 0
datetime dt
bint do_parse_datetime_string = False

if tz is not None:
tz = maybe_get_tz(tz)

obj = _TSObject()

assert isinstance(ts, str)

if len(ts) == 0 or ts in nat_strings:
Expand All @@ -443,34 +480,23 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
ts = datetime.now(tz)
# equiv: datetime.today().replace(tzinfo=tz)
else:
string_to_dts_failed = _string_to_dts(
ts, &dts, &out_local,
&out_tzoffset, False
)
try:
_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
obj.value = dtstruct_to_dt64(&obj.dts)
check_dts_bounds(&obj.dts)
if out_local == 1:
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
obj.value = tz_convert_single(obj.value, obj.tzinfo, UTC)
if tz is None:
check_dts_bounds(&obj.dts)
check_overflows(obj)
return obj
if not string_to_dts_failed:
check_dts_bounds(&dts)
value = dtstruct_to_dt64(&dts)
if out_local == 1:
return create_tsobject_tz_using_offset(value,
out_tzoffset, tz)
else:
# Keep the converter same as PyDateTime's
obj = convert_to_tsobject(obj.value, obj.tzinfo,
None, 0, 0)
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
obj.dts.hour, obj.dts.min, obj.dts.sec,
obj.dts.us, obj.tzinfo)
obj = convert_datetime_to_tsobject(
dt, tz, nanos=obj.dts.ps // 1000)
return obj

else:
ts = obj.value
if tz is not None:
# shift for localize_tso
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
ambiguous='raise')[0]
ts = value
if tz is not None:
# shift for localize_tso
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
ambiguous='raise')[0]

except OutOfBoundsDatetime:
# GH#19382 for just-barely-OutOfBounds falling back to dateutil
Expand All @@ -479,6 +505,9 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
raise

except ValueError:
do_parse_datetime_string = True

if string_to_dts_failed or do_parse_datetime_string:
try:
ts = parse_datetime_string(ts, dayfirst=dayfirst,
yearfirst=yearfirst)
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,5 @@ cdef npy_timedelta get_timedelta64_value(object obj) nogil
cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil

cdef int _string_to_dts(object val, npy_datetimestruct* dts,
int* out_local, int* out_tzoffset) except? -1
int* out_local, int* out_tzoffset,
bint want_exc) except? -1
7 changes: 4 additions & 3 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ cdef extern from "src/datetime/np_datetime.h":
npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS

cdef extern from "src/datetime/np_datetime_strings.h":
int parse_iso_8601_datetime(const char *str, int len,
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
int *out_local, int *out_tzoffset)

Expand Down Expand Up @@ -170,11 +170,12 @@ cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts):


cdef inline int _string_to_dts(object val, npy_datetimestruct* dts,
int* out_local, int* out_tzoffset) except? -1:
int* out_local, int* out_tzoffset,
bint want_exc) except? -1:
cdef:
Py_ssize_t length
const char* buf

buf = get_c_string_buf_and_size(val, &length)
return parse_iso_8601_datetime(buf, length,
return parse_iso_8601_datetime(buf, length, want_exc,
dts, out_local, out_tzoffset)
63 changes: 41 additions & 22 deletions pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ This file implements string parsing and creation for NumPy datetime.
*
* Returns 0 on success, -1 on failure.
*/
int parse_iso_8601_datetime(const char *str, int len,
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
int *out_local, int *out_tzoffset) {
int year_leap = 0;
Expand Down Expand Up @@ -173,8 +173,10 @@ int parse_iso_8601_datetime(const char *str, int len,
goto parse_error;
}
if (out->month < 1 || out->month > 12) {
PyErr_Format(PyExc_ValueError,
"Month out of range in datetime string \"%s\"", str);
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Month out of range in datetime string \"%s\"", str);
}
goto error;
}

Expand Down Expand Up @@ -217,8 +219,10 @@ int parse_iso_8601_datetime(const char *str, int len,
}
if (out->day < 1 ||
out->day > days_per_month_table[year_leap][out->month - 1]) {
PyErr_Format(PyExc_ValueError,
"Day out of range in datetime string \"%s\"", str);
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Day out of range in datetime string \"%s\"", str);
}
goto error;
}

Expand Down Expand Up @@ -251,8 +255,11 @@ int parse_iso_8601_datetime(const char *str, int len,
++substr;
--sublen;
if (out->hour >= 24) {
PyErr_Format(PyExc_ValueError,
"Hours out of range in datetime string \"%s\"", str);
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Hours out of range in datetime string \"%s\"",
str);
}
goto error;
}
}
Expand Down Expand Up @@ -291,8 +298,11 @@ int parse_iso_8601_datetime(const char *str, int len,
++substr;
--sublen;
if (out->min >= 60) {
PyErr_Format(PyExc_ValueError,
"Minutes out of range in datetime string \"%s\"", str);
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Minutes out of range in datetime string \"%s\"",
str);
}
goto error;
}
} else if (!has_hms_sep) {
Expand Down Expand Up @@ -328,8 +338,11 @@ int parse_iso_8601_datetime(const char *str, int len,
++substr;
--sublen;
if (out->sec >= 60) {
PyErr_Format(PyExc_ValueError,
"Seconds out of range in datetime string \"%s\"", str);
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Seconds out of range in datetime string \"%s\"",
str);
}
goto error;
}
} else if (!has_hms_sep) {
Expand Down Expand Up @@ -438,10 +451,12 @@ int parse_iso_8601_datetime(const char *str, int len,
substr += 2;
sublen -= 2;
if (offset_hour >= 24) {
PyErr_Format(PyExc_ValueError,
"Timezone hours offset out of range "
"in datetime string \"%s\"",
str);
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Timezone hours offset out of range "
"in datetime string \"%s\"",
str);
}
goto error;
}
} else if (sublen >= 1 && isdigit(substr[0])) {
Expand All @@ -466,10 +481,12 @@ int parse_iso_8601_datetime(const char *str, int len,
substr += 2;
sublen -= 2;
if (offset_minute >= 60) {
PyErr_Format(PyExc_ValueError,
"Timezone minutes offset out of range "
"in datetime string \"%s\"",
str);
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Timezone minutes offset out of range "
"in datetime string \"%s\"",
str);
}
goto error;
}
} else if (sublen >= 1 && isdigit(substr[0])) {
Expand Down Expand Up @@ -507,9 +524,11 @@ int parse_iso_8601_datetime(const char *str, int len,
return 0;

parse_error:
PyErr_Format(PyExc_ValueError,
"Error parsing datetime string \"%s\" at position %d", str,
(int)(substr - str));
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Error parsing datetime string \"%s\" at position %d", str,
(int)(substr - str));
}
return -1;

error:
Expand Down
3 changes: 1 addition & 2 deletions pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ This file implements string parsing and creation for NumPy datetime.
* Returns 0 on success, -1 on failure.
*/
int
parse_iso_8601_datetime(const char *str, int len,
parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
int *out_local,
int *out_tzoffset);
Expand All @@ -79,5 +79,4 @@ get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base);
int
make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
NPY_DATETIMEUNIT base);

#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_