Skip to content

Commit

Permalink
PERF: added no exception versions of '_string_to_dts' and 'parse_iso_…
Browse files Browse the repository at this point in the history
…8601_datetime' functions (#26220)
  • Loading branch information
anmyachev authored and jreback committed May 7, 2019
1 parent a2686c6 commit 2e4e0b9
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 71 deletions.
23 changes: 10 additions & 13 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def _test_parse_iso8601(object ts):
elif ts == 'today':
return Timestamp.now().normalize()

_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset, True)
obj.value = dtstruct_to_dt64(&obj.dts)
check_dts_bounds(&obj.dts)
if out_local == 1:
Expand Down Expand Up @@ -511,6 +511,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
int out_local=0, out_tzoffset=0
float offset_seconds, tz_offset
set out_tzoffset_vals = set()
bint string_to_dts_failed

# specify error conditions
assert is_raise or is_ignore or is_coerce
Expand Down Expand Up @@ -578,10 +579,12 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
iresult[i] = NPY_NAT
continue

try:
_string_to_dts(val, &dts, &out_local, &out_tzoffset)
except ValueError:
# A ValueError at this point is a _parsing_ error
string_to_dts_failed = _string_to_dts(
val, &dts, &out_local,
&out_tzoffset, False
)
if string_to_dts_failed:
# An error at this point is a _parsing_ error
# specifically _not_ OutOfBoundsDatetime
if _parse_today_now(val, &iresult[i]):
continue
Expand Down Expand Up @@ -623,14 +626,8 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',

_ts = convert_datetime_to_tsobject(py_dt, None)
iresult[i] = _ts.value
except:
# TODO: What exception are we concerned with here?
if is_coerce:
iresult[i] = NPY_NAT
continue
raise
else:
# No error raised by string_to_dts, pick back up
if not string_to_dts_failed:
# No error reported by string_to_dts, pick back up
# where we left off
value = dtstruct_to_dt64(&dts)
if out_local == 1:
Expand Down
89 changes: 59 additions & 30 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,44 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
return obj


cdef _TSObject create_tsobject_tz_using_offset(int64_t value,
int tzoffset, object tz=None):
"""
Convert a numpy datetime64 `value`, along with initial timezone offset
`tzoffset` to a _TSObject (with timezone object `tz` - optional).
Parameters
----------
value: int64_t
numpy dt64
tzoffset: int
tz : tzinfo or None
timezone for the timezone-aware output.
Returns
-------
obj : _TSObject
"""
cdef:
_TSObject obj
datetime dt

tzinfo = pytz.FixedOffset(tzoffset)
value = tz_convert_single(value, tzinfo, UTC)
obj = convert_to_tsobject(value, tzinfo, None, 0, 0)
if tz is None:
check_overflows(obj)
return obj

# Keep the converter same as PyDateTime's
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
obj.dts.hour, obj.dts.min, obj.dts.sec,
obj.dts.us, obj.tzinfo)
obj = convert_datetime_to_tsobject(
dt, tz, nanos=obj.dts.ps // 1000)
return obj


cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
bint dayfirst=False,
bint yearfirst=False):
Expand Down Expand Up @@ -420,15 +458,14 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
obj : _TSObject
"""
cdef:
_TSObject obj
npy_datetimestruct dts
int64_t value # numpy dt64
int out_local = 0, out_tzoffset = 0
datetime dt
bint do_parse_datetime_string = False

if tz is not None:
tz = maybe_get_tz(tz)

obj = _TSObject()

assert isinstance(ts, str)

if len(ts) == 0 or ts in nat_strings:
Expand All @@ -443,34 +480,23 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
ts = datetime.now(tz)
# equiv: datetime.today().replace(tzinfo=tz)
else:
string_to_dts_failed = _string_to_dts(
ts, &dts, &out_local,
&out_tzoffset, False
)
try:
_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
obj.value = dtstruct_to_dt64(&obj.dts)
check_dts_bounds(&obj.dts)
if out_local == 1:
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
obj.value = tz_convert_single(obj.value, obj.tzinfo, UTC)
if tz is None:
check_dts_bounds(&obj.dts)
check_overflows(obj)
return obj
if not string_to_dts_failed:
check_dts_bounds(&dts)
value = dtstruct_to_dt64(&dts)
if out_local == 1:
return create_tsobject_tz_using_offset(value,
out_tzoffset, tz)
else:
# Keep the converter same as PyDateTime's
obj = convert_to_tsobject(obj.value, obj.tzinfo,
None, 0, 0)
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
obj.dts.hour, obj.dts.min, obj.dts.sec,
obj.dts.us, obj.tzinfo)
obj = convert_datetime_to_tsobject(
dt, tz, nanos=obj.dts.ps // 1000)
return obj

else:
ts = obj.value
if tz is not None:
# shift for localize_tso
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
ambiguous='raise')[0]
ts = value
if tz is not None:
# shift for localize_tso
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
ambiguous='raise')[0]

except OutOfBoundsDatetime:
# GH#19382 for just-barely-OutOfBounds falling back to dateutil
Expand All @@ -479,6 +505,9 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
raise

except ValueError:
do_parse_datetime_string = True

if string_to_dts_failed or do_parse_datetime_string:
try:
ts = parse_datetime_string(ts, dayfirst=dayfirst,
yearfirst=yearfirst)
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,5 @@ cdef npy_timedelta get_timedelta64_value(object obj) nogil
cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil

cdef int _string_to_dts(object val, npy_datetimestruct* dts,
int* out_local, int* out_tzoffset) except? -1
int* out_local, int* out_tzoffset,
bint want_exc) except? -1
7 changes: 4 additions & 3 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ cdef extern from "src/datetime/np_datetime.h":
npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS

cdef extern from "src/datetime/np_datetime_strings.h":
int parse_iso_8601_datetime(const char *str, int len,
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
int *out_local, int *out_tzoffset)

Expand Down Expand Up @@ -170,11 +170,12 @@ cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts):


cdef inline int _string_to_dts(object val, npy_datetimestruct* dts,
int* out_local, int* out_tzoffset) except? -1:
int* out_local, int* out_tzoffset,
bint want_exc) except? -1:
cdef:
Py_ssize_t length
const char* buf

buf = get_c_string_buf_and_size(val, &length)
return parse_iso_8601_datetime(buf, length,
return parse_iso_8601_datetime(buf, length, want_exc,
dts, out_local, out_tzoffset)
63 changes: 41 additions & 22 deletions pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ This file implements string parsing and creation for NumPy datetime.
*
* Returns 0 on success, -1 on failure.
*/
int parse_iso_8601_datetime(const char *str, int len,
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
int *out_local, int *out_tzoffset) {
int year_leap = 0;
Expand Down Expand Up @@ -173,8 +173,10 @@ int parse_iso_8601_datetime(const char *str, int len,
goto parse_error;
}
if (out->month < 1 || out->month > 12) {
PyErr_Format(PyExc_ValueError,
"Month out of range in datetime string \"%s\"", str);
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Month out of range in datetime string \"%s\"", str);
}
goto error;
}

Expand Down Expand Up @@ -217,8 +219,10 @@ int parse_iso_8601_datetime(const char *str, int len,
}
if (out->day < 1 ||
out->day > days_per_month_table[year_leap][out->month - 1]) {
PyErr_Format(PyExc_ValueError,
"Day out of range in datetime string \"%s\"", str);
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Day out of range in datetime string \"%s\"", str);
}
goto error;
}

Expand Down Expand Up @@ -251,8 +255,11 @@ int parse_iso_8601_datetime(const char *str, int len,
++substr;
--sublen;
if (out->hour >= 24) {
PyErr_Format(PyExc_ValueError,
"Hours out of range in datetime string \"%s\"", str);
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Hours out of range in datetime string \"%s\"",
str);
}
goto error;
}
}
Expand Down Expand Up @@ -291,8 +298,11 @@ int parse_iso_8601_datetime(const char *str, int len,
++substr;
--sublen;
if (out->min >= 60) {
PyErr_Format(PyExc_ValueError,
"Minutes out of range in datetime string \"%s\"", str);
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Minutes out of range in datetime string \"%s\"",
str);
}
goto error;
}
} else if (!has_hms_sep) {
Expand Down Expand Up @@ -328,8 +338,11 @@ int parse_iso_8601_datetime(const char *str, int len,
++substr;
--sublen;
if (out->sec >= 60) {
PyErr_Format(PyExc_ValueError,
"Seconds out of range in datetime string \"%s\"", str);
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Seconds out of range in datetime string \"%s\"",
str);
}
goto error;
}
} else if (!has_hms_sep) {
Expand Down Expand Up @@ -438,10 +451,12 @@ int parse_iso_8601_datetime(const char *str, int len,
substr += 2;
sublen -= 2;
if (offset_hour >= 24) {
PyErr_Format(PyExc_ValueError,
"Timezone hours offset out of range "
"in datetime string \"%s\"",
str);
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Timezone hours offset out of range "
"in datetime string \"%s\"",
str);
}
goto error;
}
} else if (sublen >= 1 && isdigit(substr[0])) {
Expand All @@ -466,10 +481,12 @@ int parse_iso_8601_datetime(const char *str, int len,
substr += 2;
sublen -= 2;
if (offset_minute >= 60) {
PyErr_Format(PyExc_ValueError,
"Timezone minutes offset out of range "
"in datetime string \"%s\"",
str);
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Timezone minutes offset out of range "
"in datetime string \"%s\"",
str);
}
goto error;
}
} else if (sublen >= 1 && isdigit(substr[0])) {
Expand Down Expand Up @@ -507,9 +524,11 @@ int parse_iso_8601_datetime(const char *str, int len,
return 0;

parse_error:
PyErr_Format(PyExc_ValueError,
"Error parsing datetime string \"%s\" at position %d", str,
(int)(substr - str));
if (want_exc) {
PyErr_Format(PyExc_ValueError,
"Error parsing datetime string \"%s\" at position %d", str,
(int)(substr - str));
}
return -1;

error:
Expand Down
3 changes: 1 addition & 2 deletions pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ This file implements string parsing and creation for NumPy datetime.
* Returns 0 on success, -1 on failure.
*/
int
parse_iso_8601_datetime(const char *str, int len,
parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
int *out_local,
int *out_tzoffset);
Expand All @@ -79,5 +79,4 @@ get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base);
int
make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
NPY_DATETIMEUNIT base);

#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_

0 comments on commit 2e4e0b9

Please sign in to comment.