diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 50e3fb1c38cc7..68ad38dc37165 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -204,7 +204,7 @@ def _test_parse_iso8601(object ts): elif ts == 'today': return Timestamp.now().normalize() - _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset) + _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset, True) obj.value = dtstruct_to_dt64(&obj.dts) check_dts_bounds(&obj.dts) if out_local == 1: @@ -511,6 +511,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', int out_local=0, out_tzoffset=0 float offset_seconds, tz_offset set out_tzoffset_vals = set() + bint string_to_dts_failed # specify error conditions assert is_raise or is_ignore or is_coerce @@ -578,10 +579,12 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', iresult[i] = NPY_NAT continue - try: - _string_to_dts(val, &dts, &out_local, &out_tzoffset) - except ValueError: - # A ValueError at this point is a _parsing_ error + string_to_dts_failed = _string_to_dts( + val, &dts, &out_local, + &out_tzoffset, False + ) + if string_to_dts_failed: + # An error at this point is a _parsing_ error # specifically _not_ OutOfBoundsDatetime if _parse_today_now(val, &iresult[i]): continue @@ -623,14 +626,8 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', _ts = convert_datetime_to_tsobject(py_dt, None) iresult[i] = _ts.value - except: - # TODO: What exception are we concerned with here? - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - else: - # No error raised by string_to_dts, pick back up + if not string_to_dts_failed: + # No error reported by string_to_dts, pick back up # where we left off value = dtstruct_to_dt64(&dts) if out_local == 1: diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 17237a557443b..bee3e28874a05 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -392,6 +392,44 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, return obj +cdef _TSObject create_tsobject_tz_using_offset(int64_t value, + int tzoffset, object tz=None): + """ + Convert a numpy datetime64 `value`, along with initial timezone offset + `tzoffset` to a _TSObject (with timezone object `tz` - optional). + + Parameters + ---------- + value: int64_t + numpy dt64 + tzoffset: int + tz : tzinfo or None + timezone for the timezone-aware output. + + Returns + ------- + obj : _TSObject + """ + cdef: + _TSObject obj + datetime dt + + tzinfo = pytz.FixedOffset(tzoffset) + value = tz_convert_single(value, tzinfo, UTC) + obj = convert_to_tsobject(value, tzinfo, None, 0, 0) + if tz is None: + check_overflows(obj) + return obj + + # Keep the converter same as PyDateTime's + dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, + obj.dts.hour, obj.dts.min, obj.dts.sec, + obj.dts.us, obj.tzinfo) + obj = convert_datetime_to_tsobject( + dt, tz, nanos=obj.dts.ps // 1000) + return obj + + cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, bint dayfirst=False, bint yearfirst=False): @@ -420,15 +458,14 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, obj : _TSObject """ cdef: - _TSObject obj + npy_datetimestruct dts + int64_t value # numpy dt64 int out_local = 0, out_tzoffset = 0 - datetime dt + bint do_parse_datetime_string = False if tz is not None: tz = maybe_get_tz(tz) - obj = _TSObject() - assert isinstance(ts, str) if len(ts) == 0 or ts in nat_strings: @@ -443,34 +480,23 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, ts = datetime.now(tz) # equiv: datetime.today().replace(tzinfo=tz) else: + string_to_dts_failed = _string_to_dts( + ts, &dts, &out_local, + &out_tzoffset, False + ) try: - _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset) - obj.value = dtstruct_to_dt64(&obj.dts) - check_dts_bounds(&obj.dts) - if out_local == 1: - obj.tzinfo = pytz.FixedOffset(out_tzoffset) - obj.value = tz_convert_single(obj.value, obj.tzinfo, UTC) - if tz is None: - check_dts_bounds(&obj.dts) - check_overflows(obj) - return obj + if not string_to_dts_failed: + check_dts_bounds(&dts) + value = dtstruct_to_dt64(&dts) + if out_local == 1: + return create_tsobject_tz_using_offset(value, + out_tzoffset, tz) else: - # Keep the converter same as PyDateTime's - obj = convert_to_tsobject(obj.value, obj.tzinfo, - None, 0, 0) - dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, - obj.dts.hour, obj.dts.min, obj.dts.sec, - obj.dts.us, obj.tzinfo) - obj = convert_datetime_to_tsobject( - dt, tz, nanos=obj.dts.ps // 1000) - return obj - - else: - ts = obj.value - if tz is not None: - # shift for localize_tso - ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, - ambiguous='raise')[0] + ts = value + if tz is not None: + # shift for localize_tso + ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, + ambiguous='raise')[0] except OutOfBoundsDatetime: # GH#19382 for just-barely-OutOfBounds falling back to dateutil @@ -479,6 +505,9 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, raise except ValueError: + do_parse_datetime_string = True + + if string_to_dts_failed or do_parse_datetime_string: try: ts = parse_datetime_string(ts, dayfirst=dayfirst, yearfirst=yearfirst) diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 803c8cb18e3d5..020bcdf0a7b15 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -73,4 +73,5 @@ cdef npy_timedelta get_timedelta64_value(object obj) nogil cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil cdef int _string_to_dts(object val, npy_datetimestruct* dts, - int* out_local, int* out_tzoffset) except? -1 + int* out_local, int* out_tzoffset, + bint want_exc) except? -1 diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 016206b0b69f0..7d362708015ce 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -31,7 +31,7 @@ cdef extern from "src/datetime/np_datetime.h": npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS cdef extern from "src/datetime/np_datetime_strings.h": - int parse_iso_8601_datetime(const char *str, int len, + int parse_iso_8601_datetime(const char *str, int len, int want_exc, npy_datetimestruct *out, int *out_local, int *out_tzoffset) @@ -170,11 +170,12 @@ cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts): cdef inline int _string_to_dts(object val, npy_datetimestruct* dts, - int* out_local, int* out_tzoffset) except? -1: + int* out_local, int* out_tzoffset, + bint want_exc) except? -1: cdef: Py_ssize_t length const char* buf buf = get_c_string_buf_and_size(val, &length) - return parse_iso_8601_datetime(buf, length, + return parse_iso_8601_datetime(buf, length, want_exc, dts, out_local, out_tzoffset) diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c index abeeaba1d1198..54ed6ecff21e2 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -66,7 +66,7 @@ This file implements string parsing and creation for NumPy datetime. * * Returns 0 on success, -1 on failure. */ -int parse_iso_8601_datetime(const char *str, int len, +int parse_iso_8601_datetime(const char *str, int len, int want_exc, npy_datetimestruct *out, int *out_local, int *out_tzoffset) { int year_leap = 0; @@ -173,8 +173,10 @@ int parse_iso_8601_datetime(const char *str, int len, goto parse_error; } if (out->month < 1 || out->month > 12) { - PyErr_Format(PyExc_ValueError, - "Month out of range in datetime string \"%s\"", str); + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Month out of range in datetime string \"%s\"", str); + } goto error; } @@ -217,8 +219,10 @@ int parse_iso_8601_datetime(const char *str, int len, } if (out->day < 1 || out->day > days_per_month_table[year_leap][out->month - 1]) { - PyErr_Format(PyExc_ValueError, - "Day out of range in datetime string \"%s\"", str); + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Day out of range in datetime string \"%s\"", str); + } goto error; } @@ -251,8 +255,11 @@ int parse_iso_8601_datetime(const char *str, int len, ++substr; --sublen; if (out->hour >= 24) { - PyErr_Format(PyExc_ValueError, - "Hours out of range in datetime string \"%s\"", str); + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Hours out of range in datetime string \"%s\"", + str); + } goto error; } } @@ -291,8 +298,11 @@ int parse_iso_8601_datetime(const char *str, int len, ++substr; --sublen; if (out->min >= 60) { - PyErr_Format(PyExc_ValueError, - "Minutes out of range in datetime string \"%s\"", str); + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Minutes out of range in datetime string \"%s\"", + str); + } goto error; } } else if (!has_hms_sep) { @@ -328,8 +338,11 @@ int parse_iso_8601_datetime(const char *str, int len, ++substr; --sublen; if (out->sec >= 60) { - PyErr_Format(PyExc_ValueError, - "Seconds out of range in datetime string \"%s\"", str); + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Seconds out of range in datetime string \"%s\"", + str); + } goto error; } } else if (!has_hms_sep) { @@ -438,10 +451,12 @@ int parse_iso_8601_datetime(const char *str, int len, substr += 2; sublen -= 2; if (offset_hour >= 24) { - PyErr_Format(PyExc_ValueError, - "Timezone hours offset out of range " - "in datetime string \"%s\"", - str); + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone hours offset out of range " + "in datetime string \"%s\"", + str); + } goto error; } } else if (sublen >= 1 && isdigit(substr[0])) { @@ -466,10 +481,12 @@ int parse_iso_8601_datetime(const char *str, int len, substr += 2; sublen -= 2; if (offset_minute >= 60) { - PyErr_Format(PyExc_ValueError, - "Timezone minutes offset out of range " - "in datetime string \"%s\"", - str); + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone minutes offset out of range " + "in datetime string \"%s\"", + str); + } goto error; } } else if (sublen >= 1 && isdigit(substr[0])) { @@ -507,9 +524,11 @@ int parse_iso_8601_datetime(const char *str, int len, return 0; parse_error: - PyErr_Format(PyExc_ValueError, - "Error parsing datetime string \"%s\" at position %d", str, - (int)(substr - str)); + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Error parsing datetime string \"%s\" at position %d", str, + (int)(substr - str)); + } return -1; error: diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h index 86ebe890810d6..880c34ea77638 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h @@ -54,7 +54,7 @@ This file implements string parsing and creation for NumPy datetime. * Returns 0 on success, -1 on failure. */ int -parse_iso_8601_datetime(const char *str, int len, +parse_iso_8601_datetime(const char *str, int len, int want_exc, npy_datetimestruct *out, int *out_local, int *out_tzoffset); @@ -79,5 +79,4 @@ get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base); int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, NPY_DATETIMEUNIT base); - #endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_