Skip to content

Commit

Permalink
BUG: Fix to_datetime not respecting dayfirst (pandas-dev#58876)
Browse files Browse the repository at this point in the history
* ENH: Warn when to_datetime falls back to dateutil when dayfirst is passed

* Assert warnings

* Remove warnings and fix functionality

* Add whatsnew, write test
  • Loading branch information
Aloqeely authored Jul 17, 2024
1 parent dec86b3 commit 288af5f
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 54 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,7 @@ Datetimelike
- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`)
- Bug in :meth:`DatetimeIndex.union` when ``unit`` was non-nanosecond (:issue:`59036`)
- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
- Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`)
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)

Timedelta
Expand Down
65 changes: 35 additions & 30 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -606,37 +606,42 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz,
# equiv: datetime.today().replace(tzinfo=tz)
return convert_datetime_to_tsobject(dt, tz, nanos=0, reso=NPY_FR_us)
else:
string_to_dts_failed = string_to_dts(
ts, &dts, &out_bestunit, &out_local,
&out_tzoffset, False
)
if not string_to_dts_failed:
reso = get_supported_reso(out_bestunit)
check_dts_bounds(&dts, reso)
obj = _TSObject()
obj.dts = dts
obj.creso = reso
ival = npy_datetimestruct_to_datetime(reso, &dts)

if out_local == 1:
obj.tzinfo = timezone(timedelta(minutes=out_tzoffset))
obj.value = tz_localize_to_utc_single(
ival, obj.tzinfo, ambiguous="raise", nonexistent=None, creso=reso
)
if tz is None:
check_overflows(obj, reso)
return obj
_adjust_tsobject_tz_using_offset(obj, tz)
return obj
else:
if tz is not None:
# shift for _localize_tso
ival = tz_localize_to_utc_single(
ival, tz, ambiguous="raise", nonexistent=None, creso=reso
if not dayfirst: # GH 58859
string_to_dts_failed = string_to_dts(
ts, &dts, &out_bestunit, &out_local,
&out_tzoffset, False
)
if not string_to_dts_failed:
reso = get_supported_reso(out_bestunit)
check_dts_bounds(&dts, reso)
obj = _TSObject()
obj.dts = dts
obj.creso = reso
ival = npy_datetimestruct_to_datetime(reso, &dts)

if out_local == 1:
obj.tzinfo = timezone(timedelta(minutes=out_tzoffset))
obj.value = tz_localize_to_utc_single(
ival,
obj.tzinfo,
ambiguous="raise",
nonexistent=None,
creso=reso,
)
obj.value = ival
maybe_localize_tso(obj, tz, obj.creso)
return obj
if tz is None:
check_overflows(obj, reso)
return obj
_adjust_tsobject_tz_using_offset(obj, tz)
return obj
else:
if tz is not None:
# shift for _localize_tso
ival = tz_localize_to_utc_single(
ival, tz, ambiguous="raise", nonexistent=None, creso=reso
)
obj.value = ival
maybe_localize_tso(obj, tz, obj.creso)
return obj

dt = parse_datetime_string(
ts,
Expand Down
49 changes: 25 additions & 24 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -377,32 +377,33 @@ def parse_datetime_string_with_reso(
raise ValueError(f'Given date string "{date_string}" not likely a datetime')

# Try iso8601 first, as it handles nanoseconds
string_to_dts_failed = string_to_dts(
date_string, &dts, &out_bestunit, &out_local,
&out_tzoffset, False
)
if not string_to_dts_failed:
# Match Timestamp and drop picoseconds, femtoseconds, attoseconds
# The new resolution will just be nano
# GH#50417
if out_bestunit in _timestamp_units:
out_bestunit = NPY_DATETIMEUNIT.NPY_FR_ns

if out_bestunit == NPY_DATETIMEUNIT.NPY_FR_ns:
# TODO: avoid circular import
from pandas import Timestamp
parsed = Timestamp(date_string)
else:
if out_local:
tz = timezone(timedelta(minutes=out_tzoffset))
if not dayfirst: # GH 58859
string_to_dts_failed = string_to_dts(
date_string, &dts, &out_bestunit, &out_local,
&out_tzoffset, False
)
if not string_to_dts_failed:
# Match Timestamp and drop picoseconds, femtoseconds, attoseconds
# The new resolution will just be nano
# GH#50417
if out_bestunit in _timestamp_units:
out_bestunit = NPY_DATETIMEUNIT.NPY_FR_ns

if out_bestunit == NPY_DATETIMEUNIT.NPY_FR_ns:
# TODO: avoid circular import
from pandas import Timestamp
parsed = Timestamp(date_string)
else:
tz = None
parsed = datetime_new(
dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz
)
if out_local:
tz = timezone(timedelta(minutes=out_tzoffset))
else:
tz = None
parsed = datetime_new(
dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz
)

reso = npy_unit_to_attrname[out_bestunit]
return parsed, reso
reso = npy_unit_to_attrname[out_bestunit]
return parsed, reso

parsed = _parse_delimited_date(date_string, dayfirst, &out_bestunit)
if parsed is not None:
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2988,6 +2988,8 @@ def test_parsers_nat(self):
("20/12/21", True, False, datetime(2021, 12, 20)),
("20/12/21", False, True, datetime(2020, 12, 21)),
("20/12/21", True, True, datetime(2020, 12, 21)),
# GH 58859
("20201012", True, False, datetime(2020, 12, 10)),
],
)
def test_parsers_dayfirst_yearfirst(
Expand Down

0 comments on commit 288af5f

Please sign in to comment.