Skip to content

Commit

Permalink
BUG: OutOfBoundsDatetime with non-nano dt64tz dtype (#55768)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Oct 31, 2023
1 parent 4b24974 commit 386a1eb
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 19 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ Datetimelike
- Bug in addition or subtraction of :class:`BusinessDay` offset with ``offset`` attribute to non-nanosecond :class:`Index`, :class:`Series`, or :class:`DataFrame` column giving incorrect results (:issue:`55608`)
- Bug in addition or subtraction of :class:`DateOffset` objects with microsecond components to ``datetime64`` :class:`Index`, :class:`Series`, or :class:`DataFrame` columns with non-nanosecond resolution (:issue:`55595`)
- Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond :class:`DatetimeTZDtype` and inputs that would be out of bounds with nanosecond resolution incorrectly raising ``OutOfBoundsDatetime`` (:issue:`54620`)
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` dtype and inputs that would be out of bounds for a ``datetime64[ns]`` incorrectly raising ``OutOfBoundsDatetime`` (:issue:`55756`)
-

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ def array_to_datetime(
# returned ndarray may be object dtype or datetime64[ns]

def array_to_datetime_with_tz(
values: npt.NDArray[np.object_], tz: tzinfo
values: npt.NDArray[np.object_], tz: tzinfo, creso: int
) -> npt.NDArray[np.int64]: ...
4 changes: 2 additions & 2 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,7 @@ cdef _array_to_datetime_object(
return oresult_nd, None


def array_to_datetime_with_tz(ndarray values, tzinfo tz):
def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso):
"""
Vectorized analogue to pd.Timestamp(value, tz=tz)
Expand Down Expand Up @@ -707,7 +707,7 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz):
else:
# datetime64, tznaive pydatetime, int, float
ts = ts.tz_localize(tz)
ts = ts.as_unit("ns")
ts = (<_Timestamp>ts)._as_creso(creso)
ival = ts._value

# Analogous to: result[i] = ival
Expand Down
19 changes: 11 additions & 8 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def _from_sequence_not_strict(
# DatetimeTZDtype
unit = dtype.unit

subarr, tz, inferred_freq = _sequence_to_dt64ns(
subarr, tz, inferred_freq = _sequence_to_dt64(
data,
copy=copy,
tz=tz,
Expand Down Expand Up @@ -2179,7 +2179,7 @@ def std(
# Constructor Helpers


def _sequence_to_dt64ns(
def _sequence_to_dt64(
data,
*,
copy: bool = False,
Expand All @@ -2205,7 +2205,8 @@ def _sequence_to_dt64ns(
Returns
-------
result : numpy.ndarray
The sequence converted to a numpy array with dtype ``datetime64[ns]``.
The sequence converted to a numpy array with dtype ``datetime64[unit]``.
Where `unit` is "ns" unless specified otherwise by `out_unit`.
tz : tzinfo or None
Either the user-provided tzinfo or one inferred from the data.
inferred_freq : Tick or None
Expand All @@ -2228,9 +2229,9 @@ def _sequence_to_dt64ns(
data, copy = maybe_convert_dtype(data, copy, tz=tz)
data_dtype = getattr(data, "dtype", None)

out_dtype = DT64NS_DTYPE
if out_unit is not None:
out_dtype = np.dtype(f"M8[{out_unit}]")
if out_unit is None:
out_unit = "ns"
out_dtype = np.dtype(f"M8[{out_unit}]")

if data_dtype == object or is_string_dtype(data_dtype):
# TODO: We do not have tests specific to string-dtypes,
Expand All @@ -2241,8 +2242,10 @@ def _sequence_to_dt64ns(
elif tz is not None and ambiguous == "raise":
# TODO: yearfirst/dayfirst/etc?
obj_data = np.asarray(data, dtype=object)
i8data = tslib.array_to_datetime_with_tz(obj_data, tz)
return i8data.view(DT64NS_DTYPE), tz, None
i8data = tslib.array_to_datetime_with_tz(
obj_data, tz, abbrev_to_npy_unit(out_unit)
)
return i8data.view(out_dtype), tz, None
else:
# data comes back here as either i8 to denote UTC timestamps
# or M8[ns] to denote wall times
Expand Down
11 changes: 7 additions & 4 deletions pandas/tests/indexes/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,16 +1013,19 @@ def test_dti_convert_datetime_list(self, tzstr):
dr2 = DatetimeIndex(list(dr), name="foo", freq="D")
tm.assert_index_equal(dr, dr2)

def test_dti_constructor_with_non_nano_dtype(self):
# GH#55756
@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
def test_dti_constructor_with_non_nano_dtype(self, tz):
# GH#55756, GH#54620
ts = Timestamp("2999-01-01")
dtype = "M8[us]"
if tz is not None:
dtype = f"M8[us, {tz}]"
# NB: the 2500 is interpreted as nanoseconds and rounded *down*
# to 2 microseconds
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
result = DatetimeIndex(vals, dtype=dtype)
exp_arr = np.array([ts.asm8, vals[1], 2], dtype=dtype)
expected = DatetimeIndex(exp_arr, dtype=dtype)
exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]")
expected = DatetimeIndex(exp_arr, dtype="M8[us]").tz_localize(tz)
tm.assert_index_equal(result, expected)

result2 = DatetimeIndex(np.array(vals, dtype=object), dtype=dtype)
Expand Down
11 changes: 7 additions & 4 deletions pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,18 +107,21 @@ def test_astype_dict_like(self, dtype_class):


class TestAstype:
def test_astype_object_to_dt64_non_nano(self):
# GH#55756
@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
def test_astype_object_to_dt64_non_nano(self, tz):
# GH#55756, GH#54620
ts = Timestamp("2999-01-01")
dtype = "M8[us]"
if tz is not None:
dtype = f"M8[us, {tz}]"
# NB: the 2500 is interpreted as nanoseconds and rounded *down*
# to 2 microseconds
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
ser = Series(vals, dtype=object)
result = ser.astype(dtype)

exp_arr = np.array([ts.asm8, vals[1], 2], dtype=dtype)
expected = Series(exp_arr, dtype=dtype)
exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]")
expected = Series(exp_arr, dtype="M8[us]").dt.tz_localize(tz)
tm.assert_series_equal(result, expected)

def test_astype_mixed_object_to_dt64tz(self):
Expand Down

0 comments on commit 386a1eb

Please sign in to comment.