From 386a1eb1d2e9d6109fcaf305752d2e4bb29cc3ee Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 31 Oct 2023 09:31:43 -0700 Subject: [PATCH] BUG: OutOfBoundsDatetime with non-nano dt64tz dtype (#55768) --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/_libs/tslib.pyi | 2 +- pandas/_libs/tslib.pyx | 4 ++-- pandas/core/arrays/datetimes.py | 19 +++++++++++-------- .../indexes/datetimes/test_constructors.py | 11 +++++++---- pandas/tests/series/methods/test_astype.py | 11 +++++++---- 6 files changed, 29 insertions(+), 19 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index d9909b0dbfad8..99b6310a80f83 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -330,6 +330,7 @@ Datetimelike - Bug in addition or subtraction of :class:`BusinessDay` offset with ``offset`` attribute to non-nanosecond :class:`Index`, :class:`Series`, or :class:`DataFrame` column giving incorrect results (:issue:`55608`) - Bug in addition or subtraction of :class:`DateOffset` objects with microsecond components to ``datetime64`` :class:`Index`, :class:`Series`, or :class:`DataFrame` columns with non-nanosecond resolution (:issue:`55595`) - Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`) +- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond :class:`DatetimeTZDtype` and inputs that would be out of bounds with nanosecond resolution incorrectly raising ``OutOfBoundsDatetime`` (:issue:`54620`) - Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` dtype and inputs that would be out of bounds for a ``datetime64[ns]`` incorrectly raising ``OutOfBoundsDatetime`` (:issue:`55756`) - diff --git a/pandas/_libs/tslib.pyi b/pandas/_libs/tslib.pyi index 7f95bfc717633..a803ea0692c74 100644 --- a/pandas/_libs/tslib.pyi +++ b/pandas/_libs/tslib.pyi @@ -29,5 +29,5 @@ def array_to_datetime( # returned ndarray may be object dtype or datetime64[ns] def array_to_datetime_with_tz( - values: npt.NDArray[np.object_], tz: tzinfo + values: npt.NDArray[np.object_], tz: tzinfo, creso: int ) -> npt.NDArray[np.int64]: ... diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d2eeea78ee7e8..bb96a89f7d1fe 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -671,7 +671,7 @@ cdef _array_to_datetime_object( return oresult_nd, None -def array_to_datetime_with_tz(ndarray values, tzinfo tz): +def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso): """ Vectorized analogue to pd.Timestamp(value, tz=tz) @@ -707,7 +707,7 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz): else: # datetime64, tznaive pydatetime, int, float ts = ts.tz_localize(tz) - ts = ts.as_unit("ns") + ts = (<_Timestamp>ts)._as_creso(creso) ival = ts._value # Analogous to: result[i] = ival diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 3fff5cb2aa0c7..968b64e2c3de3 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -355,7 +355,7 @@ def _from_sequence_not_strict( # DatetimeTZDtype unit = dtype.unit - subarr, tz, inferred_freq = _sequence_to_dt64ns( + subarr, tz, inferred_freq = _sequence_to_dt64( data, copy=copy, tz=tz, @@ -2179,7 +2179,7 @@ def std( # Constructor Helpers -def _sequence_to_dt64ns( +def _sequence_to_dt64( data, *, copy: bool = False, @@ -2205,7 +2205,8 @@ def _sequence_to_dt64ns( Returns ------- result : numpy.ndarray - The sequence converted to a numpy array with dtype ``datetime64[ns]``. + The sequence converted to a numpy array with dtype ``datetime64[unit]``. + Where `unit` is "ns" unless specified otherwise by `out_unit`. tz : tzinfo or None Either the user-provided tzinfo or one inferred from the data. inferred_freq : Tick or None @@ -2228,9 +2229,9 @@ def _sequence_to_dt64ns( data, copy = maybe_convert_dtype(data, copy, tz=tz) data_dtype = getattr(data, "dtype", None) - out_dtype = DT64NS_DTYPE - if out_unit is not None: - out_dtype = np.dtype(f"M8[{out_unit}]") + if out_unit is None: + out_unit = "ns" + out_dtype = np.dtype(f"M8[{out_unit}]") if data_dtype == object or is_string_dtype(data_dtype): # TODO: We do not have tests specific to string-dtypes, @@ -2241,8 +2242,10 @@ def _sequence_to_dt64ns( elif tz is not None and ambiguous == "raise": # TODO: yearfirst/dayfirst/etc? obj_data = np.asarray(data, dtype=object) - i8data = tslib.array_to_datetime_with_tz(obj_data, tz) - return i8data.view(DT64NS_DTYPE), tz, None + i8data = tslib.array_to_datetime_with_tz( + obj_data, tz, abbrev_to_npy_unit(out_unit) + ) + return i8data.view(out_dtype), tz, None else: # data comes back here as either i8 to denote UTC timestamps # or M8[ns] to denote wall times diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index ef86e7800dbb7..90ddc9b5f618a 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1013,16 +1013,19 @@ def test_dti_convert_datetime_list(self, tzstr): dr2 = DatetimeIndex(list(dr), name="foo", freq="D") tm.assert_index_equal(dr, dr2) - def test_dti_constructor_with_non_nano_dtype(self): - # GH#55756 + @pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"]) + def test_dti_constructor_with_non_nano_dtype(self, tz): + # GH#55756, GH#54620 ts = Timestamp("2999-01-01") dtype = "M8[us]" + if tz is not None: + dtype = f"M8[us, {tz}]" # NB: the 2500 is interpreted as nanoseconds and rounded *down* # to 2 microseconds vals = [ts, "2999-01-02 03:04:05.678910", 2500] result = DatetimeIndex(vals, dtype=dtype) - exp_arr = np.array([ts.asm8, vals[1], 2], dtype=dtype) - expected = DatetimeIndex(exp_arr, dtype=dtype) + exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]") + expected = DatetimeIndex(exp_arr, dtype="M8[us]").tz_localize(tz) tm.assert_index_equal(result, expected) result2 = DatetimeIndex(np.array(vals, dtype=object), dtype=dtype) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index edd3062f7d472..2434290616618 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -107,18 +107,21 @@ def test_astype_dict_like(self, dtype_class): class TestAstype: - def test_astype_object_to_dt64_non_nano(self): - # GH#55756 + @pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"]) + def test_astype_object_to_dt64_non_nano(self, tz): + # GH#55756, GH#54620 ts = Timestamp("2999-01-01") dtype = "M8[us]" + if tz is not None: + dtype = f"M8[us, {tz}]" # NB: the 2500 is interpreted as nanoseconds and rounded *down* # to 2 microseconds vals = [ts, "2999-01-02 03:04:05.678910", 2500] ser = Series(vals, dtype=object) result = ser.astype(dtype) - exp_arr = np.array([ts.asm8, vals[1], 2], dtype=dtype) - expected = Series(exp_arr, dtype=dtype) + exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]") + expected = Series(exp_arr, dtype="M8[us]").dt.tz_localize(tz) tm.assert_series_equal(result, expected) def test_astype_mixed_object_to_dt64tz(self):