Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: DateOffset addition with non-nano #55595

Merged
merged 3 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ Datetimelike
- Bug in :meth:`DatetimeIndex.union` returning object dtype for tz-aware indexes with the same timezone but different units (:issue:`55238`)
- Bug in :meth:`Tick.delta` with very large ticks raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
- Bug in adding or subtracting a :class:`Week` offset to a ``datetime64`` :class:`Series`, :class:`Index`, or :class:`DataFrame` column with non-nanosecond resolution returning incorrect results (:issue:`55583`)
- Bug in addition or subtraction of :class:`DateOffset` objects with microsecond components to ``datetime64`` :class:`Index`, :class:`Series`, or :class:`DataFrame` columns with non-nanosecond resolution (:issue:`55595`)
- Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
-

Expand Down
70 changes: 43 additions & 27 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1368,10 +1368,10 @@ cdef class RelativeDeltaOffset(BaseOffset):
else:
return other + timedelta(self.n)

@apply_array_wraps
def _apply_array(self, dtarr):
reso = get_unit_from_dtype(dtarr.dtype)
dt64other = np.asarray(dtarr)
@cache_readonly
def _pd_timedelta(self) -> Timedelta:
# components of _offset that can be cast to pd.Timedelta

kwds = self.kwds
relativedelta_fast = {
"years",
Expand All @@ -1385,28 +1385,26 @@ cdef class RelativeDeltaOffset(BaseOffset):
}
# relativedelta/_offset path only valid for base DateOffset
if self._use_relativedelta and set(kwds).issubset(relativedelta_fast):

months = (kwds.get("years", 0) * 12 + kwds.get("months", 0)) * self.n
if months:
shifted = shift_months(dt64other.view("i8"), months, reso=reso)
dt64other = shifted.view(dtarr.dtype)

weeks = kwds.get("weeks", 0) * self.n
if weeks:
delta = Timedelta(days=7 * weeks)
td = (<_Timedelta>delta)._as_creso(reso)
dt64other = dt64other + td

timedelta_kwds = {
k: v
for k, v in kwds.items()
if k in ["days", "hours", "minutes", "seconds", "microseconds"]
td_kwds = {
key: val
for key, val in kwds.items()
if key in ["days", "hours", "minutes", "seconds", "microseconds"]
}
if timedelta_kwds:
delta = Timedelta(**timedelta_kwds)
td = (<_Timedelta>delta)._as_creso(reso)
dt64other = dt64other + (self.n * td)
return dt64other
if "weeks" in kwds:
days = td_kwds.get("days", 0)
td_kwds["days"] = days + 7 * kwds["weeks"]

if td_kwds:
delta = Timedelta(**td_kwds)
if "microseconds" in kwds:
delta = delta.as_unit("us")
else:
delta = delta.as_unit("s")
else:
delta = Timedelta(0).as_unit("s")

return delta * self.n

elif not self._use_relativedelta and hasattr(self, "_offset"):
# timedelta
num_nano = getattr(self, "nanoseconds", 0)
Expand All @@ -1415,8 +1413,12 @@ cdef class RelativeDeltaOffset(BaseOffset):
delta = Timedelta((self._offset + rem_nano) * self.n)
else:
delta = Timedelta(self._offset * self.n)
td = (<_Timedelta>delta)._as_creso(reso)
return dt64other + td
if "microseconds" in kwds:
delta = delta.as_unit("us")
else:
delta = delta.as_unit("s")
return delta

else:
# relativedelta with other keywords
kwd = set(kwds) - relativedelta_fast
Expand All @@ -1426,6 +1428,20 @@ cdef class RelativeDeltaOffset(BaseOffset):
"applied vectorized"
)

@apply_array_wraps
def _apply_array(self, dtarr):
reso = get_unit_from_dtype(dtarr.dtype)
dt64other = np.asarray(dtarr)

delta = self._pd_timedelta # may raise NotImplementedError

kwds = self.kwds
months = (kwds.get("years", 0) * 12 + kwds.get("months", 0)) * self.n
if months:
shifted = shift_months(dt64other.view("i8"), months, reso=reso)
dt64other = shifted.view(dtarr.dtype)
return dt64other + delta

def is_on_offset(self, dt: datetime) -> bool:
if self.normalize and not _is_normalized(dt):
return False
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -799,14 +799,17 @@ def _add_offset(self, offset) -> Self:
values = self

try:
result = offset._apply_array(values).view(values.dtype)
result = offset._apply_array(values)
if result.dtype.kind == "i":
result = result.view(values.dtype)
except NotImplementedError:
warnings.warn(
"Non-vectorized DateOffset being applied to Series or DatetimeIndex.",
PerformanceWarning,
stacklevel=find_stack_level(),
)
result = self.astype("O") + offset
# TODO(GH#55564): as_unit will be unnecessary
result = type(self)._from_sequence(result).as_unit(self.unit)
if not len(self):
# GH#30336 _from_sequence won't be able to infer self.tz
Expand Down
31 changes: 20 additions & 11 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1223,13 +1223,16 @@ class TestDatetime64DateOffsetArithmetic:
# Tick DateOffsets

# TODO: parametrize over timezone?
def test_dt64arr_series_add_tick_DateOffset(self, box_with_array):
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
def test_dt64arr_series_add_tick_DateOffset(self, box_with_array, unit):
# GH#4532
# operate with pd.offsets
ser = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")])
ser = Series(
[Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]
).dt.as_unit(unit)
expected = Series(
[Timestamp("20130101 9:01:05"), Timestamp("20130101 9:02:05")]
)
).dt.as_unit(unit)

ser = tm.box_expected(ser, box_with_array)
expected = tm.box_expected(expected, box_with_array)
Expand Down Expand Up @@ -1310,7 +1313,8 @@ def test_dti_add_tick_tzaware(self, tz_aware_fixture, box_with_array):
# -------------------------------------------------------------
# RelativeDelta DateOffsets

def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array):
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array, unit):
# GH#10699
vec = DatetimeIndex(
[
Expand All @@ -1323,7 +1327,7 @@ def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array):
Timestamp("2000-05-15"),
Timestamp("2001-06-15"),
]
)
).as_unit(unit)
vec = tm.box_expected(vec, box_with_array)
vec_items = vec.iloc[0] if box_with_array is pd.DataFrame else vec

Expand All @@ -1337,24 +1341,29 @@ def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array):
("seconds", 2),
("microseconds", 5),
]
for i, (unit, value) in enumerate(relative_kwargs):
off = DateOffset(**{unit: value})
for i, (offset_unit, value) in enumerate(relative_kwargs):
off = DateOffset(**{offset_unit: value})

exp_unit = unit
if offset_unit == "microseconds" and unit != "ns":
exp_unit = "us"

expected = DatetimeIndex([x + off for x in vec_items])
# TODO(GH#55564): as_unit will be unnecessary
expected = DatetimeIndex([x + off for x in vec_items]).as_unit(exp_unit)
expected = tm.box_expected(expected, box_with_array)
tm.assert_equal(expected, vec + off)

expected = DatetimeIndex([x - off for x in vec_items])
expected = DatetimeIndex([x - off for x in vec_items]).as_unit(exp_unit)
expected = tm.box_expected(expected, box_with_array)
tm.assert_equal(expected, vec - off)

off = DateOffset(**dict(relative_kwargs[: i + 1]))

expected = DatetimeIndex([x + off for x in vec_items])
expected = DatetimeIndex([x + off for x in vec_items]).as_unit(exp_unit)
expected = tm.box_expected(expected, box_with_array)
tm.assert_equal(expected, vec + off)

expected = DatetimeIndex([x - off for x in vec_items])
expected = DatetimeIndex([x - off for x in vec_items]).as_unit(exp_unit)
expected = tm.box_expected(expected, box_with_array)
tm.assert_equal(expected, vec - off)
msg = "(bad|unsupported) operand type for unary"
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/tseries/offsets/test_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,10 +568,8 @@ def test_add_dt64_ndarray_non_nano(self, offset_types, unit, request):
# check that the result with non-nano matches nano
off = _create_offset(offset_types)

dti = date_range("2016-01-01", periods=35, freq="D")

arr = dti._data._ndarray.astype(f"M8[{unit}]")
dta = type(dti._data)._simple_new(arr, dtype=arr.dtype)
dti = date_range("2016-01-01", periods=35, freq="D", unit=unit)
dta = dti._data

expected = dti._data + off
result = dta + off
Expand Down
Loading