From def01cf7bbb5ef8c9bf2e19737ea918e6a76a143 Mon Sep 17 00:00:00 2001 From: ianzur <33916505+ianzur@users.noreply.github.com> Date: Thu, 22 Aug 2019 08:09:48 -0500 Subject: [PATCH] BUG: timedelta merge asof with tolerance (#27650) * issue #27642 - timedelta merge asof with tolerance --- doc/source/whatsnew/v0.25.1.rst | 1 + pandas/core/reshape/merge.py | 3 +- pandas/tests/reshape/merge/test_merge_asof.py | 55 +++++++++++++++++-- 3 files changed, 53 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index b307fae4fbdc1..63dd56f4a3793 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -95,6 +95,7 @@ Reshaping ^^^^^^^^^ - A ``KeyError`` is now raised if ``.unstack()`` is called on a :class:`Series` or :class:`DataFrame` with a flat :class:`Index` passing a name which is not the correct one (:issue:`18303`) +- Bug :meth:`merge_asof` could not merge :class:`Timedelta` objects when passing `tolerance` kwarg (:issue:`27642`) - Bug in :meth:`DataFrame.crosstab` when ``margins`` set to ``True`` and ``normalize`` is not ``False``, an error is raised. (:issue:`27500`) - :meth:`DataFrame.join` now suppresses the ``FutureWarning`` when the sort parameter is specified (:issue:`21952`) - Bug in :meth:`DataFrame.join` raising with readonly arrays (:issue:`27943`) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index f45c7693bf6ed..225de3f11cf7d 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -22,7 +22,6 @@ is_bool, is_bool_dtype, is_categorical_dtype, - is_datetime64_dtype, is_datetime64tz_dtype, is_datetimelike, is_dtype_equal, @@ -1635,7 +1634,7 @@ def _get_merge_keys(self): ) ) - if is_datetime64_dtype(lt) or is_datetime64tz_dtype(lt): + if is_datetimelike(lt): if not isinstance(self.tolerance, Timedelta): raise MergeError(msg) if self.tolerance < Timedelta(0): diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 6b66386bafc5e..7412b1de643a1 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1,3 +1,5 @@ +import datetime + import numpy as np import pytest import pytz @@ -588,14 +590,23 @@ def test_non_sorted(self): # ok, though has dupes merge_asof(trades, self.quotes, on="time", by="ticker") - def test_tolerance(self): + @pytest.mark.parametrize( + "tolerance", + [ + Timedelta("1day"), + pytest.param( + datetime.timedelta(days=1), + marks=pytest.mark.xfail(reason="not implemented", strict=True), + ), + ], + ids=["pd.Timedelta", "datetime.timedelta"], + ) + def test_tolerance(self, tolerance): trades = self.trades quotes = self.quotes - result = merge_asof( - trades, quotes, on="time", by="ticker", tolerance=Timedelta("1day") - ) + result = merge_asof(trades, quotes, on="time", by="ticker", tolerance=tolerance) expected = self.tolerance assert_frame_equal(result, expected) @@ -1246,3 +1257,39 @@ def test_by_mixed_tz_aware(self): ) expected["value_y"] = np.array([np.nan], dtype=object) assert_frame_equal(result, expected) + + def test_timedelta_tolerance_nearest(self): + # GH 27642 + + left = pd.DataFrame( + list(zip([0, 5, 10, 15, 20, 25], [0, 1, 2, 3, 4, 5])), + columns=["time", "left"], + ) + + left["time"] = pd.to_timedelta(left["time"], "ms") + + right = pd.DataFrame( + list(zip([0, 3, 9, 12, 15, 18], [0, 1, 2, 3, 4, 5])), + columns=["time", "right"], + ) + + right["time"] = pd.to_timedelta(right["time"], "ms") + + expected = pd.DataFrame( + list( + zip( + [0, 5, 10, 15, 20, 25], + [0, 1, 2, 3, 4, 5], + [0, np.nan, 2, 4, np.nan, np.nan], + ) + ), + columns=["time", "left", "right"], + ) + + expected["time"] = pd.to_timedelta(expected["time"], "ms") + + result = pd.merge_asof( + left, right, on="time", tolerance=Timedelta("1ms"), direction="nearest" + ) + + assert_frame_equal(result, expected)