Skip to content

Commit

Permalink
Backport PR #42692: Regression in merge_asof raising KeyError when at…
Browse files Browse the repository at this point in the history
… least one by columnis in index (#42694)

Co-authored-by: Patrick Hoefler <[email protected]>
  • Loading branch information
meeseeksmachine and phofl authored Jul 24, 2021
1 parent d71ca72 commit 904277d
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 10 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Fixed regressions
- Regression in :func:`concat` between objects with bool dtype and integer dtype casting to object instead of to integer (:issue:`42092`)
- Bug in :class:`Series` constructor not accepting a ``dask.Array`` (:issue:`38645`)
- Fixed regression for ``SettingWithCopyWarning`` displaying incorrect stacklevel (:issue:`42570`)
- Fixed regression for :func:`merge_asof` raising ``KeyError`` when one of the ``by`` columns is in the index (:issue:`34488`)
- Fixed regression in :func:`to_datetime` returning pd.NaT for inputs that produce duplicated values, when ``cache=True`` (:issue:`42259`)


Expand Down
30 changes: 20 additions & 10 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1775,16 +1775,26 @@ def _validate_specification(self) -> None:
raise MergeError("missing right_by")

# GH#29130 Check that merge keys do not have dtype object
lo_dtype = (
self.left[self.left_on[0]].dtype
if not self.left_index
else self.left.index.dtype
)
ro_dtype = (
self.right[self.right_on[0]].dtype
if not self.right_index
else self.right.index.dtype
)
if not self.left_index:
left_on = self.left_on[0]
lo_dtype = (
self.left[left_on].dtype
if left_on in self.left.columns
else self.left.index.get_level_values(left_on)
)
else:
lo_dtype = self.left.index.dtype

if not self.right_index:
right_on = self.right_on[0]
ro_dtype = (
self.right[right_on].dtype
if right_on in self.right.columns
else self.right.index.get_level_values(right_on)
)
else:
ro_dtype = self.right.index.dtype

if is_object_dtype(lo_dtype) or is_object_dtype(ro_dtype):
raise MergeError(
f"Incompatible merge dtype, {repr(ro_dtype)} and "
Expand Down
47 changes: 47 additions & 0 deletions pandas/tests/reshape/merge/test_merge_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -1437,3 +1437,50 @@ def test_merge_asof_index_behavior(kwargs):
index=index,
)
tm.assert_frame_equal(result, expected)


def test_merge_asof_numeri_column_in_index():
# GH#34488
left = pd.DataFrame({"b": [10, 11, 12]}, index=Index([1, 2, 3], name="a"))
right = pd.DataFrame({"c": [20, 21, 22]}, index=Index([0, 2, 3], name="a"))

result = merge_asof(left, right, left_on="a", right_on="a")
expected = pd.DataFrame({"a": [1, 2, 3], "b": [10, 11, 12], "c": [20, 21, 22]})
tm.assert_frame_equal(result, expected)


def test_merge_asof_numeri_column_in_multiindex():
# GH#34488
left = pd.DataFrame(
{"b": [10, 11, 12]},
index=pd.MultiIndex.from_arrays([[1, 2, 3], ["a", "b", "c"]], names=["a", "z"]),
)
right = pd.DataFrame(
{"c": [20, 21, 22]},
index=pd.MultiIndex.from_arrays([[1, 2, 3], ["x", "y", "z"]], names=["a", "y"]),
)

result = merge_asof(left, right, left_on="a", right_on="a")
expected = pd.DataFrame({"a": [1, 2, 3], "b": [10, 11, 12], "c": [20, 21, 22]})
tm.assert_frame_equal(result, expected)


def test_merge_asof_numeri_column_in_index_object_dtype():
# GH#34488
left = pd.DataFrame({"b": [10, 11, 12]}, index=Index(["1", "2", "3"], name="a"))
right = pd.DataFrame({"c": [20, 21, 22]}, index=Index(["m", "n", "o"], name="a"))

with pytest.raises(
MergeError,
match=r"Incompatible merge dtype, .*, both sides must have numeric dtype",
):
merge_asof(left, right, left_on="a", right_on="a")

left = left.reset_index().set_index(["a", "b"])
right = right.reset_index().set_index(["a", "c"])

with pytest.raises(
MergeError,
match=r"Incompatible merge dtype, .*, both sides must have numeric dtype",
):
merge_asof(left, right, left_on="a", right_on="a")

0 comments on commit 904277d

Please sign in to comment.