From 2bcc465c37999e24dcfc41f3c78cfc17c2c91ec2 Mon Sep 17 00:00:00 2001 From: Nitish Satyavolu Date: Tue, 10 Dec 2024 22:23:42 -0800 Subject: [PATCH] BUG: Fix DataFrame binary arithmatic operation handling of unaligned MultiIndex columns --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/frame.py | 12 ++++++++++++ pandas/tests/frame/test_arithmetic.py | 25 +++++++++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 2013f81d4da18..066eb5ae207de 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -682,6 +682,7 @@ MultiIndex - :meth:`DataFrame.melt` would not accept multiple names in ``var_name`` when the columns were a :class:`MultiIndex` (:issue:`58033`) - :meth:`MultiIndex.insert` would not insert NA value correctly at unified location of index -1 (:issue:`59003`) - :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`) +- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`) - I/O diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 34b448a0d8d1c..665a6fdf1339c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7960,6 +7960,11 @@ def _arith_method_with_reindex(self, right: DataFrame, op) -> DataFrame: new_left = left if lcol_indexer is None else left.iloc[:, lcol_indexer] new_right = right if rcol_indexer is None else right.iloc[:, rcol_indexer] + + # GH#60498 For MultiIndex column alignment + new_left.columns = cols + new_right.columns = cols + result = op(new_left, new_right) # Do the join on the columns instead of using left._align_for_op @@ -7990,6 +7995,13 @@ def _should_reindex_frame_op(self, right, op, axis: int, fill_value, level) -> b if not isinstance(right, DataFrame): return False + if ( + isinstance(self.columns, MultiIndex) + or isinstance(right.columns, MultiIndex) + ) and not self.columns.equals(right.columns): + # GH#60498 Reindex if MultiIndexe columns are not matching + return True + if fill_value is None and level is None and axis == 1: # TODO: any other cases we should handle here? diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 6b61fe8b05219..7ada1884feb90 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2033,6 +2033,31 @@ def test_arithmetic_multiindex_align(): tm.assert_frame_equal(result, expected) +def test_arithmetic_multiindex_column_align(): + # GH#60498 + df1 = DataFrame( + data=100, + columns=MultiIndex.from_product( + [["1A", "1B"], ["2A", "2B"]], names=["Lev1", "Lev2"] + ), + index=["C1", "C2"], + ) + df2 = DataFrame( + data=np.array([[0.1, 0.25], [0.2, 0.45]]), + columns=MultiIndex.from_product([["1A", "1B"]], names=["Lev1"]), + index=["C1", "C2"], + ) + expected = DataFrame( + data=np.array([[10.0, 10.0, 25.0, 25.0], [20.0, 20.0, 45.0, 45.0]]), + columns=MultiIndex.from_product( + [["1A", "1B"], ["2A", "2B"]], names=["Lev1", "Lev2"] + ), + index=["C1", "C2"], + ) + result = df1 * df2 + tm.assert_frame_equal(result, expected) + + def test_bool_frame_mult_float(): # GH 18549 df = DataFrame(True, list("ab"), list("cd"))