From f4dc9f9028a3da539126f9a8a37e7c41fc7b4b3c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 18 Feb 2020 16:26:27 -0800 Subject: [PATCH] REGR: fix op(frame, frame2) with reindex (#31679) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/core/ops/__init__.py | 60 ++++++++++++++++++++++++++- pandas/tests/frame/test_arithmetic.py | 19 +++++++++ 3 files changed, 79 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 19358689a2186..c9031ac1ae9fe 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -19,6 +19,7 @@ Fixed regressions - Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`) - Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`) - Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) +- Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index f3c1a609d50a1..b74dea686a89f 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -5,7 +5,7 @@ """ import datetime import operator -from typing import Optional, Set, Tuple, Union +from typing import TYPE_CHECKING, Optional, Set, Tuple, Union import numpy as np @@ -61,6 +61,9 @@ rxor, ) +if TYPE_CHECKING: + from pandas import DataFrame # noqa:F401 + # ----------------------------------------------------------------------------- # constants ARITHMETIC_BINOPS: Set[str] = { @@ -703,6 +706,58 @@ def to_series(right): return left, right +def _should_reindex_frame_op( + left: "DataFrame", right, axis, default_axis: int, fill_value, level +) -> bool: + """ + Check if this is an operation between DataFrames that will need to reindex. + """ + assert isinstance(left, ABCDataFrame) + + if not isinstance(right, ABCDataFrame): + return False + + if fill_value is None and level is None and axis is default_axis: + # TODO: any other cases we should handle here? + cols = left.columns.intersection(right.columns) + if not (cols.equals(left.columns) and cols.equals(right.columns)): + return True + + return False + + +def _frame_arith_method_with_reindex( + left: "DataFrame", right: "DataFrame", op +) -> "DataFrame": + """ + For DataFrame-with-DataFrame operations that require reindexing, + operate only on shared columns, then reindex. + + Parameters + ---------- + left : DataFrame + right : DataFrame + op : binary operator + + Returns + ------- + DataFrame + """ + # GH#31623, only operate on shared columns + cols = left.columns.intersection(right.columns) + + new_left = left[cols] + new_right = right[cols] + result = op(new_left, new_right) + + # Do the join on the columns instead of using _align_method_FRAME + # to avoid constructing two potentially large/sparse DataFrames + join_columns, _, _ = left.columns.join( + right.columns, how="outer", level=None, return_indexers=True + ) + return result.reindex(join_columns, axis=1) + + def _arith_method_FRAME(cls, op, special): str_rep = _get_opstr(op) op_name = _get_op_name(op, special) @@ -720,6 +775,9 @@ def _arith_method_FRAME(cls, op, special): @Appender(doc) def f(self, other, axis=default_axis, level=None, fill_value=None): + if _should_reindex_frame_op(self, other, axis, default_axis, fill_value, level): + return _frame_arith_method_with_reindex(self, other, op) + self, other = _align_method_FRAME(self, other, axis, flex=True, level=level) if isinstance(other, ABCDataFrame): diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index c6eacf2bbcd84..44ad55517dcea 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -711,6 +711,25 @@ def test_operations_with_interval_categories_index(self, all_arithmetic_operator expected = pd.DataFrame([[getattr(n, op)(num) for n in data]], columns=ind) tm.assert_frame_equal(result, expected) + def test_frame_with_frame_reindex(self): + # GH#31623 + df = pd.DataFrame( + { + "foo": [pd.Timestamp("2019"), pd.Timestamp("2020")], + "bar": [pd.Timestamp("2018"), pd.Timestamp("2021")], + }, + columns=["foo", "bar"], + ) + df2 = df[["foo"]] + + result = df - df2 + + expected = pd.DataFrame( + {"foo": [pd.Timedelta(0), pd.Timedelta(0)], "bar": [np.nan, np.nan]}, + columns=["bar", "foo"], + ) + tm.assert_frame_equal(result, expected) + def test_frame_with_zero_len_series_corner_cases(): # GH#28600