Skip to content

Commit

Permalink
REGR: fix op(frame, frame2) with reindex (#31679)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Feb 19, 2020
1 parent 9c06b30 commit f4dc9f9
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Fixed regressions
- Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`)
- Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`)
- Fixed regression in :meth:`rolling(..).corr() <pandas.core.window.Rolling.corr>` when using a time offset (:issue:`31789`)
- Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`)
-

.. ---------------------------------------------------------------------------
Expand Down
60 changes: 59 additions & 1 deletion pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"""
import datetime
import operator
from typing import Optional, Set, Tuple, Union
from typing import TYPE_CHECKING, Optional, Set, Tuple, Union

import numpy as np

Expand Down Expand Up @@ -61,6 +61,9 @@
rxor,
)

if TYPE_CHECKING:
from pandas import DataFrame # noqa:F401

# -----------------------------------------------------------------------------
# constants
ARITHMETIC_BINOPS: Set[str] = {
Expand Down Expand Up @@ -703,6 +706,58 @@ def to_series(right):
return left, right


def _should_reindex_frame_op(
left: "DataFrame", right, axis, default_axis: int, fill_value, level
) -> bool:
"""
Check if this is an operation between DataFrames that will need to reindex.
"""
assert isinstance(left, ABCDataFrame)

if not isinstance(right, ABCDataFrame):
return False

if fill_value is None and level is None and axis is default_axis:
# TODO: any other cases we should handle here?
cols = left.columns.intersection(right.columns)
if not (cols.equals(left.columns) and cols.equals(right.columns)):
return True

return False


def _frame_arith_method_with_reindex(
left: "DataFrame", right: "DataFrame", op
) -> "DataFrame":
"""
For DataFrame-with-DataFrame operations that require reindexing,
operate only on shared columns, then reindex.
Parameters
----------
left : DataFrame
right : DataFrame
op : binary operator
Returns
-------
DataFrame
"""
# GH#31623, only operate on shared columns
cols = left.columns.intersection(right.columns)

new_left = left[cols]
new_right = right[cols]
result = op(new_left, new_right)

# Do the join on the columns instead of using _align_method_FRAME
# to avoid constructing two potentially large/sparse DataFrames
join_columns, _, _ = left.columns.join(
right.columns, how="outer", level=None, return_indexers=True
)
return result.reindex(join_columns, axis=1)


def _arith_method_FRAME(cls, op, special):
str_rep = _get_opstr(op)
op_name = _get_op_name(op, special)
Expand All @@ -720,6 +775,9 @@ def _arith_method_FRAME(cls, op, special):
@Appender(doc)
def f(self, other, axis=default_axis, level=None, fill_value=None):

if _should_reindex_frame_op(self, other, axis, default_axis, fill_value, level):
return _frame_arith_method_with_reindex(self, other, op)

self, other = _align_method_FRAME(self, other, axis, flex=True, level=level)

if isinstance(other, ABCDataFrame):
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,25 @@ def test_operations_with_interval_categories_index(self, all_arithmetic_operator
expected = pd.DataFrame([[getattr(n, op)(num) for n in data]], columns=ind)
tm.assert_frame_equal(result, expected)

def test_frame_with_frame_reindex(self):
# GH#31623
df = pd.DataFrame(
{
"foo": [pd.Timestamp("2019"), pd.Timestamp("2020")],
"bar": [pd.Timestamp("2018"), pd.Timestamp("2021")],
},
columns=["foo", "bar"],
)
df2 = df[["foo"]]

result = df - df2

expected = pd.DataFrame(
{"foo": [pd.Timedelta(0), pd.Timedelta(0)], "bar": [np.nan, np.nan]},
columns=["bar", "foo"],
)
tm.assert_frame_equal(result, expected)


def test_frame_with_zero_len_series_corner_cases():
# GH#28600
Expand Down

0 comments on commit f4dc9f9

Please sign in to comment.