From ab372f0f7604ac507d6c2dbba662a8a817dfda0b Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 21 Mar 2022 15:04:17 -0700
Subject: [PATCH 1/9] Various simplifications of
 DataFrame._make_index_and_operands_for_binop.

---
 python/cudf/cudf/core/dataframe.py | 77 ++++++++++++------------------
 1 file changed, 31 insertions(+), 46 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 08a30729e7c..7603dd746c6 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1854,47 +1854,42 @@ def _make_operands_and_index_for_binop(
         ],
         Optional[BaseIndex],
     ]:
-        lhs, rhs = self, other
-
-        if _is_scalar_or_zero_d_array(rhs):
-            rhs = [rhs] * lhs._num_columns
-
-        # For columns that exist in rhs but not lhs, we swap the order so that
-        # we can always assume that left has a binary operator. This
-        # implementation assumes that binary operations between a column and
-        # NULL are always commutative, even for binops (like subtraction) that
-        # are normally anticommutative.
-        # TODO: The above should no longer be necessary once we switch to
-        # properly invoking the operator since we can then rely on reflection.
-        if isinstance(rhs, Sequence):
+        # The default index to return.
+        index = self._index
+        if _is_scalar_or_zero_d_array(other):
+            operands = {
+                name: (left, other, reflect, fill_value)
+                for name, left in self._data.items()
+            }
+        elif isinstance(other, Sequence):
             # TODO: Consider validating sequence length (pandas does).
             operands = {
                 name: (left, right, reflect, fill_value)
-                for right, (name, left) in zip(rhs, lhs._data.items())
+                for right, (name, left) in zip(other, self._data.items())
             }
-        elif isinstance(rhs, DataFrame):
+        elif isinstance(other, DataFrame):
             if (
                 not can_reindex
                 and fn in cudf.utils.utils._EQUALITY_OPS
                 and (
-                    not lhs._data.to_pandas_index().equals(
-                        rhs._data.to_pandas_index()
+                    not self.index.equals(other.index)
+                    or not self._data.to_pandas_index().equals(
+                        other._data.to_pandas_index()
                     )
-                    or not lhs.index.equals(rhs.index)
                 )
             ):
                 raise ValueError(
                     "Can only compare identically-labeled DataFrame objects"
                 )
 
-            lhs, rhs = _align_indices(lhs, rhs)
+            lhs, rhs = _align_indices(self, other)
+            # Return the aligned index.
+            index = lhs._index
 
             operands = {
                 name: (
                     lcol,
-                    rhs._data[name]
-                    if name in rhs._data
-                    else (fill_value or None),
+                    rhs._data.get(name, fill_value),
                     reflect,
                     fill_value if name in rhs._data else None,
                 )
@@ -1902,38 +1897,28 @@ def _make_operands_and_index_for_binop(
             }
             for name, col in rhs._data.items():
                 if name not in lhs._data:
-                    operands[name] = (
-                        col,
-                        (fill_value or None),
-                        not reflect,
-                        None,
-                    )
-        elif isinstance(rhs, Series):
+                    operands[name] = (fill_value, col, reflect, None)
+        elif isinstance(other, Series):
             # Note: This logic will need updating if any of the user-facing
             # binop methods (e.g. DataFrame.add) ever support axis=0/rows.
-            right_dict = dict(zip(rhs.index.values_host, rhs.values_host))
-            left_cols = lhs._column_names
-            # mypy thinks lhs._column_names is a List rather than a Tuple, so
-            # we have to ignore the type check.
-            result_cols = left_cols + tuple(  # type: ignore
-                col for col in right_dict if col not in left_cols
-            )
-            operands = {}
-            for col in result_cols:
-                if col in left_cols:
-                    left = lhs._data[col]
-                    right = right_dict[col] if col in right_dict else None
-                else:
+            right = dict(zip(other.index.values_host, other.values_host))
+
+            operands = {
+                col: (self._data[col], right.get(col), reflect, fill_value)
+                for col in self._column_names
+            }
+
+            for col in right:
+                if col not in self._column_names:
                     # We match pandas semantics here by performing binops
                     # between a NaN (not NULL!) column and the actual values,
                     # which results in nans, the pandas output.
-                    left = as_column(np.nan, length=lhs._num_rows)
-                    right = right_dict[col]
-                operands[col] = (left, right, reflect, fill_value)
+                    left = as_column(np.nan, length=self._num_rows)
+                    operands[col] = (left, right[col], reflect, fill_value)
         else:
             return NotImplemented, None
 
-        return operands, lhs._index
+        return operands, index
 
     @_cudf_nvtx_annotate
     def update(

From 4f4fa3d665174266ebb4407d3e7fe0baa27e7b22 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 21 Mar 2022 17:14:45 -0700
Subject: [PATCH 2/9] Unify more logic for scalar, list, and Series binops with
 DataFrame.

---
 python/cudf/cudf/core/dataframe.py       | 59 +++++++++++++----------
 python/cudf/cudf/tests/test_dataframe.py | 60 +++++++++++++++++++++---
 2 files changed, 89 insertions(+), 30 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 7603dd746c6..1abc787e924 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1856,17 +1856,45 @@ def _make_operands_and_index_for_binop(
     ]:
         # The default index to return.
         index = self._index
+        other_is_series = False
+
         if _is_scalar_or_zero_d_array(other):
-            operands = {
-                name: (left, other, reflect, fill_value)
-                for name, left in self._data.items()
-            }
+            other = {name: other for name in self._data}
         elif isinstance(other, Sequence):
-            # TODO: Consider validating sequence length (pandas does).
+            if len(other) != self._num_columns:
+                raise ValueError(
+                    f"Other is the wrong length. Expected {self._num_columns} "
+                    f", got {len(other)}"
+                )
+            other = {name: o for (name, o) in zip(self._data, other)}
+        elif isinstance(other, Series):
+            # Note: This logic will need updating if any of the user-facing
+            # binop methods (e.g. DataFrame.add) ever support axis=0/rows.
+            other = dict(zip(other.index.values_host, other.values_host))
+            other_is_series = True
+
+        # Check dict first for speed.
+        if isinstance(other, (dict, MutableMapping)):
+            # Series can have extra index elements, other objects cannot.
+            if not other_is_series and len(other) != self._num_columns:
+                raise ValueError(
+                    f"Other is the wrong length. Expected {self._num_columns} "
+                    f", got {len(other)}"
+                )
+            # pandas checks the length of other, but not the actual elements,
+            # so we can end up with a mismatch (which produces nulls).
             operands = {
-                name: (left, right, reflect, fill_value)
-                for right, (name, left) in zip(other, self._data.items())
+                name: (left, other.get(name), reflect, fill_value)
+                for name, left in self._data.items()
             }
+            if other_is_series:
+                for col in other:
+                    if col not in self._column_names:
+                        # We match pandas semantics here by performing binops
+                        # between a NaN (not NULL!) column and the actual
+                        # values, which results in nans, the pandas output.
+                        left = as_column(np.nan, length=self._num_rows)
+                        operands[col] = (left, other[col], reflect, fill_value)
         elif isinstance(other, DataFrame):
             if (
                 not can_reindex
@@ -1898,23 +1926,6 @@ def _make_operands_and_index_for_binop(
             for name, col in rhs._data.items():
                 if name not in lhs._data:
                     operands[name] = (fill_value, col, reflect, None)
-        elif isinstance(other, Series):
-            # Note: This logic will need updating if any of the user-facing
-            # binop methods (e.g. DataFrame.add) ever support axis=0/rows.
-            right = dict(zip(other.index.values_host, other.values_host))
-
-            operands = {
-                col: (self._data[col], right.get(col), reflect, fill_value)
-                for col in self._column_names
-            }
-
-            for col in right:
-                if col not in self._column_names:
-                    # We match pandas semantics here by performing binops
-                    # between a NaN (not NULL!) column and the actual values,
-                    # which results in nans, the pandas output.
-                    left = as_column(np.nan, length=self._num_rows)
-                    operands[col] = (left, right[col], reflect, fill_value)
         else:
             return NotImplemented, None
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 303c245777c..816bc8221f3 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8,6 +8,7 @@
 import re
 import string
 import textwrap
+import warnings
 from copy import copy
 
 import cupy
@@ -2034,12 +2035,59 @@ def test_dataframe_min_count_ops(data, ops, skipna, min_count):
         operator.ne,
     ],
 )
-def test_binops_df(pdf, gdf, binop):
-    pdf = pdf + 1.0
-    gdf = gdf + 1.0
-    d = binop(pdf, pdf)
-    g = binop(gdf, gdf)
-    assert_eq(d, g)
+@pytest.mark.parametrize(
+    "other",
+    [
+        1.0,
+        [1.0],
+        [1.0, 2.0],
+        [1.0, 2.0, 3.0],
+        {"x": 1.0},
+        {"x": 1.0, "y": 2.0},
+        {"x": 1.0, "y": 2.0, "z": 3.0},
+        pd.Series([1.0]),
+        # pd.Series([1.0, 2.0]),
+        # pd.Series([1.0, 2.0, 3.0]),
+        # pd.Series([1.0], index=["x"]),
+        # pd.Series([1.0, 2.0], index=["x", "y"]),
+        # pd.Series([1.0, 2.0, 3.0], index=["x", "y", "z"]),
+        pd.DataFrame({"x": [1.0]}),
+        pd.DataFrame({"x": [1.0], "y": [2.0]}),
+        pd.DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]}),
+    ],
+)
+def test_binops_df(pdf, gdf, binop, other):
+    # Avoid 1**NA cases: https://github.com/pandas-dev/pandas/issues/29997
+    pdf[pdf == 1.0] = 2
+    gdf[gdf == 1.0] = 2
+    try:
+        with warnings.catch_warnings(record=True) as w:
+            d = binop(pdf, other)
+    except Exception:
+        if isinstance(other, (pd.Series, pd.DataFrame)):
+            other = cudf.from_pandas(other)
+
+        assert_exceptions_equal(
+            lfunc=binop,
+            rfunc=binop,
+            lfunc_args_and_kwargs=([pdf, other], {}),
+            rfunc_args_and_kwargs=([gdf, other], {}),
+            compare_error_message=False,
+        )
+    else:
+        if isinstance(other, (pd.Series, pd.DataFrame)):
+            other = cudf.from_pandas(other)
+        g = binop(gdf, other)
+        try:
+            assert_eq(d, g)
+        except AssertionError:
+            # Currently we will not match pandas for equality/inequality
+            # operators when there are columns that exist in a Series but not
+            # the DataFrame because pandas returns True/False values whereas we
+            # return NA. However, this reindexing is deprecated in pandas so we
+            # opt not to add support.
+            if w and "DataFrame vs Series comparisons is deprecated" in str(w):
+                pass
 
 
 @pytest.mark.parametrize("binop", [operator.and_, operator.or_, operator.xor])

From c925cea402bfca387cad7a3e2c8741c8d7548580 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 21 Mar 2022 17:30:25 -0700
Subject: [PATCH 3/9] Separate Series back into its own block.

---
 python/cudf/cudf/core/dataframe.py | 44 +++++++++++++-----------------
 python/cudf/cudf/core/frame.py     | 12 ++++++--
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 1abc787e924..70c50dfa72a 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1854,47 +1854,41 @@ def _make_operands_and_index_for_binop(
         ],
         Optional[BaseIndex],
     ]:
-        # The default index to return.
         index = self._index
-        other_is_series = False
 
-        if _is_scalar_or_zero_d_array(other):
-            other = {name: other for name in self._data}
-        elif isinstance(other, Sequence):
+        # Check built-in types first for speed.
+        if isinstance(other, (list, dict, Sequence, MutableMapping)):
             if len(other) != self._num_columns:
                 raise ValueError(
                     f"Other is the wrong length. Expected {self._num_columns} "
                     f", got {len(other)}"
                 )
+
+        if _is_scalar_or_zero_d_array(other):
+            other = {name: other for name in self._data}
+        elif isinstance(other, Sequence):
             other = {name: o for (name, o) in zip(self._data, other)}
-        elif isinstance(other, Series):
-            # Note: This logic will need updating if any of the user-facing
-            # binop methods (e.g. DataFrame.add) ever support axis=0/rows.
-            other = dict(zip(other.index.values_host, other.values_host))
-            other_is_series = True
 
-        # Check dict first for speed.
         if isinstance(other, (dict, MutableMapping)):
-            # Series can have extra index elements, other objects cannot.
-            if not other_is_series and len(other) != self._num_columns:
-                raise ValueError(
-                    f"Other is the wrong length. Expected {self._num_columns} "
-                    f", got {len(other)}"
-                )
             # pandas checks the length of other, but not the actual elements,
             # so we can end up with a mismatch (which produces nulls).
             operands = {
                 name: (left, other.get(name), reflect, fill_value)
                 for name, left in self._data.items()
             }
-            if other_is_series:
-                for col in other:
-                    if col not in self._column_names:
-                        # We match pandas semantics here by performing binops
-                        # between a NaN (not NULL!) column and the actual
-                        # values, which results in nans, the pandas output.
-                        left = as_column(np.nan, length=self._num_rows)
-                        operands[col] = (left, other[col], reflect, fill_value)
+        elif isinstance(other, Series):
+            other = dict(zip(other.index.values_host, other.values_host))
+            operands = {
+                name: (left, other.get(name), reflect, fill_value)
+                for name, left in self._data.items()
+            }
+            for col in other:
+                if col not in self._column_names:
+                    # We match pandas semantics here by performing binops
+                    # between a NaN (not NULL!) column and the actual
+                    # values, which results in nans, the pandas output.
+                    left = as_column(np.nan, length=self._num_rows)
+                    operands[col] = (left, other[col], reflect, fill_value)
         elif isinstance(other, DataFrame):
             if (
                 not can_reindex
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 75c6e4d0964..fdbeee0e374 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2475,7 +2475,10 @@ def _colwise_binop(
         ) in operands.items():
             output_mask = None
             if fill_value is not None:
-                if isinstance(right_column, ColumnBase):
+                left_is_column = isinstance(left_column, ColumnBase)
+                right_is_column = isinstance(right_column, ColumnBase)
+
+                if left_is_column and right_is_column:
                     # If both columns are nullable, pandas semantics dictate
                     # that nulls that are present in both left_column and
                     # right_column are not filled.
@@ -2489,9 +2492,14 @@ def _colwise_binop(
                         left_column = left_column.fillna(fill_value)
                     elif right_column.nullable:
                         right_column = right_column.fillna(fill_value)
-                else:
+                elif left_is_column:
                     if left_column.nullable:
                         left_column = left_column.fillna(fill_value)
+                elif right_is_column:
+                    if right_column.nullable:
+                        right_column = right_column.fillna(fill_value)
+                else:
+                    assert False, "At least one operand must be a column."
 
             # TODO: Disable logical and binary operators between columns that
             # are not numerical using the new binops mixin.

From 8d5d90aa318a010c880ef0bf46f6c4b93369db4c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 21 Mar 2022 17:48:05 -0700
Subject: [PATCH 4/9] Create helper functions for operand generation.

---
 python/cudf/cudf/core/dataframe.py | 55 ++++++++++++++++--------------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 70c50dfa72a..2e037f34e69 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1856,6 +1856,24 @@ def _make_operands_and_index_for_binop(
     ]:
         index = self._index
 
+        def make_operands(left_data, right_data, right_default=None, fill_requires_key=False):
+            return {
+                name: (
+                    left,
+                    right_data.get(name, right_default),
+                    reflect,
+                    fill_value
+                    if (not fill_requires_key or name in right_data)
+                    else None,
+                )
+                for name, left in left_data.items()
+            }
+
+        def add_right_operands(operands, left_data, right_data, left_default):
+            for name, col in right_data.items():
+                if name not in left_data:
+                    operands[name] = (left_default(), col, reflect, fill_value)
+
         # Check built-in types first for speed.
         if isinstance(other, (list, dict, Sequence, MutableMapping)):
             if len(other) != self._num_columns:
@@ -1872,23 +1890,17 @@ def _make_operands_and_index_for_binop(
         if isinstance(other, (dict, MutableMapping)):
             # pandas checks the length of other, but not the actual elements,
             # so we can end up with a mismatch (which produces nulls).
-            operands = {
-                name: (left, other.get(name), reflect, fill_value)
-                for name, left in self._data.items()
-            }
+            operands = make_operands(self._data, other)
         elif isinstance(other, Series):
             other = dict(zip(other.index.values_host, other.values_host))
-            operands = {
-                name: (left, other.get(name), reflect, fill_value)
-                for name, left in self._data.items()
-            }
-            for col in other:
-                if col not in self._column_names:
-                    # We match pandas semantics here by performing binops
-                    # between a NaN (not NULL!) column and the actual
-                    # values, which results in nans, the pandas output.
-                    left = as_column(np.nan, length=self._num_rows)
-                    operands[col] = (left, other[col], reflect, fill_value)
+            operands = make_operands(self._data, other)
+
+            # For keys in right but not left, we match pandas semantics here by
+            # performing binops between a NaN (not NULL!) column and the actual
+            # values, which results in nans, the pandas output.
+            for name, col in other.items():
+                if name not in self._data:
+                    operands[name] = (as_column(np.nan, length=self._num_rows), col, reflect, fill_value)
         elif isinstance(other, DataFrame):
             if (
                 not can_reindex
@@ -1905,18 +1917,9 @@ def _make_operands_and_index_for_binop(
                 )
 
             lhs, rhs = _align_indices(self, other)
-            # Return the aligned index.
             index = lhs._index
-
-            operands = {
-                name: (
-                    lcol,
-                    rhs._data.get(name, fill_value),
-                    reflect,
-                    fill_value if name in rhs._data else None,
-                )
-                for name, lcol in lhs._data.items()
-            }
+            operands = make_operands(lhs._data, rhs._data, fill_value, True)
+            add_right_operands(operands, lhs._data, rhs._data, lambda: None)
             for name, col in rhs._data.items():
                 if name not in lhs._data:
                     operands[name] = (fill_value, col, reflect, None)

From 4efbaba9cf388cab618f7fbef72003e107c6baeb Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 21 Mar 2022 18:23:34 -0700
Subject: [PATCH 5/9] Streamline the implementation.

---
 python/cudf/cudf/core/dataframe.py | 71 ++++++++++++++----------------
 1 file changed, 34 insertions(+), 37 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 2e037f34e69..a8ceadf2e56 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1854,25 +1854,20 @@ def _make_operands_and_index_for_binop(
         ],
         Optional[BaseIndex],
     ]:
-        index = self._index
-
-        def make_operands(left_data, right_data, right_default=None, fill_requires_key=False):
-            return {
-                name: (
-                    left,
-                    right_data.get(name, right_default),
+        def make_operands(left, right, add_operands, fill_if_no_key, right_default):
+            operands = {
+                k: (
+                    v,
+                    right.get(k, right_default),
                     reflect,
-                    fill_value
-                    if (not fill_requires_key or name in right_data)
-                    else None,
+                    fill_value if (not fill_if_no_key or k in right) else None,
                 )
-                for name, left in left_data.items()
+                for k, v in left.items()
             }
 
-        def add_right_operands(operands, left_data, right_data, left_default):
-            for name, col in right_data.items():
-                if name not in left_data:
-                    operands[name] = (left_default(), col, reflect, fill_value)
+            if add_operands is not None:
+                add_operands(operands, left, right)
+            return operands
 
         # Check built-in types first for speed.
         if isinstance(other, (list, dict, Sequence, MutableMapping)):
@@ -1882,25 +1877,25 @@ def add_right_operands(operands, left_data, right_data, left_default):
                     f", got {len(other)}"
                 )
 
+        lhs, rhs = self._data, other
+        index = self._index
+        add_operands = None
+        fill_if_no_key = False
+        right_default = None
+
         if _is_scalar_or_zero_d_array(other):
-            other = {name: other for name in self._data}
+            rhs = {name: other for name in self._data}
         elif isinstance(other, Sequence):
-            other = {name: o for (name, o) in zip(self._data, other)}
-
-        if isinstance(other, (dict, MutableMapping)):
-            # pandas checks the length of other, but not the actual elements,
-            # so we can end up with a mismatch (which produces nulls).
-            operands = make_operands(self._data, other)
+            rhs = {name: o for (name, o) in zip(self._data, other)}
         elif isinstance(other, Series):
-            other = dict(zip(other.index.values_host, other.values_host))
-            operands = make_operands(self._data, other)
-
+            rhs = dict(zip(other.index.values_host, other.values_host))
             # For keys in right but not left, we match pandas semantics here by
             # performing binops between a NaN (not NULL!) column and the actual
             # values, which results in nans, the pandas output.
-            for name, col in other.items():
-                if name not in self._data:
-                    operands[name] = (as_column(np.nan, length=self._num_rows), col, reflect, fill_value)
+            def add_operands(operands, left, right):
+                for k, v in right.items():
+                    if k not in left:
+                        operands[k] = (as_column(np.nan, length=self._num_rows), v, reflect, fill_value)
         elif isinstance(other, DataFrame):
             if (
                 not can_reindex
@@ -1915,18 +1910,20 @@ def add_right_operands(operands, left_data, right_data, left_default):
                 raise ValueError(
                     "Can only compare identically-labeled DataFrame objects"
                 )
-
             lhs, rhs = _align_indices(self, other)
             index = lhs._index
-            operands = make_operands(lhs._data, rhs._data, fill_value, True)
-            add_right_operands(operands, lhs._data, rhs._data, lambda: None)
-            for name, col in rhs._data.items():
-                if name not in lhs._data:
-                    operands[name] = (fill_value, col, reflect, None)
-        else:
-            return NotImplemented, None
+            lhs, rhs = lhs._data, rhs._data
+            fill_if_no_key = True
+            right_default = fill_value
 
-        return operands, index
+            def add_operands(operands, left, right):
+                for k, v in right.items():
+                    if k not in left:
+                        operands[k] = (fill_value, v, reflect, None)
+
+        if not isinstance(rhs, MutableMapping):
+            return NotImplemented, None
+        return make_operands(lhs, rhs, add_operands, fill_if_no_key, right_default), index
 
     @_cudf_nvtx_annotate
     def update(

From 71b8e7b1d471354eda51d0bb3ccc0c0552312f46 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 21 Mar 2022 18:35:50 -0700
Subject: [PATCH 6/9] Inline make_operands.

---
 python/cudf/cudf/core/dataframe.py       | 34 +++++++++++-------------
 python/cudf/cudf/tests/test_dataframe.py | 11 ++++----
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index a8ceadf2e56..41778a971e7 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1854,27 +1854,12 @@ def _make_operands_and_index_for_binop(
         ],
         Optional[BaseIndex],
     ]:
-        def make_operands(left, right, add_operands, fill_if_no_key, right_default):
-            operands = {
-                k: (
-                    v,
-                    right.get(k, right_default),
-                    reflect,
-                    fill_value if (not fill_if_no_key or k in right) else None,
-                )
-                for k, v in left.items()
-            }
-
-            if add_operands is not None:
-                add_operands(operands, left, right)
-            return operands
-
         # Check built-in types first for speed.
         if isinstance(other, (list, dict, Sequence, MutableMapping)):
             if len(other) != self._num_columns:
                 raise ValueError(
-                    f"Other is the wrong length. Expected {self._num_columns} "
-                    f", got {len(other)}"
+                    "Other is the wrong length. Expected "
+                    f"{self._num_columns}, got {len(other)}"
                 )
 
         lhs, rhs = self._data, other
@@ -1923,7 +1908,20 @@ def add_operands(operands, left, right):
 
         if not isinstance(rhs, MutableMapping):
             return NotImplemented, None
-        return make_operands(lhs, rhs, add_operands, fill_if_no_key, right_default), index
+
+        operands = {
+            k: (
+                v,
+                rhs.get(k, right_default),
+                reflect,
+                fill_value if (not fill_if_no_key or k in rhs) else None,
+            )
+            for k, v in lhs.items()
+        }
+
+        if add_operands is not None:
+            add_operands(operands, lhs, rhs)
+        return operands, index
 
     @_cudf_nvtx_annotate
     def update(
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 816bc8221f3..8549c9a1b20 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -2045,12 +2045,13 @@ def test_dataframe_min_count_ops(data, ops, skipna, min_count):
         {"x": 1.0},
         {"x": 1.0, "y": 2.0},
         {"x": 1.0, "y": 2.0, "z": 3.0},
+        {"x": 1.0, "z": 3.0},
         pd.Series([1.0]),
-        # pd.Series([1.0, 2.0]),
-        # pd.Series([1.0, 2.0, 3.0]),
-        # pd.Series([1.0], index=["x"]),
-        # pd.Series([1.0, 2.0], index=["x", "y"]),
-        # pd.Series([1.0, 2.0, 3.0], index=["x", "y", "z"]),
+        pd.Series([1.0, 2.0]),
+        pd.Series([1.0, 2.0, 3.0]),
+        pd.Series([1.0], index=["x"]),
+        pd.Series([1.0, 2.0], index=["x", "y"]),
+        pd.Series([1.0, 2.0, 3.0], index=["x", "y", "z"]),
         pd.DataFrame({"x": [1.0]}),
         pd.DataFrame({"x": [1.0], "y": [2.0]}),
         pd.DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]}),

From f3219285dcfd52985188ad1ed2361b7370d2e1e7 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 22 Mar 2022 16:00:44 -0700
Subject: [PATCH 7/9] Simplify defaults as much as possible.

---
 python/cudf/cudf/core/dataframe.py | 48 +++++++++++++-----------------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 41778a971e7..5bdf7e5f202 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1864,23 +1864,18 @@ def _make_operands_and_index_for_binop(
 
         lhs, rhs = self._data, other
         index = self._index
-        add_operands = None
-        fill_if_no_key = False
-        right_default = None
+        fill_requires_key = False
+        left_default: Any = False
 
         if _is_scalar_or_zero_d_array(other):
             rhs = {name: other for name in self._data}
-        elif isinstance(other, Sequence):
+        elif isinstance(other, (list, Sequence)):
             rhs = {name: o for (name, o) in zip(self._data, other)}
         elif isinstance(other, Series):
             rhs = dict(zip(other.index.values_host, other.values_host))
-            # For keys in right but not left, we match pandas semantics here by
-            # performing binops between a NaN (not NULL!) column and the actual
-            # values, which results in nans, the pandas output.
-            def add_operands(operands, left, right):
-                for k, v in right.items():
-                    if k not in left:
-                        operands[k] = (as_column(np.nan, length=self._num_rows), v, reflect, fill_value)
+            # For keys in right but not left, perform binops between NaN (not
+            # NULL!) and the right value (result is NaN).
+            left_default = as_column(np.nan, length=len(self))
         elif isinstance(other, DataFrame):
             if (
                 not can_reindex
@@ -1895,32 +1890,31 @@ def add_operands(operands, left, right):
                 raise ValueError(
                     "Can only compare identically-labeled DataFrame objects"
                 )
-            lhs, rhs = _align_indices(self, other)
-            index = lhs._index
-            lhs, rhs = lhs._data, rhs._data
-            fill_if_no_key = True
-            right_default = fill_value
-
-            def add_operands(operands, left, right):
-                for k, v in right.items():
-                    if k not in left:
-                        operands[k] = (fill_value, v, reflect, None)
-
-        if not isinstance(rhs, MutableMapping):
+            new_lhs, new_rhs = _align_indices(self, other)
+            index = new_lhs._index
+            lhs, rhs = new_lhs._data, new_rhs._data
+            fill_requires_key = True
+            # For DataFrame-DataFrame ops, always default to operating against
+            # the fill value.
+            left_default = fill_value
+
+        if not isinstance(rhs, (dict, MutableMapping)):
             return NotImplemented, None
 
         operands = {
             k: (
                 v,
-                rhs.get(k, right_default),
+                rhs.get(k, fill_value),
                 reflect,
-                fill_value if (not fill_if_no_key or k in rhs) else None,
+                fill_value if (not fill_requires_key or k in rhs) else None,
             )
             for k, v in lhs.items()
         }
 
-        if add_operands is not None:
-            add_operands(operands, lhs, rhs)
+        if left_default is not False:
+            for k, v in rhs.items():
+                if k not in lhs:
+                    operands[k] = (left_default, v, reflect, None)
         return operands, index
 
     @_cudf_nvtx_annotate

From db84ddf821fc157724a555b5fe77ff0b053886b7 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 31 Mar 2022 14:22:56 -0700
Subject: [PATCH 8/9] Deprecate binary operations with future unsupported
 types.

---
 python/cudf/cudf/core/dataframe.py           | 13 ++++++--
 python/cudf/cudf/core/single_column_frame.py | 12 ++++++++
 python/cudf/cudf/tests/test_dataframe.py     | 31 +++++++++++++++-----
 3 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 5bdf7e5f202..0e25f245efd 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -10,7 +10,7 @@
 import sys
 import warnings
 from collections import defaultdict
-from collections.abc import Iterable, Sequence
+from collections.abc import Iterable, Mapping, Sequence
 from typing import (
     Any,
     Dict,
@@ -1855,7 +1855,14 @@ def _make_operands_and_index_for_binop(
         Optional[BaseIndex],
     ]:
         # Check built-in types first for speed.
-        if isinstance(other, (list, dict, Sequence, MutableMapping)):
+        if isinstance(other, (list, dict, Sequence, Mapping)):
+            warnings.warn(
+                "Binary operations between host objects such as "
+                f"{type(other)} and cudf.DataFrame are deprecated and will be "
+                "removed in a future release. Please convert it to a cudf "
+                "object before performing the operation.",
+                FutureWarning,
+            )
             if len(other) != self._num_columns:
                 raise ValueError(
                     "Other is the wrong length. Expected "
@@ -1898,7 +1905,7 @@ def _make_operands_and_index_for_binop(
             # the fill value.
             left_default = fill_value
 
-        if not isinstance(rhs, (dict, MutableMapping)):
+        if not isinstance(rhs, (dict, Mapping)):
             return NotImplemented, None
 
         operands = {
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index 3e91aa634f4..1a06e98148b 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -3,6 +3,7 @@
 
 from __future__ import annotations
 
+import warnings
 from typing import Any, Dict, Optional, Tuple, Type, TypeVar, Union
 
 import cupy
@@ -333,6 +334,17 @@ def _make_operands_for_binop(
         if isinstance(other, SingleColumnFrame):
             other = other._column
         elif not _is_scalar_or_zero_d_array(other):
+            if not hasattr(other, "__cuda_array_interface__"):
+                # TODO: When this deprecated behavior is removed, also change
+                # the above conditional to stop checking for pd.Series and
+                # pd.Index since we only need to support SingleColumnFrame.
+                warnings.warn(
+                    f"Binary operations between host objects such as "
+                    f"{type(other)} and {type(self)} are deprecated and will "
+                    "be removed in a future release. Please convert it to a "
+                    "cudf object before performing the operation.",
+                    FutureWarning,
+                )
             # Non-scalar right operands are valid iff they convert to columns.
             try:
                 other = as_column(other)
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 8549c9a1b20..bddc1316aec 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -9,6 +9,7 @@
 import string
 import textwrap
 import warnings
+from contextlib import contextmanager
 from copy import copy
 
 import cupy
@@ -2018,6 +2019,15 @@ def test_dataframe_min_count_ops(data, ops, skipna, min_count):
     )
 
 
+@contextmanager
+def _hide_host_other_warning(other):
+    if isinstance(other, (dict, list)):
+        with pytest.warns(FutureWarning):
+            yield
+    else:
+        yield
+
+
 @pytest.mark.parametrize(
     "binop",
     [
@@ -2068,17 +2078,22 @@ def test_binops_df(pdf, gdf, binop, other):
         if isinstance(other, (pd.Series, pd.DataFrame)):
             other = cudf.from_pandas(other)
 
-        assert_exceptions_equal(
-            lfunc=binop,
-            rfunc=binop,
-            lfunc_args_and_kwargs=([pdf, other], {}),
-            rfunc_args_and_kwargs=([gdf, other], {}),
-            compare_error_message=False,
-        )
+        # TODO: When we remove support for binary operations with lists and
+        # dicts, those cases should all be checked in a `pytest.raises` block
+        # that returns before we enter this try-except.
+        with _hide_host_other_warning(other):
+            assert_exceptions_equal(
+                lfunc=binop,
+                rfunc=binop,
+                lfunc_args_and_kwargs=([pdf, other], {}),
+                rfunc_args_and_kwargs=([gdf, other], {}),
+                compare_error_message=False,
+            )
     else:
         if isinstance(other, (pd.Series, pd.DataFrame)):
             other = cudf.from_pandas(other)
-        g = binop(gdf, other)
+        with _hide_host_other_warning(other):
+            g = binop(gdf, other)
         try:
             assert_eq(d, g)
         except AssertionError:

From 46fd12957ba1cf8ef03d5ef74d5819d20e560a57 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 12 Apr 2022 13:49:23 -0700
Subject: [PATCH 9/9] Address PR comments.

---
 python/cudf/cudf/core/dataframe.py       | 2 +-
 python/cudf/cudf/tests/test_dataframe.py | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 0e25f245efd..5483dcf653c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1865,7 +1865,7 @@ def _make_operands_and_index_for_binop(
             )
             if len(other) != self._num_columns:
                 raise ValueError(
-                    "Other is the wrong length. Expected "
+                    "Other is of the wrong length. Expected "
                     f"{self._num_columns}, got {len(other)}"
                 )
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index bddc1316aec..a7fad792bd0 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -2106,6 +2106,11 @@ def test_binops_df(pdf, gdf, binop, other):
                 pass
 
 
+def test_binops_df_invalid(gdf):
+    with pytest.raises(TypeError):
+        gdf + np.array([1, 2])
+
+
 @pytest.mark.parametrize("binop", [operator.and_, operator.or_, operator.xor])
 def test_bitwise_binops_df(pdf, gdf, binop):
     d = binop(pdf, pdf + 1)