Be more careful in type promotion for scalar binop

Try and do everything following numpy using types rather than values by first attempting to use the dtype of the passed in operand and subsequently (if it does not have one) using result_type. This way we avoid problems with min_scalar_type wanting to pick unsigned int types for bare Python integers.
rapidsai · Nov 8, 2022 · d670394 · d670394
1 parent 2a58ff6
commit d670394
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 20 deletions.
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
@@ -502,7 +502,8 @@ def _wrap_binop_normalization(self, other):
         if other is NA or other is None:
             return cudf.Scalar(other, dtype=self.dtype)
         if isinstance(other, np.ndarray) and other.ndim == 0:
-            other = other.item()
+            # Try and maintain the dtype
+            other = other.dtype.type(other.item())
         return self.normalize_binop_value(other)
 
     def _scatter_by_slice(

diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
@@ -35,7 +35,7 @@
     is_number,
     is_scalar,
 )
-from cudf.core.buffer import Buffer, as_buffer, cuda_array_interface_wrapper
+from cudf.core.buffer import Buffer, cuda_array_interface_wrapper
 from cudf.core.column import (
     ColumnBase,
     as_column,
@@ -274,7 +274,7 @@ def nans_to_nulls(self: NumericalColumn) -> NumericalColumn:
 
     def normalize_binop_value(
         self, other: ScalarLike
-    ) -> Union[ColumnBase, ScalarLike]:
+    ) -> Union[ColumnBase, cudf.Scalar]:
         if isinstance(other, ColumnBase):
             if not isinstance(other, NumericalColumn):
                 return NotImplemented
@@ -285,25 +285,20 @@ def normalize_binop_value(
             # expensive device-host transfer just to
             # adjust the dtype
             other = other.value
-        other_dtype = np.min_scalar_type(other)
+        try:
+            # Try and use the dtype of the incoming object
+            other_dtype = other.dtype
+        except AttributeError:
+            # Otherwise fall back to numpy's type deduction scheme.
+            other_dtype = np.result_type(other)
+
         if other_dtype.kind in {"b", "i", "u", "f"}:
-            if isinstance(other, cudf.Scalar):
-                return other
-            other_dtype = np.promote_types(self.dtype, other_dtype)
-            if other_dtype == np.dtype("float16"):
-                other_dtype = cudf.dtype("float32")
-                other = other_dtype.type(other)
+            common_dtype = np.promote_types(self.dtype, other_dtype)
+            if common_dtype == np.dtype("float16"):
+                common_dtype = cudf.dtype("float32")
             if self.dtype.kind == "b":
-                other_dtype = min_signed_type(other)
-            if np.isscalar(other):
-                return cudf.dtype(other_dtype).type(other)
-            else:
-                ary = full(len(self), other, dtype=other_dtype)
-                return column.build_column(
-                    data=as_buffer(ary),
-                    dtype=ary.dtype,
-                    mask=self.mask,
-                )
+                common_dtype = min_signed_type(other)
+            return cudf.Scalar(other, dtype=common_dtype)
         else:
             return NotImplemented
 

diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
@@ -1951,3 +1951,9 @@ def test_set_bool_error(dtype, bool_scalar):
         lfunc_args_and_kwargs=([bool_scalar],),
         rfunc_args_and_kwargs=([bool_scalar],),
     )
+
+
+def test_int64_equality():
+    s = cudf.Series(np.asarray([2**63 - 10, 2**63 - 100], dtype=np.int64))
+    assert (s != np.int64(2**63 - 1)).all()
+    assert (s != cudf.Scalar(2**63 - 1, dtype=np.int64)).all()