From d670394e9f440bb40cc33213d5a059815012d466 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Fri, 4 Nov 2022 16:12:03 +0000 Subject: [PATCH] Be more careful in type promotion for scalar binop Try and do everything following numpy using types rather than values by first attempting to use the dtype of the passed in operand and subsequently (if it does not have one) using result_type. This way we avoid problems with min_scalar_type wanting to pick unsigned int types for bare Python integers. --- python/cudf/cudf/core/column/column.py | 3 ++- python/cudf/cudf/core/column/numerical.py | 33 ++++++++++------------- python/cudf/cudf/tests/test_series.py | 6 +++++ 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 22f8d27f9e8..f22add316e6 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -502,7 +502,8 @@ def _wrap_binop_normalization(self, other): if other is NA or other is None: return cudf.Scalar(other, dtype=self.dtype) if isinstance(other, np.ndarray) and other.ndim == 0: - other = other.item() + # Try and maintain the dtype + other = other.dtype.type(other.item()) return self.normalize_binop_value(other) def _scatter_by_slice( diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index f126f47c3c2..7ca4c40f674 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -35,7 +35,7 @@ is_number, is_scalar, ) -from cudf.core.buffer import Buffer, as_buffer, cuda_array_interface_wrapper +from cudf.core.buffer import Buffer, cuda_array_interface_wrapper from cudf.core.column import ( ColumnBase, as_column, @@ -274,7 +274,7 @@ def nans_to_nulls(self: NumericalColumn) -> NumericalColumn: def normalize_binop_value( self, other: ScalarLike - ) -> Union[ColumnBase, ScalarLike]: + ) -> Union[ColumnBase, cudf.Scalar]: if isinstance(other, ColumnBase): if not isinstance(other, NumericalColumn): return NotImplemented @@ -285,25 +285,20 @@ def normalize_binop_value( # expensive device-host transfer just to # adjust the dtype other = other.value - other_dtype = np.min_scalar_type(other) + try: + # Try and use the dtype of the incoming object + other_dtype = other.dtype + except AttributeError: + # Otherwise fall back to numpy's type deduction scheme. + other_dtype = np.result_type(other) + if other_dtype.kind in {"b", "i", "u", "f"}: - if isinstance(other, cudf.Scalar): - return other - other_dtype = np.promote_types(self.dtype, other_dtype) - if other_dtype == np.dtype("float16"): - other_dtype = cudf.dtype("float32") - other = other_dtype.type(other) + common_dtype = np.promote_types(self.dtype, other_dtype) + if common_dtype == np.dtype("float16"): + common_dtype = cudf.dtype("float32") if self.dtype.kind == "b": - other_dtype = min_signed_type(other) - if np.isscalar(other): - return cudf.dtype(other_dtype).type(other) - else: - ary = full(len(self), other, dtype=other_dtype) - return column.build_column( - data=as_buffer(ary), - dtype=ary.dtype, - mask=self.mask, - ) + common_dtype = min_signed_type(other) + return cudf.Scalar(other, dtype=common_dtype) else: return NotImplemented diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index c0b99f56238..c9587438b09 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -1951,3 +1951,9 @@ def test_set_bool_error(dtype, bool_scalar): lfunc_args_and_kwargs=([bool_scalar],), rfunc_args_and_kwargs=([bool_scalar],), ) + + +def test_int64_equality(): + s = cudf.Series(np.asarray([2**63 - 10, 2**63 - 100], dtype=np.int64)) + assert (s != np.int64(2**63 - 1)).all() + assert (s != cudf.Scalar(2**63 - 1, dtype=np.int64)).all()