Skip to content

Commit

Permalink
Be more careful in type promotion for scalar binop
Browse files Browse the repository at this point in the history
Try and do everything following numpy using types rather than values
by first attempting to use the dtype of the passed in operand and
subsequently (if it does not have one) using result_type. This way
we avoid problems with min_scalar_type wanting to pick unsigned int
types for bare Python integers.
  • Loading branch information
wence- committed Nov 8, 2022
1 parent 2a58ff6 commit d670394
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 20 deletions.
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,8 @@ def _wrap_binop_normalization(self, other):
if other is NA or other is None:
return cudf.Scalar(other, dtype=self.dtype)
if isinstance(other, np.ndarray) and other.ndim == 0:
other = other.item()
# Try and maintain the dtype
other = other.dtype.type(other.item())
return self.normalize_binop_value(other)

def _scatter_by_slice(
Expand Down
33 changes: 14 additions & 19 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
is_number,
is_scalar,
)
from cudf.core.buffer import Buffer, as_buffer, cuda_array_interface_wrapper
from cudf.core.buffer import Buffer, cuda_array_interface_wrapper
from cudf.core.column import (
ColumnBase,
as_column,
Expand Down Expand Up @@ -274,7 +274,7 @@ def nans_to_nulls(self: NumericalColumn) -> NumericalColumn:

def normalize_binop_value(
self, other: ScalarLike
) -> Union[ColumnBase, ScalarLike]:
) -> Union[ColumnBase, cudf.Scalar]:
if isinstance(other, ColumnBase):
if not isinstance(other, NumericalColumn):
return NotImplemented
Expand All @@ -285,25 +285,20 @@ def normalize_binop_value(
# expensive device-host transfer just to
# adjust the dtype
other = other.value
other_dtype = np.min_scalar_type(other)
try:
# Try and use the dtype of the incoming object
other_dtype = other.dtype
except AttributeError:
# Otherwise fall back to numpy's type deduction scheme.
other_dtype = np.result_type(other)

if other_dtype.kind in {"b", "i", "u", "f"}:
if isinstance(other, cudf.Scalar):
return other
other_dtype = np.promote_types(self.dtype, other_dtype)
if other_dtype == np.dtype("float16"):
other_dtype = cudf.dtype("float32")
other = other_dtype.type(other)
common_dtype = np.promote_types(self.dtype, other_dtype)
if common_dtype == np.dtype("float16"):
common_dtype = cudf.dtype("float32")
if self.dtype.kind == "b":
other_dtype = min_signed_type(other)
if np.isscalar(other):
return cudf.dtype(other_dtype).type(other)
else:
ary = full(len(self), other, dtype=other_dtype)
return column.build_column(
data=as_buffer(ary),
dtype=ary.dtype,
mask=self.mask,
)
common_dtype = min_signed_type(other)
return cudf.Scalar(other, dtype=common_dtype)
else:
return NotImplemented

Expand Down
6 changes: 6 additions & 0 deletions python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1951,3 +1951,9 @@ def test_set_bool_error(dtype, bool_scalar):
lfunc_args_and_kwargs=([bool_scalar],),
rfunc_args_and_kwargs=([bool_scalar],),
)


def test_int64_equality():
s = cudf.Series(np.asarray([2**63 - 10, 2**63 - 100], dtype=np.int64))
assert (s != np.int64(2**63 - 1)).all()
assert (s != cudf.Scalar(2**63 - 1, dtype=np.int64)).all()

0 comments on commit d670394

Please sign in to comment.