Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT: Adapt to NumPy 2 promotion changes #16141

Merged
merged 9 commits into from
Jul 15, 2024
24 changes: 15 additions & 9 deletions python/cudf/cudf/core/_internals/where.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,17 @@ def _check_and_cast_columns_with_other(

other_is_scalar = is_scalar(other)
if other_is_scalar:
if (isinstance(other, float) and not np.isnan(other)) and (
source_dtype.type(other) != other
):
raise TypeError(
f"Cannot safely cast non-equivalent "
f"{type(other).__name__} to {source_dtype.name}"
)
if isinstance(other, float) and not np.isnan(other):
try:
is_safe = source_dtype.type(other) == other
except OverflowError:
is_safe = False
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This just adds the OverflowError check, because NumPy is now strict, otherwise the error type would change here.


if not is_safe:
raise TypeError(
f"Cannot safely cast non-equivalent "
f"{type(other).__name__} to {source_dtype.name}"
)

if cudf.utils.utils.is_na_like(other):
return _normalize_categorical(
Expand All @@ -84,8 +88,10 @@ def _check_and_cast_columns_with_other(
)
return _normalize_categorical(source_col, other.astype(source_dtype))

if _is_non_decimal_numeric_dtype(source_dtype) and _can_cast(
other, source_dtype
if (
_is_non_decimal_numeric_dtype(source_dtype)
and not other_is_scalar # can-cast fails for Python scalars
and _can_cast(other, source_dtype)
):
common_dtype = source_dtype
elif (
Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@
)


_DEFAULT_CATEGORICAL_VALUE = -1
# Using np.int8(-1) to allow silent wrap-around when casting to uint
# it may make sense to make this dtype specific or a function.
_DEFAULT_CATEGORICAL_VALUE = np.int8(-1)


class CategoricalAccessor(ColumnMethods):
Expand Down
27 changes: 21 additions & 6 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,15 +301,28 @@ def normalize_binop_value(
if isinstance(other, cudf.Scalar):
if self.dtype == other.dtype:
return other

# expensive device-host transfer just to
# adjust the dtype
other = other.value

# NumPy 2 needs a Python scalar to do weak promotion, but
# pandas forces weak promotion always
# TODO: We could use 0, 0.0, and 0j for promotion to avoid copies.
if other.dtype.kind in "ifc":
other = other.item()
elif not isinstance(other, (int, float, complex)):
# Go via NumPy to get the value
other = np.array(other)
if other.dtype.kind in "ifc":
other = other.item()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The biggest change, on NumPy 2, we need to pass Python scalars to align with Pandas (not NumPy, which pandas does not align with here).
On NumPy 1, this doesn't really make a difference.

To get those Python scalars, .item() is one reasonable way.


# Try and match pandas and hence numpy. Deduce the common
# dtype via the _value_ of other, and the dtype of self. TODO:
# When NEP50 is accepted, this might want changed or
# simplified.
# This is not at all simple:
# np.result_type(np.int64(0), np.uint8)
# dtype via the _value_ of other, and the dtype of self on NumPy 1.x
# with NumPy 2, we force weak promotion even for our/NumPy scalars
# to match pandas 2.2.
# Weak promotion is not at all simple:
# np.result_type(0, np.uint8)
# => np.uint8
# np.result_type(np.asarray([0], dtype=np.int64), np.uint8)
# => np.int64
Expand Down Expand Up @@ -626,7 +639,9 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
min_, max_ = iinfo.min, iinfo.max

# best we can do is hope to catch it here and avoid compare
if (self.min() >= min_) and (self.max() <= max_):
# Use Python floats, which have precise comparison for float64.
# NOTE(seberg): it would make sense to limit to the mantissa range.
if (float(self.min()) >= min_) and (float(self.max()) <= max_):
filled = self.fillna(0)
return (cudf.Series(filled) % 1 == 0).all()
else:
Expand Down
21 changes: 18 additions & 3 deletions python/cudf/cudf/tests/test_binops.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,14 @@ def test_series_reflected_ops_scalar(func, dtype, obj_class):
if obj_class == "Index":
gs = Index(gs)

gs_result = func(gs)
try:
gs_result = func(gs)
except OverflowError:
# An error is fine, if pandas raises the same error:
with pytest.raises(OverflowError):
func(random_series)

return

# class typing
if obj_class == "Index":
Expand Down Expand Up @@ -589,7 +596,14 @@ def test_series_reflected_ops_cudf_scalar(funcs, dtype, obj_class):
if obj_class == "Index":
gs = Index(gs)

gs_result = gpu_func(gs)
try:
gs_result = gpu_func(gs)
except OverflowError:
# An error is fine, if pandas raises the same error:
with pytest.raises(OverflowError):
cpu_func(random_series)

return

# class typing
if obj_class == "Index":
Expand Down Expand Up @@ -770,7 +784,8 @@ def test_operator_func_series_and_scalar(
fill_value=fill_value,
)
pdf_series_result = getattr(pdf_series, func)(
scalar, fill_value=fill_value
np.array(scalar)[()] if use_cudf_scalar else scalar,
fill_value=fill_value,
)

assert_eq(pdf_series_result, gdf_series_result)
Expand Down
13 changes: 12 additions & 1 deletion python/cudf/cudf/tests/test_doctests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
import contextlib
import doctest
import inspect
Expand All @@ -8,6 +8,7 @@

import numpy as np
import pytest
from packaging import version

import cudf

Expand Down Expand Up @@ -80,6 +81,16 @@ def chdir_to_tmp_path(cls, tmp_path):
yield
os.chdir(original_directory)

@pytest.fixture(autouse=True)
def prinoptions(cls):
# TODO: NumPy now prints scalars as `np.int8(1)`, etc. this should
# be adapted evantually.
if version.parse(np.__version__) >= version.parse("2.0"):
with np.printoptions(legacy="1.25"):
yield
else:
yield

@pytest.mark.parametrize(
"docstring",
itertools.chain(*[_find_doctests_in_obj(mod) for mod in tests]),
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/tests/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,6 @@ def test_dtype(in_dtype, expect):
np.complex128,
complex,
"S",
"a",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just avoids an error "a" has been removed, and seems not particularly important here.

"V",
"float16",
np.float16,
Expand Down
Loading