Skip to content

Commit

Permalink
MAINT: Adapt to NumPy 2 promotion changes (#16141)
Browse files Browse the repository at this point in the history
Splitting out the non API changes from gh-15897, the Scalar API change is required for the tests to pass with NumPy 2, but almost all changes should be relatively straight forward here on their own.

(I will add inline comments.)

---

This PR does not fix integer comparisons, there are currently no tests that run into these.

xref: rapidsai/build-planning#38

Authors:
  - Sebastian Berg (https://github.com/seberg)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #16141
  • Loading branch information
seberg authored Jul 15, 2024
1 parent c4ee4a7 commit 1889c7c
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 21 deletions.
24 changes: 15 additions & 9 deletions python/cudf/cudf/core/_internals/where.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,17 @@ def _check_and_cast_columns_with_other(

other_is_scalar = is_scalar(other)
if other_is_scalar:
if (isinstance(other, float) and not np.isnan(other)) and (
source_dtype.type(other) != other
):
raise TypeError(
f"Cannot safely cast non-equivalent "
f"{type(other).__name__} to {source_dtype.name}"
)
if isinstance(other, float) and not np.isnan(other):
try:
is_safe = source_dtype.type(other) == other
except OverflowError:
is_safe = False

if not is_safe:
raise TypeError(
f"Cannot safely cast non-equivalent "
f"{type(other).__name__} to {source_dtype.name}"
)

if cudf.utils.utils.is_na_like(other):
return _normalize_categorical(
Expand All @@ -84,8 +88,10 @@ def _check_and_cast_columns_with_other(
)
return _normalize_categorical(source_col, other.astype(source_dtype))

if _is_non_decimal_numeric_dtype(source_dtype) and _can_cast(
other, source_dtype
if (
_is_non_decimal_numeric_dtype(source_dtype)
and not other_is_scalar # can-cast fails for Python scalars
and _can_cast(other, source_dtype)
):
common_dtype = source_dtype
elif (
Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@
)


_DEFAULT_CATEGORICAL_VALUE = -1
# Using np.int8(-1) to allow silent wrap-around when casting to uint
# it may make sense to make this dtype specific or a function.
_DEFAULT_CATEGORICAL_VALUE = np.int8(-1)


class CategoricalAccessor(ColumnMethods):
Expand Down
27 changes: 21 additions & 6 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,15 +301,28 @@ def normalize_binop_value(
if isinstance(other, cudf.Scalar):
if self.dtype == other.dtype:
return other

# expensive device-host transfer just to
# adjust the dtype
other = other.value

# NumPy 2 needs a Python scalar to do weak promotion, but
# pandas forces weak promotion always
# TODO: We could use 0, 0.0, and 0j for promotion to avoid copies.
if other.dtype.kind in "ifc":
other = other.item()
elif not isinstance(other, (int, float, complex)):
# Go via NumPy to get the value
other = np.array(other)
if other.dtype.kind in "ifc":
other = other.item()

# Try and match pandas and hence numpy. Deduce the common
# dtype via the _value_ of other, and the dtype of self. TODO:
# When NEP50 is accepted, this might want changed or
# simplified.
# This is not at all simple:
# np.result_type(np.int64(0), np.uint8)
# dtype via the _value_ of other, and the dtype of self on NumPy 1.x
# with NumPy 2, we force weak promotion even for our/NumPy scalars
# to match pandas 2.2.
# Weak promotion is not at all simple:
# np.result_type(0, np.uint8)
# => np.uint8
# np.result_type(np.asarray([0], dtype=np.int64), np.uint8)
# => np.int64
Expand Down Expand Up @@ -626,7 +639,9 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
min_, max_ = iinfo.min, iinfo.max

# best we can do is hope to catch it here and avoid compare
if (self.min() >= min_) and (self.max() <= max_):
# Use Python floats, which have precise comparison for float64.
# NOTE(seberg): it would make sense to limit to the mantissa range.
if (float(self.min()) >= min_) and (float(self.max()) <= max_):
filled = self.fillna(0)
return (cudf.Series(filled) % 1 == 0).all()
else:
Expand Down
21 changes: 18 additions & 3 deletions python/cudf/cudf/tests/test_binops.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,14 @@ def test_series_reflected_ops_scalar(func, dtype, obj_class):
if obj_class == "Index":
gs = Index(gs)

gs_result = func(gs)
try:
gs_result = func(gs)
except OverflowError:
# An error is fine, if pandas raises the same error:
with pytest.raises(OverflowError):
func(random_series)

return

# class typing
if obj_class == "Index":
Expand Down Expand Up @@ -589,7 +596,14 @@ def test_series_reflected_ops_cudf_scalar(funcs, dtype, obj_class):
if obj_class == "Index":
gs = Index(gs)

gs_result = gpu_func(gs)
try:
gs_result = gpu_func(gs)
except OverflowError:
# An error is fine, if pandas raises the same error:
with pytest.raises(OverflowError):
cpu_func(random_series)

return

# class typing
if obj_class == "Index":
Expand Down Expand Up @@ -770,7 +784,8 @@ def test_operator_func_series_and_scalar(
fill_value=fill_value,
)
pdf_series_result = getattr(pdf_series, func)(
scalar, fill_value=fill_value
np.array(scalar)[()] if use_cudf_scalar else scalar,
fill_value=fill_value,
)

assert_eq(pdf_series_result, gdf_series_result)
Expand Down
13 changes: 12 additions & 1 deletion python/cudf/cudf/tests/test_doctests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
import contextlib
import doctest
import inspect
Expand All @@ -8,6 +8,7 @@

import numpy as np
import pytest
from packaging import version

import cudf

Expand Down Expand Up @@ -80,6 +81,16 @@ def chdir_to_tmp_path(cls, tmp_path):
yield
os.chdir(original_directory)

@pytest.fixture(autouse=True)
def prinoptions(cls):
# TODO: NumPy now prints scalars as `np.int8(1)`, etc. this should
# be adapted evantually.
if version.parse(np.__version__) >= version.parse("2.0"):
with np.printoptions(legacy="1.25"):
yield
else:
yield

@pytest.mark.parametrize(
"docstring",
itertools.chain(*[_find_doctests_in_obj(mod) for mod in tests]),
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/tests/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,6 @@ def test_dtype(in_dtype, expect):
np.complex128,
complex,
"S",
"a",
"V",
"float16",
np.float16,
Expand Down

0 comments on commit 1889c7c

Please sign in to comment.