diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py index f3183e6029d..97cd0ef72d5 100644 --- a/python/cudf/cudf/core/_internals/where.py +++ b/python/cudf/cudf/core/_internals/where.py @@ -54,7 +54,7 @@ def _check_and_cast_columns_with_other( other_is_scalar = is_scalar(other) if other_is_scalar: - if isinstance(other, float) and not np.isnan(other): + if isinstance(other, (float, np.floating)) and not np.isnan(other): try: is_safe = source_dtype.type(other) == other except OverflowError: diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index f633d527681..70be83b5ddb 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1426,9 +1426,10 @@ def column_empty_like( return column_empty(row_count, dtype, masked) -def _has_any_nan(arbitrary): +def _has_any_nan(arbitrary: pd.Series | np.ndarray) -> bool: + """Check if an object dtype Series or array contains NaN.""" return any( - ((isinstance(x, float) or isinstance(x, np.floating)) and np.isnan(x)) + isinstance(x, (float, np.floating)) and np.isnan(x) for x in np.asarray(arbitrary) ) @@ -2280,9 +2281,8 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase: # Notice, we can always cast pure null columns not_null_col_dtypes = [o.dtype for o in objs if o.null_count != len(o)] if len(not_null_col_dtypes) and all( - _is_non_decimal_numeric_dtype(dtyp) - and np.issubdtype(dtyp, np.datetime64) - for dtyp in not_null_col_dtypes + _is_non_decimal_numeric_dtype(dtype) and dtype.kind == "M" + for dtype in not_null_col_dtypes ): common_dtype = find_common_type(not_null_col_dtypes) # Cast all columns to the common dtype diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 214e84028d2..0b683758195 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -645,7 +645,7 @@ def isin(self, values: Sequence) -> ColumnBase: return cudf.core.tools.datetimes._isin_datetimelike(self, values) def can_cast_safely(self, to_dtype: Dtype) -> bool: - if np.issubdtype(to_dtype, np.datetime64): + if to_dtype.kind == "M": # type: ignore[union-attr] to_res, _ = np.datetime_data(to_dtype) self_res, _ = np.datetime_data(self.dtype) diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index cc15e78314e..1489b5efa13 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -564,10 +564,11 @@ def take(self, lists_indices: ColumnLike) -> ParentType: raise ValueError( "lists_indices and list column is of different " "size." ) - if not _is_non_decimal_numeric_dtype( - lists_indices_col.children[1].dtype - ) or not np.issubdtype( - lists_indices_col.children[1].dtype, np.integer + if ( + not _is_non_decimal_numeric_dtype( + lists_indices_col.children[1].dtype + ) + or lists_indices_col.children[1].dtype.kind not in "iu" ): raise TypeError( "lists_indices should be column of values of index types." diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index b8fa00e9643..b156e75be7d 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -232,25 +232,17 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: tmp = self if reflect else other # Guard against division by zero for integers. if ( - (tmp.dtype.type in int_float_dtype_mapping) - and (tmp.dtype.type != np.bool_) - and ( - ( - ( - np.isscalar(tmp) - or ( - isinstance(tmp, cudf.Scalar) - # host to device copy - and tmp.is_valid() - ) - ) - and (0 == tmp) - ) - or ((isinstance(tmp, NumericalColumn)) and (0 in tmp)) - ) + tmp.dtype.type in int_float_dtype_mapping + and tmp.dtype.kind != "b" ): - out_dtype = cudf.dtype("float64") - + if isinstance(tmp, NumericalColumn) and 0 in tmp: + out_dtype = cudf.dtype("float64") + elif isinstance(tmp, cudf.Scalar): + if tmp.is_valid() and tmp == 0: + # tmp == 0 can return NA + out_dtype = cudf.dtype("float64") + elif is_scalar(tmp) and tmp == 0: + out_dtype = cudf.dtype("float64") if op in { "__lt__", "__gt__", diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py index dd0a4f666a1..32c84763401 100644 --- a/python/cudf/cudf/core/join/_join_helpers.py +++ b/python/cudf/cudf/core/join/_join_helpers.py @@ -9,7 +9,7 @@ import numpy as np import cudf -from cudf.api.types import is_decimal_dtype, is_dtype_equal +from cudf.api.types import is_decimal_dtype, is_dtype_equal, is_numeric_dtype from cudf.core.column import CategoricalColumn from cudf.core.dtypes import CategoricalDtype @@ -88,38 +88,25 @@ def _match_join_keys( ) if ( - np.issubdtype(ltype, np.number) - and np.issubdtype(rtype, np.number) - and not ( - np.issubdtype(ltype, np.timedelta64) - or np.issubdtype(rtype, np.timedelta64) - ) + is_numeric_dtype(ltype) + and is_numeric_dtype(rtype) + and not (ltype.kind == "m" or rtype.kind == "m") ): common_type = ( max(ltype, rtype) if ltype.kind == rtype.kind else np.result_type(ltype, rtype) ) - elif ( - np.issubdtype(ltype, np.datetime64) - and np.issubdtype(rtype, np.datetime64) - ) or ( - np.issubdtype(ltype, np.timedelta64) - and np.issubdtype(rtype, np.timedelta64) + elif (ltype.kind == "M" and rtype.kind == "M") or ( + ltype.kind == "m" and rtype.kind == "m" ): common_type = max(ltype, rtype) - elif ( - np.issubdtype(ltype, np.datetime64) - or np.issubdtype(ltype, np.timedelta64) - ) and not rcol.fillna(0).can_cast_safely(ltype): + elif ltype.kind in "mM" and not rcol.fillna(0).can_cast_safely(ltype): raise TypeError( f"Cannot join between {ltype} and {rtype}, please type-cast both " "columns to the same type." ) - elif ( - np.issubdtype(rtype, np.datetime64) - or np.issubdtype(rtype, np.timedelta64) - ) and not lcol.fillna(0).can_cast_safely(rtype): + elif rtype.kind in "mM" and not lcol.fillna(0).can_cast_safely(rtype): raise TypeError( f"Cannot join between {rtype} and {ltype}, please type-cast both " "columns to the same type." diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 8c8fa75918c..83a21fed418 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -214,7 +214,7 @@ def __setitem__(self, key, value): and self._frame.dtype.categories.dtype.kind == "f" ) ) - and isinstance(value, (np.float32, np.float64)) + and isinstance(value, np.floating) and np.isnan(value) ): raise MixedTypeError( diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py index e56c8d867cb..c2072d90e98 100644 --- a/python/cudf/cudf/testing/testing.py +++ b/python/cudf/cudf/testing/testing.py @@ -158,12 +158,12 @@ def assert_column_equal( return True if check_datetimelike_compat: - if np.issubdtype(left.dtype, np.datetime64): + if left.dtype.kind == "M": right = right.astype(left.dtype) - elif np.issubdtype(right.dtype, np.datetime64): + elif right.dtype.kind == "M": left = left.astype(right.dtype) - if np.issubdtype(left.dtype, np.datetime64): + if left.dtype.kind == "M": if not left.equals(right): raise AssertionError( f"[datetimelike_compat=True] {left.values} " @@ -779,9 +779,7 @@ def assert_eq(left, right, **kwargs): tm.assert_index_equal(left, right, **kwargs) elif isinstance(left, np.ndarray) and isinstance(right, np.ndarray): - if np.issubdtype(left.dtype, np.floating) and np.issubdtype( - right.dtype, np.floating - ): + if left.dtype.kind == "f" and right.dtype.kind == "f": assert np.allclose(left, right, equal_nan=True) else: assert np.array_equal(left, right) diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index 0dec857ea96..eb099ca7f90 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -375,10 +375,10 @@ def min_column_type(x, expected_type): if x.null_count == len(x): return x.dtype - if np.issubdtype(x.dtype, np.floating): + if x.dtype.kind == "f": return get_min_float_dtype(x) - elif np.issubdtype(expected_type, np.integer): + elif cudf.dtype(expected_type).kind in "iu": max_bound_dtype = np.min_scalar_type(x.max()) min_bound_dtype = np.min_scalar_type(x.min()) result_type = np.promote_types(max_bound_dtype, min_bound_dtype)