rapidsai · rapids-bot · Jul 19, 2024 · Jul 11, 2024 · Jul 11, 2024 · Jul 12, 2024
@@ -90,7 +90,7 @@ def is_integer(obj):
     bool
     """
     if isinstance(obj, cudf.Scalar):
-        return pd.api.types.is_integer_dtype(obj.dtype)
+        return obj.dtype.kind in "iu"
     return pd.api.types.is_integer(obj)
 
 

@@ -19,14 +19,7 @@
 )
 from cudf._lib.types import size_type_dtype
 from cudf.api.extensions import no_default
-from cudf.api.types import (
-    is_integer,
-    is_integer_dtype,
-    is_list_like,
-    is_scalar,
-    is_signed_integer_dtype,
-    is_unsigned_integer_dtype,
-)
+from cudf.api.types import is_integer, is_list_like, is_scalar
 from cudf.core.abc import Serializable
 from cudf.core.column import ColumnBase, column
 from cudf.errors import MixedTypeError
@@ -615,12 +608,8 @@ def union(self, other, sort=None):
                 # Bools + other types will result in mixed type.
                 # This is not yet consistent in pandas and specific to APIs.
                 raise MixedTypeError("Cannot perform union with mixed types")
-            if (
-                is_signed_integer_dtype(self.dtype)
-                and is_unsigned_integer_dtype(other.dtype)
-            ) or (
-                is_unsigned_integer_dtype(self.dtype)
-                and is_signed_integer_dtype(other.dtype)
+            if (self.dtype.kind == "i" and other.dtype.kind == "u") or (
+                self.dtype.kind == "u" and other.dtype.kind == "i"
             ):
                 # signed + unsigned types will result in
                 # mixed type for union in pandas.
@@ -2097,7 +2086,7 @@ def _gather(self, gather_map, nullify=False, check_bounds=True):
 
         # TODO: For performance, the check and conversion of gather map should
         # be done by the caller. This check will be removed in future release.
-        if not is_integer_dtype(gather_map.dtype):
+        if gather_map.dtype.kind not in "iu":
             gather_map = gather_map.astype(size_type_dtype)
 
         if not _gather_map_is_valid(

@@ -2213,25 +2213,26 @@ def as_column(
                 and arbitrary.null_count > 0
             ):
                 arbitrary = arbitrary.cast(pa.float64())
-            if cudf.get_option(
-                "default_integer_bitwidth"
-            ) and pa.types.is_integer(arbitrary.type):
-                dtype = _maybe_convert_to_default_type("int")
-            elif cudf.get_option(
-                "default_float_bitwidth"
-            ) and pa.types.is_floating(arbitrary.type):
-                dtype = _maybe_convert_to_default_type("float")
+            if (
+                cudf.get_option("default_integer_bitwidth")
+                and pa.types.is_integer(arbitrary.type)
+            ) or (
+                cudf.get_option("default_float_bitwidth")
+                and pa.types.is_floating(arbitrary.type)
+            ):
+                dtype = _maybe_convert_to_default_type(
+                    cudf.dtype(arbitrary.type.to_pandas_dtype())
+                )
         except (pa.ArrowInvalid, pa.ArrowTypeError, TypeError):
             arbitrary = pd.Series(arbitrary)
-            if cudf.get_option(
-                "default_integer_bitwidth"
-            ) and arbitrary.dtype.kind in set("iu"):
-                dtype = _maybe_convert_to_default_type("int")
-            elif (
+            if (
+                cudf.get_option("default_integer_bitwidth")
+                and arbitrary.dtype.kind in set("iu")
+            ) or (
                 cudf.get_option("default_float_bitwidth")
                 and arbitrary.dtype.kind == "f"
             ):
-                dtype = _maybe_convert_to_default_type("float")
+                dtype = _maybe_convert_to_default_type(arbitrary.dtype)
         return as_column(arbitrary, nan_as_null=nan_as_null, dtype=dtype)
 
 

@@ -15,7 +15,7 @@
 from cudf._lib.strings.convert.convert_fixed_point import (
     from_decimal as cpp_from_decimal,
 )
-from cudf.api.types import is_integer_dtype, is_scalar
+from cudf.api.types import is_scalar
 from cudf.core.buffer import as_buffer
 from cudf.core.column import ColumnBase
 from cudf.core.dtypes import (
@@ -150,7 +150,7 @@ def _validate_fillna_value(
     def normalize_binop_value(self, other):
         if isinstance(other, ColumnBase):
             if isinstance(other, cudf.core.column.NumericalColumn):
-                if not is_integer_dtype(other.dtype):
+                if other.dtype.kind not in "iu":
                     raise TypeError(
                         "Decimal columns only support binary operations with "
                         "integer numerical columns."

@@ -12,12 +12,7 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf._lib import pylibcudf
-from cudf.api.types import (
-    is_float_dtype,
-    is_integer,
-    is_integer_dtype,
-    is_scalar,
-)
+from cudf.api.types import is_integer, is_scalar
 from cudf.core.column import (
     ColumnBase,
     as_column,
@@ -257,7 +252,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
             out_dtype = "bool"
 
         if op in {"__and__", "__or__", "__xor__"}:
-            if is_float_dtype(self.dtype) or is_float_dtype(other.dtype):
+            if self.dtype.kind == "f" or other.dtype.kind == "f":
                 raise TypeError(
                     f"Operation 'bitwise {op[2:-2]}' not supported between "
                     f"{self.dtype.type.__name__} and "
@@ -268,8 +263,8 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
 
         if (
             op == "__pow__"
-            and is_integer_dtype(self.dtype)
-            and (is_integer(other) or is_integer_dtype(other.dtype))
+            and self.dtype.kind in "iu"
+            and (is_integer(other) or other.dtype.kind in "iu")
         ):
             op = "INT_POW"
 

@@ -1460,18 +1460,19 @@ def notna(self):
     notnull = notna
 
     def _is_numeric(self):
-        return isinstance(
-            self._values, cudf.core.column.NumericalColumn
-        ) and self.dtype != cudf.dtype("bool")
+        return (
+            isinstance(self._values, cudf.core.column.NumericalColumn)
+            and self.dtype.kind != "b"
+        )
 
     def _is_boolean(self):
-        return self.dtype == cudf.dtype("bool")
+        return self.dtype.kind == "b"
 
     def _is_integer(self):
-        return cudf.api.types.is_integer_dtype(self.dtype)
+        return self.dtype.kind in "iu"
 
     def _is_floating(self):
-        return cudf.api.types.is_float_dtype(self.dtype)
+        return self.dtype.kind == "f"
 
     def _is_object(self):
         return isinstance(self._values, cudf.core.column.StringColumn)

@@ -8,11 +8,7 @@
 from typing_extensions import TypeAlias
 
 import cudf
-from cudf.api.types import (
-    _is_scalar_or_zero_d_array,
-    is_integer,
-    is_integer_dtype,
-)
+from cudf.api.types import _is_scalar_or_zero_d_array, is_integer
 from cudf.core.copy_types import BooleanMask, GatherMap
 
 
@@ -233,7 +229,7 @@ def parse_row_iloc_indexer(key: Any, n: int) -> IndexingSpec:
             return MaskIndexer(BooleanMask(key, n))
         elif len(key) == 0:
             return EmptyIndexer()
-        elif is_integer_dtype(key.dtype):
+        elif key.dtype.kind in "iu":
             return MapIndexer(GatherMap(key, n, nullify=False))
         else:
             raise TypeError(

@@ -24,7 +24,6 @@
     _is_scalar_or_zero_d_array,
     is_dict_like,
     is_integer,
-    is_integer_dtype,
     is_scalar,
 )
 from cudf.core import indexing_utils
@@ -356,12 +355,10 @@ def _loc_to_iloc(self, arg):
             )
             if not _is_non_decimal_numeric_dtype(index_dtype) and not (
                 isinstance(index_dtype, cudf.CategoricalDtype)
-                and is_integer_dtype(index_dtype.categories.dtype)
+                and index_dtype.categories.dtype.kind in "iu"
             ):
                 # TODO: switch to cudf.utils.dtypes.is_integer(arg)
-                if isinstance(arg, cudf.Scalar) and is_integer_dtype(
-                    arg.dtype
-                ):
+                if isinstance(arg, cudf.Scalar) and arg.dtype.kind in "iu":
                     # Do not remove until pandas 3.0 support is added.
                     assert (
                         PANDAS_LT_300

@@ -12,7 +12,6 @@
 from cudf.api.types import (
     _is_scalar_or_zero_d_array,
     is_integer,
-    is_integer_dtype,
     is_numeric_dtype,
 )
 from cudf.core.column import ColumnBase, as_column
@@ -358,7 +357,7 @@ def _get_elements_from_column(self, arg) -> ScalarLike | ColumnBase:
             arg = as_column(arg)
             if len(arg) == 0:
                 arg = cudf.core.column.column_empty(0, dtype="int32")
-            if is_integer_dtype(arg.dtype):
+            if arg.dtype.kind in "iu":
                 return self._column.take(arg)
             if arg.dtype.kind == "b":
                 if (bn := len(arg)) != (n := len(self)):

@@ -10833,7 +10833,7 @@ def test_dataframe_contains(name, contains, other_names):
         expectation = contains is cudf.NA and name is cudf.NA
         assert (contains in pdf) == expectation
         assert (contains in gdf) == expectation
-    elif pd.api.types.is_float_dtype(gdf.columns.dtype):
+    elif gdf.columns.dtype.kind == "f":
         # In some cases, the columns are converted to an Index[float] based on
         # the other column names. That casts name values from None to np.nan.
         expectation = contains is np.nan and (name is None or name is np.nan)

@@ -1,7 +1,9 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import datetime
 from decimal import Decimal
+from typing import TYPE_CHECKING
 
 import cupy as cp
 import numpy as np
@@ -11,6 +13,9 @@
 
 import cudf
 
+if TYPE_CHECKING:
+    from cudf._typing import DtypeObj
+
 """Map numpy dtype to pyarrow types.
 Note that np.bool_ bitwidth (8) is different from pa.bool_ (1). Special
 handling is required when converting a Boolean column into arrow.
@@ -582,25 +587,18 @@ def _dtype_pandas_compatible(dtype):
     return dtype
 
 
-def _maybe_convert_to_default_type(dtype):
+def _maybe_convert_to_default_type(dtype: DtypeObj) -> DtypeObj:
     """Convert `dtype` to default if specified by user.
 
     If not specified, return as is.
     """
-    if cudf.get_option("default_integer_bitwidth"):
-        if cudf.api.types.is_signed_integer_dtype(dtype):
-            return cudf.dtype(
-                f'i{cudf.get_option("default_integer_bitwidth")//8}'
-            )
-        elif cudf.api.types.is_unsigned_integer_dtype(dtype):
-            return cudf.dtype(
-                f'u{cudf.get_option("default_integer_bitwidth")//8}'
-            )
-    if cudf.get_option(
-        "default_float_bitwidth"
-    ) and cudf.api.types.is_float_dtype(dtype):
-        return cudf.dtype(f'f{cudf.get_option("default_float_bitwidth")//8}')
-
+    if ib := cudf.get_option("default_integer_bitwidth"):
+        if dtype.kind == "i":
+            return cudf.dtype(f"i{ib//8}")
+        elif dtype.kind == "u":
+            return cudf.dtype(f"u{ib//8}")
+    if (fb := cudf.get_option("default_float_bitwidth")) and dtype.kind == "f":
+        return cudf.dtype(f"f{fb//8}")
     return dtype