Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace is_float/integer_dtype checks with .kind checks #16261

Merged
merged 8 commits into from
Jul 19, 2024
2 changes: 1 addition & 1 deletion python/cudf/cudf/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def is_integer(obj):
bool
"""
if isinstance(obj, cudf.Scalar):
return pd.api.types.is_integer_dtype(obj.dtype)
return obj.dtype.kind in "iu"
return pd.api.types.is_integer(obj)


Expand Down
19 changes: 4 additions & 15 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,7 @@
)
from cudf._lib.types import size_type_dtype
from cudf.api.extensions import no_default
from cudf.api.types import (
is_integer,
is_integer_dtype,
is_list_like,
is_scalar,
is_signed_integer_dtype,
is_unsigned_integer_dtype,
)
from cudf.api.types import is_integer, is_list_like, is_scalar
from cudf.core.abc import Serializable
from cudf.core.column import ColumnBase, column
from cudf.errors import MixedTypeError
Expand Down Expand Up @@ -615,12 +608,8 @@ def union(self, other, sort=None):
# Bools + other types will result in mixed type.
# This is not yet consistent in pandas and specific to APIs.
raise MixedTypeError("Cannot perform union with mixed types")
if (
is_signed_integer_dtype(self.dtype)
and is_unsigned_integer_dtype(other.dtype)
) or (
is_unsigned_integer_dtype(self.dtype)
and is_signed_integer_dtype(other.dtype)
if (self.dtype.kind == "i" and other.dtype.kind == "u") or (
self.dtype.kind == "u" and other.dtype.kind == "i"
):
# signed + unsigned types will result in
# mixed type for union in pandas.
Expand Down Expand Up @@ -2097,7 +2086,7 @@ def _gather(self, gather_map, nullify=False, check_bounds=True):

# TODO: For performance, the check and conversion of gather map should
# be done by the caller. This check will be removed in future release.
if not is_integer_dtype(gather_map.dtype):
if gather_map.dtype.kind not in "iu":
gather_map = gather_map.astype(size_type_dtype)

if not _gather_map_is_valid(
Expand Down
29 changes: 15 additions & 14 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -2213,25 +2213,26 @@ def as_column(
and arbitrary.null_count > 0
):
arbitrary = arbitrary.cast(pa.float64())
if cudf.get_option(
"default_integer_bitwidth"
) and pa.types.is_integer(arbitrary.type):
dtype = _maybe_convert_to_default_type("int")
elif cudf.get_option(
"default_float_bitwidth"
) and pa.types.is_floating(arbitrary.type):
dtype = _maybe_convert_to_default_type("float")
if (
cudf.get_option("default_integer_bitwidth")
and pa.types.is_integer(arbitrary.type)
) or (
cudf.get_option("default_float_bitwidth")
and pa.types.is_floating(arbitrary.type)
):
dtype = _maybe_convert_to_default_type(
cudf.dtype(arbitrary.type.to_pandas_dtype())
)
except (pa.ArrowInvalid, pa.ArrowTypeError, TypeError):
arbitrary = pd.Series(arbitrary)
if cudf.get_option(
"default_integer_bitwidth"
) and arbitrary.dtype.kind in set("iu"):
dtype = _maybe_convert_to_default_type("int")
elif (
if (
cudf.get_option("default_integer_bitwidth")
and arbitrary.dtype.kind in set("iu")
) or (
cudf.get_option("default_float_bitwidth")
and arbitrary.dtype.kind == "f"
):
dtype = _maybe_convert_to_default_type("float")
dtype = _maybe_convert_to_default_type(arbitrary.dtype)
return as_column(arbitrary, nan_as_null=nan_as_null, dtype=dtype)


Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/column/decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from cudf._lib.strings.convert.convert_fixed_point import (
from_decimal as cpp_from_decimal,
)
from cudf.api.types import is_integer_dtype, is_scalar
from cudf.api.types import is_scalar
from cudf.core.buffer import as_buffer
from cudf.core.column import ColumnBase
from cudf.core.dtypes import (
Expand Down Expand Up @@ -150,7 +150,7 @@ def _validate_fillna_value(
def normalize_binop_value(self, other):
if isinstance(other, ColumnBase):
if isinstance(other, cudf.core.column.NumericalColumn):
if not is_integer_dtype(other.dtype):
if other.dtype.kind not in "iu":
raise TypeError(
"Decimal columns only support binary operations with "
"integer numerical columns."
Expand Down
13 changes: 4 additions & 9 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,7 @@
import cudf
from cudf import _lib as libcudf
from cudf._lib import pylibcudf
from cudf.api.types import (
is_float_dtype,
is_integer,
is_integer_dtype,
is_scalar,
)
from cudf.api.types import is_integer, is_scalar
from cudf.core.column import (
ColumnBase,
as_column,
Expand Down Expand Up @@ -257,7 +252,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
out_dtype = "bool"

if op in {"__and__", "__or__", "__xor__"}:
if is_float_dtype(self.dtype) or is_float_dtype(other.dtype):
if self.dtype.kind == "f" or other.dtype.kind == "f":
raise TypeError(
f"Operation 'bitwise {op[2:-2]}' not supported between "
f"{self.dtype.type.__name__} and "
Expand All @@ -268,8 +263,8 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:

if (
op == "__pow__"
and is_integer_dtype(self.dtype)
and (is_integer(other) or is_integer_dtype(other.dtype))
and self.dtype.kind in "iu"
and (is_integer(other) or other.dtype.kind in "iu")
):
op = "INT_POW"

Expand Down
13 changes: 7 additions & 6 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1460,18 +1460,19 @@ def notna(self):
notnull = notna

def _is_numeric(self):
return isinstance(
self._values, cudf.core.column.NumericalColumn
) and self.dtype != cudf.dtype("bool")
return (
isinstance(self._values, cudf.core.column.NumericalColumn)
and self.dtype.kind != "b"
)

def _is_boolean(self):
return self.dtype == cudf.dtype("bool")
return self.dtype.kind == "b"

def _is_integer(self):
return cudf.api.types.is_integer_dtype(self.dtype)
return self.dtype.kind in "iu"

def _is_floating(self):
return cudf.api.types.is_float_dtype(self.dtype)
return self.dtype.kind == "f"

def _is_object(self):
return isinstance(self._values, cudf.core.column.StringColumn)
Expand Down
8 changes: 2 additions & 6 deletions python/cudf/cudf/core/indexing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@
from typing_extensions import TypeAlias

import cudf
from cudf.api.types import (
_is_scalar_or_zero_d_array,
is_integer,
is_integer_dtype,
)
from cudf.api.types import _is_scalar_or_zero_d_array, is_integer
from cudf.core.copy_types import BooleanMask, GatherMap


Expand Down Expand Up @@ -233,7 +229,7 @@ def parse_row_iloc_indexer(key: Any, n: int) -> IndexingSpec:
return MaskIndexer(BooleanMask(key, n))
elif len(key) == 0:
return EmptyIndexer()
elif is_integer_dtype(key.dtype):
elif key.dtype.kind in "iu":
return MapIndexer(GatherMap(key, n, nullify=False))
else:
raise TypeError(
Expand Down
7 changes: 2 additions & 5 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
_is_scalar_or_zero_d_array,
is_dict_like,
is_integer,
is_integer_dtype,
is_scalar,
)
from cudf.core import indexing_utils
Expand Down Expand Up @@ -356,12 +355,10 @@ def _loc_to_iloc(self, arg):
)
if not _is_non_decimal_numeric_dtype(index_dtype) and not (
isinstance(index_dtype, cudf.CategoricalDtype)
and is_integer_dtype(index_dtype.categories.dtype)
and index_dtype.categories.dtype.kind in "iu"
):
# TODO: switch to cudf.utils.dtypes.is_integer(arg)
if isinstance(arg, cudf.Scalar) and is_integer_dtype(
arg.dtype
):
if isinstance(arg, cudf.Scalar) and arg.dtype.kind in "iu":
# Do not remove until pandas 3.0 support is added.
assert (
PANDAS_LT_300
Expand Down
3 changes: 1 addition & 2 deletions python/cudf/cudf/core/single_column_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from cudf.api.types import (
_is_scalar_or_zero_d_array,
is_integer,
is_integer_dtype,
is_numeric_dtype,
)
from cudf.core.column import ColumnBase, as_column
Expand Down Expand Up @@ -358,7 +357,7 @@ def _get_elements_from_column(self, arg) -> ScalarLike | ColumnBase:
arg = as_column(arg)
if len(arg) == 0:
arg = cudf.core.column.column_empty(0, dtype="int32")
if is_integer_dtype(arg.dtype):
if arg.dtype.kind in "iu":
return self._column.take(arg)
if arg.dtype.kind == "b":
if (bn := len(arg)) != (n := len(self)):
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10833,7 +10833,7 @@ def test_dataframe_contains(name, contains, other_names):
expectation = contains is cudf.NA and name is cudf.NA
assert (contains in pdf) == expectation
assert (contains in gdf) == expectation
elif pd.api.types.is_float_dtype(gdf.columns.dtype):
elif gdf.columns.dtype.kind == "f":
# In some cases, the columns are converted to an Index[float] based on
# the other column names. That casts name values from None to np.nan.
expectation = contains is np.nan and (name is None or name is np.nan)
Expand Down
28 changes: 13 additions & 15 deletions python/cudf/cudf/utils/dtypes.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
from __future__ import annotations

import datetime
from decimal import Decimal
from typing import TYPE_CHECKING

import cupy as cp
import numpy as np
Expand All @@ -11,6 +13,9 @@

import cudf

if TYPE_CHECKING:
from cudf._typing import DtypeObj

"""Map numpy dtype to pyarrow types.
Note that np.bool_ bitwidth (8) is different from pa.bool_ (1). Special
handling is required when converting a Boolean column into arrow.
Expand Down Expand Up @@ -582,25 +587,18 @@ def _dtype_pandas_compatible(dtype):
return dtype


def _maybe_convert_to_default_type(dtype):
def _maybe_convert_to_default_type(dtype: DtypeObj) -> DtypeObj:
"""Convert `dtype` to default if specified by user.

If not specified, return as is.
"""
if cudf.get_option("default_integer_bitwidth"):
if cudf.api.types.is_signed_integer_dtype(dtype):
return cudf.dtype(
f'i{cudf.get_option("default_integer_bitwidth")//8}'
)
elif cudf.api.types.is_unsigned_integer_dtype(dtype):
return cudf.dtype(
f'u{cudf.get_option("default_integer_bitwidth")//8}'
)
if cudf.get_option(
"default_float_bitwidth"
) and cudf.api.types.is_float_dtype(dtype):
return cudf.dtype(f'f{cudf.get_option("default_float_bitwidth")//8}')

if ib := cudf.get_option("default_integer_bitwidth"):
if dtype.kind == "i":
return cudf.dtype(f"i{ib//8}")
elif dtype.kind == "u":
return cudf.dtype(f"u{ib//8}")
if (fb := cudf.get_option("default_float_bitwidth")) and dtype.kind == "f":
return cudf.dtype(f"f{fb//8}")
return dtype


Expand Down
Loading