Skip to content

Commit

Permalink
Replace is_float/integer_dtype checks with .kind checks (#16261)
Browse files Browse the repository at this point in the history
It appears this was called when we already had a dtype object so can instead just simply check the .kind attribute

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #16261
  • Loading branch information
mroeschke authored Jul 19, 2024
1 parent 4c46628 commit 5dde41d
Show file tree
Hide file tree
Showing 11 changed files with 52 additions and 76 deletions.
2 changes: 1 addition & 1 deletion python/cudf/cudf/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def is_integer(obj):
bool
"""
if isinstance(obj, cudf.Scalar):
return pd.api.types.is_integer_dtype(obj.dtype)
return obj.dtype.kind in "iu"
return pd.api.types.is_integer(obj)


Expand Down
19 changes: 4 additions & 15 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,7 @@
)
from cudf._lib.types import size_type_dtype
from cudf.api.extensions import no_default
from cudf.api.types import (
is_integer,
is_integer_dtype,
is_list_like,
is_scalar,
is_signed_integer_dtype,
is_unsigned_integer_dtype,
)
from cudf.api.types import is_integer, is_list_like, is_scalar
from cudf.core.abc import Serializable
from cudf.core.column import ColumnBase, column
from cudf.errors import MixedTypeError
Expand Down Expand Up @@ -621,12 +614,8 @@ def union(self, other, sort=None):
# Bools + other types will result in mixed type.
# This is not yet consistent in pandas and specific to APIs.
raise MixedTypeError("Cannot perform union with mixed types")
if (
is_signed_integer_dtype(self.dtype)
and is_unsigned_integer_dtype(other.dtype)
) or (
is_unsigned_integer_dtype(self.dtype)
and is_signed_integer_dtype(other.dtype)
if (self.dtype.kind == "i" and other.dtype.kind == "u") or (
self.dtype.kind == "u" and other.dtype.kind == "i"
):
# signed + unsigned types will result in
# mixed type for union in pandas.
Expand Down Expand Up @@ -2103,7 +2092,7 @@ def _gather(self, gather_map, nullify=False, check_bounds=True):

# TODO: For performance, the check and conversion of gather map should
# be done by the caller. This check will be removed in future release.
if not is_integer_dtype(gather_map.dtype):
if gather_map.dtype.kind not in "iu":
gather_map = gather_map.astype(size_type_dtype)

if not _gather_map_is_valid(
Expand Down
29 changes: 15 additions & 14 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -2219,25 +2219,26 @@ def as_column(
and arbitrary.null_count > 0
):
arbitrary = arbitrary.cast(pa.float64())
if cudf.get_option(
"default_integer_bitwidth"
) and pa.types.is_integer(arbitrary.type):
dtype = _maybe_convert_to_default_type("int")
elif cudf.get_option(
"default_float_bitwidth"
) and pa.types.is_floating(arbitrary.type):
dtype = _maybe_convert_to_default_type("float")
if (
cudf.get_option("default_integer_bitwidth")
and pa.types.is_integer(arbitrary.type)
) or (
cudf.get_option("default_float_bitwidth")
and pa.types.is_floating(arbitrary.type)
):
dtype = _maybe_convert_to_default_type(
cudf.dtype(arbitrary.type.to_pandas_dtype())
)
except (pa.ArrowInvalid, pa.ArrowTypeError, TypeError):
arbitrary = pd.Series(arbitrary)
if cudf.get_option(
"default_integer_bitwidth"
) and arbitrary.dtype.kind in set("iu"):
dtype = _maybe_convert_to_default_type("int")
elif (
if (
cudf.get_option("default_integer_bitwidth")
and arbitrary.dtype.kind in set("iu")
) or (
cudf.get_option("default_float_bitwidth")
and arbitrary.dtype.kind == "f"
):
dtype = _maybe_convert_to_default_type("float")
dtype = _maybe_convert_to_default_type(arbitrary.dtype)
return as_column(arbitrary, nan_as_null=nan_as_null, dtype=dtype)


Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/column/decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from cudf._lib.strings.convert.convert_fixed_point import (
from_decimal as cpp_from_decimal,
)
from cudf.api.types import is_integer_dtype, is_scalar
from cudf.api.types import is_scalar
from cudf.core.buffer import as_buffer
from cudf.core.column import ColumnBase
from cudf.core.dtypes import (
Expand Down Expand Up @@ -150,7 +150,7 @@ def _validate_fillna_value(
def normalize_binop_value(self, other):
if isinstance(other, ColumnBase):
if isinstance(other, cudf.core.column.NumericalColumn):
if not is_integer_dtype(other.dtype):
if other.dtype.kind not in "iu":
raise TypeError(
"Decimal columns only support binary operations with "
"integer numerical columns."
Expand Down
13 changes: 4 additions & 9 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,7 @@
import cudf
from cudf import _lib as libcudf
from cudf._lib import pylibcudf
from cudf.api.types import (
is_float_dtype,
is_integer,
is_integer_dtype,
is_scalar,
)
from cudf.api.types import is_integer, is_scalar
from cudf.core.column import (
ColumnBase,
as_column,
Expand Down Expand Up @@ -249,7 +244,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
out_dtype = "bool"

if op in {"__and__", "__or__", "__xor__"}:
if is_float_dtype(self.dtype) or is_float_dtype(other.dtype):
if self.dtype.kind == "f" or other.dtype.kind == "f":
raise TypeError(
f"Operation 'bitwise {op[2:-2]}' not supported between "
f"{self.dtype.type.__name__} and "
Expand All @@ -260,8 +255,8 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:

if (
op == "__pow__"
and is_integer_dtype(self.dtype)
and (is_integer(other) or is_integer_dtype(other.dtype))
and self.dtype.kind in "iu"
and (is_integer(other) or other.dtype.kind in "iu")
):
op = "INT_POW"

Expand Down
13 changes: 7 additions & 6 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1456,18 +1456,19 @@ def notna(self):
notnull = notna

def _is_numeric(self):
return isinstance(
self._values, cudf.core.column.NumericalColumn
) and self.dtype != cudf.dtype("bool")
return (
isinstance(self._values, cudf.core.column.NumericalColumn)
and self.dtype.kind != "b"
)

def _is_boolean(self):
return self.dtype == cudf.dtype("bool")
return self.dtype.kind == "b"

def _is_integer(self):
return cudf.api.types.is_integer_dtype(self.dtype)
return self.dtype.kind in "iu"

def _is_floating(self):
return cudf.api.types.is_float_dtype(self.dtype)
return self.dtype.kind == "f"

def _is_object(self):
return isinstance(self._values, cudf.core.column.StringColumn)
Expand Down
8 changes: 2 additions & 6 deletions python/cudf/cudf/core/indexing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@
from typing_extensions import TypeAlias

import cudf
from cudf.api.types import (
_is_scalar_or_zero_d_array,
is_integer,
is_integer_dtype,
)
from cudf.api.types import _is_scalar_or_zero_d_array, is_integer
from cudf.core.copy_types import BooleanMask, GatherMap


Expand Down Expand Up @@ -233,7 +229,7 @@ def parse_row_iloc_indexer(key: Any, n: int) -> IndexingSpec:
return MaskIndexer(BooleanMask(key, n))
elif len(key) == 0:
return EmptyIndexer()
elif is_integer_dtype(key.dtype):
elif key.dtype.kind in "iu":
return MapIndexer(GatherMap(key, n, nullify=False))
else:
raise TypeError(
Expand Down
7 changes: 2 additions & 5 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
_is_scalar_or_zero_d_array,
is_dict_like,
is_integer,
is_integer_dtype,
is_scalar,
)
from cudf.core import indexing_utils
Expand Down Expand Up @@ -356,12 +355,10 @@ def _loc_to_iloc(self, arg):
)
if not _is_non_decimal_numeric_dtype(index_dtype) and not (
isinstance(index_dtype, cudf.CategoricalDtype)
and is_integer_dtype(index_dtype.categories.dtype)
and index_dtype.categories.dtype.kind in "iu"
):
# TODO: switch to cudf.utils.dtypes.is_integer(arg)
if isinstance(arg, cudf.Scalar) and is_integer_dtype(
arg.dtype
):
if isinstance(arg, cudf.Scalar) and arg.dtype.kind in "iu":
# Do not remove until pandas 3.0 support is added.
assert (
PANDAS_LT_300
Expand Down
3 changes: 1 addition & 2 deletions python/cudf/cudf/core/single_column_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from cudf.api.types import (
_is_scalar_or_zero_d_array,
is_integer,
is_integer_dtype,
is_numeric_dtype,
)
from cudf.core.column import ColumnBase, as_column
Expand Down Expand Up @@ -352,7 +351,7 @@ def _get_elements_from_column(self, arg) -> ScalarLike | ColumnBase:
arg = as_column(arg)
if len(arg) == 0:
arg = cudf.core.column.column_empty(0, dtype="int32")
if is_integer_dtype(arg.dtype):
if arg.dtype.kind in "iu":
return self._column.take(arg)
if arg.dtype.kind == "b":
if (bn := len(arg)) != (n := len(self)):
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10833,7 +10833,7 @@ def test_dataframe_contains(name, contains, other_names):
expectation = contains is cudf.NA and name is cudf.NA
assert (contains in pdf) == expectation
assert (contains in gdf) == expectation
elif pd.api.types.is_float_dtype(gdf.columns.dtype):
elif gdf.columns.dtype.kind == "f":
# In some cases, the columns are converted to an Index[float] based on
# the other column names. That casts name values from None to np.nan.
expectation = contains is np.nan and (name is None or name is np.nan)
Expand Down
28 changes: 13 additions & 15 deletions python/cudf/cudf/utils/dtypes.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
from __future__ import annotations

import datetime
from decimal import Decimal
from typing import TYPE_CHECKING

import cupy as cp
import numpy as np
Expand All @@ -11,6 +13,9 @@

import cudf

if TYPE_CHECKING:
from cudf._typing import DtypeObj

"""Map numpy dtype to pyarrow types.
Note that np.bool_ bitwidth (8) is different from pa.bool_ (1). Special
handling is required when converting a Boolean column into arrow.
Expand Down Expand Up @@ -568,25 +573,18 @@ def _dtype_pandas_compatible(dtype):
return dtype


def _maybe_convert_to_default_type(dtype):
def _maybe_convert_to_default_type(dtype: DtypeObj) -> DtypeObj:
"""Convert `dtype` to default if specified by user.
If not specified, return as is.
"""
if cudf.get_option("default_integer_bitwidth"):
if cudf.api.types.is_signed_integer_dtype(dtype):
return cudf.dtype(
f'i{cudf.get_option("default_integer_bitwidth")//8}'
)
elif cudf.api.types.is_unsigned_integer_dtype(dtype):
return cudf.dtype(
f'u{cudf.get_option("default_integer_bitwidth")//8}'
)
if cudf.get_option(
"default_float_bitwidth"
) and cudf.api.types.is_float_dtype(dtype):
return cudf.dtype(f'f{cudf.get_option("default_float_bitwidth")//8}')

if ib := cudf.get_option("default_integer_bitwidth"):
if dtype.kind == "i":
return cudf.dtype(f"i{ib//8}")
elif dtype.kind == "u":
return cudf.dtype(f"u{ib//8}")
if (fb := cudf.get_option("default_float_bitwidth")) and dtype.kind == "f":
return cudf.dtype(f"f{fb//8}")
return dtype


Expand Down

0 comments on commit 5dde41d

Please sign in to comment.