Skip to content

Commit

Permalink
Replace is_bool_type with checking .dtype.kind (#16255)
Browse files Browse the repository at this point in the history
It appears this was called when we already had a dtype object so can instead just simply check the .kind attribute

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #16255
  • Loading branch information
mroeschke authored Jul 16, 2024
1 parent 47a0a87 commit beda22e
Show file tree
Hide file tree
Showing 12 changed files with 28 additions and 46 deletions.
9 changes: 3 additions & 6 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from cudf._lib.types import size_type_dtype
from cudf.api.extensions import no_default
from cudf.api.types import (
is_bool_dtype,
is_integer,
is_integer_dtype,
is_list_like,
Expand Down Expand Up @@ -610,10 +609,8 @@ def union(self, other, sort=None):
)

if cudf.get_option("mode.pandas_compatible"):
if (
is_bool_dtype(self.dtype) and not is_bool_dtype(other.dtype)
) or (
not is_bool_dtype(self.dtype) and is_bool_dtype(other.dtype)
if (self.dtype.kind == "b" and other.dtype.kind != "b") or (
self.dtype.kind != "b" and other.dtype.kind == "b"
):
# Bools + other types will result in mixed type.
# This is not yet consistent in pandas and specific to APIs.
Expand Down Expand Up @@ -2154,7 +2151,7 @@ def _apply_boolean_mask(self, boolean_mask):
Rows corresponding to `False` is dropped.
"""
boolean_mask = cudf.core.column.as_column(boolean_mask)
if not is_bool_dtype(boolean_mask.dtype):
if boolean_mask.dtype.kind != "b":
raise ValueError("boolean_mask is not boolean type.")

return self._from_columns_like_self(
Expand Down
8 changes: 2 additions & 6 deletions python/cudf/cudf/core/_internals/where.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,7 @@
import numpy as np

import cudf
from cudf.api.types import (
_is_non_decimal_numeric_dtype,
is_bool_dtype,
is_scalar,
)
from cudf.api.types import _is_non_decimal_numeric_dtype, is_scalar
from cudf.core.dtypes import CategoricalDtype
from cudf.utils.dtypes import (
_can_cast,
Expand Down Expand Up @@ -112,7 +108,7 @@ def _check_and_cast_columns_with_other(
other = cudf.Scalar(other)

if is_mixed_with_object_dtype(other, source_col) or (
is_bool_dtype(source_dtype) and not is_bool_dtype(common_dtype)
source_dtype.kind == "b" and common_dtype.kind != "b"
):
raise TypeError(mixed_err)

Expand Down
7 changes: 3 additions & 4 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
_is_non_decimal_numeric_dtype,
_is_pandas_nullable_extension_dtype,
infer_dtype,
is_bool_dtype,
is_dtype_equal,
is_scalar,
is_string_dtype,
Expand Down Expand Up @@ -619,7 +618,7 @@ def _scatter_by_column(
key: cudf.core.column.NumericalColumn,
value: cudf.core.scalar.Scalar | ColumnBase,
) -> Self:
if is_bool_dtype(key.dtype):
if key.dtype.kind == "b":
# `key` is boolean mask
if len(key) != len(self):
raise ValueError(
Expand All @@ -644,7 +643,7 @@ def _scatter_by_column(

self._check_scatter_key_length(num_keys, value)

if is_bool_dtype(key.dtype):
if key.dtype.kind == "b":
return libcudf.copying.boolean_mask_scatter([value], [self], key)[
0
]._with_type_metadata(self.dtype)
Expand Down Expand Up @@ -1083,7 +1082,7 @@ def as_decimal_column(

def apply_boolean_mask(self, mask) -> ColumnBase:
mask = as_column(mask)
if not is_bool_dtype(mask.dtype):
if mask.dtype.kind != "b":
raise ValueError("boolean_mask is not boolean type.")

return apply_boolean_mask([self], mask)[0]._with_type_metadata(
Expand Down
5 changes: 2 additions & 3 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from cudf import _lib as libcudf
from cudf._lib import pylibcudf
from cudf.api.types import (
is_bool_dtype,
is_float_dtype,
is_integer,
is_integer_dtype,
Expand Down Expand Up @@ -159,7 +158,7 @@ def __setitem__(self, key: Any, value: Any):
else as_column(value)
)

if not is_bool_dtype(self.dtype) and is_bool_dtype(device_value.dtype):
if self.dtype.kind != "b" and device_value.dtype.kind == "b":
raise TypeError(f"Invalid value {value} for dtype {self.dtype}")
else:
device_value = device_value.astype(self.dtype)
Expand Down Expand Up @@ -264,7 +263,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
f"{self.dtype.type.__name__} and "
f"{other.dtype.type.__name__}"
)
if is_bool_dtype(self.dtype) or is_bool_dtype(other.dtype):
if self.dtype.kind == "b" or other.dtype.kind == "b":
out_dtype = "bool"

if (
Expand Down
13 changes: 6 additions & 7 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from cudf.api.extensions import no_default
from cudf.api.types import (
_is_scalar_or_zero_d_array,
is_bool_dtype,
is_dict_like,
is_dtype_equal,
is_list_like,
Expand Down Expand Up @@ -171,7 +170,7 @@ def _can_downcast_to_series(self, df, arg):
):
return False
else:
if is_bool_dtype(as_column(arg[0]).dtype) and not isinstance(
if as_column(arg[0]).dtype.kind == "b" and not isinstance(
arg[1], slice
):
return True
Expand Down Expand Up @@ -320,7 +319,7 @@ def _getitem_tuple_arg(self, arg):
tmp_arg[1],
)

if is_bool_dtype(tmp_arg[0].dtype):
if tmp_arg[0].dtype.kind == "b":
df = columns_df._apply_boolean_mask(
BooleanMask(tmp_arg[0], len(columns_df))
)
Expand Down Expand Up @@ -3678,8 +3677,8 @@ def agg(self, aggs, axis=None):
"""
dtypes = [self[col].dtype for col in self._column_names]
common_dtype = find_common_type(dtypes)
if not is_bool_dtype(common_dtype) and any(
is_bool_dtype(dtype) for dtype in dtypes
if common_dtype.kind != "b" and any(
dtype.kind == "b" for dtype in dtypes
):
raise MixedTypeError("Cannot create a column with mixed types")

Expand Down Expand Up @@ -6305,8 +6304,8 @@ def _reduce(
and any(
not is_object_dtype(dtype) for dtype in source_dtypes
)
or not is_bool_dtype(common_dtype)
and any(is_bool_dtype(dtype) for dtype in source_dtypes)
or common_dtype.kind != "b"
and any(dtype.kind == "b" for dtype in source_dtypes)
):
raise TypeError(
"Columns must all have the same dtype to "
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from cudf._lib.sort import segmented_sort_by_key
from cudf._lib.types import size_type_dtype
from cudf.api.extensions import no_default
from cudf.api.types import is_bool_dtype, is_list_like, is_numeric_dtype
from cudf.api.types import is_list_like, is_numeric_dtype
from cudf.core._compat import PANDAS_LT_300
from cudf.core.abc import Serializable
from cudf.core.column.column import ColumnBase, StructDtype, as_column
Expand Down Expand Up @@ -1534,7 +1534,7 @@ def mult(df):
# For `sum` & `product`, boolean types
# will need to result in `int64` type.
for name, col in res._data.items():
if is_bool_dtype(col.dtype):
if col.dtype.kind == "b":
res._data[name] = col.astype("int")
return res

Expand Down
3 changes: 1 addition & 2 deletions python/cudf/cudf/core/indexing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import cudf
from cudf.api.types import (
_is_scalar_or_zero_d_array,
is_bool_dtype,
is_integer,
is_integer_dtype,
)
Expand Down Expand Up @@ -230,7 +229,7 @@ def parse_row_iloc_indexer(key: Any, n: int) -> IndexingSpec:
key = cudf.core.column.as_column(key)
if isinstance(key, cudf.core.column.CategoricalColumn):
key = key.astype(key.codes.dtype)
if is_bool_dtype(key.dtype):
if key.dtype.kind == "b":
return MaskIndexer(BooleanMask(key, n))
elif len(key) == 0:
return EmptyIndexer()
Expand Down
4 changes: 0 additions & 4 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,10 +841,6 @@ def _get_row_major(
| tuple[Any, ...]
| list[tuple[Any, ...]],
) -> DataFrameOrSeries:
if pd.api.types.is_bool_dtype(
list(row_tuple) if isinstance(row_tuple, tuple) else row_tuple
):
return df[row_tuple]
if isinstance(row_tuple, slice):
if row_tuple.start is None:
row_tuple = slice(self[0], row_tuple.stop, row_tuple.step)
Expand Down
11 changes: 5 additions & 6 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from cudf.api.types import (
_is_non_decimal_numeric_dtype,
_is_scalar_or_zero_d_array,
is_bool_dtype,
is_dict_like,
is_integer,
is_integer_dtype,
Expand Down Expand Up @@ -221,10 +220,10 @@ def __setitem__(self, key, value):
f"Cannot assign {value=} to "
f"non-float dtype={self._frame.dtype}"
)
elif (
self._frame.dtype.kind == "b"
and not is_bool_dtype(value)
and value not in {None, cudf.NA}
elif self._frame.dtype.kind == "b" and not (
value in {None, cudf.NA}
or isinstance(value, (np.bool_, bool))
or (isinstance(value, cudf.Scalar) and value.dtype.kind == "b")
):
raise MixedTypeError(
f"Cannot assign {value=} to "
Expand Down Expand Up @@ -3221,7 +3220,7 @@ def describe(
percentiles = np.array([0.25, 0.5, 0.75])

dtype = "str"
if is_bool_dtype(self.dtype):
if self.dtype.kind == "b":
data = _describe_categorical(self, percentiles)
elif isinstance(self._column, cudf.core.column.NumericalColumn):
data = _describe_numeric(self, percentiles)
Expand Down
3 changes: 1 addition & 2 deletions python/cudf/cudf/core/single_column_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from cudf.api.extensions import no_default
from cudf.api.types import (
_is_scalar_or_zero_d_array,
is_bool_dtype,
is_integer,
is_integer_dtype,
is_numeric_dtype,
Expand Down Expand Up @@ -361,7 +360,7 @@ def _get_elements_from_column(self, arg) -> ScalarLike | ColumnBase:
arg = cudf.core.column.column_empty(0, dtype="int32")
if is_integer_dtype(arg.dtype):
return self._column.take(arg)
if is_bool_dtype(arg.dtype):
if arg.dtype.kind == "b":
if (bn := len(arg)) != (n := len(self)):
raise IndexError(
f"Boolean mask has wrong length: {bn} not {n}"
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5234,7 +5234,7 @@ def test_rowwise_ops(data, op, skipna, numeric_only):
else (pdf[column].notna().count() == 0)
)
or cudf.api.types.is_numeric_dtype(pdf[column].dtype)
or cudf.api.types.is_bool_dtype(pdf[column].dtype)
or pdf[column].dtype.kind == "b"
for column in pdf
):
with pytest.raises(TypeError):
Expand Down
5 changes: 2 additions & 3 deletions python/cudf/cudf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import cudf
from cudf.api.extensions import no_default
from cudf.api.types import is_bool_dtype
from cudf.core.index import CategoricalIndex, DatetimeIndex, Index, RangeIndex
from cudf.testing import assert_eq
from cudf.testing._utils import (
Expand Down Expand Up @@ -2397,8 +2396,8 @@ def test_intersection_index(idx1, idx2, sort, pandas_compatible):
expected,
actual,
exact=False
if (is_bool_dtype(idx1.dtype) and not is_bool_dtype(idx2.dtype))
or (not is_bool_dtype(idx1.dtype) or is_bool_dtype(idx2.dtype))
if (idx1.dtype.kind == "b" and idx2.dtype.kind != "b")
or (idx1.dtype.kind != "b" or idx2.dtype.kind == "b")
else True,
)

Expand Down

0 comments on commit beda22e

Please sign in to comment.