Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace is_bool_type with checking .dtype.kind #16255

Merged
merged 5 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from cudf._lib.types import size_type_dtype
from cudf.api.extensions import no_default
from cudf.api.types import (
is_bool_dtype,
is_integer,
is_integer_dtype,
is_list_like,
Expand Down Expand Up @@ -608,10 +607,8 @@ def union(self, other, sort=None):
)

if cudf.get_option("mode.pandas_compatible"):
if (
is_bool_dtype(self.dtype) and not is_bool_dtype(other.dtype)
) or (
not is_bool_dtype(self.dtype) and is_bool_dtype(other.dtype)
if (self.dtype.kind == "b" and other.dtype.kind != "b") or (
self.dtype.kind != "b" and other.dtype.kind == "b"
):
# Bools + other types will result in mixed type.
# This is not yet consistent in pandas and specific to APIs.
Expand Down Expand Up @@ -2152,7 +2149,7 @@ def _apply_boolean_mask(self, boolean_mask):
Rows corresponding to `False` is dropped.
"""
boolean_mask = cudf.core.column.as_column(boolean_mask)
if not is_bool_dtype(boolean_mask.dtype):
if boolean_mask.dtype.kind != "b":
raise ValueError("boolean_mask is not boolean type.")

return self._from_columns_like_self(
Expand Down
8 changes: 2 additions & 6 deletions python/cudf/cudf/core/_internals/where.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,7 @@
import numpy as np

import cudf
from cudf.api.types import (
_is_non_decimal_numeric_dtype,
is_bool_dtype,
is_scalar,
)
from cudf.api.types import _is_non_decimal_numeric_dtype, is_scalar
from cudf.core.dtypes import CategoricalDtype
from cudf.utils.dtypes import (
_can_cast,
Expand Down Expand Up @@ -112,7 +108,7 @@ def _check_and_cast_columns_with_other(
other = cudf.Scalar(other)

if is_mixed_with_object_dtype(other, source_col) or (
is_bool_dtype(source_dtype) and not is_bool_dtype(common_dtype)
source_dtype.kind == "b" and common_dtype.kind != "b"
):
raise TypeError(mixed_err)

Expand Down
7 changes: 3 additions & 4 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
_is_non_decimal_numeric_dtype,
_is_pandas_nullable_extension_dtype,
infer_dtype,
is_bool_dtype,
is_dtype_equal,
is_scalar,
is_string_dtype,
Expand Down Expand Up @@ -619,7 +618,7 @@ def _scatter_by_column(
key: cudf.core.column.NumericalColumn,
value: cudf.core.scalar.Scalar | ColumnBase,
) -> Self:
if is_bool_dtype(key.dtype):
if key.dtype.kind == "b":
# `key` is boolean mask
if len(key) != len(self):
raise ValueError(
Expand All @@ -644,7 +643,7 @@ def _scatter_by_column(

self._check_scatter_key_length(num_keys, value)

if is_bool_dtype(key.dtype):
if key.dtype.kind == "b":
return libcudf.copying.boolean_mask_scatter([value], [self], key)[
0
]._with_type_metadata(self.dtype)
Expand Down Expand Up @@ -1083,7 +1082,7 @@ def as_decimal_column(

def apply_boolean_mask(self, mask) -> ColumnBase:
mask = as_column(mask)
if not is_bool_dtype(mask.dtype):
if mask.dtype.kind != "b":
raise ValueError("boolean_mask is not boolean type.")

return apply_boolean_mask([self], mask)[0]._with_type_metadata(
Expand Down
5 changes: 2 additions & 3 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from cudf._lib import pylibcudf
from cudf._lib.types import size_type_dtype
from cudf.api.types import (
is_bool_dtype,
is_float_dtype,
is_integer,
is_integer_dtype,
Expand Down Expand Up @@ -165,7 +164,7 @@ def __setitem__(self, key: Any, value: Any):
else as_column(value)
)

if not is_bool_dtype(self.dtype) and is_bool_dtype(device_value.dtype):
if self.dtype.kind != "b" and device_value.dtype.kind == "b":
raise TypeError(f"Invalid value {value} for dtype {self.dtype}")
else:
device_value = device_value.astype(self.dtype)
Expand Down Expand Up @@ -270,7 +269,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
f"{self.dtype.type.__name__} and "
f"{other.dtype.type.__name__}"
)
if is_bool_dtype(self.dtype) or is_bool_dtype(other.dtype):
if self.dtype.kind == "b" or other.dtype.kind == "b":
out_dtype = "bool"

if (
Expand Down
13 changes: 6 additions & 7 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from cudf.api.extensions import no_default
from cudf.api.types import (
_is_scalar_or_zero_d_array,
is_bool_dtype,
is_dict_like,
is_dtype_equal,
is_list_like,
Expand Down Expand Up @@ -171,7 +170,7 @@ def _can_downcast_to_series(self, df, arg):
):
return False
else:
if is_bool_dtype(as_column(arg[0]).dtype) and not isinstance(
if as_column(arg[0]).dtype.kind == "b" and not isinstance(
arg[1], slice
):
return True
Expand Down Expand Up @@ -320,7 +319,7 @@ def _getitem_tuple_arg(self, arg):
tmp_arg[1],
)

if is_bool_dtype(tmp_arg[0].dtype):
if tmp_arg[0].dtype.kind == "b":
df = columns_df._apply_boolean_mask(
BooleanMask(tmp_arg[0], len(columns_df))
)
Expand Down Expand Up @@ -3678,8 +3677,8 @@ def agg(self, aggs, axis=None):
"""
dtypes = [self[col].dtype for col in self._column_names]
common_dtype = find_common_type(dtypes)
if not is_bool_dtype(common_dtype) and any(
is_bool_dtype(dtype) for dtype in dtypes
if common_dtype.kind != "b" and any(
dtype.kind == "b" for dtype in dtypes
):
raise MixedTypeError("Cannot create a column with mixed types")

Expand Down Expand Up @@ -6301,8 +6300,8 @@ def _reduce(
and any(
not is_object_dtype(dtype) for dtype in source_dtypes
)
or not is_bool_dtype(common_dtype)
and any(is_bool_dtype(dtype) for dtype in source_dtypes)
or common_dtype.kind != "b"
and any(dtype.kind == "b" for dtype in source_dtypes)
):
raise TypeError(
"Columns must all have the same dtype to "
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from cudf._lib.sort import segmented_sort_by_key
from cudf._lib.types import size_type_dtype
from cudf.api.extensions import no_default
from cudf.api.types import is_bool_dtype, is_list_like, is_numeric_dtype
from cudf.api.types import is_list_like, is_numeric_dtype
from cudf.core._compat import PANDAS_LT_300
from cudf.core.abc import Serializable
from cudf.core.column.column import ColumnBase, StructDtype, as_column
Expand Down Expand Up @@ -1531,7 +1531,7 @@ def mult(df):
# For `sum` & `product`, boolean types
# will need to result in `int64` type.
for name, col in res._data.items():
if is_bool_dtype(col.dtype):
if col.dtype.kind == "b":
res._data[name] = col.astype("int")
return res

Expand Down
3 changes: 1 addition & 2 deletions python/cudf/cudf/core/indexing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import cudf
from cudf.api.types import (
_is_scalar_or_zero_d_array,
is_bool_dtype,
is_integer,
is_integer_dtype,
)
Expand Down Expand Up @@ -230,7 +229,7 @@ def parse_row_iloc_indexer(key: Any, n: int) -> IndexingSpec:
key = cudf.core.column.as_column(key)
if isinstance(key, cudf.core.column.CategoricalColumn):
key = key.astype(key.codes.dtype)
if is_bool_dtype(key.dtype):
if key.dtype.kind == "b":
return MaskIndexer(BooleanMask(key, n))
elif len(key) == 0:
return EmptyIndexer()
Expand Down
4 changes: 0 additions & 4 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,10 +841,6 @@ def _get_row_major(
| tuple[Any, ...]
| list[tuple[Any, ...]],
) -> DataFrameOrSeries:
if pd.api.types.is_bool_dtype(
Copy link
Contributor Author

@mroeschke mroeschke Jul 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While running the test suite locally, I didn't see this branch ever getting hit

list(row_tuple) if isinstance(row_tuple, tuple) else row_tuple
):
return df[row_tuple]
if isinstance(row_tuple, slice):
if row_tuple.start is None:
row_tuple = slice(self[0], row_tuple.stop, row_tuple.step)
Expand Down
11 changes: 5 additions & 6 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from cudf.api.types import (
_is_non_decimal_numeric_dtype,
_is_scalar_or_zero_d_array,
is_bool_dtype,
is_dict_like,
is_integer,
is_integer_dtype,
Expand Down Expand Up @@ -221,10 +220,10 @@ def __setitem__(self, key, value):
f"Cannot assign {value=} to "
f"non-float dtype={self._frame.dtype}"
)
elif (
self._frame.dtype.kind == "b"
and not is_bool_dtype(value)
and value not in {None, cudf.NA}
elif self._frame.dtype.kind == "b" and not (
value in {None, cudf.NA}
or isinstance(value, (np.bool_, bool))
or (isinstance(value, cudf.Scalar) and value.dtype.kind == "b")
):
raise MixedTypeError(
f"Cannot assign {value=} to "
Expand Down Expand Up @@ -3221,7 +3220,7 @@ def describe(
percentiles = np.array([0.25, 0.5, 0.75])

dtype = "str"
if is_bool_dtype(self.dtype):
if self.dtype.kind == "b":
data = _describe_categorical(self, percentiles)
elif isinstance(self._column, cudf.core.column.NumericalColumn):
data = _describe_numeric(self, percentiles)
Expand Down
3 changes: 1 addition & 2 deletions python/cudf/cudf/core/single_column_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from cudf.api.extensions import no_default
from cudf.api.types import (
_is_scalar_or_zero_d_array,
is_bool_dtype,
is_integer,
is_integer_dtype,
is_numeric_dtype,
Expand Down Expand Up @@ -361,7 +360,7 @@ def _get_elements_from_column(self, arg) -> ScalarLike | ColumnBase:
arg = cudf.core.column.column_empty(0, dtype="int32")
if is_integer_dtype(arg.dtype):
return self._column.take(arg)
if is_bool_dtype(arg.dtype):
if arg.dtype.kind == "b":
if (bn := len(arg)) != (n := len(self)):
raise IndexError(
f"Boolean mask has wrong length: {bn} not {n}"
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5234,7 +5234,7 @@ def test_rowwise_ops(data, op, skipna, numeric_only):
else (pdf[column].notna().count() == 0)
)
or cudf.api.types.is_numeric_dtype(pdf[column].dtype)
or cudf.api.types.is_bool_dtype(pdf[column].dtype)
or pdf[column].dtype.kind == "b"
for column in pdf
):
with pytest.raises(TypeError):
Expand Down
5 changes: 2 additions & 3 deletions python/cudf/cudf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import cudf
from cudf.api.extensions import no_default
from cudf.api.types import is_bool_dtype
from cudf.core.index import CategoricalIndex, DatetimeIndex, Index, RangeIndex
from cudf.testing import assert_eq
from cudf.testing._utils import (
Expand Down Expand Up @@ -2397,8 +2396,8 @@ def test_intersection_index(idx1, idx2, sort, pandas_compatible):
expected,
actual,
exact=False
if (is_bool_dtype(idx1.dtype) and not is_bool_dtype(idx2.dtype))
or (not is_bool_dtype(idx1.dtype) or is_bool_dtype(idx2.dtype))
if (idx1.dtype.kind == "b" and idx2.dtype.kind != "b")
or (idx1.dtype.kind != "b" or idx2.dtype.kind == "b")
else True,
)

Expand Down
Loading