From beda22ed28030bbed2faaa5a49509255f11976aa Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 15 Jul 2024 16:29:05 -1000
Subject: [PATCH] Replace is_bool_type with checking .dtype.kind (#16255)

It appears this was called when we already had a dtype object so can instead just simply check the .kind attribute

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16255
---
 python/cudf/cudf/core/_base_index.py         |  9 +++------
 python/cudf/cudf/core/_internals/where.py    |  8 ++------
 python/cudf/cudf/core/column/column.py       |  7 +++----
 python/cudf/cudf/core/column/numerical.py    |  5 ++---
 python/cudf/cudf/core/dataframe.py           | 13 ++++++-------
 python/cudf/cudf/core/groupby/groupby.py     |  4 ++--
 python/cudf/cudf/core/indexing_utils.py      |  3 +--
 python/cudf/cudf/core/multiindex.py          |  4 ----
 python/cudf/cudf/core/series.py              | 11 +++++------
 python/cudf/cudf/core/single_column_frame.py |  3 +--
 python/cudf/cudf/tests/test_dataframe.py     |  2 +-
 python/cudf/cudf/tests/test_index.py         |  5 ++---
 12 files changed, 28 insertions(+), 46 deletions(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 9ba2d161619..479f87bb78b 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -20,7 +20,6 @@
 from cudf._lib.types import size_type_dtype
 from cudf.api.extensions import no_default
 from cudf.api.types import (
-    is_bool_dtype,
     is_integer,
     is_integer_dtype,
     is_list_like,
@@ -610,10 +609,8 @@ def union(self, other, sort=None):
             )
 
         if cudf.get_option("mode.pandas_compatible"):
-            if (
-                is_bool_dtype(self.dtype) and not is_bool_dtype(other.dtype)
-            ) or (
-                not is_bool_dtype(self.dtype) and is_bool_dtype(other.dtype)
+            if (self.dtype.kind == "b" and other.dtype.kind != "b") or (
+                self.dtype.kind != "b" and other.dtype.kind == "b"
             ):
                 # Bools + other types will result in mixed type.
                 # This is not yet consistent in pandas and specific to APIs.
@@ -2154,7 +2151,7 @@ def _apply_boolean_mask(self, boolean_mask):
         Rows corresponding to `False` is dropped.
         """
         boolean_mask = cudf.core.column.as_column(boolean_mask)
-        if not is_bool_dtype(boolean_mask.dtype):
+        if boolean_mask.dtype.kind != "b":
             raise ValueError("boolean_mask is not boolean type.")
 
         return self._from_columns_like_self(
diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index f3183e6029d..4a36be76b6d 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -7,11 +7,7 @@
 import numpy as np
 
 import cudf
-from cudf.api.types import (
-    _is_non_decimal_numeric_dtype,
-    is_bool_dtype,
-    is_scalar,
-)
+from cudf.api.types import _is_non_decimal_numeric_dtype, is_scalar
 from cudf.core.dtypes import CategoricalDtype
 from cudf.utils.dtypes import (
     _can_cast,
@@ -112,7 +108,7 @@ def _check_and_cast_columns_with_other(
         other = cudf.Scalar(other)
 
     if is_mixed_with_object_dtype(other, source_col) or (
-        is_bool_dtype(source_dtype) and not is_bool_dtype(common_dtype)
+        source_dtype.kind == "b" and common_dtype.kind != "b"
     ):
         raise TypeError(mixed_err)
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index fd3664ecac4..dbdf501e022 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -41,7 +41,6 @@
     _is_non_decimal_numeric_dtype,
     _is_pandas_nullable_extension_dtype,
     infer_dtype,
-    is_bool_dtype,
     is_dtype_equal,
     is_scalar,
     is_string_dtype,
@@ -619,7 +618,7 @@ def _scatter_by_column(
         key: cudf.core.column.NumericalColumn,
         value: cudf.core.scalar.Scalar | ColumnBase,
     ) -> Self:
-        if is_bool_dtype(key.dtype):
+        if key.dtype.kind == "b":
             # `key` is boolean mask
             if len(key) != len(self):
                 raise ValueError(
@@ -644,7 +643,7 @@ def _scatter_by_column(
 
         self._check_scatter_key_length(num_keys, value)
 
-        if is_bool_dtype(key.dtype):
+        if key.dtype.kind == "b":
             return libcudf.copying.boolean_mask_scatter([value], [self], key)[
                 0
             ]._with_type_metadata(self.dtype)
@@ -1083,7 +1082,7 @@ def as_decimal_column(
 
     def apply_boolean_mask(self, mask) -> ColumnBase:
         mask = as_column(mask)
-        if not is_bool_dtype(mask.dtype):
+        if mask.dtype.kind != "b":
             raise ValueError("boolean_mask is not boolean type.")
 
         return apply_boolean_mask([self], mask)[0]._with_type_metadata(
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 7f05a5f91a1..cea68c88c90 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -13,7 +13,6 @@
 from cudf import _lib as libcudf
 from cudf._lib import pylibcudf
 from cudf.api.types import (
-    is_bool_dtype,
     is_float_dtype,
     is_integer,
     is_integer_dtype,
@@ -159,7 +158,7 @@ def __setitem__(self, key: Any, value: Any):
             else as_column(value)
         )
 
-        if not is_bool_dtype(self.dtype) and is_bool_dtype(device_value.dtype):
+        if self.dtype.kind != "b" and device_value.dtype.kind == "b":
             raise TypeError(f"Invalid value {value} for dtype {self.dtype}")
         else:
             device_value = device_value.astype(self.dtype)
@@ -264,7 +263,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
                     f"{self.dtype.type.__name__} and "
                     f"{other.dtype.type.__name__}"
                 )
-            if is_bool_dtype(self.dtype) or is_bool_dtype(other.dtype):
+            if self.dtype.kind == "b" or other.dtype.kind == "b":
                 out_dtype = "bool"
 
         if (
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 2121e623c1c..b3d938829c9 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -32,7 +32,6 @@
 from cudf.api.extensions import no_default
 from cudf.api.types import (
     _is_scalar_or_zero_d_array,
-    is_bool_dtype,
     is_dict_like,
     is_dtype_equal,
     is_list_like,
@@ -171,7 +170,7 @@ def _can_downcast_to_series(self, df, arg):
             ):
                 return False
             else:
-                if is_bool_dtype(as_column(arg[0]).dtype) and not isinstance(
+                if as_column(arg[0]).dtype.kind == "b" and not isinstance(
                     arg[1], slice
                 ):
                     return True
@@ -320,7 +319,7 @@ def _getitem_tuple_arg(self, arg):
                     tmp_arg[1],
                 )
 
-                if is_bool_dtype(tmp_arg[0].dtype):
+                if tmp_arg[0].dtype.kind == "b":
                     df = columns_df._apply_boolean_mask(
                         BooleanMask(tmp_arg[0], len(columns_df))
                     )
@@ -3678,8 +3677,8 @@ def agg(self, aggs, axis=None):
         """
         dtypes = [self[col].dtype for col in self._column_names]
         common_dtype = find_common_type(dtypes)
-        if not is_bool_dtype(common_dtype) and any(
-            is_bool_dtype(dtype) for dtype in dtypes
+        if common_dtype.kind != "b" and any(
+            dtype.kind == "b" for dtype in dtypes
         ):
             raise MixedTypeError("Cannot create a column with mixed types")
 
@@ -6305,8 +6304,8 @@ def _reduce(
                     and any(
                         not is_object_dtype(dtype) for dtype in source_dtypes
                     )
-                    or not is_bool_dtype(common_dtype)
-                    and any(is_bool_dtype(dtype) for dtype in source_dtypes)
+                    or common_dtype.kind != "b"
+                    and any(dtype.kind == "b" for dtype in source_dtypes)
                 ):
                     raise TypeError(
                         "Columns must all have the same dtype to "
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 8659d7c2392..d2c75715be2 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -22,7 +22,7 @@
 from cudf._lib.sort import segmented_sort_by_key
 from cudf._lib.types import size_type_dtype
 from cudf.api.extensions import no_default
-from cudf.api.types import is_bool_dtype, is_list_like, is_numeric_dtype
+from cudf.api.types import is_list_like, is_numeric_dtype
 from cudf.core._compat import PANDAS_LT_300
 from cudf.core.abc import Serializable
 from cudf.core.column.column import ColumnBase, StructDtype, as_column
@@ -1534,7 +1534,7 @@ def mult(df):
                 # For `sum` & `product`, boolean types
                 # will need to result in `int64` type.
                 for name, col in res._data.items():
-                    if is_bool_dtype(col.dtype):
+                    if col.dtype.kind == "b":
                         res._data[name] = col.astype("int")
             return res
 
diff --git a/python/cudf/cudf/core/indexing_utils.py b/python/cudf/cudf/core/indexing_utils.py
index a5fed02cbed..9c81b0eb607 100644
--- a/python/cudf/cudf/core/indexing_utils.py
+++ b/python/cudf/cudf/core/indexing_utils.py
@@ -10,7 +10,6 @@
 import cudf
 from cudf.api.types import (
     _is_scalar_or_zero_d_array,
-    is_bool_dtype,
     is_integer,
     is_integer_dtype,
 )
@@ -230,7 +229,7 @@ def parse_row_iloc_indexer(key: Any, n: int) -> IndexingSpec:
         key = cudf.core.column.as_column(key)
         if isinstance(key, cudf.core.column.CategoricalColumn):
             key = key.astype(key.codes.dtype)
-        if is_bool_dtype(key.dtype):
+        if key.dtype.kind == "b":
             return MaskIndexer(BooleanMask(key, n))
         elif len(key) == 0:
             return EmptyIndexer()
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 3ed72ff812a..ff4b06c6334 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -841,10 +841,6 @@ def _get_row_major(
         | tuple[Any, ...]
         | list[tuple[Any, ...]],
     ) -> DataFrameOrSeries:
-        if pd.api.types.is_bool_dtype(
-            list(row_tuple) if isinstance(row_tuple, tuple) else row_tuple
-        ):
-            return df[row_tuple]
         if isinstance(row_tuple, slice):
             if row_tuple.start is None:
                 row_tuple = slice(self[0], row_tuple.stop, row_tuple.step)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 8c8fa75918c..e12cc3d52fb 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -22,7 +22,6 @@
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
     _is_scalar_or_zero_d_array,
-    is_bool_dtype,
     is_dict_like,
     is_integer,
     is_integer_dtype,
@@ -221,10 +220,10 @@ def __setitem__(self, key, value):
                     f"Cannot assign {value=} to "
                     f"non-float dtype={self._frame.dtype}"
                 )
-            elif (
-                self._frame.dtype.kind == "b"
-                and not is_bool_dtype(value)
-                and value not in {None, cudf.NA}
+            elif self._frame.dtype.kind == "b" and not (
+                value in {None, cudf.NA}
+                or isinstance(value, (np.bool_, bool))
+                or (isinstance(value, cudf.Scalar) and value.dtype.kind == "b")
             ):
                 raise MixedTypeError(
                     f"Cannot assign {value=} to "
@@ -3221,7 +3220,7 @@ def describe(
             percentiles = np.array([0.25, 0.5, 0.75])
 
         dtype = "str"
-        if is_bool_dtype(self.dtype):
+        if self.dtype.kind == "b":
             data = _describe_categorical(self, percentiles)
         elif isinstance(self._column, cudf.core.column.NumericalColumn):
             data = _describe_numeric(self, percentiles)
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index f9555aee6a2..04c7db7a53c 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -11,7 +11,6 @@
 from cudf.api.extensions import no_default
 from cudf.api.types import (
     _is_scalar_or_zero_d_array,
-    is_bool_dtype,
     is_integer,
     is_integer_dtype,
     is_numeric_dtype,
@@ -361,7 +360,7 @@ def _get_elements_from_column(self, arg) -> ScalarLike | ColumnBase:
                 arg = cudf.core.column.column_empty(0, dtype="int32")
             if is_integer_dtype(arg.dtype):
                 return self._column.take(arg)
-            if is_bool_dtype(arg.dtype):
+            if arg.dtype.kind == "b":
                 if (bn := len(arg)) != (n := len(self)):
                     raise IndexError(
                         f"Boolean mask has wrong length: {bn} not {n}"
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 7ccf83e424c..2009fc49ce5 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -5234,7 +5234,7 @@ def test_rowwise_ops(data, op, skipna, numeric_only):
             else (pdf[column].notna().count() == 0)
         )
         or cudf.api.types.is_numeric_dtype(pdf[column].dtype)
-        or cudf.api.types.is_bool_dtype(pdf[column].dtype)
+        or pdf[column].dtype.kind == "b"
         for column in pdf
     ):
         with pytest.raises(TypeError):
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 05dcd85df6a..9eba6122d26 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -16,7 +16,6 @@
 
 import cudf
 from cudf.api.extensions import no_default
-from cudf.api.types import is_bool_dtype
 from cudf.core.index import CategoricalIndex, DatetimeIndex, Index, RangeIndex
 from cudf.testing import assert_eq
 from cudf.testing._utils import (
@@ -2397,8 +2396,8 @@ def test_intersection_index(idx1, idx2, sort, pandas_compatible):
             expected,
             actual,
             exact=False
-            if (is_bool_dtype(idx1.dtype) and not is_bool_dtype(idx2.dtype))
-            or (not is_bool_dtype(idx1.dtype) or is_bool_dtype(idx2.dtype))
+            if (idx1.dtype.kind == "b" and idx2.dtype.kind != "b")
+            or (idx1.dtype.kind != "b" or idx2.dtype.kind == "b")
             else True,
         )