Skip to content

Commit

Permalink
Replace _is_categorical_dtype with isinstance
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed Jan 31, 2024
1 parent bb59715 commit 7269e4e
Show file tree
Hide file tree
Showing 11 changed files with 45 additions and 42 deletions.
2 changes: 1 addition & 1 deletion python/cudf/cudf/_fuzz_testing/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def set_rand_params(self, params):
if dtype_val is not None:
dtype_val = {
col_name: "category"
if cudf.utils.dtypes._is_categorical_dtype(dtype)
if isinstance(dtype, cudf.CategoricalDtype)
else pandas_dtypes_to_np_dtypes[dtype]
for col_name, dtype in dtype_val.items()
}
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_fuzz_testing/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def _get_dtype_param_value(dtype_val):
if dtype_val is not None and isinstance(dtype_val, abc.Mapping):
processed_dtypes = {}
for col_name, dtype in dtype_val.items():
if cudf.utils.dtypes._is_categorical_dtype(dtype):
if isinstance(dtype, cudf.CategoricalDtype):
processed_dtypes[col_name] = "category"
else:
processed_dtypes[col_name] = str(
Expand Down
8 changes: 4 additions & 4 deletions python/cudf/cudf/_lib/csv.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -434,19 +434,19 @@ def read_csv(
if dtype is not None:
if isinstance(dtype, abc.Mapping):
for k, v in dtype.items():
if cudf.api.types._is_categorical_dtype(v):
if isinstance(cudf.dtype(v), cudf.CategoricalDtype):
df._data[str(k)] = df._data[str(k)].astype(v)
elif (
cudf.api.types.is_scalar(dtype) or
isinstance(dtype, (
np.dtype, pd.api.extensions.ExtensionDtype, type
))
):
if cudf.api.types._is_categorical_dtype(dtype):
if isinstance(cudf.dtype(v), cudf.CategoricalDtype):
df = df.astype(dtype)
elif isinstance(dtype, abc.Collection):
for index, col_dtype in enumerate(dtype):
if cudf.api.types._is_categorical_dtype(col_dtype):
if isinstance(cudf.dtype(v), cudf.CategoricalDtype):
col_name = df._data.names[index]
df._data[col_name] = df._data[col_name].astype(col_dtype)

Expand Down Expand Up @@ -554,7 +554,7 @@ cdef data_type _get_cudf_data_type_from_dtype(object dtype) except *:
# TODO: Remove this work-around Dictionary types
# in libcudf are fully mapped to categorical columns:
# https://github.com/rapidsai/cudf/issues/3960
if cudf.api.types._is_categorical_dtype(dtype):
if isinstance(cudf.dtype(dtype), cudf.CategoricalDtype):
if isinstance(dtype, str):
dtype = "str"
else:
Expand Down
15 changes: 9 additions & 6 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
from cudf._lib.types import size_type_dtype
from cudf._typing import ColumnLike, Dtype, ScalarLike
from cudf.api.types import (
_is_categorical_dtype,
_is_datetime64tz_dtype,
_is_interval_dtype,
_is_non_decimal_numeric_dtype,
Expand Down Expand Up @@ -968,7 +967,8 @@ def astype(self, dtype: Dtype, copy: bool = False) -> ColumnBase:
col = self
if self.dtype == dtype:
return col
if _is_categorical_dtype(dtype):
dtype = cudf.dtype(dtype)
if isinstance(dtype, cudf.CategoricalDtype):
return col.as_categorical_column(dtype)

if (
Expand All @@ -987,7 +987,7 @@ def astype(self, dtype: Dtype, copy: bool = False) -> ColumnBase:
dtype = pandas_dtypes_to_np_dtypes.get(dtype, dtype)
if _is_non_decimal_numeric_dtype(dtype):
return col.as_numerical_column(dtype)
elif _is_categorical_dtype(dtype):
elif isinstance(dtype, cudf.CategoricalDtype):
return col.as_categorical_column(dtype)
elif cudf.dtype(dtype).type in {
np.str_,
Expand Down Expand Up @@ -1396,7 +1396,7 @@ def column_empty_like(

if (
hasattr(column, "dtype")
and _is_categorical_dtype(column.dtype)
and isinstance(column.dtype, cudf.CategoricalDtype)
and dtype == column.dtype
):
catcolumn = cast("cudf.core.column.CategoricalColumn", column)
Expand Down Expand Up @@ -2263,7 +2263,10 @@ def as_column(
np_type = None
try:
if dtype is not None:
if _is_categorical_dtype(dtype) or _is_interval_dtype(dtype):
dtype = cudf.dtype(dtype)
if isinstance(
dtype, (cudf.CategoricalDtype, cudf.IntervalDtype)
):
raise TypeError
if _is_datetime64tz_dtype(dtype):
raise NotImplementedError(
Expand Down Expand Up @@ -2407,7 +2410,7 @@ def as_column(
except (pa.ArrowInvalid, pa.ArrowTypeError, TypeError) as e:
if isinstance(e, MixedTypeError):
raise TypeError(str(e))
if _is_categorical_dtype(dtype):
if isinstance(cudf.dtype(dtype), cudf.CategoricalDtype):
sr = pd.Series(arbitrary, dtype="category")
data = as_column(sr, nan_as_null=nan_as_null, dtype=dtype)
elif np_type == np.str_:
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1008,7 +1008,7 @@ def _is_categorical_dtype(obj):
pd.Series,
),
):
return _is_categorical_dtype(obj.dtype)
return isinstance(cudf.dtype(obj.dtype), cudf.CategoricalDtype)
if hasattr(obj, "type"):
if obj.type is pd_CategoricalDtypeType:
return True
Expand Down
24 changes: 12 additions & 12 deletions python/cudf/cudf/testing/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@

import cudf
from cudf._lib.unary import is_nan
from cudf.api.types import (
_is_categorical_dtype,
is_numeric_dtype,
is_string_dtype,
)
from cudf.api.types import is_numeric_dtype, is_string_dtype
from cudf.core.missing import NA, NaT


Expand Down Expand Up @@ -86,7 +82,7 @@ def _check_types(
if (
exact
and not isinstance(left, cudf.MultiIndex)
and _is_categorical_dtype(left)
and isinstance(left.dtype, cudf.CategoricalDtype)
):
if left.dtype != right.dtype:
raise_assert_detail(
Expand Down Expand Up @@ -144,8 +140,8 @@ def assert_column_equal(
"""
if check_dtype is True:
if (
_is_categorical_dtype(left)
and _is_categorical_dtype(right)
isinstance(left.dtype, cudf.CategoricalDtype)
and isinstance(right.dtype, cudf.CategoricalDtype)
and not check_categorical
):
pass
Expand Down Expand Up @@ -173,7 +169,9 @@ def assert_column_equal(
return

if check_exact and check_categorical:
if _is_categorical_dtype(left) and _is_categorical_dtype(right):
if isinstance(left.dtype, cudf.CategoricalDtype) and isinstance(
right.dtype, cudf.CategoricalDtype
):
left_cat = left.categories
right_cat = right.categories

Expand Down Expand Up @@ -207,8 +205,8 @@ def assert_column_equal(

if (
not check_dtype
and _is_categorical_dtype(left)
and _is_categorical_dtype(right)
and isinstance(left.dtype, cudf.CategoricalDtype)
and isinstance(right.dtype, cudf.CategoricalDtype)
):
left = left.astype(left.categories.dtype)
right = right.astype(right.categories.dtype)
Expand Down Expand Up @@ -258,7 +256,9 @@ def assert_column_equal(
raise e
else:
columns_equal = False
if _is_categorical_dtype(left) and _is_categorical_dtype(right):
if isinstance(left.dtype, cudf.CategoricalDtype) and isinstance(
right.dtype, cudf.CategoricalDtype
):
left = left.astype(left.categories.dtype)
right = right.astype(right.categories.dtype)
if not columns_equal:
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/tests/test_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_column_offset_and_size(pandas_input, offset, size):
children=col.base_children,
)

if cudf.api.types._is_categorical_dtype(col.dtype):
if isinstance(col.dtype, cudf.CategoricalDtype):
assert col.size == col.codes.size
assert col.size == (col.codes.data.size / col.codes.dtype.itemsize)
elif cudf.api.types.is_string_dtype(col.dtype):
Expand Down Expand Up @@ -120,7 +120,7 @@ def column_slicing_test(col, offset, size, cast_to_float=False):
else:
pd_series = series.to_pandas()

if cudf.api.types._is_categorical_dtype(col.dtype):
if isinstance(col.dtype, cudf.CategoricalDtype):
# The cudf.Series is constructed from an already sliced column, whereas
# the pandas.Series is constructed from the unsliced series and then
# sliced, so the indexes should be different and we must ignore it.
Expand Down
13 changes: 7 additions & 6 deletions python/cudf/cudf/tests/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import pytest

import cudf as gd
from cudf.api.types import _is_categorical_dtype
from cudf.core._compat import PANDAS_GE_200
from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
from cudf.testing._utils import (
Expand Down Expand Up @@ -600,8 +599,8 @@ def test_concat_empty_dataframes(df, other, ignore_index):
actual = gd.concat(other_gd, ignore_index=ignore_index)
if expected.shape != df.shape:
for key, col in actual[actual.columns].items():
if _is_categorical_dtype(col.dtype):
if not _is_categorical_dtype(expected[key].dtype):
if isinstance(col.dtype, gd.CategoricalDtype):
if not isinstance(expected[key].dtype, gd.CategoricalDtype):
# TODO: Pandas bug:
# https://github.com/pandas-dev/pandas/issues/42840
expected[key] = expected[key].fillna("-1").astype("str")
Expand Down Expand Up @@ -1203,8 +1202,10 @@ def test_concat_join_empty_dataframes(
if expected.shape != df.shape:
if axis == 0:
for key, col in actual[actual.columns].items():
if _is_categorical_dtype(col.dtype):
if not _is_categorical_dtype(expected[key].dtype):
if isinstance(col.dtype, gd.CategoricalDtype):
if not isinstance(
expected[key].dtype, gd.CategoricalDtype
):
# TODO: Pandas bug:
# https://github.com/pandas-dev/pandas/issues/42840
expected[key] = (
Expand Down Expand Up @@ -1323,7 +1324,7 @@ def test_concat_join_empty_dataframes_axis_1(
if expected.shape != df.shape:
if axis == 0:
for key, col in actual[actual.columns].items():
if _is_categorical_dtype(col.dtype):
if isinstance(col.dtype, gd.CategoricalDtype):
expected[key] = expected[key].fillna("-1")
actual[key] = col.astype("str").fillna("-1")
# if not expected.empty:
Expand Down
12 changes: 6 additions & 6 deletions python/cudf/cudf/utils/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ def cudf_dtype_from_pydata_dtype(dtype):
"""Given a numpy or pandas dtype, converts it into the equivalent cuDF
Python dtype.
"""

if cudf.api.types._is_categorical_dtype(dtype):
dtype = cudf.dtype(dtype)
if isinstance(dtype, cudf.CategoricalDtype):
return cudf.core.dtypes.CategoricalDtype
elif cudf.api.types.is_decimal32_dtype(dtype):
return cudf.core.dtypes.Decimal32Dtype
Expand Down Expand Up @@ -416,9 +416,9 @@ def get_min_float_dtype(col):


def is_mixed_with_object_dtype(lhs, rhs):
if cudf.api.types._is_categorical_dtype(lhs.dtype):
if isinstance(lhs.dtype, cudf.CategoricalDtype):
return is_mixed_with_object_dtype(lhs.dtype.categories, rhs)
elif cudf.api.types._is_categorical_dtype(rhs.dtype):
elif isinstance(rhs.dtype, cudf.CategoricalDtype):
return is_mixed_with_object_dtype(lhs, rhs.dtype.categories)

return (lhs.dtype == "object" and rhs.dtype != "object") or (
Expand Down Expand Up @@ -518,10 +518,10 @@ def find_common_type(dtypes):

# Early exit for categoricals since they're not hashable and therefore
# can't be put in a set.
if any(cudf.api.types._is_categorical_dtype(dtype) for dtype in dtypes):
if any(isinstance(dtype, cudf.CategoricalDtype) for dtype in dtypes):
if all(
(
cudf.api.types._is_categorical_dtype(dtype)
isinstance(dtype, cudf.CategoricalDtype)
and (not dtype.ordered if hasattr(dtype, "ordered") else True)
)
for dtype in dtypes
Expand Down
2 changes: 1 addition & 1 deletion python/dask_cudf/dask_cudf/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ def percentile_cudf(a, q, interpolation="linear"):
if isinstance(q, Iterator):
q = list(q)

if cudf.api.types._is_categorical_dtype(a.dtype):
if isinstance(a.dtype, cudf.CategoricalDtype):
result = cp.percentile(a.cat.codes, q, interpolation=interpolation)

return (
Expand Down
3 changes: 1 addition & 2 deletions python/dask_cudf/dask_cudf/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from dask.utils import M

import cudf as gd
from cudf.api.types import _is_categorical_dtype
from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate

_SHUFFLE_SUPPORT = ("tasks", "p2p") # "disk" not supported
Expand Down Expand Up @@ -230,7 +229,7 @@ def quantile_divisions(df, by, npartitions):
if (
len(columns) == 1
and df[columns[0]].dtype != "object"
and not _is_categorical_dtype(df[columns[0]].dtype)
and not isinstance(df[columns[0]].dtype, gd.CategoricalDtype)
):
dtype = df[columns[0]].dtype
divisions = divisions[columns[0]].astype("int64")
Expand Down

0 comments on commit 7269e4e

Please sign in to comment.