Skip to content

Commit

Permalink
DEPR: is_categorical_dtype (#52527)
Browse files Browse the repository at this point in the history
* DEPR: is_categorical_dtype

* GH ref

* suppress warning in doctest

* okwarning from dask

* update test
  • Loading branch information
jbrockmendel authored Apr 11, 2023
1 parent e616938 commit 658ac5b
Show file tree
Hide file tree
Showing 22 changed files with 100 additions and 78 deletions.
4 changes: 4 additions & 0 deletions doc/source/user_guide/scale.rst
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ We'll import ``dask.dataframe`` and notice that the API feels similar to pandas.
We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in.

.. ipython:: python
:okwarning:
import dask.dataframe as dd
Expand Down Expand Up @@ -286,6 +287,7 @@ column names and dtypes. That's because Dask hasn't actually read the data yet.
Rather than executing immediately, doing operations build up a **task graph**.

.. ipython:: python
:okwarning:
ddf
ddf["name"]
Expand All @@ -300,6 +302,7 @@ returns a Dask Series with the same dtype and the same name.
To get the actual result you can call ``.compute()``.

.. ipython:: python
:okwarning:
%time ddf["name"].value_counts().compute()
Expand Down Expand Up @@ -345,6 +348,7 @@ known automatically. In this case, since we created the parquet files manually,
we need to supply the divisions manually.

.. ipython:: python
:okwarning:
N = 12
starts = [f"20{i:>02d}-01-01" for i in range(N)]
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ Deprecations
- Deprecated making :meth:`Series.apply` return a :class:`DataFrame` when the passed-in callable returns a :class:`Series` object. In the future this will return a :class:`Series` whose values are themselves :class:`Series`. This pattern was very slow and it's recommended to use alternative methods to archive the same goal (:issue:`52116`)
- Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`)
- Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`)
- Deprecated :func:`is_categorical_dtype`, use ``isinstance(obj.dtype, pd.CategoricalDtype)`` instead (:issue:`52527`)
- Deprecated :func:`is_int64_dtype`, check ``dtype == np.dtype(np.int64)`` instead (:issue:`52564`)
-

Expand Down
1 change: 1 addition & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def pytest_collection_modifyitems(items, config) -> None:
"(Series|DataFrame).bool is now deprecated and will be removed "
"in future version of pandas",
),
("is_categorical_dtype", "is_categorical_dtype is deprecated"),
]

for item in items:
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,14 @@

from pandas.core.dtypes.cast import is_nested_object
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_dict_like,
is_list_like,
is_sequence,
)
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
ExtensionDtype,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCNDFrame,
Expand Down Expand Up @@ -1115,7 +1117,7 @@ def apply_standard(self) -> DataFrame | Series:
# we need to give `na_action="ignore"` for categorical data.
# TODO: remove the `na_action="ignore"` when that default has been changed in
# Categorical (GH51645).
action = "ignore" if is_categorical_dtype(obj) else None
action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None
mapped = obj._map_values(mapper=f, na_action=action, convert=self.convert_dtype)

if len(mapped) and isinstance(mapped[0], ABCSeries):
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
ensure_platform_int,
is_any_real_numeric_dtype,
is_bool_dtype,
is_categorical_dtype,
is_datetime64_dtype,
is_dict_like,
is_dtype_equal,
Expand Down Expand Up @@ -409,7 +408,8 @@ def __init__(
null_mask = np.array(False)

# sanitize input
if is_categorical_dtype(values):
vdtype = getattr(values, "dtype", None)
if isinstance(vdtype, CategoricalDtype):
if dtype.categories is None:
dtype = CategoricalDtype(values.categories, dtype.ordered)
elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)):
Expand Down Expand Up @@ -2721,7 +2721,9 @@ def factorize_from_iterable(values) -> tuple[np.ndarray, Index]:
raise TypeError("Input must be list-like")

categories: Index
if is_categorical_dtype(values):

vdtype = getattr(values, "dtype", None)
if isinstance(vdtype, CategoricalDtype):
values = extract_array(values)
# The Categorical we want to build has the same categories
# as values but its codes are by def [0, ..., len(n_categories) - 1]
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
maybe_upcast_numeric_to_64bit,
)
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_dtype_equal,
is_float_dtype,
is_integer_dtype,
Expand Down Expand Up @@ -1772,7 +1771,7 @@ def _maybe_convert_platform_interval(values) -> ArrayLike:
elif not is_list_like(values) or isinstance(values, ABCDataFrame):
# This will raise later, but we avoid passing to maybe_convert_platform
return values
elif is_categorical_dtype(values):
elif isinstance(getattr(values, "dtype", None), CategoricalDtype):
values = np.asarray(values)
elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)):
# TODO: should we just cast these to list?
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,13 @@ def is_categorical_dtype(arr_or_dtype) -> bool:
>>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
True
"""
# GH#52527
warnings.warn(
"is_categorical_dtype is deprecated and will be removed in a future "
"version. Use isinstance(dtype, CategoricalDtype) instead",
FutureWarning,
stacklevel=find_stack_level(),
)
if isinstance(arr_or_dtype, ExtensionDtype):
# GH#33400 fastpath for dtype object
return arr_or_dtype.name == "category"
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
is_categorical_dtype,
is_list_like,
is_scalar,
)
from pandas.core.dtypes.dtypes import CategoricalDtype

from pandas.core import algorithms
from pandas.core.arrays import (
Expand Down Expand Up @@ -618,7 +618,7 @@ def __init__(
# TODO 2022-10-08 we only have one test that gets here and
# values are already in nanoseconds in that case.
grouping_vector = Series(grouping_vector).to_numpy()
elif is_categorical_dtype(grouping_vector):
elif isinstance(getattr(grouping_vector, "dtype", None), CategoricalDtype):
# a passed Categorical
self._orig_cats = grouping_vector.categories
grouping_vector, self._all_grouper = recode_for_groupby(
Expand All @@ -635,7 +635,8 @@ def __iter__(self) -> Iterator:

@cache_readonly
def _passed_categorical(self) -> bool:
return is_categorical_dtype(self.grouping_vector)
dtype = getattr(self.grouping_vector, "dtype", None)
return isinstance(dtype, CategoricalDtype)

@cache_readonly
def name(self) -> Hashable:
Expand Down
7 changes: 2 additions & 5 deletions pandas/core/interchange/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@
from pandas.util._decorators import cache_readonly

import pandas as pd
from pandas.api.types import (
is_categorical_dtype,
is_string_dtype,
)
from pandas.api.types import is_string_dtype
from pandas.core.interchange.buffer import PandasBuffer
from pandas.core.interchange.dataframe_protocol import (
Column,
Expand Down Expand Up @@ -99,7 +96,7 @@ def offset(self) -> int:
def dtype(self) -> tuple[DtypeKind, int, str, str]:
dtype = self._col.dtype

if is_categorical_dtype(dtype):
if isinstance(dtype, pd.CategoricalDtype):
codes = self._col.values.codes
(
_,
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/reshape/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
DT64NS_DTYPE,
ensure_platform_int,
is_bool_dtype,
is_categorical_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_datetime_or_timedelta_dtype,
Expand All @@ -33,6 +32,7 @@
is_timedelta64_dtype,
)
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
DatetimeTZDtype,
ExtensionDtype,
)
Expand Down Expand Up @@ -458,7 +458,8 @@ def _bins_to_cuts(
raise ValueError(
"Bin labels must be one fewer than the number of bin edges"
)
if not is_categorical_dtype(labels):

if not isinstance(getattr(labels, "dtype", None), CategoricalDtype):
labels = Categorical(
labels,
categories=labels if len(set(labels)) == len(labels) else None,
Expand Down
8 changes: 5 additions & 3 deletions pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

from pandas.core.dtypes.common import (
is_any_real_numeric_dtype,
is_categorical_dtype,
is_extension_array_dtype,
is_float,
is_float_dtype,
Expand All @@ -34,6 +33,7 @@
is_number,
is_numeric_dtype,
)
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCIndex,
Expand Down Expand Up @@ -563,7 +563,7 @@ def result(self):

def _convert_to_ndarray(self, data):
# GH31357: categorical columns are processed separately
if is_categorical_dtype(data):
if isinstance(data.dtype, CategoricalDtype):
return data

# GH32073: cast to float if values contain nulled integers
Expand Down Expand Up @@ -1211,7 +1211,9 @@ def _make_plot(self):

c_is_column = is_hashable(c) and c in self.data.columns

color_by_categorical = c_is_column and is_categorical_dtype(self.data[c])
color_by_categorical = c_is_column and isinstance(
self.data[c].dtype, CategoricalDtype
)

color = self.kwds.pop("color", None)
if c is not None and color is not None:
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/base/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from pandas.compat import PYPY

from pandas.core.dtypes.common import (
is_categorical_dtype,
is_dtype_equal,
is_object_dtype,
)
Expand Down Expand Up @@ -96,8 +95,8 @@ def test_memory_usage(index_or_series_memory_obj):
res_deep = obj.memory_usage(deep=True)

is_object = is_object_dtype(obj) or (is_ser and is_object_dtype(obj.index))
is_categorical = is_categorical_dtype(obj.dtype) or (
is_ser and is_categorical_dtype(obj.index.dtype)
is_categorical = isinstance(obj.dtype, pd.CategoricalDtype) or (
is_ser and isinstance(obj.index.dtype, pd.CategoricalDtype)
)
is_object_string = is_dtype_equal(obj, "string[python]") or (
is_ser and is_dtype_equal(obj.index.dtype, "string[python]")
Expand Down
15 changes: 9 additions & 6 deletions pandas/tests/dtypes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def get_is_dtype_funcs():
return [getattr(com, fname) for fname in fnames]


@pytest.mark.filterwarnings("ignore:is_categorical_dtype is deprecated:FutureWarning")
@pytest.mark.parametrize("func", get_is_dtype_funcs(), ids=lambda x: x.__name__)
def test_get_dtype_error_catch(func):
# see gh-15941
Expand All @@ -171,7 +172,7 @@ def test_get_dtype_error_catch(func):

msg = f"{func.__name__} is deprecated"
warn = None
if func is com.is_int64_dtype:
if func is com.is_int64_dtype or func is com.is_categorical_dtype:
warn = FutureWarning

with tm.assert_produces_warning(warn, match=msg):
Expand Down Expand Up @@ -274,12 +275,14 @@ def test_is_interval_dtype():


def test_is_categorical_dtype():
assert not com.is_categorical_dtype(object)
assert not com.is_categorical_dtype([1, 2, 3])
msg = "is_categorical_dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert not com.is_categorical_dtype(object)
assert not com.is_categorical_dtype([1, 2, 3])

assert com.is_categorical_dtype(CategoricalDtype())
assert com.is_categorical_dtype(pd.Categorical([1, 2, 3]))
assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
assert com.is_categorical_dtype(CategoricalDtype())
assert com.is_categorical_dtype(pd.Categorical([1, 2, 3]))
assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))


def test_is_string_dtype():
Expand Down
25 changes: 16 additions & 9 deletions pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,16 +166,18 @@ def test_is_dtype(self, dtype):
assert not CategoricalDtype.is_dtype(np.float64)

def test_basic(self, dtype):
assert is_categorical_dtype(dtype)
msg = "is_categorical_dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert is_categorical_dtype(dtype)

factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])

s = Series(factor, name="A")
s = Series(factor, name="A")

# dtypes
assert is_categorical_dtype(s.dtype)
assert is_categorical_dtype(s)
assert not is_categorical_dtype(np.dtype("float64"))
# dtypes
assert is_categorical_dtype(s.dtype)
assert is_categorical_dtype(s)
assert not is_categorical_dtype(np.dtype("float64"))

def test_tuple_categories(self):
categories = [(1, "a"), (2, "b"), (3, "c")]
Expand Down Expand Up @@ -1109,10 +1111,15 @@ def test_is_bool_dtype_sparse():
)
def test_is_dtype_no_warning(check):
data = pd.DataFrame({"A": [1, 2]})
with tm.assert_produces_warning(None):

warn = None
msg = "is_categorical_dtype is deprecated"
if check is is_categorical_dtype:
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg):
check(data)

with tm.assert_produces_warning(None):
with tm.assert_produces_warning(warn, match=msg):
check(data["A"])


Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from pandas.core.dtypes.base import _registry as ea_registry
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_interval_dtype,
is_object_dtype,
)
Expand Down Expand Up @@ -484,9 +483,9 @@ def test_setitem_intervals(self):
df["E"] = np.array(ser.values)
df["F"] = ser.astype(object)

assert is_categorical_dtype(df["B"].dtype)
assert isinstance(df["B"].dtype, CategoricalDtype)
assert is_interval_dtype(df["B"].cat.categories)
assert is_categorical_dtype(df["D"].dtype)
assert isinstance(df["D"].dtype, CategoricalDtype)
assert is_interval_dtype(df["D"].cat.categories)

# These go through the Series constructor and so get inferred back
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@
from pandas.compat import is_platform_windows
import pandas.util._test_decorators as td

from pandas.core.dtypes.common import is_categorical_dtype

import pandas as pd
from pandas import (
Categorical,
CategoricalDtype,
DataFrame,
Index,
Series,
Expand Down Expand Up @@ -1280,7 +1279,7 @@ def test_any_all_np_func(self, func, data, expected):
# GH 19976
data = DataFrame(data)

if any(is_categorical_dtype(x) for x in data.dtypes):
if any(isinstance(x, CategoricalDtype) for x in data.dtypes):
with pytest.raises(
TypeError, match="dtype category does not support reduction"
):
Expand Down
Loading

0 comments on commit 658ac5b

Please sign in to comment.