From 658ac5b62745296009ebf9d5414f036b1a3e0dc0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 11 Apr 2023 11:20:17 -0700 Subject: [PATCH] DEPR: is_categorical_dtype (#52527) * DEPR: is_categorical_dtype * GH ref * suppress warning in doctest * okwarning from dask * update test --- doc/source/user_guide/scale.rst | 4 +++ doc/source/whatsnew/v2.1.0.rst | 1 + pandas/conftest.py | 1 + pandas/core/apply.py | 8 +++--- pandas/core/arrays/categorical.py | 8 +++--- pandas/core/arrays/interval.py | 3 +-- pandas/core/dtypes/common.py | 7 ++++++ pandas/core/groupby/grouper.py | 7 +++--- pandas/core/interchange/column.py | 7 ++---- pandas/core/reshape/tile.py | 5 ++-- pandas/plotting/_matplotlib/core.py | 8 +++--- pandas/tests/base/test_misc.py | 5 ++-- pandas/tests/dtypes/test_common.py | 15 ++++++----- pandas/tests/dtypes/test_dtypes.py | 25 ++++++++++++------- pandas/tests/frame/indexing/test_setitem.py | 5 ++-- pandas/tests/frame/test_reductions.py | 5 ++-- .../indexes/interval/test_constructors.py | 6 ++--- pandas/tests/indexing/test_categorical.py | 21 ++++++++-------- pandas/tests/io/test_stata.py | 9 +++---- pandas/tests/reshape/merge/test_merge.py | 7 ++---- pandas/tests/reshape/test_crosstab.py | 5 ++-- pandas/tests/series/test_constructors.py | 16 +++++++----- 22 files changed, 100 insertions(+), 78 deletions(-) diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index 65ed82d9d2cf5..182f1ca39e0a8 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -257,6 +257,7 @@ We'll import ``dask.dataframe`` and notice that the API feels similar to pandas. We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in. .. ipython:: python + :okwarning: import dask.dataframe as dd @@ -286,6 +287,7 @@ column names and dtypes. That's because Dask hasn't actually read the data yet. Rather than executing immediately, doing operations build up a **task graph**. .. ipython:: python + :okwarning: ddf ddf["name"] @@ -300,6 +302,7 @@ returns a Dask Series with the same dtype and the same name. To get the actual result you can call ``.compute()``. .. ipython:: python + :okwarning: %time ddf["name"].value_counts().compute() @@ -345,6 +348,7 @@ known automatically. In this case, since we created the parquet files manually, we need to supply the divisions manually. .. ipython:: python + :okwarning: N = 12 starts = [f"20{i:>02d}-01-01" for i in range(N)] diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index cbbd76ae37d7b..fca355069ae74 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -226,6 +226,7 @@ Deprecations - Deprecated making :meth:`Series.apply` return a :class:`DataFrame` when the passed-in callable returns a :class:`Series` object. In the future this will return a :class:`Series` whose values are themselves :class:`Series`. This pattern was very slow and it's recommended to use alternative methods to archive the same goal (:issue:`52116`) - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) - Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`) +- Deprecated :func:`is_categorical_dtype`, use ``isinstance(obj.dtype, pd.CategoricalDtype)`` instead (:issue:`52527`) - Deprecated :func:`is_int64_dtype`, check ``dtype == np.dtype(np.int64)`` instead (:issue:`52564`) - diff --git a/pandas/conftest.py b/pandas/conftest.py index ab9ffa35d9a9f..750fff1b1aea7 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -147,6 +147,7 @@ def pytest_collection_modifyitems(items, config) -> None: "(Series|DataFrame).bool is now deprecated and will be removed " "in future version of pandas", ), + ("is_categorical_dtype", "is_categorical_dtype is deprecated"), ] for item in items: diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 31a61bd01131e..274a389791a31 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -41,12 +41,14 @@ from pandas.core.dtypes.cast import is_nested_object from pandas.core.dtypes.common import ( - is_categorical_dtype, is_dict_like, is_list_like, is_sequence, ) -from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + ExtensionDtype, +) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCNDFrame, @@ -1115,7 +1117,7 @@ def apply_standard(self) -> DataFrame | Series: # we need to give `na_action="ignore"` for categorical data. # TODO: remove the `na_action="ignore"` when that default has been changed in # Categorical (GH51645). - action = "ignore" if is_categorical_dtype(obj) else None + action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None mapped = obj._map_values(mapper=f, na_action=action, convert=self.convert_dtype) if len(mapped) and isinstance(mapped[0], ABCSeries): diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f037e14177fdb..ea4384dc0ef2d 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -38,7 +38,6 @@ ensure_platform_int, is_any_real_numeric_dtype, is_bool_dtype, - is_categorical_dtype, is_datetime64_dtype, is_dict_like, is_dtype_equal, @@ -409,7 +408,8 @@ def __init__( null_mask = np.array(False) # sanitize input - if is_categorical_dtype(values): + vdtype = getattr(values, "dtype", None) + if isinstance(vdtype, CategoricalDtype): if dtype.categories is None: dtype = CategoricalDtype(values.categories, dtype.ordered) elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)): @@ -2721,7 +2721,9 @@ def factorize_from_iterable(values) -> tuple[np.ndarray, Index]: raise TypeError("Input must be list-like") categories: Index - if is_categorical_dtype(values): + + vdtype = getattr(values, "dtype", None) + if isinstance(vdtype, CategoricalDtype): values = extract_array(values) # The Categorical we want to build has the same categories # as values but its codes are by def [0, ..., len(n_categories) - 1] diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 6b0cf2a900ae5..1d233e0ebde1a 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -50,7 +50,6 @@ maybe_upcast_numeric_to_64bit, ) from pandas.core.dtypes.common import ( - is_categorical_dtype, is_dtype_equal, is_float_dtype, is_integer_dtype, @@ -1772,7 +1771,7 @@ def _maybe_convert_platform_interval(values) -> ArrayLike: elif not is_list_like(values) or isinstance(values, ABCDataFrame): # This will raise later, but we avoid passing to maybe_convert_platform return values - elif is_categorical_dtype(values): + elif isinstance(getattr(values, "dtype", None), CategoricalDtype): values = np.asarray(values) elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)): # TODO: should we just cast these to list? diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index e22baacba0574..6349a064f9edd 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -469,6 +469,13 @@ def is_categorical_dtype(arr_or_dtype) -> bool: >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) True """ + # GH#52527 + warnings.warn( + "is_categorical_dtype is deprecated and will be removed in a future " + "version. Use isinstance(dtype, CategoricalDtype) instead", + FutureWarning, + stacklevel=find_stack_level(), + ) if isinstance(arr_or_dtype, ExtensionDtype): # GH#33400 fastpath for dtype object return arr_or_dtype.name == "category" diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 8879a46e9bbbf..1cc20d5bc68f9 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -22,10 +22,10 @@ from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( - is_categorical_dtype, is_list_like, is_scalar, ) +from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core import algorithms from pandas.core.arrays import ( @@ -618,7 +618,7 @@ def __init__( # TODO 2022-10-08 we only have one test that gets here and # values are already in nanoseconds in that case. grouping_vector = Series(grouping_vector).to_numpy() - elif is_categorical_dtype(grouping_vector): + elif isinstance(getattr(grouping_vector, "dtype", None), CategoricalDtype): # a passed Categorical self._orig_cats = grouping_vector.categories grouping_vector, self._all_grouper = recode_for_groupby( @@ -635,7 +635,8 @@ def __iter__(self) -> Iterator: @cache_readonly def _passed_categorical(self) -> bool: - return is_categorical_dtype(self.grouping_vector) + dtype = getattr(self.grouping_vector, "dtype", None) + return isinstance(dtype, CategoricalDtype) @cache_readonly def name(self) -> Hashable: diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py index d8c6a58e77413..7eb43dbd074c9 100644 --- a/pandas/core/interchange/column.py +++ b/pandas/core/interchange/column.py @@ -10,10 +10,7 @@ from pandas.util._decorators import cache_readonly import pandas as pd -from pandas.api.types import ( - is_categorical_dtype, - is_string_dtype, -) +from pandas.api.types import is_string_dtype from pandas.core.interchange.buffer import PandasBuffer from pandas.core.interchange.dataframe_protocol import ( Column, @@ -99,7 +96,7 @@ def offset(self) -> int: def dtype(self) -> tuple[DtypeKind, int, str, str]: dtype = self._col.dtype - if is_categorical_dtype(dtype): + if isinstance(dtype, pd.CategoricalDtype): codes = self._col.values.codes ( _, diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index ccb53efbdcf6e..dbf5fe8e6fd95 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -22,7 +22,6 @@ DT64NS_DTYPE, ensure_platform_int, is_bool_dtype, - is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, @@ -33,6 +32,7 @@ is_timedelta64_dtype, ) from pandas.core.dtypes.dtypes import ( + CategoricalDtype, DatetimeTZDtype, ExtensionDtype, ) @@ -458,7 +458,8 @@ def _bins_to_cuts( raise ValueError( "Bin labels must be one fewer than the number of bin edges" ) - if not is_categorical_dtype(labels): + + if not isinstance(getattr(labels, "dtype", None), CategoricalDtype): labels = Categorical( labels, categories=labels if len(set(labels)) == len(labels) else None, diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index cfea83a7740fe..f667de6a5a34c 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -22,7 +22,6 @@ from pandas.core.dtypes.common import ( is_any_real_numeric_dtype, - is_categorical_dtype, is_extension_array_dtype, is_float, is_float_dtype, @@ -34,6 +33,7 @@ is_number, is_numeric_dtype, ) +from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCIndex, @@ -563,7 +563,7 @@ def result(self): def _convert_to_ndarray(self, data): # GH31357: categorical columns are processed separately - if is_categorical_dtype(data): + if isinstance(data.dtype, CategoricalDtype): return data # GH32073: cast to float if values contain nulled integers @@ -1211,7 +1211,9 @@ def _make_plot(self): c_is_column = is_hashable(c) and c in self.data.columns - color_by_categorical = c_is_column and is_categorical_dtype(self.data[c]) + color_by_categorical = c_is_column and isinstance( + self.data[c].dtype, CategoricalDtype + ) color = self.kwds.pop("color", None) if c is not None and color is not None: diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index 362df635c13fd..3ca53c4010449 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -6,7 +6,6 @@ from pandas.compat import PYPY from pandas.core.dtypes.common import ( - is_categorical_dtype, is_dtype_equal, is_object_dtype, ) @@ -96,8 +95,8 @@ def test_memory_usage(index_or_series_memory_obj): res_deep = obj.memory_usage(deep=True) is_object = is_object_dtype(obj) or (is_ser and is_object_dtype(obj.index)) - is_categorical = is_categorical_dtype(obj.dtype) or ( - is_ser and is_categorical_dtype(obj.index.dtype) + is_categorical = isinstance(obj.dtype, pd.CategoricalDtype) or ( + is_ser and isinstance(obj.index.dtype, pd.CategoricalDtype) ) is_object_string = is_dtype_equal(obj, "string[python]") or ( is_ser and is_dtype_equal(obj.index.dtype, "string[python]") diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 78e943fd35cb8..6702d219caa83 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -163,6 +163,7 @@ def get_is_dtype_funcs(): return [getattr(com, fname) for fname in fnames] +@pytest.mark.filterwarnings("ignore:is_categorical_dtype is deprecated:FutureWarning") @pytest.mark.parametrize("func", get_is_dtype_funcs(), ids=lambda x: x.__name__) def test_get_dtype_error_catch(func): # see gh-15941 @@ -171,7 +172,7 @@ def test_get_dtype_error_catch(func): msg = f"{func.__name__} is deprecated" warn = None - if func is com.is_int64_dtype: + if func is com.is_int64_dtype or func is com.is_categorical_dtype: warn = FutureWarning with tm.assert_produces_warning(warn, match=msg): @@ -274,12 +275,14 @@ def test_is_interval_dtype(): def test_is_categorical_dtype(): - assert not com.is_categorical_dtype(object) - assert not com.is_categorical_dtype([1, 2, 3]) + msg = "is_categorical_dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert not com.is_categorical_dtype(object) + assert not com.is_categorical_dtype([1, 2, 3]) - assert com.is_categorical_dtype(CategoricalDtype()) - assert com.is_categorical_dtype(pd.Categorical([1, 2, 3])) - assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) + assert com.is_categorical_dtype(CategoricalDtype()) + assert com.is_categorical_dtype(pd.Categorical([1, 2, 3])) + assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) def test_is_string_dtype(): diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 32ab5abed6f50..e0ae572e17a8e 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -166,16 +166,18 @@ def test_is_dtype(self, dtype): assert not CategoricalDtype.is_dtype(np.float64) def test_basic(self, dtype): - assert is_categorical_dtype(dtype) + msg = "is_categorical_dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert is_categorical_dtype(dtype) - factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]) + factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]) - s = Series(factor, name="A") + s = Series(factor, name="A") - # dtypes - assert is_categorical_dtype(s.dtype) - assert is_categorical_dtype(s) - assert not is_categorical_dtype(np.dtype("float64")) + # dtypes + assert is_categorical_dtype(s.dtype) + assert is_categorical_dtype(s) + assert not is_categorical_dtype(np.dtype("float64")) def test_tuple_categories(self): categories = [(1, "a"), (2, "b"), (3, "c")] @@ -1109,10 +1111,15 @@ def test_is_bool_dtype_sparse(): ) def test_is_dtype_no_warning(check): data = pd.DataFrame({"A": [1, 2]}) - with tm.assert_produces_warning(None): + + warn = None + msg = "is_categorical_dtype is deprecated" + if check is is_categorical_dtype: + warn = FutureWarning + with tm.assert_produces_warning(warn, match=msg): check(data) - with tm.assert_produces_warning(None): + with tm.assert_produces_warning(warn, match=msg): check(data["A"]) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index b20375c673679..f31b27897bf82 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -7,7 +7,6 @@ from pandas.core.dtypes.base import _registry as ea_registry from pandas.core.dtypes.common import ( - is_categorical_dtype, is_interval_dtype, is_object_dtype, ) @@ -484,9 +483,9 @@ def test_setitem_intervals(self): df["E"] = np.array(ser.values) df["F"] = ser.astype(object) - assert is_categorical_dtype(df["B"].dtype) + assert isinstance(df["B"].dtype, CategoricalDtype) assert is_interval_dtype(df["B"].cat.categories) - assert is_categorical_dtype(df["D"].dtype) + assert isinstance(df["D"].dtype, CategoricalDtype) assert is_interval_dtype(df["D"].cat.categories) # These go through the Series constructor and so get inferred back diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 28809e2ecb788..41191dd3d0b90 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -9,11 +9,10 @@ from pandas.compat import is_platform_windows import pandas.util._test_decorators as td -from pandas.core.dtypes.common import is_categorical_dtype - import pandas as pd from pandas import ( Categorical, + CategoricalDtype, DataFrame, Index, Series, @@ -1280,7 +1279,7 @@ def test_any_all_np_func(self, func, data, expected): # GH 19976 data = DataFrame(data) - if any(is_categorical_dtype(x) for x in data.dtypes): + if any(isinstance(x, CategoricalDtype) for x in data.dtypes): with pytest.raises( TypeError, match="dtype category does not support reduction" ): diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index b7511a72e31da..9524288b33eef 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -3,11 +3,11 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_categorical_dtype from pandas.core.dtypes.dtypes import IntervalDtype from pandas import ( Categorical, + CategoricalDtype, CategoricalIndex, Index, Interval, @@ -316,7 +316,7 @@ def get_kwargs_from_breaks(self, breaks, closed="right"): tuples = list(zip(breaks[:-1], breaks[1:])) if isinstance(breaks, (list, tuple)): return {"data": tuples} - elif is_categorical_dtype(breaks): + elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype): return {"data": breaks._constructor(tuples)} return {"data": com.asarray_tuplesafe(tuples)} @@ -378,7 +378,7 @@ def get_kwargs_from_breaks(self, breaks, closed="right"): if isinstance(breaks, list): return {"data": ivs} - elif is_categorical_dtype(breaks): + elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype): return {"data": breaks._constructor(ivs)} return {"data": np.array(ivs, dtype=object)} diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 747e7972aacf1..05d6eadc5aed0 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -3,11 +3,10 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_categorical_dtype - import pandas as pd from pandas import ( Categorical, + CategoricalDtype, CategoricalIndex, DataFrame, Index, @@ -166,7 +165,7 @@ def test_slicing_and_getting_ops(self): # frame res_df = df.iloc[2:4, :] tm.assert_frame_equal(res_df, exp_df) - assert is_categorical_dtype(res_df["cats"].dtype) + assert isinstance(res_df["cats"].dtype, CategoricalDtype) # row res_row = df.iloc[2, :] @@ -176,7 +175,7 @@ def test_slicing_and_getting_ops(self): # col res_col = df.iloc[:, 0] tm.assert_series_equal(res_col, exp_col) - assert is_categorical_dtype(res_col.dtype) + assert isinstance(res_col.dtype, CategoricalDtype) # single value res_val = df.iloc[2, 0] @@ -186,7 +185,7 @@ def test_slicing_and_getting_ops(self): # frame res_df = df.loc["j":"k", :] tm.assert_frame_equal(res_df, exp_df) - assert is_categorical_dtype(res_df["cats"].dtype) + assert isinstance(res_df["cats"].dtype, CategoricalDtype) # row res_row = df.loc["j", :] @@ -196,7 +195,7 @@ def test_slicing_and_getting_ops(self): # col res_col = df.loc[:, "cats"] tm.assert_series_equal(res_col, exp_col) - assert is_categorical_dtype(res_col.dtype) + assert isinstance(res_col.dtype, CategoricalDtype) # single value res_val = df.loc["j", "cats"] @@ -233,23 +232,23 @@ def test_slicing_and_getting_ops(self): res_df = df.iloc[slice(2, 4)] tm.assert_frame_equal(res_df, exp_df) - assert is_categorical_dtype(res_df["cats"].dtype) + assert isinstance(res_df["cats"].dtype, CategoricalDtype) res_df = df.iloc[[2, 3]] tm.assert_frame_equal(res_df, exp_df) - assert is_categorical_dtype(res_df["cats"].dtype) + assert isinstance(res_df["cats"].dtype, CategoricalDtype) res_col = df.iloc[:, 0] tm.assert_series_equal(res_col, exp_col) - assert is_categorical_dtype(res_col.dtype) + assert isinstance(res_col.dtype, CategoricalDtype) res_df = df.iloc[:, slice(0, 2)] tm.assert_frame_equal(res_df, df) - assert is_categorical_dtype(res_df["cats"].dtype) + assert isinstance(res_df["cats"].dtype, CategoricalDtype) res_df = df.iloc[:, [0, 1]] tm.assert_frame_equal(res_df, df) - assert is_categorical_dtype(res_df["cats"].dtype) + assert isinstance(res_df["cats"].dtype, CategoricalDtype) def test_slicing_doc_examples(self): # GH 7918 diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 75e9f7b744caa..05c397c4ea4f1 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -12,9 +12,8 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_categorical_dtype - import pandas as pd +from pandas import CategoricalDtype import pandas._testing as tm from pandas.core.frame import ( DataFrame, @@ -1084,7 +1083,7 @@ def test_categorical_order(self, file, datapath): # Check identity of codes for col in expected: - if is_categorical_dtype(expected[col].dtype): + if isinstance(expected[col].dtype, CategoricalDtype): tm.assert_series_equal(expected[col].cat.codes, parsed[col].cat.codes) tm.assert_index_equal( expected[col].cat.categories, parsed[col].cat.categories @@ -1114,7 +1113,7 @@ def test_categorical_ordering(self, file, datapath): parsed_unordered = read_stata(file, order_categoricals=False) for col in parsed: - if not is_categorical_dtype(parsed[col].dtype): + if not isinstance(parsed[col].dtype, CategoricalDtype): continue assert parsed[col].cat.ordered assert not parsed_unordered[col].cat.ordered @@ -1178,7 +1177,7 @@ def _convert_categorical(from_frame: DataFrame) -> DataFrame: """ for col in from_frame: ser = from_frame[col] - if is_categorical_dtype(ser.dtype): + if isinstance(ser.dtype, CategoricalDtype): cat = ser._values.remove_unused_categories() if cat.categories.dtype == object: categories = pd.Index._with_infer(cat.categories._values) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 3a822c8134eb4..017bf1c917e37 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -8,10 +8,7 @@ import numpy as np import pytest -from pandas.core.dtypes.common import ( - is_categorical_dtype, - is_object_dtype, -) +from pandas.core.dtypes.common import is_object_dtype from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd @@ -1982,7 +1979,7 @@ def test_dtype_on_merged_different(self, change, join_type, left, right): X = change(right.X.astype("object")) right = right.assign(X=X) - assert is_categorical_dtype(left.X.values.dtype) + assert isinstance(left.X.values.dtype, CategoricalDtype) # assert not left.X.values._categories_match_up_to_permutation(right.X.values) merged = merge(left, right, on="X", how=join_type) diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index 573f5d49afb89..2f2ac7b456064 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -1,10 +1,9 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_categorical_dtype - import pandas as pd from pandas import ( + CategoricalDtype, CategoricalIndex, DataFrame, Index, @@ -836,7 +835,7 @@ def test_categoricals(a_dtype, b_dtype): # Verify when categorical does not have all values present a.loc[a == 1] = 2 - a_is_cat = is_categorical_dtype(a.dtype) + a_is_cat = isinstance(a.dtype, CategoricalDtype) assert not a_is_cat or a.value_counts().loc[1] == 0 result = crosstab(a, b, margins=True, dropna=False) values = [[18, 16, 34], [0, 0, 0], [34, 32, 66], [52, 48, 100]] diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 8db5830d1ed6e..77ff5af0e6f5d 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -396,13 +396,17 @@ def test_constructor_categorical(self): def test_construct_from_categorical_with_dtype(self): # GH12574 cat = Series(Categorical([1, 2, 3]), dtype="category") - assert is_categorical_dtype(cat) - assert is_categorical_dtype(cat.dtype) + msg = "is_categorical_dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert is_categorical_dtype(cat) + assert is_categorical_dtype(cat.dtype) def test_construct_intlist_values_category_dtype(self): ser = Series([1, 2, 3], dtype="category") - assert is_categorical_dtype(ser) - assert is_categorical_dtype(ser.dtype) + msg = "is_categorical_dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert is_categorical_dtype(ser) + assert is_categorical_dtype(ser.dtype) def test_constructor_categorical_with_coercion(self): factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]) @@ -472,12 +476,12 @@ def test_constructor_categorical_dtype(self): result = Series( ["a", "b"], dtype=CategoricalDtype(["a", "b", "c"], ordered=True) ) - assert is_categorical_dtype(result.dtype) is True + assert isinstance(result.dtype, CategoricalDtype) tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"])) assert result.cat.ordered result = Series(["a", "b"], dtype=CategoricalDtype(["b", "a"])) - assert is_categorical_dtype(result.dtype) + assert isinstance(result.dtype, CategoricalDtype) tm.assert_index_equal(result.cat.categories, Index(["b", "a"])) assert result.cat.ordered is False