Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Change default value for CategoricalDtype.ordered from None to False #29955

Merged
merged 7 commits into from
Dec 9, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
- Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`)
- Removed previously deprecated :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` (:issue:`18164`)
- Removed previously deprecated ``errors`` argument in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`)
-
- Changed the default value for ``ordered`` in :class:`CategoricalDtype` from ``None`` to ``False`` (:issue:`26336`)

.. _whatsnew_1000.performance:

Expand Down
10 changes: 5 additions & 5 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def __init__(
# sanitize input
if is_categorical_dtype(values):
if dtype.categories is None:
dtype = CategoricalDtype(values.categories, dtype._ordered)
dtype = CategoricalDtype(values.categories, dtype.ordered)
elif not isinstance(values, (ABCIndexClass, ABCSeries)):
# sanitize_array coerces np.nan to a string under certain versions
# of numpy
Expand All @@ -353,7 +353,7 @@ def __init__(
codes, categories = factorize(values, sort=True)
except TypeError:
codes, categories = factorize(values, sort=False)
if dtype._ordered:
if dtype.ordered:
# raise, as we don't have a sortable data structure and so
# the user should give us one by specifying categories
raise TypeError(
Expand All @@ -369,7 +369,7 @@ def __init__(
)

# we're inferring from values
dtype = CategoricalDtype(categories, dtype._ordered)
dtype = CategoricalDtype(categories, dtype.ordered)

elif is_categorical_dtype(values):
old_codes = (
Expand Down Expand Up @@ -439,7 +439,7 @@ def ordered(self) -> Ordered:
"""
Whether the categories have an ordered relationship.
"""
return self.dtype._ordered
return self.dtype.ordered

@property
def dtype(self) -> CategoricalDtype:
Expand Down Expand Up @@ -835,7 +835,7 @@ def set_categories(self, new_categories, ordered=None, rename=False, inplace=Fal
"""
inplace = validate_bool_kwarg(inplace, "inplace")
if ordered is None:
ordered = self.dtype._ordered
ordered = self.dtype.ordered
new_dtype = CategoricalDtype(new_categories, ordered=ordered)

cat = self if inplace else self.copy()
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ def _try_cast(
# that Categorical is the only array type for 'category'.
dtype = cast(CategoricalDtype, dtype)
subarr = dtype.construct_array_type()(
arr, dtype.categories, ordered=dtype._ordered
arr, dtype.categories, ordered=dtype.ordered
)
elif is_extension_array_dtype(dtype):
# create an extension array from its dtype
Expand Down
95 changes: 44 additions & 51 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
""" define extension dtypes """
import re
from typing import Any, Dict, List, MutableMapping, Optional, Tuple, Type, Union, cast
import warnings

import numpy as np
import pytz
Expand All @@ -18,10 +17,6 @@

str_type = str

# GH26403: sentinel value used for the default value of ordered in the
# CategoricalDtype constructor to detect when ordered=None is explicitly passed
ordered_sentinel: object = object()


def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]:
"""
Expand Down Expand Up @@ -218,14 +213,10 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
kind: str_type = "O"
str = "|O08"
base = np.dtype("O")
_metadata = ("categories", "ordered", "_ordered_from_sentinel")
_metadata = ("categories", "ordered")
_cache: Dict[str_type, PandasExtensionDtype] = {}

def __init__(
self, categories=None, ordered: Union[Ordered, object] = ordered_sentinel
):
# TODO(GH26403): Set type of ordered to Ordered
ordered = cast(Ordered, ordered)
def __init__(self, categories=None, ordered: Ordered = False):
self._finalize(categories, ordered, fastpath=False)

@classmethod
Expand Down Expand Up @@ -338,36 +329,63 @@ def _from_values_or_dtype(

return dtype

@classmethod
def construct_from_string(cls, string: str_type) -> "CategoricalDtype":
"""
Construct a CategoricalDtype from a string.

Parameters
----------
string : str
Must be the string "category" in order to be successfully constructed.

Returns
-------
CategoricalDtype
Instance of the dtype.

Raises
------
TypeError
If a CategoricalDtype cannot be constructed from the input.
"""
if not isinstance(string, str):
raise TypeError(f"Expects a string, got {type(string)}")
if string != cls.name:
raise TypeError(f"Cannot construct a 'CategoricalDtype' from '{string}'")

# need ordered=None to ensure that operations specifying dtype="category" don't
# override the ordered value for existing categoricals
return cls(ordered=None)

def _finalize(self, categories, ordered: Ordered, fastpath: bool = False) -> None:

if ordered is not None and ordered is not ordered_sentinel:
if ordered is not None:
self.validate_ordered(ordered)

if categories is not None:
categories = self.validate_categories(categories, fastpath=fastpath)

self._categories = categories
self._ordered = ordered if ordered is not ordered_sentinel else None
self._ordered_from_sentinel = ordered is ordered_sentinel
self._ordered = ordered

def __setstate__(self, state: MutableMapping[str_type, Any]) -> None:
# for pickle compat. __get_state__ is defined in the
# PandasExtensionDtype superclass and uses the public properties to
# pickle -> need to set the settable private ones here (see GH26067)
self._categories = state.pop("categories", None)
self._ordered = state.pop("ordered", False)
self._ordered_from_sentinel = state.pop("_ordered_from_sentinel", False)

def __hash__(self) -> int:
# _hash_categories returns a uint64, so use the negative
# space for when we have unknown categories to avoid a conflict
if self.categories is None:
if self._ordered:
if self.ordered:
return -1
else:
return -2
# We *do* want to include the real self.ordered here
return int(self._hash_categories(self.categories, self._ordered))
return int(self._hash_categories(self.categories, self.ordered))

def __eq__(self, other: Any) -> bool:
"""
Expand All @@ -386,7 +404,7 @@ def __eq__(self, other: Any) -> bool:
return other == self.name
elif other is self:
return True
elif not (hasattr(other, "_ordered") and hasattr(other, "categories")):
elif not (hasattr(other, "ordered") and hasattr(other, "categories")):
return False
elif self.categories is None or other.categories is None:
# We're forced into a suboptimal corner thanks to math and
Expand All @@ -395,10 +413,10 @@ def __eq__(self, other: Any) -> bool:
# CDT(., .) = CDT(None, False) and *all*
# CDT(., .) = CDT(None, True).
return True
elif self._ordered or other._ordered:
elif self.ordered or other.ordered:
# At least one has ordered=True; equal if both have ordered=True
# and the same values for categories in the same order.
return (self._ordered == other._ordered) and self.categories.equals(
return (self.ordered == other.ordered) and self.categories.equals(
other.categories
)
else:
Expand All @@ -420,7 +438,7 @@ def __repr__(self) -> str_type:
data = "None, "
else:
data = self.categories._format_data(name=type(self).__name__)
return tpl.format(data=data, ordered=self._ordered)
return tpl.format(data=data, ordered=self.ordered)

@staticmethod
def _hash_categories(categories, ordered: Ordered = True) -> int:
Expand Down Expand Up @@ -557,26 +575,11 @@ def update_dtype(
# from here on, dtype is a CategoricalDtype
dtype = cast(CategoricalDtype, dtype)

# dtype is CDT: keep current categories/ordered if None
new_categories = dtype.categories
if new_categories is None:
new_categories = self.categories

new_ordered = dtype._ordered
new_ordered_from_sentinel = dtype._ordered_from_sentinel
if new_ordered is None:
# maintain existing ordered if new dtype has ordered=None
new_ordered = self._ordered
if self._ordered and new_ordered_from_sentinel:
# only warn if we'd actually change the existing behavior
msg = (
"Constructing a CategoricalDtype without specifying "
"`ordered` will default to `ordered=False` in a future "
"version, which will cause the resulting categorical's "
"`ordered` attribute to change to False; `ordered=True` "
"must be explicitly passed in order to be retained"
)
warnings.warn(msg, FutureWarning, stacklevel=3)
# update categories/ordered unless they've been explicitly passed as None
new_categories = (
dtype.categories if dtype.categories is not None else self.categories
)
new_ordered = dtype.ordered if dtype.ordered is not None else self.ordered

return CategoricalDtype(new_categories, new_ordered)

Expand All @@ -592,16 +595,6 @@ def ordered(self) -> Ordered:
"""
Whether the categories have an ordered relationship.
"""
# TODO: remove if block when ordered=None as default is deprecated
if self._ordered_from_sentinel and self._ordered is None:
# warn when accessing ordered if ordered=None and None was not
# explicitly passed to the constructor
msg = (
"Constructing a CategoricalDtype without specifying "
"`ordered` will default to `ordered=False` in a future "
"version; `ordered=None` must be explicitly passed."
)
warnings.warn(msg, FutureWarning, stacklevel=2)
return self._ordered

@property
Expand Down
10 changes: 0 additions & 10 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
_is_unorderable_exception,
ensure_platform_int,
is_bool,
is_categorical,
is_categorical_dtype,
is_datetime64_dtype,
is_dict_like,
Expand Down Expand Up @@ -195,15 +194,6 @@ def __init__(
if data is None:
data = {}
if dtype is not None:
# GH 26336: explicitly handle 'category' to avoid warning
# TODO: Remove after CategoricalDtype defaults to ordered=False
if (
isinstance(dtype, str)
and dtype == "category"
and is_categorical(data)
):
dtype = data.dtype

dtype = self._validate_dtype(dtype)

if isinstance(data, MultiIndex):
Expand Down
8 changes: 0 additions & 8 deletions pandas/tests/arrays/categorical/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,6 @@ def test_astype_category(self, dtype_ordered, cat_ordered):
expected = cat
tm.assert_categorical_equal(result, expected)

def test_astype_category_ordered_none_deprecated(self):
# GH 26336
cdt1 = CategoricalDtype(categories=list("cdab"), ordered=True)
cdt2 = CategoricalDtype(categories=list("cedafb"))
cat = Categorical(list("abcdaba"), dtype=cdt1)
with tm.assert_produces_warning(FutureWarning):
cat.astype(cdt2)

def test_iter_python_types(self):
# GH-19909
cat = Categorical([1, 2])
Expand Down
50 changes: 8 additions & 42 deletions pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
DatetimeTZDtype,
IntervalDtype,
PeriodDtype,
ordered_sentinel,
registry,
)

Expand Down Expand Up @@ -65,8 +64,7 @@ def test_pickle(self):

class TestCategoricalDtype(Base):
def create(self):
# TODO(GH 26403): Remove when default ordered becomes False
return CategoricalDtype(ordered=None)
return CategoricalDtype()

def test_pickle(self):
# make sure our cache is NOT pickled
Expand Down Expand Up @@ -721,8 +719,7 @@ def test_unordered_same(self, ordered):
def test_categories(self):
result = CategoricalDtype(["a", "b", "c"])
tm.assert_index_equal(result.categories, pd.Index(["a", "b", "c"]))
with tm.assert_produces_warning(FutureWarning):
assert result.ordered is None
assert result.ordered is False

def test_equal_but_different(self, ordered_fixture):
c1 = CategoricalDtype([1, 2, 3])
Expand Down Expand Up @@ -847,25 +844,15 @@ def test_categorical_categories(self):
@pytest.mark.parametrize(
"new_categories", [list("abc"), list("cba"), list("wxyz"), None]
)
@pytest.mark.parametrize("new_ordered", [True, False, None, ordered_sentinel])
@pytest.mark.parametrize("new_ordered", [True, False, None])
def test_update_dtype(self, ordered_fixture, new_categories, new_ordered):
dtype = CategoricalDtype(list("abc"), ordered_fixture)
original_categories = list("abc")
dtype = CategoricalDtype(original_categories, ordered_fixture)
new_dtype = CategoricalDtype(new_categories, new_ordered)

expected_categories = new_dtype.categories
if expected_categories is None:
expected_categories = dtype.categories

expected_ordered = new_ordered
if new_ordered is ordered_sentinel or new_ordered is None:
expected_ordered = dtype.ordered

# GH 26336
if new_ordered is ordered_sentinel and ordered_fixture is True:
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = dtype.update_dtype(new_dtype)
else:
result = dtype.update_dtype(new_dtype)
result = dtype.update_dtype(new_dtype)
expected_categories = pd.Index(new_categories or original_categories)
expected_ordered = new_ordered if new_ordered is not None else dtype.ordered

tm.assert_index_equal(result.categories, expected_categories)
assert result.ordered is expected_ordered
Expand All @@ -885,27 +872,6 @@ def test_update_dtype_errors(self, bad_dtype):
with pytest.raises(ValueError, match=msg):
dtype.update_dtype(bad_dtype)

@pytest.mark.parametrize("ordered", [ordered_sentinel, None, True, False])
def test_ordered_none_default_deprecated(self, ordered):
# GH 26403: CDT.ordered only warns if ordered is not explicitly passed
dtype = CategoricalDtype(list("abc"), ordered=ordered)
warning = FutureWarning if ordered is ordered_sentinel else None
with tm.assert_produces_warning(warning):
dtype.ordered

@pytest.mark.parametrize("ordered", [True, False, None, ordered_sentinel])
def test_pickle_ordered_from_sentinel(self, ordered):
# GH 27295: can remove test when _ordered_from_sentinel is removed (GH 26403)
dtype = CategoricalDtype(categories=list("abc"), ordered=ordered)

warning = FutureWarning if ordered is ordered_sentinel else None
with tm.assert_produces_warning(warning, check_stacklevel=False):
dtype_from_pickle = tm.round_trip_pickle(dtype)

result = dtype_from_pickle._ordered_from_sentinel
expected = ordered is ordered_sentinel
assert result is expected


@pytest.mark.parametrize(
"dtype", [CategoricalDtype, IntervalDtype, DatetimeTZDtype, PeriodDtype]
Expand Down
Loading