From 2511d20f698ceb4a5ae7bb4c7a967061dbd164d2 Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Mon, 9 Dec 2019 02:10:55 -0700
Subject: [PATCH] DEPR: Change default value for CategoricalDtype.ordered from
 None to False (#29955)

---
 doc/source/whatsnew/v1.0.0.rst                |   1 +
 pandas/core/arrays/categorical.py             |  10 +-
 pandas/core/construction.py                   |   2 +-
 pandas/core/dtypes/dtypes.py                  | 101 +++++++++---------
 pandas/core/series.py                         |  10 --
 .../tests/arrays/categorical/test_dtypes.py   |   8 --
 pandas/tests/dtypes/test_dtypes.py            |  50 ++-------
 pandas/tests/indexes/test_category.py         |  13 +--
 pandas/tests/series/test_constructors.py      |  18 +---
 pandas/tests/series/test_dtypes.py            |  13 +--
 pandas/tests/series/test_io.py                |   9 --
 11 files changed, 67 insertions(+), 168 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 6cdb9221a7a6a1..0d83d74b5e0c15 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -622,6 +622,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
 - Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`)
 - Removed previously deprecated :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` (:issue:`18164`)
 - Removed previously deprecated ``errors`` argument in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`)
+- Changed the default value for ``ordered`` in :class:`CategoricalDtype` from ``None`` to ``False`` (:issue:`26336`)
 - :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` now require "labels" as the first argument and "axis" as an optional named parameter (:issue:`30089`)
 -
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index a2e456581cb4fb..1e470e44ed9333 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -328,7 +328,7 @@ def __init__(
         # sanitize input
         if is_categorical_dtype(values):
             if dtype.categories is None:
-                dtype = CategoricalDtype(values.categories, dtype._ordered)
+                dtype = CategoricalDtype(values.categories, dtype.ordered)
         elif not isinstance(values, (ABCIndexClass, ABCSeries)):
             # sanitize_array coerces np.nan to a string under certain versions
             # of numpy
@@ -351,7 +351,7 @@ def __init__(
                 codes, categories = factorize(values, sort=True)
             except TypeError:
                 codes, categories = factorize(values, sort=False)
-                if dtype._ordered:
+                if dtype.ordered:
                     # raise, as we don't have a sortable data structure and so
                     # the user should give us one by specifying categories
                     raise TypeError(
@@ -367,7 +367,7 @@ def __init__(
                 )
 
             # we're inferring from values
-            dtype = CategoricalDtype(categories, dtype._ordered)
+            dtype = CategoricalDtype(categories, dtype.ordered)
 
         elif is_categorical_dtype(values):
             old_codes = (
@@ -437,7 +437,7 @@ def ordered(self) -> Ordered:
         """
         Whether the categories have an ordered relationship.
         """
-        return self.dtype._ordered
+        return self.dtype.ordered
 
     @property
     def dtype(self) -> CategoricalDtype:
@@ -833,7 +833,7 @@ def set_categories(self, new_categories, ordered=None, rename=False, inplace=Fal
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
         if ordered is None:
-            ordered = self.dtype._ordered
+            ordered = self.dtype.ordered
         new_dtype = CategoricalDtype(new_categories, ordered=ordered)
 
         cat = self if inplace else self.copy()
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index b03c69d8653013..c7dec9e1234d22 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -558,7 +558,7 @@ def _try_cast(
             # that Categorical is the only array type for 'category'.
             dtype = cast(CategoricalDtype, dtype)
             subarr = dtype.construct_array_type()(
-                arr, dtype.categories, ordered=dtype._ordered
+                arr, dtype.categories, ordered=dtype.ordered
             )
         elif is_extension_array_dtype(dtype):
             # create an extension array from its dtype
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 9e16ba670344ee..2bb27de320e7ed 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -1,7 +1,6 @@
 """ define extension dtypes """
 import re
 from typing import Any, Dict, List, MutableMapping, Optional, Tuple, Type, Union, cast
-import warnings
 
 import numpy as np
 import pytz
@@ -18,10 +17,6 @@
 
 str_type = str
 
-# GH26403: sentinel value used for the default value of ordered in the
-# CategoricalDtype constructor to detect when ordered=None is explicitly passed
-ordered_sentinel: object = object()
-
 
 def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]:
     """
@@ -179,7 +174,11 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
     ----------
     categories : sequence, optional
         Must be unique, and must not contain any nulls.
-    ordered : bool, default False
+    ordered : bool or None, default False
+        Whether or not this categorical is treated as a ordered categorical.
+        None can be used to maintain the ordered value of existing categoricals when
+        used in operations that combine categoricals, e.g. astype, and will resolve to
+        False if there is no existing ordered to maintain.
 
     Attributes
     ----------
@@ -218,14 +217,10 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
     kind: str_type = "O"
     str = "|O08"
     base = np.dtype("O")
-    _metadata = ("categories", "ordered", "_ordered_from_sentinel")
+    _metadata = ("categories", "ordered")
     _cache: Dict[str_type, PandasExtensionDtype] = {}
 
-    def __init__(
-        self, categories=None, ordered: Union[Ordered, object] = ordered_sentinel
-    ):
-        # TODO(GH26403): Set type of ordered to Ordered
-        ordered = cast(Ordered, ordered)
+    def __init__(self, categories=None, ordered: Ordered = False):
         self._finalize(categories, ordered, fastpath=False)
 
     @classmethod
@@ -338,17 +333,45 @@ def _from_values_or_dtype(
 
         return dtype
 
+    @classmethod
+    def construct_from_string(cls, string: str_type) -> "CategoricalDtype":
+        """
+        Construct a CategoricalDtype from a string.
+
+        Parameters
+        ----------
+        string : str
+            Must be the string "category" in order to be successfully constructed.
+
+        Returns
+        -------
+        CategoricalDtype
+            Instance of the dtype.
+
+        Raises
+        ------
+        TypeError
+            If a CategoricalDtype cannot be constructed from the input.
+        """
+        if not isinstance(string, str):
+            raise TypeError(f"Expects a string, got {type(string)}")
+        if string != cls.name:
+            raise TypeError(f"Cannot construct a 'CategoricalDtype' from '{string}'")
+
+        # need ordered=None to ensure that operations specifying dtype="category" don't
+        # override the ordered value for existing categoricals
+        return cls(ordered=None)
+
     def _finalize(self, categories, ordered: Ordered, fastpath: bool = False) -> None:
 
-        if ordered is not None and ordered is not ordered_sentinel:
+        if ordered is not None:
             self.validate_ordered(ordered)
 
         if categories is not None:
             categories = self.validate_categories(categories, fastpath=fastpath)
 
         self._categories = categories
-        self._ordered = ordered if ordered is not ordered_sentinel else None
-        self._ordered_from_sentinel = ordered is ordered_sentinel
+        self._ordered = ordered
 
     def __setstate__(self, state: MutableMapping[str_type, Any]) -> None:
         # for pickle compat. __get_state__ is defined in the
@@ -356,18 +379,17 @@ def __setstate__(self, state: MutableMapping[str_type, Any]) -> None:
         # pickle -> need to set the settable private ones here (see GH26067)
         self._categories = state.pop("categories", None)
         self._ordered = state.pop("ordered", False)
-        self._ordered_from_sentinel = state.pop("_ordered_from_sentinel", False)
 
     def __hash__(self) -> int:
         # _hash_categories returns a uint64, so use the negative
         # space for when we have unknown categories to avoid a conflict
         if self.categories is None:
-            if self._ordered:
+            if self.ordered:
                 return -1
             else:
                 return -2
         # We *do* want to include the real self.ordered here
-        return int(self._hash_categories(self.categories, self._ordered))
+        return int(self._hash_categories(self.categories, self.ordered))
 
     def __eq__(self, other: Any) -> bool:
         """
@@ -386,7 +408,7 @@ def __eq__(self, other: Any) -> bool:
             return other == self.name
         elif other is self:
             return True
-        elif not (hasattr(other, "_ordered") and hasattr(other, "categories")):
+        elif not (hasattr(other, "ordered") and hasattr(other, "categories")):
             return False
         elif self.categories is None or other.categories is None:
             # We're forced into a suboptimal corner thanks to math and
@@ -395,10 +417,10 @@ def __eq__(self, other: Any) -> bool:
             # CDT(., .) = CDT(None, False) and *all*
             # CDT(., .) = CDT(None, True).
             return True
-        elif self._ordered or other._ordered:
+        elif self.ordered or other.ordered:
             # At least one has ordered=True; equal if both have ordered=True
             # and the same values for categories in the same order.
-            return (self._ordered == other._ordered) and self.categories.equals(
+            return (self.ordered == other.ordered) and self.categories.equals(
                 other.categories
             )
         else:
@@ -420,7 +442,7 @@ def __repr__(self) -> str_type:
             data = "None, "
         else:
             data = self.categories._format_data(name=type(self).__name__)
-        return tpl.format(data=data, ordered=self._ordered)
+        return tpl.format(data=data, ordered=self.ordered)
 
     @staticmethod
     def _hash_categories(categories, ordered: Ordered = True) -> int:
@@ -557,26 +579,11 @@ def update_dtype(
             # from here on, dtype is a CategoricalDtype
             dtype = cast(CategoricalDtype, dtype)
 
-        # dtype is CDT: keep current categories/ordered if None
-        new_categories = dtype.categories
-        if new_categories is None:
-            new_categories = self.categories
-
-        new_ordered = dtype._ordered
-        new_ordered_from_sentinel = dtype._ordered_from_sentinel
-        if new_ordered is None:
-            # maintain existing ordered if new dtype has ordered=None
-            new_ordered = self._ordered
-            if self._ordered and new_ordered_from_sentinel:
-                # only warn if we'd actually change the existing behavior
-                msg = (
-                    "Constructing a CategoricalDtype without specifying "
-                    "`ordered` will default to `ordered=False` in a future "
-                    "version, which will cause the resulting categorical's "
-                    "`ordered` attribute to change to False; `ordered=True` "
-                    "must be explicitly passed in order to be retained"
-                )
-                warnings.warn(msg, FutureWarning, stacklevel=3)
+        # update categories/ordered unless they've been explicitly passed as None
+        new_categories = (
+            dtype.categories if dtype.categories is not None else self.categories
+        )
+        new_ordered = dtype.ordered if dtype.ordered is not None else self.ordered
 
         return CategoricalDtype(new_categories, new_ordered)
 
@@ -592,16 +599,6 @@ def ordered(self) -> Ordered:
         """
         Whether the categories have an ordered relationship.
         """
-        # TODO: remove if block when ordered=None as default is deprecated
-        if self._ordered_from_sentinel and self._ordered is None:
-            # warn when accessing ordered if ordered=None and None was not
-            # explicitly passed to the constructor
-            msg = (
-                "Constructing a CategoricalDtype without specifying "
-                "`ordered` will default to `ordered=False` in a future "
-                "version; `ordered=None` must be explicitly passed."
-            )
-            warnings.warn(msg, FutureWarning, stacklevel=2)
         return self._ordered
 
     @property
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 9d75025ebcb1a1..965736a097c218 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -20,7 +20,6 @@
     _is_unorderable_exception,
     ensure_platform_int,
     is_bool,
-    is_categorical,
     is_categorical_dtype,
     is_datetime64_dtype,
     is_dict_like,
@@ -213,15 +212,6 @@ def __init__(
             if data is None:
                 data = {}
             if dtype is not None:
-                # GH 26336: explicitly handle 'category' to avoid warning
-                # TODO: Remove after CategoricalDtype defaults to ordered=False
-                if (
-                    isinstance(dtype, str)
-                    and dtype == "category"
-                    and is_categorical(data)
-                ):
-                    dtype = data.dtype
-
                 dtype = self._validate_dtype(dtype)
 
             if isinstance(data, MultiIndex):
diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py
index c08ad1da386718..85bf385b029a36 100644
--- a/pandas/tests/arrays/categorical/test_dtypes.py
+++ b/pandas/tests/arrays/categorical/test_dtypes.py
@@ -161,14 +161,6 @@ def test_astype_category(self, dtype_ordered, cat_ordered):
             expected = cat
             tm.assert_categorical_equal(result, expected)
 
-    def test_astype_category_ordered_none_deprecated(self):
-        # GH 26336
-        cdt1 = CategoricalDtype(categories=list("cdab"), ordered=True)
-        cdt2 = CategoricalDtype(categories=list("cedafb"))
-        cat = Categorical(list("abcdaba"), dtype=cdt1)
-        with tm.assert_produces_warning(FutureWarning):
-            cat.astype(cdt2)
-
     def test_iter_python_types(self):
         # GH-19909
         cat = Categorical([1, 2])
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index bbf44006611fba..6c6ff3272c0125 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -22,7 +22,6 @@
     DatetimeTZDtype,
     IntervalDtype,
     PeriodDtype,
-    ordered_sentinel,
     registry,
 )
 
@@ -65,8 +64,7 @@ def test_pickle(self):
 
 class TestCategoricalDtype(Base):
     def create(self):
-        # TODO(GH 26403): Remove when default ordered becomes False
-        return CategoricalDtype(ordered=None)
+        return CategoricalDtype()
 
     def test_pickle(self):
         # make sure our cache is NOT pickled
@@ -721,8 +719,7 @@ def test_unordered_same(self, ordered):
     def test_categories(self):
         result = CategoricalDtype(["a", "b", "c"])
         tm.assert_index_equal(result.categories, pd.Index(["a", "b", "c"]))
-        with tm.assert_produces_warning(FutureWarning):
-            assert result.ordered is None
+        assert result.ordered is False
 
     def test_equal_but_different(self, ordered_fixture):
         c1 = CategoricalDtype([1, 2, 3])
@@ -847,25 +844,15 @@ def test_categorical_categories(self):
     @pytest.mark.parametrize(
         "new_categories", [list("abc"), list("cba"), list("wxyz"), None]
     )
-    @pytest.mark.parametrize("new_ordered", [True, False, None, ordered_sentinel])
+    @pytest.mark.parametrize("new_ordered", [True, False, None])
     def test_update_dtype(self, ordered_fixture, new_categories, new_ordered):
-        dtype = CategoricalDtype(list("abc"), ordered_fixture)
+        original_categories = list("abc")
+        dtype = CategoricalDtype(original_categories, ordered_fixture)
         new_dtype = CategoricalDtype(new_categories, new_ordered)
 
-        expected_categories = new_dtype.categories
-        if expected_categories is None:
-            expected_categories = dtype.categories
-
-        expected_ordered = new_ordered
-        if new_ordered is ordered_sentinel or new_ordered is None:
-            expected_ordered = dtype.ordered
-
-        # GH 26336
-        if new_ordered is ordered_sentinel and ordered_fixture is True:
-            with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-                result = dtype.update_dtype(new_dtype)
-        else:
-            result = dtype.update_dtype(new_dtype)
+        result = dtype.update_dtype(new_dtype)
+        expected_categories = pd.Index(new_categories or original_categories)
+        expected_ordered = new_ordered if new_ordered is not None else dtype.ordered
 
         tm.assert_index_equal(result.categories, expected_categories)
         assert result.ordered is expected_ordered
@@ -885,27 +872,6 @@ def test_update_dtype_errors(self, bad_dtype):
         with pytest.raises(ValueError, match=msg):
             dtype.update_dtype(bad_dtype)
 
-    @pytest.mark.parametrize("ordered", [ordered_sentinel, None, True, False])
-    def test_ordered_none_default_deprecated(self, ordered):
-        # GH 26403: CDT.ordered only warns if ordered is not explicitly passed
-        dtype = CategoricalDtype(list("abc"), ordered=ordered)
-        warning = FutureWarning if ordered is ordered_sentinel else None
-        with tm.assert_produces_warning(warning):
-            dtype.ordered
-
-    @pytest.mark.parametrize("ordered", [True, False, None, ordered_sentinel])
-    def test_pickle_ordered_from_sentinel(self, ordered):
-        # GH 27295: can remove test when _ordered_from_sentinel is removed (GH 26403)
-        dtype = CategoricalDtype(categories=list("abc"), ordered=ordered)
-
-        warning = FutureWarning if ordered is ordered_sentinel else None
-        with tm.assert_produces_warning(warning, check_stacklevel=False):
-            dtype_from_pickle = tm.round_trip_pickle(dtype)
-
-        result = dtype_from_pickle._ordered_from_sentinel
-        expected = ordered is ordered_sentinel
-        assert result is expected
-
 
 @pytest.mark.parametrize(
     "dtype", [CategoricalDtype, IntervalDtype, DatetimeTZDtype, PeriodDtype]
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 86219d77542af6..7286fca42848c4 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -5,7 +5,7 @@
 
 from pandas._libs import index as libindex
 
-from pandas.core.dtypes.dtypes import CategoricalDtype, ordered_sentinel
+from pandas.core.dtypes.dtypes import CategoricalDtype
 
 import pandas as pd
 from pandas import Categorical, IntervalIndex
@@ -525,17 +525,6 @@ def test_astype_category(self, name, dtype_ordered, index_ordered):
             expected = index
             tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "none, warning", [(None, None), (ordered_sentinel, FutureWarning)]
-    )
-    def test_astype_category_ordered_none_deprecated(self, none, warning):
-        # GH 26336: only warn if None is not explicitly passed
-        cdt1 = CategoricalDtype(categories=list("cdab"), ordered=True)
-        cdt2 = CategoricalDtype(categories=list("cedafb"), ordered=none)
-        idx = CategoricalIndex(list("abcdaba"), dtype=cdt1)
-        with tm.assert_produces_warning(warning):
-            idx.astype(cdt2)
-
     def test_reindex_base(self):
         # Determined by cat ordering.
         idx = CategoricalIndex(list("cab"), categories=list("cab"))
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 293ec9580436ef..c772038619db0a 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -9,7 +9,7 @@
 from pandas._libs.tslib import iNaT
 
 from pandas.core.dtypes.common import is_categorical_dtype, is_datetime64tz_dtype
-from pandas.core.dtypes.dtypes import CategoricalDtype, ordered_sentinel
+from pandas.core.dtypes.dtypes import CategoricalDtype
 
 import pandas as pd
 from pandas import (
@@ -401,22 +401,6 @@ def test_constructor_categorical_string(self):
         result = Series(result, dtype="category")
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "none, warning", [(None, None), (ordered_sentinel, FutureWarning)]
-    )
-    def test_categorical_ordered_none_deprecated(self, none, warning):
-        # GH 26336: only warn if None is not explicitly passed
-        cdt1 = CategoricalDtype(categories=list("cdab"), ordered=True)
-        cdt2 = CategoricalDtype(categories=list("cedafb"), ordered=none)
-
-        cat = Categorical(list("abcdaba"), dtype=cdt1)
-        with tm.assert_produces_warning(warning, check_stacklevel=False):
-            Series(cat, dtype=cdt2)
-
-        s = Series(cat)
-        with tm.assert_produces_warning(warning, check_stacklevel=False):
-            Series(s, dtype=cdt2)
-
     def test_categorical_sideeffects_free(self):
         # Passing a categorical to a Series and then changing values in either
         # the series or the categorical should not change the values in the
diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py
index 22b00425abb6b4..ff4842791b4fd6 100644
--- a/pandas/tests/series/test_dtypes.py
+++ b/pandas/tests/series/test_dtypes.py
@@ -8,7 +8,7 @@
 
 from pandas._libs.tslibs import iNaT
 
-from pandas.core.dtypes.dtypes import CategoricalDtype, ordered_sentinel
+from pandas.core.dtypes.dtypes import CategoricalDtype
 
 import pandas as pd
 from pandas import (
@@ -219,17 +219,6 @@ def test_astype_categories_raises(self):
         with pytest.raises(TypeError, match="got an unexpected"):
             s.astype("category", categories=["a", "b"], ordered=True)
 
-    @pytest.mark.parametrize(
-        "none, warning", [(None, None), (ordered_sentinel, FutureWarning)]
-    )
-    def test_astype_category_ordered_none_deprecated(self, none, warning):
-        # GH 26336: only warn if None is not explicitly passed
-        cdt1 = CategoricalDtype(categories=list("cdab"), ordered=True)
-        cdt2 = CategoricalDtype(categories=list("cedafb"), ordered=none)
-        s = Series(list("abcdaba"), dtype=cdt1)
-        with tm.assert_produces_warning(warning, check_stacklevel=False):
-            s.astype(cdt2)
-
     def test_astype_from_categorical(self):
         items = ["a", "b", "c", "a"]
         s = Series(items)
diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py
index cd32b2188b892a..9041d582b19ca0 100644
--- a/pandas/tests/series/test_io.py
+++ b/pandas/tests/series/test_io.py
@@ -216,15 +216,6 @@ def test_pickle_preserve_name(self):
             unpickled = self._pickle_roundtrip_name(tm.makeTimeSeries(name=n))
             assert unpickled.name == n
 
-    def test_pickle_categorical_ordered_from_sentinel(self):
-        # GH 27295: can remove test when _ordered_from_sentinel is removed (GH 26403)
-        s = Series(["a", "b", "c", "a"], dtype="category")
-        result = tm.round_trip_pickle(s)
-        result = result.astype("category")
-
-        tm.assert_series_equal(result, s)
-        assert result.dtype._ordered_from_sentinel is False
-
     def _pickle_roundtrip_name(self, obj):
 
         with tm.ensure_clean() as path: