pandas-dev · jreback · Feb 10, 2018 · Dec 21, 2017 · Feb 7, 2018 · Feb 7, 2018
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -460,6 +460,29 @@ To restore previous behavior, simply set ``expand`` to ``False``:
     extracted
     type(extracted)
 
+.. _whatsnew_0230.api_breaking.cdt_ordered:
+
+Default value for the ``ordered`` parameter of ``CategoricalDtype``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The default value of the ``ordered`` parameter for :class:`~pandas.api.types.CategoricalDtype` has changed from ``False`` to ``None`` to allow updating of ``categories`` without impacting ``ordered``.  Behavior should remain consistent for downstream objects, such as :class:`Categorical` (:issue:`18790`)
+
+In previous versions, the default value for the ``ordered`` parameter was ``False``.  This could potentially lead to the ``ordered`` parameter unintentionally being changed from ``True`` to ``False`` when users attempt to update ``categories`` if ``ordered`` is not explicitly specified, as it would silently default to ``False``.  The new behavior for ``ordered=None`` is to retain the existing value of ``ordered``.
+
+New Behavior:
+
+.. ipython:: python
+
+    from pandas.api.types import CategoricalDtype
+    cat = pd.Categorical(list('abcaba'), ordered=True, categories=list('cba'))
+    cat
+    cdt = CategoricalDtype(categories=list('cbad'))
+    cat.astype(cdt)
+
+Notice in the example above that the converted ``Categorical`` has retained ``ordered=True``.  Had the default value for ``ordered`` remained as ``False``, the converted ``Categorical`` would have become unordered, despite ``ordered=False`` never being explicitly specified.  To change the value of ``ordered``, explicitly pass it to the new dtype, e.g. ``CategoricalDtype(categories=list('cbad'), ordered=False)``.
+
+Note that the unintenional conversion of ``ordered`` discussed above did not arise in previous versions due to separate bugs that prevented ``astype`` from doing any type of category to category conversion (:issue:`10696`, :issue:`18593`).  These bugs have been fixed in this release, and motivated changing the default value of ``ordered``.
+
 .. _whatsnew_0230.api:
 
 Other API Changes

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -243,7 +243,7 @@ class Categorical(ExtensionArray, PandasObject):
     # For comparisons, so that numpy uses our implementation if the compare
     # ops, which raise
     __array_priority__ = 1000
-    _dtype = CategoricalDtype()
+    _dtype = CategoricalDtype(ordered=False)
     _deprecations = frozenset(['labels'])
     _typ = 'categorical'
 
@@ -294,7 +294,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
 
         if fastpath:
             self._codes = coerce_indexer_dtype(values, categories)
-            self._dtype = dtype
+            self._dtype = self._dtype.update_dtype(dtype)
             return
 
         # null_mask indicates missing values we want to exclude from inference.
@@ -358,7 +358,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
             full_codes[~null_mask] = codes
             codes = full_codes
 
-        self._dtype = dtype
+        self._dtype = self._dtype.update_dtype(dtype)
         self._codes = coerce_indexer_dtype(codes, dtype.categories)
 
     @property
@@ -438,7 +438,7 @@ def astype(self, dtype, copy=True):
         """
         if is_categorical_dtype(dtype):
             # GH 10696/18593
-            dtype = self.dtype._update_dtype(dtype)
+            dtype = self.dtype.update_dtype(dtype)
             self = self.copy() if copy else self
             if dtype == self.dtype:
                 return self
@@ -560,7 +560,7 @@ def from_codes(cls, codes, categories, ordered=False):
             raise ValueError(
                 "codes need to be convertible to an arrays of integers")
 
-        categories = CategoricalDtype._validate_categories(categories)
+        categories = CategoricalDtype.validate_categories(categories)
 
         if len(codes) and (codes.max() >= len(categories) or codes.min() < -1):
             raise ValueError("codes need to be between -1 and "
@@ -1165,7 +1165,7 @@ def __setstate__(self, state):
 
         # Provide compatibility with pre-0.15.0 Categoricals.
         if '_categories' not in state and '_levels' in state:
-            state['_categories'] = self.dtype._validate_categories(state.pop(
+            state['_categories'] = self.dtype.validate_categories(state.pop(
                 '_levels'))
         if '_codes' not in state and 'labels' in state:
             state['_codes'] = coerce_indexer_dtype(

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -159,11 +159,11 @@ class CategoricalDtype(PandasExtensionDtype):
     _metadata = ['categories', 'ordered']
     _cache = {}
 
-    def __init__(self, categories=None, ordered=False):
+    def __init__(self, categories=None, ordered=None):
         self._finalize(categories, ordered, fastpath=False)
 
     @classmethod
-    def _from_fastpath(cls, categories=None, ordered=False):
+    def _from_fastpath(cls, categories=None, ordered=None):
         self = cls.__new__(cls)
         self._finalize(categories, ordered, fastpath=True)
         return self
@@ -180,14 +180,12 @@ def _from_categorical_dtype(cls, dtype, categories=None, ordered=None):
 
     def _finalize(self, categories, ordered, fastpath=False):
 
-        if ordered is None:
-            ordered = False
-        else:
-            self._validate_ordered(ordered)
+        if ordered is not None:
+            self.validate_ordered(ordered)
 
         if categories is not None:
-            categories = self._validate_categories(categories,
-                                                   fastpath=fastpath)
+            categories = self.validate_categories(categories,
+                                                  fastpath=fastpath)
 
         self._categories = categories
         self._ordered = ordered
@@ -208,6 +206,17 @@ def __hash__(self):
         return int(self._hash_categories(self.categories, self.ordered))
 
     def __eq__(self, other):
+        """
+        Rules for CDT equality:
+        1) Any CDT is equal to the string 'category'
+        2) Any CDT is equal to a CDT with categories=None regardless of ordered
+        3) A CDT with ordered=True is only equal to another CDT with
+           ordered=True and identical categories in the same order
+        4) A CDT with ordered={False, None} is only equal to another CDT with
+           ordered={False, None} and identical categories, but same order is
+           not required. There is no distinction between False/None.
+        5) Any other comparison returns False
+        """
         if isinstance(other, compat.string_types):
             return other == self.name
 
@@ -220,12 +229,16 @@ def __eq__(self, other):
             # CDT(., .) = CDT(None, False) and *all*
             # CDT(., .) = CDT(None, True).
             return True
-        elif self.ordered:
-            return other.ordered and self.categories.equals(other.categories)
-        elif other.ordered:
-            return False
+        elif self.ordered or other.ordered:
+            # At least one has ordered=True; equal if both have ordered=True
+            # and the same values for categories in the same order.
+            return ((self.ordered == other.ordered) and
+                    self.categories.equals(other.categories))
         else:
-            # both unordered; this could probably be optimized / cached
+            # Neither has ordered=True; equal if both have the same categories,
+            # but same order is not necessary.  There is no distinction between
+            # ordered=False and ordered=None: CDT(., False) and CDT(., None)
+            # will be equal if they have the same categories.
             return hash(self) == hash(other)
 
     def __repr__(self):
@@ -288,7 +301,7 @@ def construct_from_string(cls, string):
         raise TypeError("cannot construct a CategoricalDtype")
 
     @staticmethod
-    def _validate_ordered(ordered):
+    def validate_ordered(ordered):
         """
         Validates that we have a valid ordered parameter. If
         it is not a boolean, a TypeError will be raised.
@@ -308,7 +321,7 @@ def _validate_ordered(ordered):
             raise TypeError("'ordered' must either be 'True' or 'False'")
 
     @staticmethod
-    def _validate_categories(categories, fastpath=False):
+    def validate_categories(categories, fastpath=False):
         """
         Validates that we have good categories
 
@@ -340,7 +353,7 @@ def _validate_categories(categories, fastpath=False):
 
         return categories
 
-    def _update_dtype(self, dtype):
+    def update_dtype(self, dtype):
         """
         Returns a CategoricalDtype with categories and ordered taken from dtype
         if specified, otherwise falling back to self if unspecified
@@ -361,11 +374,16 @@ def _update_dtype(self, dtype):
                    'got {dtype!r}').format(dtype=dtype)
             raise ValueError(msg)
 
-        # dtype is CDT: keep current categories if None (ordered can't be None)
+        # dtype is CDT: keep current categories/ordered if None
         new_categories = dtype.categories
         if new_categories is None:
             new_categories = self.categories
-        return CategoricalDtype(new_categories, dtype.ordered)
+
+        new_ordered = dtype.ordered
+        if new_ordered is None:
+            new_ordered = self.ordered
+
+        return CategoricalDtype(new_categories, new_ordered)
 
     @property
     def categories(self):

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -344,7 +344,7 @@ def astype(self, dtype, copy=True):
             return IntervalIndex(np.array(self))
         elif is_categorical_dtype(dtype):
             # GH 18630
-            dtype = self.dtype._update_dtype(dtype)
+            dtype = self.dtype.update_dtype(dtype)
             if dtype == self.dtype:
                 return self.copy() if copy else self