From 41dfcccf0771950ed94b7bd4110c92a0c7397af7 Mon Sep 17 00:00:00 2001 From: paulreece <96156234+paulreece@users.noreply.github.com> Date: Tue, 11 Jul 2023 13:32:50 -0400 Subject: [PATCH] BUG: fixes #53935 Categorical order lost after call to remove_categories (#54027) * Changed the default value for sort to 'False' in the difference method. This allows the difference method to then call the _difference method and finally call the _maybe_try_sort method. In the _maybe_try_sort_method it will sort the values if sort is not False. That's why in the original code haveing sort=None would still sort the categories. This way the code will only sort if you set sort=True. * Added test to show the variable value change behaves the way we want it to. * Added bug fix to whatsnew. * Changed bug fix implementation to simply check if the Ordered value is set to True, if so it sets sort=False in the call to difference in remove_categories. * Changed bug fix implementation to simply check if the Ordered value is set to True, if so it sets sort=False in the call to difference in remove_categories. * Switched the implementation to a ternary to check for ordered. This seems to work better since we are not overriding a default argument this way. --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/arrays/categorical.py | 6 +++++- pandas/tests/indexes/categorical/test_category.py | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index e0cee7c20820a..6fcddad70f22b 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -376,6 +376,7 @@ Bug fixes Categorical ^^^^^^^^^^^ +- Bug in :meth:`CategoricalIndex.remove_categories` where ordered categories would not be maintained (:issue:`53935`). - Bug in :meth:`Series.astype` with ``dtype="category"`` for nullable arrays with read-only null value masks (:issue:`53658`) - Bug in :meth:`Series.map` , where the value of the ``na_action`` parameter was not used if the series held a :class:`Categorical` (:issue:`22527`). - diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 6c61ce7a3e99b..8898379689cfd 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1369,7 +1369,11 @@ def remove_categories(self, removals) -> Self: removals = [removals] removals = Index(removals).unique().dropna() - new_categories = self.dtype.categories.difference(removals) + new_categories = ( + self.dtype.categories.difference(removals, sort=False) + if self.dtype.ordered is True + else self.dtype.categories.difference(removals) + ) not_included = removals.difference(self.dtype.categories) if len(not_included) != 0: diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 232d966e39a01..873d06db58fab 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -373,3 +373,18 @@ def test_method_delegation(self): msg = "cannot use inplace with CategoricalIndex" with pytest.raises(ValueError, match=msg): ci.set_categories(list("cab"), inplace=True) + + def test_remove_maintains_order(self): + ci = CategoricalIndex(list("abcdda"), categories=list("abcd")) + result = ci.reorder_categories(["d", "c", "b", "a"], ordered=True) + tm.assert_index_equal( + result, + CategoricalIndex(list("abcdda"), categories=list("dcba"), ordered=True), + ) + result = result.remove_categories(["c"]) + tm.assert_index_equal( + result, + CategoricalIndex( + ["a", "b", np.nan, "d", "d", "a"], categories=list("dba"), ordered=True + ), + )