From c866a4ad6b419f33bc004513a41b3b64e0b587f6 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sat, 2 Sep 2023 17:33:08 -0400 Subject: [PATCH] BUG: Categorical.isin raising for overlapping intervals (#54951) fix Categorical.isin raising for overlapping intervals --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/core/arrays/categorical.py | 2 +- pandas/tests/indexes/categorical/test_category.py | 7 +++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 17abb3debe3e7..89b4d102fcf04 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -171,7 +171,7 @@ Bug fixes Categorical ^^^^^^^^^^^ -- +- :meth:`Categorical.isin` raising ``InvalidIndexError`` for categorical containing overlapping :class:`Interval` values (:issue:`34974`) - Datetimelike diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index da4bf987d0386..8d2633c10b428 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2597,7 +2597,7 @@ def isin(self, values) -> npt.NDArray[np.bool_]: ) values = sanitize_array(values, None, None) null_mask = np.asarray(isna(values)) - code_values = self.categories.get_indexer(values) + code_values = self.categories.get_indexer_for(values) code_values = code_values[null_mask | (code_values >= 0)] return algorithms.isin(self.codes, code_values) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 64cbe657a8aff..87facbf529411 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -228,6 +228,13 @@ def test_isin(self): expected = np.array([False] * 5 + [True]) tm.assert_numpy_array_equal(result, expected) + def test_isin_overlapping_intervals(self): + # GH 34974 + idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)]) + result = CategoricalIndex(idx).isin(idx) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + def test_identical(self): ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)