From 288b796f17625228546b07c6b54abd8143f5f28d Mon Sep 17 00:00:00 2001 From: topper-123 Date: Sun, 18 Nov 2018 23:29:54 +0100 Subject: [PATCH] API: Make Categorical.searchsorted returns a scalar when supplied a scalar (#23466) --- doc/source/whatsnew/v0.24.0.rst | 2 ++ pandas/core/arrays/categorical.py | 12 +++++------- pandas/tests/arrays/categorical/test_analytics.py | 14 +++++++------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index d24dfdddc545c6..29896eb0036483 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1008,6 +1008,8 @@ Other API Changes - Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`) - :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`) - Comparing :class:`Timedelta` to be less or greater than unknown types now raises a ``TypeError`` instead of returning ``False`` (:issue:`20829`) +- :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`). +- :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`). - :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`). - The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d226d8c2e7ee24..276ef6426a51b6 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1344,15 +1344,13 @@ def searchsorted(self, value, side='left', sorter=None): "ordered one") from pandas.core.series import Series + codes = _get_codes_for_values(Series(value).values, self.categories) + if -1 in codes: + raise KeyError("Value(s) to be inserted must be in categories.") - values_as_codes = _get_codes_for_values(Series(value).values, - self.categories) + codes = codes[0] if is_scalar(value) else codes - if -1 in values_as_codes: - raise ValueError("Value(s) to be inserted must be in categories.") - - return self.codes.searchsorted(values_as_codes, side=side, - sorter=sorter) + return self.codes.searchsorted(codes, side=side, sorter=sorter) def isna(self): """ diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index ea6facd66a1a38..4251273e424dd0 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -85,10 +85,10 @@ def test_searchsorted(self): # Searching for single item argument, side='left' (default) res_cat = c1.searchsorted('apple') + assert res_cat == 2 + res_ser = s1.searchsorted('apple') - exp = np.array([2], dtype=np.intp) - tm.assert_numpy_array_equal(res_cat, exp) - tm.assert_numpy_array_equal(res_ser, exp) + assert res_ser == 2 # Searching for single item array, side='left' (default) res_cat = c1.searchsorted(['bread']) @@ -105,13 +105,13 @@ def test_searchsorted(self): tm.assert_numpy_array_equal(res_ser, exp) # Searching for a single value that is not from the Categorical - pytest.raises(ValueError, lambda: c1.searchsorted('cucumber')) - pytest.raises(ValueError, lambda: s1.searchsorted('cucumber')) + pytest.raises(KeyError, lambda: c1.searchsorted('cucumber')) + pytest.raises(KeyError, lambda: s1.searchsorted('cucumber')) # Searching for multiple values one of each is not from the Categorical - pytest.raises(ValueError, + pytest.raises(KeyError, lambda: c1.searchsorted(['bread', 'cucumber'])) - pytest.raises(ValueError, + pytest.raises(KeyError, lambda: s1.searchsorted(['bread', 'cucumber'])) # searchsorted call for unordered Categorical