diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index ca60dea241d88..61119089fdb42 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -477,6 +477,7 @@ Deprecations - :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`) - :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`) - :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`) +- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`) .. _whatsnew_0240.prior_deprecations: @@ -524,9 +525,7 @@ Bug Fixes Categorical ^^^^^^^^^^^ -- -- -- +- Bug in :meth:`Categorical.from_codes` where ``NaN`` values in `codes` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of `.from_codes([1.1, 2.0])`. Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 3d9f1ca4027fd..eebdfe8a54a9d 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -27,6 +27,8 @@ is_timedelta64_dtype, is_categorical, is_categorical_dtype, + is_float_dtype, + is_integer_dtype, is_list_like, is_sequence, is_scalar, is_iterator, is_dict_like) @@ -633,8 +635,21 @@ def from_codes(cls, codes, categories, ordered=False): categorical. If not given, the resulting categorical will be unordered. """ + codes = np.asarray(codes) # #21767 + if not is_integer_dtype(codes): + msg = "codes need to be array-like integers" + if is_float_dtype(codes): + icodes = codes.astype('i8') + if (icodes == codes).all(): + msg = None + codes = icodes + warn(("float codes will be disallowed in the future and " + "raise a ValueError"), FutureWarning, stacklevel=2) + if msg: + raise ValueError(msg) + try: - codes = coerce_indexer_dtype(np.asarray(codes), categories) + codes = coerce_indexer_dtype(codes, categories) except (ValueError, TypeError): raise ValueError( "codes need to be convertible to an arrays of integers") diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index e5d620df96493..b5f499ba27323 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -468,6 +468,26 @@ def test_from_codes_with_categorical_categories(self): with pytest.raises(ValueError): Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a'])) + def test_from_codes_with_nan_code(self): + # GH21767 + codes = [1, 2, np.nan] + categories = ['a', 'b', 'c'] + with pytest.raises(ValueError): + Categorical.from_codes(codes, categories) + + def test_from_codes_with_float(self): + # GH21767 + codes = [1.0, 2.0, 0] # integer, but in float dtype + categories = ['a', 'b', 'c'] + + with tm.assert_produces_warning(FutureWarning): + cat = Categorical.from_codes(codes, categories) + tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1')) + + codes = [1.1, 2.0, 0] # non-integer + with pytest.raises(ValueError): + Categorical.from_codes(codes, categories) + @pytest.mark.parametrize('dtype', [None, 'category']) def test_from_inferred_categories(self, dtype): cats = ['a', 'b']