Skip to content

Commit

Permalink
deprecate categories and ordered parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
topper-123 committed Dec 23, 2018
1 parent 6e42d80 commit 6997fd8
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 69 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,7 @@ Deprecations
- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
- The signature of :meth:`Series.to_csv` has been uniformed to that of :meth:`DataFrame.to_csv`: the name of the first argument is now ``path_or_buf``, the order of subsequent arguments has changed, the ``header`` argument now defaults to ``True``. (:issue:`19715`)
- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)
- :meth:`Categorical.from_codes` has deprecated parameters ``categories`` and ``ordered``. Supply a :class:`~pandas.api.types.CategoricalDtype` to new parameter ``dtype`` instead. (:issue:`24398`)
- :func:`pandas.read_table` is deprecated. Instead, use :func:`read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`)
- :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain
many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`)
Expand Down
24 changes: 14 additions & 10 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,11 +639,13 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes,
return cls(codes, dtype=dtype, fastpath=True)

@classmethod
@deprecate_kwarg(old_arg_name='categories', new_arg_name=None)
@deprecate_kwarg(old_arg_name='ordered', new_arg_name=None)
def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
"""
Make a Categorical type from codes and categories arrays.
Make a Categorical type from codes and CategoricalDtype.
This constructor is useful if you already have codes and categories and
This constructor is useful if you already have codes and the dtype and
so do not need the (computation intensive) factorization step, which is
usually done on the constructor.
Expand All @@ -657,16 +659,17 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
categories or -1 for NaN
categories : index-like, optional
The categories for the categorical. Items need to be unique.
.. deprecated:: 0.24.0
Use ``dtype`` instead.
ordered : bool, optional
Whether or not this categorical is treated as an ordered
categorical. If not given, the resulting categorical will be
unordered.
.. versionchanged:: 0.24.0
The default value has been changed to ``None``. Previously
the default value was ``False``.
dtype : CategoricalDtype, optional
.. deprecated:: 0.24.0
Use ``dtype`` instead.
dtype : CategoricalDtype
An instance of ``CategoricalDtype`` to use for this categorical.
.. versionadded:: 0.24.0
Expand All @@ -682,6 +685,8 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
if categories is not None or ordered is not None:
raise ValueError("Cannot specify `categories` or `ordered` "
"together with `dtype`.")
elif categories is None and dtype is None:
raise ValueError("Must specify `categories` or `dtype`.")
else:
dtype = CategoricalDtype(categories, ordered)

Expand Down Expand Up @@ -1245,9 +1250,8 @@ def map(self, mapper):
"""
new_categories = self.categories.map(mapper)
try:
return self.from_codes(self._codes.copy(),
categories=new_categories,
ordered=self.ordered)
new_dtype = CategoricalDtype(new_categories, ordered=self.ordered)
return self.from_codes(self._codes.copy(), dtype=new_dtype)
except ValueError:
return np.take(new_categories, self._codes)

Expand Down
81 changes: 23 additions & 58 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,13 @@ class TestCategoricalConstructors(object):
def test_validate_ordered(self):
# see gh-14058
exp_msg = "'ordered' must either be 'True' or 'False'"
exp_err = TypeError

# This should be a boolean.
# This should be a boolean or None.
ordered = np.array([0, 1, 2])

with pytest.raises(exp_err, match=exp_msg):
with pytest.raises(TypeError, match=exp_msg):
Categorical([1, 2, 3], ordered=ordered)

with pytest.raises(exp_err, match=exp_msg):
Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'],
ordered=ordered)

def test_constructor_empty(self):
# GH 17248
c = Categorical([])
Expand Down Expand Up @@ -421,76 +416,41 @@ def test_constructor_with_categorical_categories(self):
tm.assert_categorical_equal(result, expected)

def test_from_codes(self):
dtype = CategoricalDtype(categories=[1, 2])

# no dtype or categories
msg = "Must specify `categories` or `dtype`."
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([1, 2])

# too few categories
dtype = CategoricalDtype(categories=[1, 2])
msg = "codes need to be between "
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([1, 2], categories=dtype.categories)
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([1, 2], dtype=dtype)

# no int codes
msg = "codes need to be array-like integers"
with pytest.raises(ValueError, match=msg):
Categorical.from_codes(["a"], categories=dtype.categories)
with pytest.raises(ValueError, match=msg):
Categorical.from_codes(["a"], dtype=dtype)

# no unique categories
with pytest.raises(ValueError,
match="Categorical categories must be unique"):
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])

# NaN categories included
with pytest.raises(ValueError,
match="Categorial categories cannot be null"):
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])

# too negative
dtype = CategoricalDtype(categories=["a", "b", "c"])
msg = r"codes need to be between -1 and len\(categories\)-1"
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([-2, 1, 2], dtype=dtype)

exp = Categorical(["a", "b", "c"], ordered=False)
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
tm.assert_categorical_equal(exp, res)

res = Categorical.from_codes([0, 1, 2], dtype=dtype)
tm.assert_categorical_equal(exp, res)

codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
dtype = CategoricalDtype(categories=["train", "test"])
Categorical.from_codes(codes, categories=dtype.categories)
Categorical.from_codes(codes, dtype=dtype)

def test_from_codes_with_categorical_categories(self):
# GH17884
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])

result = Categorical.from_codes(
[0, 1], categories=Categorical(['a', 'b', 'c']))
tm.assert_categorical_equal(result, expected)

result = Categorical.from_codes(
[0, 1], categories=CategoricalIndex(['a', 'b', 'c']))
tm.assert_categorical_equal(result, expected)

# non-unique Categorical still raises
with pytest.raises(ValueError,
match="Categorical categories must be unique"):
Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))

def test_from_codes_with_nan_code(self):
# GH21767
codes = [1, 2, np.nan]
dtype = CategoricalDtype(categories=['a', 'b', 'c'])
with pytest.raises(ValueError,
match="codes need to be array-like integers"):
Categorical.from_codes(codes, categories=dtype.categories)
with pytest.raises(ValueError,
match="codes need to be array-like integers"):
Categorical.from_codes(codes, dtype=dtype)
Expand All @@ -500,36 +460,41 @@ def test_from_codes_with_float(self):
codes = [1.0, 2.0, 0] # integer, but in float dtype
dtype = CategoricalDtype(categories=['a', 'b', 'c'])

with tm.assert_produces_warning(FutureWarning):
cat = Categorical.from_codes(codes, dtype.categories)
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))

with tm.assert_produces_warning(FutureWarning):
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
cat = Categorical.from_codes(codes, dtype=dtype)
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))

codes = [1.1, 2.0, 0] # non-integer
with pytest.raises(ValueError,
match="codes need to be array-like integers"):
Categorical.from_codes(codes, dtype.categories)
with pytest.raises(ValueError,
match="codes need to be array-like integers"):
Categorical.from_codes(codes, dtype=dtype)

def test_from_codes_deprecated(self):
with tm.assert_produces_warning(FutureWarning):
Categorical.from_codes([0, 1], categories=['a', 'b'])

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
Categorical.from_codes([0, 1], categories=['a', 'b'], ordered=True)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
Categorical.from_codes([0, 1], categories=['a', 'b'], ordered=False)

@pytest.mark.parametrize('dtype', [None, 'category'])
def test_from_inferred_categories(self, dtype):
cats = ['a', 'b']
codes = np.array([0, 0, 1, 1], dtype='i8')
result = Categorical._from_inferred_categories(cats, codes, dtype)
expected = Categorical.from_codes(codes, cats)
expected = Categorical.from_codes(codes,
dtype=CategoricalDtype(cats))
tm.assert_categorical_equal(result, expected)

@pytest.mark.parametrize('dtype', [None, 'category'])
def test_from_inferred_categories_sorts(self, dtype):
cats = ['b', 'a']
codes = np.array([0, 1, 1, 1], dtype='i8')
result = Categorical._from_inferred_categories(cats, codes, dtype)
expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b'])
expected = Categorical.from_codes([1, 0, 0, 0],
dtype=CategoricalDtype(['a', 'b']))
tm.assert_categorical_equal(result, expected)

def test_from_inferred_categories_dtype(self):
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,9 @@ def test_astype(self):
right=[2, 4],
closed='right')

dtype = CategoricalDtype(categories=ii, ordered=True)
ci = CategoricalIndex(Categorical.from_codes(
[0, 1, -1], categories=ii, ordered=True))
[0, 1, -1], dtype=dtype))

result = ci.astype('interval')
expected = ii.take([0, 1, -1])
Expand Down

0 comments on commit 6997fd8

Please sign in to comment.