Skip to content

Commit

Permalink
deprecate categories and ordered parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
topper-123 committed Dec 30, 2018
1 parent 79a60b2 commit 8a6ec5d
Show file tree
Hide file tree
Showing 16 changed files with 113 additions and 134 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1129,6 +1129,7 @@ Deprecations
- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
- The signature of :meth:`Series.to_csv` has been uniformed to that of :meth:`DataFrame.to_csv`: the name of the first argument is now ``path_or_buf``, the order of subsequent arguments has changed, the ``header`` argument now defaults to ``True``. (:issue:`19715`)
- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)
- :meth:`Categorical.from_codes` has deprecated parameters ``categories`` and ``ordered``. Supply a :class:`~pandas.api.types.CategoricalDtype` to new parameter ``dtype`` instead. (:issue:`24398`)
- :func:`pandas.read_table` is deprecated. Instead, use :func:`read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`)
- :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain
many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`)
Expand Down
26 changes: 16 additions & 10 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,9 +638,9 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes,
@classmethod
def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
"""
Make a Categorical type from codes and categories arrays.
Make a Categorical type from codes and CategoricalDtype.
This constructor is useful if you already have codes and categories and
This constructor is useful if you already have codes and the dtype and
so do not need the (computation intensive) factorization step, which is
usually done on the constructor.
Expand All @@ -654,16 +654,17 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
categories or -1 for NaN
categories : index-like, optional
The categories for the categorical. Items need to be unique.
.. deprecated:: 0.24.0
Use ``dtype`` instead.
ordered : bool, optional
Whether or not this categorical is treated as an ordered
categorical. If not given, the resulting categorical will be
unordered.
.. versionchanged:: 0.24.0
The default value has been changed to ``None``. Previously
the default value was ``False``.
dtype : CategoricalDtype, optional
.. deprecated:: 0.24.0
Use ``dtype`` instead.
dtype : CategoricalDtype
An instance of ``CategoricalDtype`` to use for this categorical.
.. versionadded:: 0.24.0
Expand All @@ -679,7 +680,13 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
if categories is not None or ordered is not None:
raise ValueError("Cannot specify `categories` or `ordered` "
"together with `dtype`.")
elif categories is None and dtype is None:
raise ValueError("Must specify `dtype`.")
else:
msg = ("The 'categories' and ´ordered` keyword are deprecated "
"and will be removed in a future version. Please use "
"'dtype' instead.")
warn(msg, FutureWarning, stacklevel=2)
dtype = CategoricalDtype(categories, ordered)

codes = np.asarray(codes) # #21767
Expand Down Expand Up @@ -1242,9 +1249,8 @@ def map(self, mapper):
"""
new_categories = self.categories.map(mapper)
try:
return self.from_codes(self._codes.copy(),
categories=new_categories,
ordered=self.ordered)
new_dtype = CategoricalDtype(new_categories, ordered=self.ordered)
return self.from_codes(self._codes.copy(), dtype=new_dtype)
except ValueError:
# NA values are represented in self._codes with -1
# np.take causes NA values to take final element in new_categories
Expand Down
15 changes: 7 additions & 8 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,23 +290,22 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
elif is_categorical_dtype(self.grouper):

from pandas.core.groupby.categorical import recode_for_groupby
from pandas.api.types import CategoricalDtype
self.grouper, self.all_grouper = recode_for_groupby(
self.grouper, self.sort, observed)
categories = self.grouper.categories
dtype = CategoricalDtype(self.grouper.categories,
ordered=self.grouper.ordered)

# we make a CategoricalIndex out of the cat grouper
# preserving the categories / ordered attributes
self._labels = self.grouper.codes
if observed:
codes = algorithms.unique1d(self.grouper.codes)
else:
codes = np.arange(len(categories))
codes = np.arange(len(dtype.categories))

self._group_index = CategoricalIndex(
Categorical.from_codes(
codes=codes,
categories=categories,
ordered=self.grouper.ordered))
Categorical.from_codes(codes=codes, dtype=dtype))

# we are done
if isinstance(self.grouper, Grouping):
Expand Down Expand Up @@ -395,8 +394,8 @@ def _make_labels(self):

@cache_readonly
def groups(self):
return self.index.groupby(Categorical.from_codes(self.labels,
self.group_index))
return self.index.groupby(
Categorical(self.labels, self.group_index, fastpath=True))


def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
Expand Down
10 changes: 6 additions & 4 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2026,13 +2026,15 @@ def _get_codes_for_sorting(self):
"""
from pandas.core.arrays import Categorical

def cats(level_codes):
return np.arange(np.array(level_codes).max() + 1 if
def as_dtype(level_codes):
from pandas.api.types import CategoricalDtype
cats = np.arange(np.array(level_codes).max() + 1 if
len(level_codes) else 0,
dtype=level_codes.dtype)
return CategoricalDtype(cats, ordered=True)

return [Categorical.from_codes(level_codes, cats(level_codes),
ordered=True)
return [Categorical.from_codes(level_codes,
dtype=as_dtype(level_codes))
for level_codes in self.codes]

def sortlevel(self, level=0, ascending=True, sort_remaining=True):
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Float64Index,
Index, Int64Index, Interval, IntervalIndex, MultiIndex, NaT, Panel, Period,
PeriodIndex, RangeIndex, Series, TimedeltaIndex, Timestamp)
from pandas.api.types import CategoricalDtype as CDT
from pandas.core import internals
from pandas.core.arrays import IntervalArray, PeriodArray
from pandas.core.arrays.sparse import BlockIndex, IntIndex
Expand Down Expand Up @@ -620,9 +621,8 @@ def decode(obj):
name=obj[u'name'])
elif typ == u'category':
from_codes = globals()[obj[u'klass']].from_codes
return from_codes(codes=obj[u'codes'],
categories=obj[u'categories'],
ordered=obj[u'ordered'])
dtype = CDT(obj[u'categories'], ordered=obj[u'ordered'])
return from_codes(codes=obj[u'codes'], dtype=dtype)

elif typ == u'interval':
return Interval(obj[u'left'], obj[u'right'], obj[u'closed'])
Expand Down
7 changes: 3 additions & 4 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
DataFrame, DatetimeIndex, Index, Int64Index, MultiIndex, Panel,
PeriodIndex, Series, SparseDataFrame, SparseSeries, TimedeltaIndex, compat,
concat, isna, to_datetime)
from pandas.api.types import CategoricalDtype
from pandas.core import config
from pandas.core.algorithms import match, unique
from pandas.core.arrays.categorical import (
Expand Down Expand Up @@ -2206,10 +2207,8 @@ def convert(self, values, nan_rep, encoding, errors):
categories = categories[~mask]
codes[codes != -1] -= mask.astype(int).cumsum().values

self.data = Categorical.from_codes(codes,
categories=categories,
ordered=self.ordered)

dtype = CategoricalDtype(categories, ordered=self.ordered)
self.data = Categorical.from_codes(codes, dtype=dtype)
else:

try:
Expand Down
82 changes: 24 additions & 58 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,13 @@ class TestCategoricalConstructors(object):
def test_validate_ordered(self):
# see gh-14058
exp_msg = "'ordered' must either be 'True' or 'False'"
exp_err = TypeError

# This should be a boolean.
# This should be a boolean or None.
ordered = np.array([0, 1, 2])

with pytest.raises(exp_err, match=exp_msg):
with pytest.raises(TypeError, match=exp_msg):
Categorical([1, 2, 3], ordered=ordered)

with pytest.raises(exp_err, match=exp_msg):
Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'],
ordered=ordered)

def test_constructor_empty(self):
# GH 17248
c = Categorical([])
Expand Down Expand Up @@ -421,76 +416,41 @@ def test_constructor_with_categorical_categories(self):
tm.assert_categorical_equal(result, expected)

def test_from_codes(self):
dtype = CategoricalDtype(categories=[1, 2])

# no dtype or categories
msg = "Must specify `categories` or `dtype`."
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([1, 2])

# too few categories
dtype = CategoricalDtype(categories=[1, 2])
msg = "codes need to be between "
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([1, 2], categories=dtype.categories)
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([1, 2], dtype=dtype)

# no int codes
msg = "codes need to be array-like integers"
with pytest.raises(ValueError, match=msg):
Categorical.from_codes(["a"], categories=dtype.categories)
with pytest.raises(ValueError, match=msg):
Categorical.from_codes(["a"], dtype=dtype)

# no unique categories
with pytest.raises(ValueError,
match="Categorical categories must be unique"):
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])

# NaN categories included
with pytest.raises(ValueError,
match="Categorial categories cannot be null"):
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])

# too negative
dtype = CategoricalDtype(categories=["a", "b", "c"])
msg = r"codes need to be between -1 and len\(categories\)-1"
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([-2, 1, 2], dtype=dtype)

exp = Categorical(["a", "b", "c"], ordered=False)
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
tm.assert_categorical_equal(exp, res)

res = Categorical.from_codes([0, 1, 2], dtype=dtype)
tm.assert_categorical_equal(exp, res)

codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
dtype = CategoricalDtype(categories=["train", "test"])
Categorical.from_codes(codes, categories=dtype.categories)
Categorical.from_codes(codes, dtype=dtype)

def test_from_codes_with_categorical_categories(self):
# GH17884
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])

result = Categorical.from_codes(
[0, 1], categories=Categorical(['a', 'b', 'c']))
tm.assert_categorical_equal(result, expected)

result = Categorical.from_codes(
[0, 1], categories=CategoricalIndex(['a', 'b', 'c']))
tm.assert_categorical_equal(result, expected)

# non-unique Categorical still raises
with pytest.raises(ValueError,
match="Categorical categories must be unique"):
Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))

def test_from_codes_with_nan_code(self):
# GH21767
codes = [1, 2, np.nan]
dtype = CategoricalDtype(categories=['a', 'b', 'c'])
with pytest.raises(ValueError,
match="codes need to be array-like integers"):
Categorical.from_codes(codes, categories=dtype.categories)
with pytest.raises(ValueError,
match="codes need to be array-like integers"):
Categorical.from_codes(codes, dtype=dtype)
Expand All @@ -500,36 +460,42 @@ def test_from_codes_with_float(self):
codes = [1.0, 2.0, 0] # integer, but in float dtype
dtype = CategoricalDtype(categories=['a', 'b', 'c'])

with tm.assert_produces_warning(FutureWarning):
cat = Categorical.from_codes(codes, dtype.categories)
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))

with tm.assert_produces_warning(FutureWarning):
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
cat = Categorical.from_codes(codes, dtype=dtype)
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))

codes = [1.1, 2.0, 0] # non-integer
with pytest.raises(ValueError,
match="codes need to be array-like integers"):
Categorical.from_codes(codes, dtype.categories)
with pytest.raises(ValueError,
match="codes need to be array-like integers"):
Categorical.from_codes(codes, dtype=dtype)

def test_from_codes_deprecated(self):
cats = ['a', 'b']
with tm.assert_produces_warning(FutureWarning):
Categorical.from_codes([0, 1], categories=cats)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
Categorical.from_codes([0, 1], categories=cats, ordered=True)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
Categorical.from_codes([0, 1], categories=cats, ordered=False)

@pytest.mark.parametrize('dtype', [None, 'category'])
def test_from_inferred_categories(self, dtype):
cats = ['a', 'b']
codes = np.array([0, 0, 1, 1], dtype='i8')
result = Categorical._from_inferred_categories(cats, codes, dtype)
expected = Categorical.from_codes(codes, cats)
expected = Categorical.from_codes(codes,
dtype=CategoricalDtype(cats))
tm.assert_categorical_equal(result, expected)

@pytest.mark.parametrize('dtype', [None, 'category'])
def test_from_inferred_categories_sorts(self, dtype):
cats = ['b', 'a']
codes = np.array([0, 1, 1, 1], dtype='i8')
result = Categorical._from_inferred_categories(cats, codes, dtype)
expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b'])
expected = Categorical.from_codes([1, 0, 0, 0],
dtype=CategoricalDtype(['a', 'b']))
tm.assert_categorical_equal(result, expected)

def test_from_inferred_categories_dtype(self):
Expand Down
28 changes: 16 additions & 12 deletions pandas/tests/arrays/categorical/test_subclass.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
# -*- coding: utf-8 -*-

from pandas import Categorical
from pandas.api.types import CategoricalDtype
import pandas.util.testing as tm


class TestCategoricalSubclassing(object):

def test_constructor(self):
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
assert isinstance(sc, tm.SubclassedCategorical)
tm.assert_categorical_equal(sc, Categorical(['a', 'b', 'c']))
subclassed = tm.SubclassedCategorical(['a', 'b', 'c'])
assert isinstance(subclassed, tm.SubclassedCategorical)
tm.assert_categorical_equal(subclassed, Categorical(['a', 'b', 'c']))

def test_from_codes(self):
sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
assert isinstance(sc, tm.SubclassedCategorical)
exp = Categorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
tm.assert_categorical_equal(sc, exp)
dtype = CategoricalDtype(['a', 'b', 'c'])
subclassed = tm.SubclassedCategorical.from_codes([1, 0, 2],
dtype=dtype)
assert isinstance(subclassed, tm.SubclassedCategorical)

expected = Categorical.from_codes([1, 0, 2], dtype=dtype)
tm.assert_categorical_equal(subclassed, expected)

def test_map(self):
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
res = sc.map(lambda x: x.upper())
assert isinstance(res, tm.SubclassedCategorical)
exp = Categorical(['A', 'B', 'C'])
tm.assert_categorical_equal(res, exp)
subclassed = tm.SubclassedCategorical(['a', 'b', 'c'])
result = subclassed.map(lambda x: x.upper())
assert isinstance(result, tm.SubclassedCategorical)
expected = Categorical(['A', 'B', 'C'])
tm.assert_categorical_equal(result, expected)
5 changes: 3 additions & 2 deletions pandas/tests/arrays/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pandas.core.dtypes.dtypes import PeriodDtype

import pandas as pd
from pandas.api.types import CategoricalDtype as CDT
from pandas.core.arrays import PeriodArray, period_array
import pandas.util.testing as tm

Expand Down Expand Up @@ -111,8 +112,8 @@ def test_astype_copies():
def test_astype_categorical():
arr = period_array(['2000', '2001', '2001', None], freq='D')
result = arr.astype('category')
categories = pd.PeriodIndex(['2000', '2001'], freq='D')
expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories)
dtype = CDT(categories=pd.PeriodIndex(['2000', '2001'], freq='D'))
expected = pd.Categorical.from_codes([0, 1, 1, -1], dtype=dtype)
tm.assert_categorical_equal(result, expected)


Expand Down
Loading

0 comments on commit 8a6ec5d

Please sign in to comment.