Skip to content

Commit

Permalink
API: rename Categorical.levels to .categories
Browse files Browse the repository at this point in the history
The name 'levels' was already used by a much different concept in
MultiIndex and this was too confusing, so change the name to
'categories'. Add deprecation warning if the old name is used
(in constructor or the public 'levels' attribute). The old name
'levels' is not anymore exposed unter Series.cat as it was never
part of a stable release.

See the discussion in #8074

This rename was done by search&replace in categorical.py and the
corresponding tests_categorical.py, implementing the deprecation
accessor (with a temporary 'raise Exception') and then run the the
unittests and change code until all tests pass.
  • Loading branch information
jankatins committed Sep 25, 2014
1 parent b74e0a3 commit 9d86247
Show file tree
Hide file tree
Showing 17 changed files with 618 additions and 590 deletions.
6 changes: 3 additions & 3 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -562,10 +562,10 @@ following usable methods and properties (all available as ``Series.cat.<method_o
.. autosummary::
:toctree: generated/

Categorical.levels
Categorical.categories
Categorical.ordered
Categorical.reorder_levels
Categorical.remove_unused_levels
Categorical.reorder_categories
Categorical.remove_unused_categories

The following methods are considered API when using ``Categorical`` directly:

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
raise TypeError("bins argument only works with numeric data.")
values = cat.codes
elif is_category:
bins = values.levels
bins = values.categories
cat = values
values = cat.codes

Expand Down Expand Up @@ -248,11 +248,11 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
result = Series(counts, index=com._values_from_object(keys))
if bins is not None:
# TODO: This next line should be more efficient
result = result.reindex(np.arange(len(cat.levels)), fill_value=0)
result = result.reindex(np.arange(len(cat.categories)), fill_value=0)
if not is_category:
result.index = bins[:-1]
else:
result.index = cat.levels
result.index = cat.categories

if sort:
result.sort()
Expand Down
465 changes: 241 additions & 224 deletions pandas/core/categorical.py

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@
correct auto-detection.
"""

pc_max_levels_doc = """
pc_max_categories_doc = """
: int
This sets the maximum number of levels pandas should output when printing
out a `Categorical`.
This sets the maximum number of categories pandas should output when printing
out a `Categorical` or a Series of dtype "category".
"""

pc_max_info_cols_doc = """
Expand Down Expand Up @@ -237,7 +237,7 @@ def mpl_style_cb(key):
validator=is_instance_factory((int, type(None))))
cf.register_option('max_rows', 60, pc_max_rows_doc,
validator=is_instance_factory([type(None), int]))
cf.register_option('max_levels', 8, pc_max_levels_doc, validator=is_int)
cf.register_option('max_categories', 8, pc_max_categories_doc, validator=is_int)
cf.register_option('max_colwidth', 50, max_colwidth_doc, validator=is_int)
cf.register_option('max_columns', 20, pc_max_cols_doc,
validator=is_instance_factory([type(None), int]))
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def _get_footer(self):
footer += ', '
footer += "Length: %d" % len(self.categorical)

level_info = self.categorical._repr_level_info()
level_info = self.categorical._repr_categories_info()

# Levels are added in a newline
if footer:
Expand Down Expand Up @@ -176,7 +176,7 @@ def _get_footer(self):
# level infos are added to the end and in a new line, like it is done for Categoricals
# Only added when we request a name
if self.name and com.is_categorical_dtype(self.series.dtype):
level_info = self.series.values._repr_level_info()
level_info = self.series.values._repr_categories_info()
if footer:
footer += "\n"
footer += level_info
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1924,7 +1924,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
self.grouper = np.asarray(factor)

self._labels = factor.codes
self._group_index = factor.levels
self._group_index = factor.categories
if self.name is None:
self.name = factor.name

Expand Down Expand Up @@ -3545,7 +3545,7 @@ def _lexsort_indexer(keys, orders=None, na_position='last'):
if na_position not in ['last','first']:
raise ValueError('invalid na_position: {!r}'.format(na_position))

n = len(c.levels)
n = len(c.categories)
codes = c.codes.copy()

mask = (c.codes == -1)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3206,7 +3206,7 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
return Index(arrays[0], name=name)

cats = [Categorical.from_array(arr) for arr in arrays]
levels = [c.levels for c in cats]
levels = [c.categories for c in cats]
labels = [c.codes for c in cats]
if names is None:
names = [c.name for c in cats]
Expand Down Expand Up @@ -3301,7 +3301,7 @@ def from_product(cls, iterables, sortorder=None, names=None):
categoricals = [Categorical.from_array(it) for it in iterables]
labels = cartesian_product([c.codes for c in categoricals])

return MultiIndex(levels=[c.levels for c in categoricals],
return MultiIndex(levels=[c.categories for c in categoricals],
labels=labels, sortorder=sortorder, names=names)

@property
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1698,12 +1698,12 @@ def _concat_blocks(self, blocks, values):
return the block concatenation
"""

levels = self.values.levels
categories = self.values.categories
for b in blocks:
if not levels.equals(b.values.levels):
if not categories.equals(b.values.categories):
raise ValueError("incompatible levels in categorical block merge")

return self._holder(values[0], levels=levels)
return self._holder(values[0], categories=categories)

def to_native_types(self, slicer=None, na_rep='', **kwargs):
""" convert to our native types format, slicing if desired """
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def panel_index(time, panels, names=['time', 'panel']):
panel_factor = Categorical.from_array(panels)

labels = [time_factor.codes, panel_factor.codes]
levels = [time_factor.levels, panel_factor.levels]
levels = [time_factor.categories, panel_factor.categories]
return MultiIndex(levels, labels, sortorder=None, names=names,
verify_integrity=False)

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -1113,7 +1113,7 @@ def check_len(item, name):
def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False):
# Series avoids inconsistent NaN handling
cat = Categorical.from_array(Series(data))
levels = cat.levels
levels = cat.categories

# if all NaN
if not dummy_na and len(levels) == 0:
Expand All @@ -1130,7 +1130,7 @@ def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False):
dummy_mat = np.eye(number_of_cols).take(cat.codes, axis=0)

if dummy_na:
levels = np.append(cat.levels, np.nan)
levels = np.append(cat.categories, np.nan)
else:
# reset NaN GH4446
dummy_mat[cat.codes == -1] = 0
Expand Down Expand Up @@ -1182,7 +1182,7 @@ def make_axis_dummies(frame, axis='minor', transform=None):
mapped_items = items.map(transform)
cat = Categorical.from_array(mapped_items.take(labels))
labels = cat.codes
items = cat.levels
items = cat.categories

values = np.eye(len(items), dtype=float)
values = values.take(labels, axis=0)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -922,7 +922,7 @@ def _repr_footer(self):

# Categorical
if com.is_categorical_dtype(self.dtype):
level_info = self.values._repr_level_info()
level_info = self.values._repr_categories_info()
return u('%sLength: %d, dtype: %s\n%s') % (namestr,
len(self),
str(self.dtype.name),
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3522,8 +3522,8 @@ def read(self, where=None, columns=None, **kwargs):
return None

factors = [Categorical.from_array(a.values) for a in self.index_axes]
levels = [f.levels for f in factors]
N = [len(f.levels) for f in factors]
levels = [f.categories for f in factors]
N = [len(f.categories) for f in factors]
labels = [f.codes for f in factors]

# compute the key
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4541,7 +4541,7 @@ def test_categorical(self):

with ensure_clean_store(self.path) as store:

s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], levels=['a','b','c','d']))
s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=['a','b','c','d']))

self.assertRaises(NotImplementedError, store.put, 's_fixed', s, format='fixed')
self.assertRaises(NotImplementedError, store.append, 's_table', s, format='table')
Expand Down
Loading

0 comments on commit 9d86247

Please sign in to comment.