Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Categorical.name #10632

Merged
merged 1 commit into from
Jul 26, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ Other API Changes
- Enable serialization of lists and dicts to strings in ExcelWriter (:issue:`8188`)
- Allow passing `kwargs` to the interpolation methods (:issue:`10378`).
- Serialize metadata properties of subclasses of pandas objects (:issue:`10553`).
- ``Categorical.name`` was removed to make `Categorical` more ``numpy.ndarray`` like. Use ``Series(cat, name="whatever")`` instead (:issue:`10482`).

- ``NaT``'s methods now either raise ``ValueError``, or return ``np.nan`` or ``NaT`` (:issue:`9513`)
=========================== ==============================================================
Expand Down
76 changes: 31 additions & 45 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,6 @@ class Categorical(PandasObject):
ordered : boolean, (default False)
Whether or not this categorical is treated as a ordered categorical. If not given,
the resulting categorical will not be ordered.
name : str, optional
Name for the Categorical variable. If name is None, will attempt
to infer from values.

Attributes
----------
Expand All @@ -159,8 +156,6 @@ class Categorical(PandasObject):
The codes (integer positions, which point to the categories) of this categorical, read only.
ordered : boolean
Whether or not this Categorical is ordered.
name : string
The name of this Categorical.

Raises
------
Expand Down Expand Up @@ -205,31 +200,31 @@ class Categorical(PandasObject):
# For comparisons, so that numpy uses our implementation if the compare ops, which raise
__array_priority__ = 1000
_typ = 'categorical'
name = None

def __init__(self, values, categories=None, ordered=False, name=None, fastpath=False,
levels=None):

if fastpath:
# fast path
self._codes = _coerce_indexer_dtype(values, categories)
self.name = name
self.categories = categories
self._ordered = ordered
return

if name is None:
name = getattr(values, 'name', None)
if not name is None:
msg = "the 'name' keyword is removed, use 'name' with consumers of the " \
"categorical instead (e.g. 'Series(cat, name=\"something\")'"
warn(msg, UserWarning, stacklevel=2)

# TODO: Remove after deprecation period in 2017/ after 0.18
if not levels is None:
warn("Creating a 'Categorical' with 'levels' is deprecated, use 'categories' instead",
FutureWarning)
FutureWarning, stacklevel=2)
if categories is None:
categories = levels
else:
raise ValueError("Cannot pass in both 'categories' and (deprecated) 'levels', "
"use only 'categories'")
"use only 'categories'", stacklevel=2)

# sanitize input
if is_categorical_dtype(values):
Expand Down Expand Up @@ -293,21 +288,20 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
# TODO: check for old style usage. These warnings should be removes after 0.18/ in 2016
if is_integer_dtype(values) and not is_integer_dtype(categories):
warn("Values and categories have different dtypes. Did you mean to use\n"
"'Categorical.from_codes(codes, categories)'?", RuntimeWarning)
"'Categorical.from_codes(codes, categories)'?", RuntimeWarning, stacklevel=2)

if len(values) and is_integer_dtype(values) and (codes == -1).all():
warn("None of the categories were found in values. Did you mean to use\n"
"'Categorical.from_codes(codes, categories)'?", RuntimeWarning)
"'Categorical.from_codes(codes, categories)'?", RuntimeWarning, stacklevel=2)

self.set_ordered(ordered or False, inplace=True)
self.categories = categories
self.name = name
self._codes = _coerce_indexer_dtype(codes, categories)

def copy(self):
""" Copy constructor. """
return Categorical(values=self._codes.copy(),categories=self.categories,
name=self.name, ordered=self.ordered, fastpath=True)
ordered=self.ordered, fastpath=True)

def astype(self, dtype):
""" coerce this type to another dtype """
Expand Down Expand Up @@ -373,9 +367,12 @@ def from_codes(cls, codes, categories, ordered=False, name=None):
ordered : boolean, (default False)
Whether or not this categorical is treated as a ordered categorical. If not given,
the resulting categorical will be unordered.
name : str, optional
Name for the Categorical variable.
"""
if not name is None:
msg = "the 'name' keyword is removed, use 'name' with consumers of the " \
"categorical instead (e.g. 'Series(cat, name=\"something\")'"
warn(msg, UserWarning, stacklevel=2)

try:
codes = np.asarray(codes, np.int64)
except:
Expand All @@ -386,7 +383,7 @@ def from_codes(cls, codes, categories, ordered=False, name=None):
if len(codes) and (codes.max() >= len(categories) or codes.min() < -1):
raise ValueError("codes need to be between -1 and len(categories)-1")

return Categorical(codes, categories=categories, ordered=ordered, name=name, fastpath=True)
return Categorical(codes, categories=categories, ordered=ordered, fastpath=True)

_codes = None

Expand Down Expand Up @@ -416,8 +413,7 @@ def _get_labels(self):

Deprecated, use .codes!
"""
import warnings
warnings.warn("'labels' is deprecated. Use 'codes' instead", FutureWarning)
warn("'labels' is deprecated. Use 'codes' instead", FutureWarning, stacklevel=3)
return self.codes

labels = property(fget=_get_labels, fset=_set_codes)
Expand Down Expand Up @@ -464,12 +460,12 @@ def _get_categories(self):

def _set_levels(self, levels):
""" set new levels (deprecated, use "categories") """
warn("Assigning to 'levels' is deprecated, use 'categories'", FutureWarning)
warn("Assigning to 'levels' is deprecated, use 'categories'", FutureWarning, stacklevel=3)
self.categories = levels

def _get_levels(self):
""" Gets the levels (deprecated, use "categories") """
warn("Accessing 'levels' is deprecated, use 'categories'", FutureWarning)
warn("Accessing 'levels' is deprecated, use 'categories'", FutureWarning, stacklevel=3)
return self.categories

# TODO: Remove after deprecation period in 2017/ after 0.18
Expand All @@ -479,7 +475,8 @@ def _get_levels(self):

def _set_ordered(self, value):
""" Sets the ordered attribute to the boolean value """
warn("Setting 'ordered' directly is deprecated, use 'set_ordered'", FutureWarning)
warn("Setting 'ordered' directly is deprecated, use 'set_ordered'", FutureWarning,
stacklevel=3)
self.set_ordered(value, inplace=True)

def set_ordered(self, value, inplace=False):
Expand Down Expand Up @@ -1140,7 +1137,7 @@ def order(self, inplace=False, ascending=True, na_position='last'):
return
else:
return Categorical(values=codes,categories=self.categories, ordered=self.ordered,
name=self.name, fastpath=True)
fastpath=True)


def sort(self, inplace=True, ascending=True, na_position='last'):
Expand Down Expand Up @@ -1266,7 +1263,7 @@ def fillna(self, value=None, method=None, limit=None):
values[mask] = self.categories.get_loc(value)

return Categorical(values, categories=self.categories, ordered=self.ordered,
name=self.name, fastpath=True)
fastpath=True)

def take_nd(self, indexer, allow_fill=True, fill_value=None):
""" Take the codes by the indexer, fill with the fill_value.
Expand All @@ -1280,7 +1277,7 @@ def take_nd(self, indexer, allow_fill=True, fill_value=None):

codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1)
result = Categorical(codes, categories=self.categories, ordered=self.ordered,
name=self.name, fastpath=True)
fastpath=True)
return result

take = take_nd
Expand All @@ -1300,7 +1297,7 @@ def _slice(self, slicer):

_codes = self._codes[slicer]
return Categorical(values=_codes,categories=self.categories, ordered=self.ordered,
name=self.name, fastpath=True)
fastpath=True)

def __len__(self):
"""The length of this Categorical."""
Expand All @@ -1313,9 +1310,8 @@ def __iter__(self):
def _tidy_repr(self, max_vals=10, footer=True):
""" a short repr displaying only max_vals and an optional (but default footer) """
num = max_vals // 2
head = self[:num]._get_repr(length=False, name=False, footer=False)
head = self[:num]._get_repr(length=False, footer=False)
tail = self[-(max_vals - num):]._get_repr(length=False,
name=False,
footer=False)

result = '%s, ..., %s' % (head[:-1], tail[1:])
Expand Down Expand Up @@ -1369,14 +1365,11 @@ def _repr_categories_info(self):

def _repr_footer(self):

namestr = "Name: %s, " % self.name if self.name is not None else ""
return u('%sLength: %d\n%s') % (namestr,
len(self), self._repr_categories_info())
return u('Length: %d\n%s') % (len(self), self._repr_categories_info())

def _get_repr(self, name=False, length=True, na_rep='NaN', footer=True):
def _get_repr(self, length=True, na_rep='NaN', footer=True):
from pandas.core import format as fmt
formatter = fmt.CategoricalFormatter(self,
name=name,
length=length,
na_rep=na_rep,
footer=footer)
Expand All @@ -1389,11 +1382,9 @@ def __unicode__(self):
if len(self._codes) > _maxlen:
result = self._tidy_repr(_maxlen)
elif len(self._codes) > 0:
result = self._get_repr(length=len(self) > _maxlen,
name=True)
result = self._get_repr(length=len(self) > _maxlen)
else:
result = '[], %s' % self._get_repr(name=True,
length=False,
result = '[], %s' % self._get_repr(length=False,
footer=True,
).replace("\n",", ")

Expand Down Expand Up @@ -1562,8 +1553,7 @@ def mode(self):
import pandas.hashtable as htable
good = self._codes != -1
result = Categorical(sorted(htable.mode_int64(_ensure_int64(self._codes[good]))),
categories=self.categories,ordered=self.ordered, name=self.name,
fastpath=True)
categories=self.categories,ordered=self.ordered, fastpath=True)
return result

def unique(self):
Expand All @@ -1586,8 +1576,6 @@ def equals(self, other):
"""
Returns True if categorical arrays are equal.

The name of the `Categorical` is not compared!

Parameters
----------
other : `Categorical`
Expand All @@ -1596,7 +1584,6 @@ def equals(self, other):
-------
are_equal : boolean
"""
# TODO: should this also test if name is equal?
return self.is_dtype_equal(other) and np.array_equal(self._codes, other._codes)

def is_dtype_equal(self, other):
Expand Down Expand Up @@ -1647,7 +1634,7 @@ def repeat(self, repeats):
"""
codes = self._codes.repeat(repeats)
return Categorical(values=codes, categories=self.categories,
ordered=self.ordered, name=self.name, fastpath=True)
ordered=self.ordered, fastpath=True)


##### The Series.cat accessor #####
Expand Down Expand Up @@ -1696,7 +1683,6 @@ def _delegate_method(self, name, *args, **kwargs):
if not res is None:
return Series(res, index=self.index)

# TODO: remove levels after the deprecation period
CategoricalAccessor._add_delegate_accessors(delegate=Categorical,
accessors=["categories", "ordered"],
typ='property')
Expand Down
9 changes: 1 addition & 8 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,23 +68,16 @@
class CategoricalFormatter(object):

def __init__(self, categorical, buf=None, length=True,
na_rep='NaN', name=False, footer=True):
na_rep='NaN', footer=True):
self.categorical = categorical
self.buf = buf if buf is not None else StringIO(u(""))
self.name = name
self.na_rep = na_rep
self.length = length
self.footer = footer

def _get_footer(self):
footer = ''

if self.name:
name = com.pprint_thing(self.categorical.name,
escape_chars=('\t', '\r', '\n'))
footer += ('Name: %s' % name if self.categorical.name is not None
else '')

if self.length:
if footer:
footer += ', '
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1960,8 +1960,6 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
self._group_index = CategoricalIndex(Categorical.from_codes(np.arange(len(c)),
categories=c,
ordered=self.grouper.ordered))
if self.name is None:
self.name = self.grouper.name

# a passed Grouper like
elif isinstance(self.grouper, Grouper):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4414,7 +4414,7 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
levels = [c.categories for c in cats]
labels = [c.codes for c in cats]
if names is None:
names = [c.name for c in cats]
names = [getattr(arr, "name", None) for arr in arrays]

return MultiIndex(levels=levels, labels=labels,
sortorder=sortorder, names=names,
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,6 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
elif isinstance(data, Categorical):
if dtype is not None:
raise ValueError("cannot specify a dtype with a Categorical")
if name is None:
name = data.name
elif (isinstance(data, types.GeneratorType) or
(compat.PY3 and isinstance(data, map))):
data = list(data)
Expand Down
Loading