Skip to content

Commit

Permalink
Add dtype to Categorical.from_codes
Browse files Browse the repository at this point in the history
  • Loading branch information
topper-123 committed Dec 22, 2018
1 parent 6111f64 commit e2543df
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 45 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ Other Enhancements
- :meth:`pandas.api.types.is_list_like` has gained a keyword ``allow_sets`` which is ``True`` by default; if ``False``,
all instances of ``set`` will not be considered "list-like" anymore (:issue:`23061`)
- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).
- :meth:`Categorical.from_codes` now can take a dtype parameter (:issue:`24398`).
- New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`).
- Compatibility with Matplotlib 3.0 (:issue:`22790`).
- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)
Expand Down
45 changes: 24 additions & 21 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes,
return cls(codes, dtype=dtype, fastpath=True)

@classmethod
def from_codes(cls, codes, categories, ordered=False):
def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
"""
Make a Categorical type from codes and categories arrays.
Expand All @@ -657,11 +657,27 @@ def from_codes(cls, codes, categories, ordered=False):
categories or -1 for NaN
categories : index-like
The categories for the categorical. Items need to be unique.
ordered : boolean, (default False)
ordered : boolean, optional
Whether or not this categorical is treated as a ordered
categorical. If not given, the resulting categorical will be
unordered.
.. versionchanged:: 0.24.0
The default value has been changed to ``None``. Previously
the default value was ``False``.
dtype : CategoricalDtype, optional
An instance of ``CategoricalDtype`` to use for this categorical.
.. versionadded:: 0.24.0
"""
if dtype is not None:
if categories is not None or ordered is not None:
raise ValueError("Cannot specify both `dtype` and `categories`"
" or `ordered`.")
else:
dtype = CategoricalDtype(categories, ordered)

codes = np.asarray(codes) # #21767
if not is_integer_dtype(codes):
msg = "codes need to be array-like integers"
Expand All @@ -675,20 +691,12 @@ def from_codes(cls, codes, categories, ordered=False):
if msg:
raise ValueError(msg)

try:
codes = coerce_indexer_dtype(codes, categories)
except (ValueError, TypeError):
raise ValueError(
"codes need to be convertible to an arrays of integers")

categories = CategoricalDtype.validate_categories(categories)

if len(codes) and (codes.max() >= len(categories) or codes.min() < -1):
if len(codes) and (
codes.max() >= len(dtype.categories) or codes.min() < -1):
raise ValueError("codes need to be between -1 and "
"len(categories)-1")

return cls(codes, categories=categories, ordered=ordered,
fastpath=True)
return cls(codes, dtype=dtype, fastpath=True)

_codes = None

Expand Down Expand Up @@ -1283,8 +1291,7 @@ def shift(self, periods):
else:
codes[periods:] = -1

return self.from_codes(codes, categories=self.categories,
ordered=self.ordered)
return self.from_codes(codes, dtype=self.dtype)

def __array__(self, dtype=None):
"""
Expand Down Expand Up @@ -1902,9 +1909,7 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None):

codes = take(self._codes, indexer, allow_fill=allow_fill,
fill_value=fill_value)
result = type(self).from_codes(codes,
categories=dtype.categories,
ordered=dtype.ordered)
result = type(self).from_codes(codes, dtype=dtype)
return result

take = take_nd
Expand Down Expand Up @@ -2093,9 +2098,7 @@ def __setitem__(self, key, value):
new_codes = _recode_for_categories(
value.codes, value.categories, self.categories
)
value = Categorical.from_codes(new_codes,
categories=self.categories,
ordered=self.ordered)
value = Categorical.from_codes(new_codes, dtype=self.dtype)

rvalue = value if is_list_like(value) else [value]

Expand Down
3 changes: 1 addition & 2 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,7 @@ def _create_from_codes(self, codes, dtype=None, name=None):
dtype = self.dtype
if name is None:
name = self.name
cat = Categorical.from_codes(codes, categories=dtype.categories,
ordered=dtype.ordered)
cat = Categorical.from_codes(codes, dtype=dtype)
return CategoricalIndex(cat, name=name)

@classmethod
Expand Down
60 changes: 38 additions & 22 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,33 +417,44 @@ def test_constructor_with_categorical_categories(self):
def test_from_codes(self):

# too few categories
dtype = CategoricalDtype(categories=[1, 2])
with pytest.raises(ValueError):
Categorical.from_codes([1, 2], [1, 2])
Categorical.from_codes([1, 2], categories=dtype.categories)
with pytest.raises(ValueError):
Categorical.from_codes([1, 2], dtype=dtype)

# no int codes
with pytest.raises(ValueError):
Categorical.from_codes(["a"], [1, 2])
Categorical.from_codes(["a"], categories=dtype.categories)
with pytest.raises(ValueError):
Categorical.from_codes(["a"], dtype=dtype)

# no unique categories
with pytest.raises(ValueError):
Categorical.from_codes([0, 1, 2], ["a", "a", "b"])
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])

# NaN categories included
with pytest.raises(ValueError):
Categorical.from_codes([0, 1, 2], ["a", "b", np.nan])
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])

# too negative
dtype = CategoricalDtype(categories=["a", "b", "c"])
with pytest.raises(ValueError):
Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
with pytest.raises(ValueError):
Categorical.from_codes([-2, 1, 2], ["a", "b", "c"])
Categorical.from_codes([-2, 1, 2], dtype=dtype)

exp = Categorical(["a", "b", "c"], ordered=False)
res = Categorical.from_codes([0, 1, 2], ["a", "b", "c"])
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
tm.assert_categorical_equal(exp, res)

res = Categorical.from_codes([0, 1, 2], dtype=dtype)
tm.assert_categorical_equal(exp, res)

# Not available in earlier numpy versions
if hasattr(np.random, "choice"):
codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
Categorical.from_codes(codes, categories=["train", "test"])
codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
dtype = CategoricalDtype(categories=["train", "test"])
Categorical.from_codes(codes, categories=dtype.categories)
Categorical.from_codes(codes, dtype=dtype)

def test_from_codes_with_categorical_categories(self):
# GH17884
Expand All @@ -464,22 +475,30 @@ def test_from_codes_with_categorical_categories(self):
def test_from_codes_with_nan_code(self):
# GH21767
codes = [1, 2, np.nan]
categories = ['a', 'b', 'c']
dtype = CategoricalDtype(categories=['a', 'b', 'c'])
with pytest.raises(ValueError):
Categorical.from_codes(codes, categories)
Categorical.from_codes(codes, categories=dtype.categories)
with pytest.raises(ValueError):
Categorical.from_codes(codes, dtype=dtype)

def test_from_codes_with_float(self):
# GH21767
codes = [1.0, 2.0, 0] # integer, but in float dtype
categories = ['a', 'b', 'c']
dtype = CategoricalDtype(categories=['a', 'b', 'c'])

with tm.assert_produces_warning(FutureWarning):
cat = Categorical.from_codes(codes, categories)
cat = Categorical.from_codes(codes, dtype.categories)
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))

with tm.assert_produces_warning(FutureWarning):
cat = Categorical.from_codes(codes, dtype=dtype)
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))

codes = [1.1, 2.0, 0] # non-integer
with pytest.raises(ValueError):
Categorical.from_codes(codes, categories)
Categorical.from_codes(codes, dtype.categories)
with pytest.raises(ValueError):
Categorical.from_codes(codes, dtype=dtype)

@pytest.mark.parametrize('dtype', [None, 'category'])
def test_from_inferred_categories(self, dtype):
Expand Down Expand Up @@ -515,14 +534,11 @@ def test_from_inferred_categories_coerces(self):
expected = Categorical([1, 1, 2, np.nan])
tm.assert_categorical_equal(result, expected)

def test_construction_with_ordered(self):
@pytest.mark.parametrize('ordered', [None, True, False])
def test_construction_with_ordered(self, ordered):
# GH 9347, 9190
cat = Categorical([0, 1, 2])
assert not cat.ordered
cat = Categorical([0, 1, 2], ordered=False)
assert not cat.ordered
cat = Categorical([0, 1, 2], ordered=True)
assert cat.ordered
cat = Categorical([0, 1, 2], ordered=ordered)
assert cat.ordered == bool(ordered)

@pytest.mark.xfail(reason="Imaginary values not supported in Categorical")
def test_constructor_imaginary(self):
Expand Down

0 comments on commit e2543df

Please sign in to comment.