Skip to content

Commit

Permalink
BUG: Fix groupby over a CategoricalIndex in axis=1
Browse files Browse the repository at this point in the history
closes GH18432

Add multi-index columns test to test_groupby_categorical_columns_index()

Add whatsnew for GH18432 bug fix

Fix ValueError text for GH18432 bug fix

Update whatsnew text

Use kwargs instead of positional format params

Move test_groupby_categorical_columns_index() to pandas/tests/groupby/test_grouping.py

Directly construct expected dataframe in test_groupby_categorical_index_and_columns()
  • Loading branch information
Eric Kisslinger committed Nov 29, 2017
1 parent 262e8ff commit 19f6041
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ Categorical
- Error messages in the testing module have been improved when items have different ``CategoricalDtype`` (:issue:`18069`)
- ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`)
- Bug in ``Categorical.unique()`` returning read-only ``codes`` array when all categories were ``NaN`` (:issue:`18051`)
- Bug in ``DataFrame.groupby(axis=1)`` with a ``CategoricalIndex`` (:issue:`18432`)

String
^^^^^^
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2859,9 +2859,11 @@ def is_in_obj(gpr):
else:
in_axis, name = False, None

if is_categorical_dtype(gpr) and len(gpr) != len(obj):
raise ValueError("Categorical dtype grouper must "
"have len(grouper) == len(data)")
if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]:
raise ValueError(
("Length of grouper ({len_gpr}) and axis ({len_axis})"
" must be same length"
.format(len_gpr=len(gpr), len_axis=obj.shape[axis])))

# create the Grouping
# allow us to passing the actual Grouping as the gpr
Expand Down
25 changes: 24 additions & 1 deletion pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from warnings import catch_warnings
from pandas import (date_range, Timestamp,
Index, MultiIndex, DataFrame, Series)
Index, MultiIndex, DataFrame, Series, CategoricalIndex)
from pandas.util.testing import (assert_panel_equal, assert_frame_equal,
assert_series_equal, assert_almost_equal)
from pandas.compat import lrange, long
Expand Down Expand Up @@ -251,6 +251,29 @@ def test_groupby_levels_and_columns(self):
by_columns.columns = pd.Index(by_columns.columns, dtype=np.int64)
tm.assert_frame_equal(by_levels, by_columns)

def test_groupby_categorical_index_and_columns(self):
# GH18432
columns = ['A', 'B', 'A', 'B']
categories = ['B', 'A']
data = np.ones((5, 4), int)
cat_columns = CategoricalIndex(columns,
categories=categories,
ordered=True)
df = DataFrame(data=data, columns=cat_columns)
result = df.groupby(axis=1, level=0).sum()
expected_data = 2 * np.ones((5, 2), int)
expected_columns = CategoricalIndex(categories,
categories=categories,
ordered=True)
expected = DataFrame(data=expected_data, columns=expected_columns)
assert_frame_equal(result, expected)

# test transposed version
df = DataFrame(data.T, index=cat_columns)
result = df.groupby(axis=0, level=0).sum()
expected = DataFrame(data=expected_data.T, index=expected_columns)
assert_frame_equal(result, expected)

def test_grouper_getting_correct_binner(self):

# GH 10063
Expand Down

0 comments on commit 19f6041

Please sign in to comment.