Skip to content

Commit

Permalink
Groupby getitem works with all index types (#13731)
Browse files Browse the repository at this point in the history
Previously `df.groupby(0)[df.columns]` would fail if all column names
were integers (meaning `df.columns` was an `Int64Index`). This was
because the implementation of `__getitem__` in `SelectionMixin` was
checking for `ABCIndex` when it probably should have checked for
`ABCIndexClass`.
  • Loading branch information
jcrist authored and jorisvandenbossche committed Jul 23, 2016
1 parent 5a3b071 commit 1cd1026
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 3 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,7 @@ Bug Fixes

- Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`)
- Bug in ``groupby`` with ``as_index=False`` returns all NaN's when grouping on multiple columns including a categorical one (:issue:`13204`)
- Bug in ``df.groupby(...)[...]`` where getitem with ``Int64Index`` raised an error (:issue:`13731`)

- Bug where ``pd.read_gbq()`` could throw ``ImportError: No module named discovery`` as a result of a naming conflict with another python package called apiclient (:issue:`13454`)
- Bug in ``Index.union`` returns an incorrect result with a named empty index (:issue:`13432`)
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np

from pandas.types.missing import isnull
from pandas.types.generic import ABCDataFrame, ABCSeries, ABCIndex
from pandas.types.generic import ABCDataFrame, ABCSeries, ABCIndexClass
from pandas.types.common import (_ensure_object, is_object_dtype,
is_list_like, is_scalar)

Expand Down Expand Up @@ -299,7 +299,7 @@ def name(self):
@property
def _selection_list(self):
if not isinstance(self._selection, (list, tuple, ABCSeries,
ABCIndex, np.ndarray)):
ABCIndexClass, np.ndarray)):
return [self._selection]
return self._selection

Expand Down Expand Up @@ -330,7 +330,7 @@ def __getitem__(self, key):
if self._selection is not None:
raise Exception('Column(s) %s already selected' % self._selection)

if isinstance(key, (list, tuple, ABCSeries, ABCIndex,
if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass,
np.ndarray)):
if len(self.obj.columns.intersection(key)) != len(key):
bad_keys = list(set(key).difference(self.obj.columns))
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3769,6 +3769,22 @@ def test_getitem_list_of_columns(self):
assert_frame_equal(result2, expected)
assert_frame_equal(result3, expected)

def test_getitem_numeric_column_names(self):
# GH #13731
df = DataFrame({0: list('abcd') * 2,
2: np.random.randn(8),
4: np.random.randn(8),
6: np.random.randn(8)})
result = df.groupby(0)[df.columns[1:3]].mean()
result2 = df.groupby(0)[2, 4].mean()
result3 = df.groupby(0)[[2, 4]].mean()

expected = df.ix[:, [0, 2, 4]].groupby(0).mean()

assert_frame_equal(result, expected)
assert_frame_equal(result2, expected)
assert_frame_equal(result3, expected)

def test_agg_multiple_functions_maintain_order(self):
# GH #610
funcs = [('mean', np.mean), ('max', np.max), ('min', np.min)]
Expand Down

0 comments on commit 1cd1026

Please sign in to comment.