Groupby getitem works with all index types (#13731)

Previously `df.groupby(0)[df.columns]` would fail if all column names were integers (meaning `df.columns` was an `Int64Index`). This was because the implementation of `__getitem__` in `SelectionMixin` was checking for `ABCIndex` when it probably should have checked for `ABCIndexClass`.
pandas-dev · Jul 23, 2016 · 1cd1026 · 1cd1026
1 parent 5a3b071
commit 1cd1026
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 3 deletions.
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -750,6 +750,7 @@ Bug Fixes
 
 - Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`)
 - Bug in ``groupby`` with ``as_index=False`` returns all NaN's when grouping on multiple columns including a categorical one (:issue:`13204`)
+- Bug in ``df.groupby(...)[...]`` where getitem with ``Int64Index`` raised an error (:issue:`13731`)
 
 - Bug where ``pd.read_gbq()`` could throw ``ImportError: No module named discovery`` as a result of a naming conflict with another python package called apiclient  (:issue:`13454`)
 - Bug in ``Index.union`` returns an incorrect result with a named empty index (:issue:`13432`)

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 from pandas.types.missing import isnull
-from pandas.types.generic import ABCDataFrame, ABCSeries, ABCIndex
+from pandas.types.generic import ABCDataFrame, ABCSeries, ABCIndexClass
 from pandas.types.common import (_ensure_object, is_object_dtype,
                                  is_list_like, is_scalar)
 
@@ -299,7 +299,7 @@ def name(self):
     @property
     def _selection_list(self):
         if not isinstance(self._selection, (list, tuple, ABCSeries,
-                                            ABCIndex, np.ndarray)):
+                                            ABCIndexClass, np.ndarray)):
             return [self._selection]
         return self._selection
 
@@ -330,7 +330,7 @@ def __getitem__(self, key):
         if self._selection is not None:
             raise Exception('Column(s) %s already selected' % self._selection)
 
-        if isinstance(key, (list, tuple, ABCSeries, ABCIndex,
+        if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass,
                             np.ndarray)):
             if len(self.obj.columns.intersection(key)) != len(key):
                 bad_keys = list(set(key).difference(self.obj.columns))

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -3769,6 +3769,22 @@ def test_getitem_list_of_columns(self):
         assert_frame_equal(result2, expected)
         assert_frame_equal(result3, expected)
 
+    def test_getitem_numeric_column_names(self):
+        # GH #13731
+        df = DataFrame({0: list('abcd') * 2,
+                        2: np.random.randn(8),
+                        4: np.random.randn(8),
+                        6: np.random.randn(8)})
+        result = df.groupby(0)[df.columns[1:3]].mean()
+        result2 = df.groupby(0)[2, 4].mean()
+        result3 = df.groupby(0)[[2, 4]].mean()
+
+        expected = df.ix[:, [0, 2, 4]].groupby(0).mean()
+
+        assert_frame_equal(result, expected)
+        assert_frame_equal(result2, expected)
+        assert_frame_equal(result3, expected)
+
     def test_agg_multiple_functions_maintain_order(self):
         # GH #610
         funcs = [('mean', np.mean), ('max', np.max), ('min', np.min)]