From ccab72dc521b1c6279ef2ec889943a3dd050ca23 Mon Sep 17 00:00:00 2001 From: Andy Hayden Date: Sun, 6 Apr 2014 12:47:15 -0700 Subject: [PATCH 1/4] ENH: add count method to groupby (GH5610) --- pandas/core/groupby.py | 36 +++++++++++++++++++++--------------- pandas/tests/test_groupby.py | 12 ++++++++++++ 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 27001bb69cd05..9159b43536550 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -673,9 +673,17 @@ def var(self, ddof=1): def size(self): """ Compute group sizes + """ return self.grouper.size() + def count(self): + """ + Number of non-null items in each group. + + """ + return self._python_agg_general(lambda x: notnull(x).sum()) + sum = _groupby_function('sum', 'add', np.sum) prod = _groupby_function('prod', 'prod', np.prod) min = _groupby_function('min', 'min', np.min, numeric_only=False) @@ -687,12 +695,10 @@ def size(self): def ohlc(self): """ - Compute sum of values, excluding missing values - - For multiple groupings, the result index will be a MultiIndex + Deprecated, use .resample(how="ohlc") instead. """ - return self._cython_agg_general('ohlc') + raise AttributeError('ohlc is deprecated, use resample(how="ohlc").') def nth(self, n, dropna=None): """ @@ -939,6 +945,7 @@ def _cython_agg_general(self, how, numeric_only=True): result, names = self.grouper.aggregate(obj.values, how) except AssertionError as e: raise GroupByError(str(e)) + # infer old dytpe output[name] = self._try_cast(result, obj) if len(output) == 0: @@ -947,6 +954,8 @@ def _cython_agg_general(self, how, numeric_only=True): return self._wrap_aggregated_output(output, names) def _python_agg_general(self, func, *args, **kwargs): + _dtype = kwargs.pop("_dtype", None) + func = _intercept_function(func) f = lambda x: func(x, *args, **kwargs) @@ -955,7 +964,14 @@ def _python_agg_general(self, func, *args, **kwargs): for name, obj in self._iterate_slices(): try: result, counts = self.grouper.agg_series(obj, f) - output[name] = self._try_cast(result, obj) + + if _dtype is None: # infer old dytpe + output[name] = self._try_cast(result, obj) + elif _dtype is False: + output[name] = result + else: + output[name] = _possibly_downcast_to_dtype(result, _dtype) + except TypeError: continue @@ -2889,16 +2905,6 @@ def _apply_to_column_groupbys(self, func): in self._iterate_column_groupbys()), keys=self._selected_obj.columns, axis=1) - def ohlc(self): - """ - Compute sum of values, excluding missing values - - For multiple groupings, the result index will be a MultiIndex - """ - return self._apply_to_column_groupbys( - lambda x: x._cython_agg_general('ohlc')) - - from pandas.tools.plotting import boxplot_frame_groupby DataFrameGroupBy.boxplot = boxplot_frame_groupby diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index fde9156017c4e..f460214954bfd 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1970,6 +1970,18 @@ def test_size(self): for key, group in grouped: self.assertEquals(result[key], len(group)) + def test_count(self): + df = pd.DataFrame([[1, 2], [1, nan], [3, nan]], columns=['A', 'B']) + count_as = df.groupby('A').count() + count_not_as = df.groupby('A', as_index=False).count() + + res = pd.DataFrame([[1, 1], [3, 0]], columns=['A', 'B']) + assert_frame_equal(count_not_as, res) + assert_frame_equal(count_as, res.set_index('A')) + + count_B = df.groupby('A')['B'].count() + assert_series_equal(count_B, res['B']) + def test_grouping_ndarray(self): grouped = self.df.groupby(self.df['A'].values) From 6fa398e3290a62fcbe80fa2e8af8b11e51f3aeac Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 29 Apr 2014 10:28:44 -0400 Subject: [PATCH 2/4] ENH: infer selection_obj on groupby with an applied method (GH5610) --- doc/source/release.rst | 2 + doc/source/v0.14.0.txt | 12 ++++- pandas/core/groupby.py | 78 ++++++++++++++++++--------- pandas/tests/test_groupby.py | 57 +++++++++++++++++--- pandas/tseries/tests/test_resample.py | 4 +- 5 files changed, 117 insertions(+), 36 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 40e99b879dc29..0e96491fb3aa1 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -179,6 +179,8 @@ API Changes validation warnings in :func:`read_csv`/:func:`read_table` (:issue:`6607`) - Raise a ``TypeError`` when ``DataFrame`` is passed an iterator as the ``data`` argument (:issue:`5357`) +- groupby will now not return the grouped column for non-cython functions (:issue:`5610`), + as its already the index Deprecations ~~~~~~~~~~~~ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index ccbde36b9a09f..5cdd8d67caf7e 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -110,12 +110,22 @@ API changes .. ipython:: python - DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) g = df.groupby('A') g.nth(0) # can also use negative ints g.nth(0, dropna='any') # similar to old behaviour + groupby will now not return the grouped column for non-cython functions (:issue:`5610`), + as its already the index + + .. ipython:: python + + df = DataFrame([[1, np.nan], [1, 4], [5, 6], [5, 8]], columns=['A', 'B']) + g = df.groupby('A') + g.count() + g.describe() + - Allow specification of a more complex groupby via ``pd.Grouper``, such as grouping by a Time and a string field simultaneously. See :ref:`the docs `. (:issue:`3794`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 9159b43536550..3d9dba8c0d6fa 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -445,6 +445,23 @@ def _selection_list(self): return [self._selection] return self._selection + @cache_readonly + def _selected_obj(self): + + if self._selection is None or isinstance(self.obj, Series): + return self.obj + else: + return self.obj[self._selection] + + def _set_selection_from_grouper(self): + """ we may need create a selection if we have non-level groupers """ + grp = self.grouper + if self._selection is None and getattr(grp,'groupings',None) is not None: + ax = self.obj._info_axis + groupers = [ g.name for g in grp.groupings if g.level is None and g.name is not None and g.name in ax ] + if len(groupers): + self._selection = (ax-Index(groupers)).tolist() + def _local_dir(self): return sorted(set(self.obj._local_dir() + list(self._apply_whitelist))) @@ -453,7 +470,6 @@ def __getattr__(self, attr): return object.__getattribute__(self, attr) if attr in self.obj: return self[attr] - if hasattr(self.obj, attr): return self._make_wrapper(attr) @@ -472,6 +488,10 @@ def _make_wrapper(self, name): type(self).__name__)) raise AttributeError(msg) + # need to setup the selection + # as are not passed directly but in the grouper + self._set_selection_from_grouper() + f = getattr(self._selected_obj, name) if not isinstance(f, types.MethodType): return self.apply(lambda self: getattr(self, name)) @@ -503,7 +523,19 @@ def curried(x): try: return self.apply(curried_with_axis) except Exception: - return self.apply(curried) + try: + return self.apply(curried) + except Exception: + + # related to : GH3688 + # try item-by-item + # this can be called recursively, so need to raise ValueError if + # we don't have this method to indicated to aggregate to + # mark this column as an error + try: + return self._aggregate_item_by_item(name, *args, **kwargs) + except (AttributeError): + raise ValueError return wrapper @@ -624,6 +656,7 @@ def mean(self): except GroupByError: raise except Exception: # pragma: no cover + self._set_selection_from_grouper() f = lambda x: x.mean(axis=self.axis) return self._python_agg_general(f) @@ -639,6 +672,7 @@ def median(self): raise except Exception: # pragma: no cover + self._set_selection_from_grouper() def f(x): if isinstance(x, np.ndarray): x = Series(x) @@ -655,6 +689,7 @@ def std(self, ddof=1): if ddof == 1: return self._cython_agg_general('std') else: + self._set_selection_from_grouper() f = lambda x: x.std(ddof=ddof) return self._python_agg_general(f) @@ -667,6 +702,7 @@ def var(self, ddof=1): if ddof == 1: return self._cython_agg_general('var') else: + self._set_selection_from_grouper() f = lambda x: x.var(ddof=ddof) return self._python_agg_general(f) @@ -677,12 +713,14 @@ def size(self): """ return self.grouper.size() - def count(self): + def count(self, axis=0): """ Number of non-null items in each group. - + axis : axis number, default 0 + the grouping axis """ - return self._python_agg_general(lambda x: notnull(x).sum()) + self._set_selection_from_grouper() + return self._python_agg_general(lambda x: notnull(x).sum(axis=axis)).astype('int64') sum = _groupby_function('sum', 'add', np.sum) prod = _groupby_function('prod', 'prod', np.prod) @@ -693,12 +731,14 @@ def count(self): last = _groupby_function('last', 'last', _last_compat, numeric_only=False, _convert=True) + def ohlc(self): """ - Deprecated, use .resample(how="ohlc") instead. - + Compute sum of values, excluding missing values + For multiple groupings, the result index will be a MultiIndex """ - raise AttributeError('ohlc is deprecated, use resample(how="ohlc").') + return self._apply_to_column_groupbys( + lambda x: x._cython_agg_general('ohlc')) def nth(self, n, dropna=None): """ @@ -894,13 +934,6 @@ def _cumcount_array(self, arr=None, **kwargs): cumcounts[v] = arr[len(v)-1::-1] return cumcounts - @cache_readonly - def _selected_obj(self): - if self._selection is None or isinstance(self.obj, Series): - return self.obj - else: - return self.obj[self._selection] - def _index_with_as_index(self, b): """ Take boolean mask of index to be returned from apply, if as_index=True @@ -945,7 +978,6 @@ def _cython_agg_general(self, how, numeric_only=True): result, names = self.grouper.aggregate(obj.values, how) except AssertionError as e: raise GroupByError(str(e)) - # infer old dytpe output[name] = self._try_cast(result, obj) if len(output) == 0: @@ -954,8 +986,6 @@ def _cython_agg_general(self, how, numeric_only=True): return self._wrap_aggregated_output(output, names) def _python_agg_general(self, func, *args, **kwargs): - _dtype = kwargs.pop("_dtype", None) - func = _intercept_function(func) f = lambda x: func(x, *args, **kwargs) @@ -964,14 +994,7 @@ def _python_agg_general(self, func, *args, **kwargs): for name, obj in self._iterate_slices(): try: result, counts = self.grouper.agg_series(obj, f) - - if _dtype is None: # infer old dytpe - output[name] = self._try_cast(result, obj) - elif _dtype is False: - output[name] = result - else: - output[name] = _possibly_downcast_to_dtype(result, _dtype) - + output[name] = self._try_cast(result, obj) except TypeError: continue @@ -2203,6 +2226,9 @@ def true_and_notnull(x, *args, **kwargs): filtered = self._apply_filter(indices, dropna) return filtered + def _apply_to_column_groupbys(self, func): + """ return a pass thru """ + return func(self) class NDFrameGroupBy(GroupBy): diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index f460214954bfd..0f9f1e492d6aa 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1971,16 +1971,53 @@ def test_size(self): self.assertEquals(result[key], len(group)) def test_count(self): - df = pd.DataFrame([[1, 2], [1, nan], [3, nan]], columns=['A', 'B']) + + # GH5610 + # count counts non-nulls + df = pd.DataFrame([[1, 2, 'foo'], [1, nan, 'bar'], [3, nan, nan]], columns=['A', 'B', 'C']) + count_as = df.groupby('A').count() count_not_as = df.groupby('A', as_index=False).count() - res = pd.DataFrame([[1, 1], [3, 0]], columns=['A', 'B']) - assert_frame_equal(count_not_as, res) - assert_frame_equal(count_as, res.set_index('A')) + expected = DataFrame([[1, 2], [0, 0]], columns=['B', 'C'], index=[1,3]) + expected.index.name='A' + assert_frame_equal(count_not_as, expected.reset_index()) + assert_frame_equal(count_as, expected) count_B = df.groupby('A')['B'].count() - assert_series_equal(count_B, res['B']) + assert_series_equal(count_B, expected['B']) + + def test_non_cython_api(self): + + # GH5610 + # non-cython calls should not include the grouper + + df = DataFrame([[1, 2, 'foo'], [1, nan, 'bar',], [3, nan, 'baz']], columns=['A', 'B','C']) + g = df.groupby('A') + + # mad + expected = DataFrame([[0],[nan]],columns=['B'],index=[1,3]) + expected.index.name = 'A' + result = g.mad() + assert_frame_equal(result,expected) + + # describe + expected = DataFrame(dict(B = concat([df.loc[[0,1],'B'].describe(),df.loc[[2],'B'].describe()],keys=[1,3]))) + expected.index.names = ['A',None] + result = g.describe() + assert_frame_equal(result,expected) + + # any + expected = DataFrame([[True, True],[False, True]],columns=['B','C'],index=[1,3]) + expected.index.name = 'A' + result = g.any() + assert_frame_equal(result,expected) + + # idxmax + expected = DataFrame([[0],[nan]],columns=['B'],index=[1,3]) + expected.index.name = 'A' + result = g.idxmax() + assert_frame_equal(result,expected) def test_grouping_ndarray(self): grouped = self.df.groupby(self.df['A'].values) @@ -2937,7 +2974,7 @@ def test_groupby_with_timegrouper(self): DT.datetime(2013,12,2,12,0), DT.datetime(2013,9,2,14,0), ]}) - + # GH 6908 change target column's order df_reordered = df_original.sort(columns='Quantity') @@ -3949,8 +3986,14 @@ def test_frame_groupby_plot_boxplot(self): self.assertEqual(len(res), 2) tm.close() + # now works with GH 5610 as gender is excluded + res = df.groupby('gender').hist() + tm.close() + + df2 = df.copy() + df2['gender2'] = df['gender'] with tm.assertRaisesRegexp(TypeError, '.*str.+float'): - gb.hist() + df2.groupby('gender').hist() @slow def test_frame_groupby_hist(self): diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 5f975105cd80e..7fe8ab8ca642e 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -1126,9 +1126,9 @@ def test_evenly_divisible_with_no_extra_bins(self): expected = DataFrame( [{'REST_KEY': 14, 'DLY_TRN_QT': 14, 'DLY_SLS_AMT': 14, 'COOP_DLY_TRN_QT': 14, 'COOP_DLY_SLS_AMT': 14}] * 4, - index=index).unstack().swaplevel(1,0).sortlevel() + index=index) result = df.resample('7D', how='count') - assert_series_equal(result,expected) + assert_frame_equal(result,expected) expected = DataFrame( [{'REST_KEY': 21, 'DLY_TRN_QT': 1050, 'DLY_SLS_AMT': 700, From f520e8dc5c78ccfe9f2cf673bf1043b4831c2f1c Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 29 Apr 2014 13:07:51 -0400 Subject: [PATCH 3/4] BUG: fixup Categorical.describe to work with 'fixed' count CLN: remove __inv__, __neg__ from series and use generic version CLN: remove __wrap_array__ from generic (replace with __array_wrap__) --- pandas/core/categorical.py | 10 ++++++---- pandas/core/generic.py | 22 +++++++++++++--------- pandas/core/series.py | 17 ++--------------- 3 files changed, 21 insertions(+), 28 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 23fccc3719278..b255831e51ae0 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -226,11 +226,13 @@ def describe(self): """ # Hack? from pandas.core.frame import DataFrame - grouped = DataFrame(self.labels).groupby(0) - counts = grouped.count().values.squeeze() + counts = DataFrame({ + 'labels' : self.labels, + 'values' : self.labels } + ).groupby('labels').count().squeeze().values freqs = counts / float(counts.sum()) - return DataFrame.from_dict({ + return DataFrame({ 'counts': counts, 'freqs': freqs, 'levels': self.levels - }).set_index('levels') + }).set_index('levels') diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2c2f133dd52c1..01af7534d458d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -611,11 +611,19 @@ def __neg__(self): arr = operator.inv(values) else: arr = operator.neg(values) - return self._wrap_array(arr, self.axes, copy=False) + return self.__array_wrap__(arr) def __invert__(self): - arr = operator.inv(_values_from_object(self)) - return self._wrap_array(arr, self.axes, copy=False) + try: + arr = operator.inv(_values_from_object(self)) + return self.__array_wrap__(arr) + except: + + # inv fails with 0 len + if not np.prod(self.shape): + return self + + raise def equals(self, other): """ @@ -707,15 +715,11 @@ def __abs__(self): #---------------------------------------------------------------------- # Array Interface - def _wrap_array(self, arr, axes, copy=False): - d = self._construct_axes_dict_from(self, axes, copy=copy) - return self._constructor(arr, **d).__finalize__(self) - def __array__(self, dtype=None): return _values_from_object(self) - def __array_wrap__(self, result): - d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False) + def __array_wrap__(self, result, copy=False): + d = self._construct_axes_dict(self._AXIS_ORDERS, copy=copy) return self._constructor(result, **d).__finalize__(self) # ideally we would define this to avoid the getattr checks, but diff --git a/pandas/core/series.py b/pandas/core/series.py index c94d7dc9acefd..9c642280169f0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -370,12 +370,12 @@ def __array__(self, result=None): """ the array interface, return my values """ return self.values - def __array_wrap__(self, result): + def __array_wrap__(self, result, copy=False): """ Gets called prior to a ufunc (and after) """ return self._constructor(result, index=self.index, - copy=False).__finalize__(self) + copy=copy).__finalize__(self) def __contains__(self, key): return key in self.index @@ -959,19 +959,6 @@ def iteritems(self): if compat.PY3: # pragma: no cover items = iteritems - # inversion - def __neg__(self): - values = self.values - if values.dtype == np.bool_: - arr = operator.inv(values) - else: - arr = operator.neg(values) - return self._constructor(arr, self.index).__finalize__(self) - - def __invert__(self): - arr = operator.inv(self.values) - return self._constructor(arr, self.index).__finalize__(self) - #---------------------------------------------------------------------- # unbox reductions From 134dd1f7cc1a521a1da84ea666c5a62da2e1d30a Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 29 Apr 2014 14:45:05 -0400 Subject: [PATCH 4/4] BUG: handle as_index=False for pseudo multi-groupers (e.g. .describe()) --- doc/source/v0.14.0.txt | 7 +++++++ pandas/core/groupby.py | 33 +++++++++++++++++++++++++-------- pandas/tests/test_groupby.py | 12 +++++++++++- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 5cdd8d67caf7e..f89f56e7a1aa2 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -126,6 +126,13 @@ API changes g.count() g.describe() + passing ``as_index`` will leave the grouped column in-place (this is not change in 0.14.0) + + df = DataFrame([[1, np.nan], [1, 4], [5, 6], [5, 8]], columns=['A', 'B']) + g = df.groupby('A',as_index=False) + g.count() + g.describe() + - Allow specification of a more complex groupby via ``pd.Grouper``, such as grouping by a Time and a string field simultaneously. See :ref:`the docs `. (:issue:`3794`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 3d9dba8c0d6fa..2a36ea65667d6 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -456,7 +456,7 @@ def _selected_obj(self): def _set_selection_from_grouper(self): """ we may need create a selection if we have non-level groupers """ grp = self.grouper - if self._selection is None and getattr(grp,'groupings',None) is not None: + if self._selection is None and self.as_index and getattr(grp,'groupings',None) is not None: ax = self.obj._info_axis groupers = [ g.name for g in grp.groupings if g.level is None and g.name is not None and g.name in ax ] if len(groupers): @@ -1029,12 +1029,23 @@ def _concat_objects(self, keys, values, not_indexed_same=False): result = result.reindex(ax) else: result = result.reindex_axis(ax, axis=self.axis) - elif self.group_keys and self.as_index: - group_keys = keys - group_levels = self.grouper.levels - group_names = self.grouper.names - result = concat(values, axis=self.axis, keys=group_keys, - levels=group_levels, names=group_names) + + elif self.group_keys: + + if self.as_index: + + # possible MI return case + group_keys = keys + group_levels = self.grouper.levels + group_names = self.grouper.names + result = concat(values, axis=self.axis, keys=group_keys, + levels=group_levels, names=group_names) + else: + + # GH5610, returns a MI, with the first level being a + # range index + keys = list(range(len(values))) + result = concat(values, axis=self.axis, keys=keys) else: result = concat(values, axis=self.axis) @@ -2528,6 +2539,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): elif hasattr(self.grouper, 'groupings'): if len(self.grouper.groupings) > 1: key_index = MultiIndex.from_tuples(keys, names=key_names) + else: ping = self.grouper.groupings[0] if len(keys) == ping.ngroups: @@ -2540,8 +2552,13 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): # reorder the values values = [values[i] for i in indexer] else: + key_index = Index(keys, name=key_names[0]) + # don't use the key indexer + if not self.as_index: + key_index = None + # make Nones an empty object if com._count_not_none(*values) != len(values): v = next(v for v in values if v is not None) @@ -2611,7 +2628,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): # normally use vstack as its faster than concat # and if we have mi-columns - if not _np_version_under1p7 or isinstance(v.index,MultiIndex): + if not _np_version_under1p7 or isinstance(v.index,MultiIndex) or key_index is None: stacked_values = np.vstack([np.asarray(x) for x in values]) result = DataFrame(stacked_values,index=key_index,columns=index) else: diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 0f9f1e492d6aa..fcc4eb83b0af9 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1378,7 +1378,8 @@ def test_groupby_as_index_apply(self): res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index # apply doesn't maintain the original ordering - exp_not_as_apply = Index([0, 2, 1, 4]) + # changed in GH5610 as the as_index=False returns a MI here + exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (2, 4)]) exp_as_apply = MultiIndex.from_tuples([(1, 0), (1, 2), (2, 1), (3, 4)]) assert_index_equal(res_as_apply, exp_as_apply) @@ -1994,6 +1995,7 @@ def test_non_cython_api(self): df = DataFrame([[1, 2, 'foo'], [1, nan, 'bar',], [3, nan, 'baz']], columns=['A', 'B','C']) g = df.groupby('A') + gni = df.groupby('A',as_index=False) # mad expected = DataFrame([[0],[nan]],columns=['B'],index=[1,3]) @@ -2001,12 +2003,20 @@ def test_non_cython_api(self): result = g.mad() assert_frame_equal(result,expected) + expected = DataFrame([[0.,0.],[0,nan]],columns=['A','B'],index=[0,1]) + result = gni.mad() + assert_frame_equal(result,expected) + # describe expected = DataFrame(dict(B = concat([df.loc[[0,1],'B'].describe(),df.loc[[2],'B'].describe()],keys=[1,3]))) expected.index.names = ['A',None] result = g.describe() assert_frame_equal(result,expected) + expected = concat([df.loc[[0,1],['A','B']].describe(),df.loc[[2],['A','B']].describe()],keys=[0,1]) + result = gni.describe() + assert_frame_equal(result,expected) + # any expected = DataFrame([[True, True],[False, True]],columns=['B','C'],index=[1,3]) expected.index.name = 'A'