diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 27001bb69cd05..9159b43536550 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -673,9 +673,17 @@ def var(self, ddof=1): def size(self): """ Compute group sizes + """ return self.grouper.size() + def count(self): + """ + Number of non-null items in each group. + + """ + return self._python_agg_general(lambda x: notnull(x).sum()) + sum = _groupby_function('sum', 'add', np.sum) prod = _groupby_function('prod', 'prod', np.prod) min = _groupby_function('min', 'min', np.min, numeric_only=False) @@ -687,12 +695,10 @@ def size(self): def ohlc(self): """ - Compute sum of values, excluding missing values - - For multiple groupings, the result index will be a MultiIndex + Deprecated, use .resample(how="ohlc") instead. """ - return self._cython_agg_general('ohlc') + raise AttributeError('ohlc is deprecated, use resample(how="ohlc").') def nth(self, n, dropna=None): """ @@ -939,6 +945,7 @@ def _cython_agg_general(self, how, numeric_only=True): result, names = self.grouper.aggregate(obj.values, how) except AssertionError as e: raise GroupByError(str(e)) + # infer old dytpe output[name] = self._try_cast(result, obj) if len(output) == 0: @@ -947,6 +954,8 @@ def _cython_agg_general(self, how, numeric_only=True): return self._wrap_aggregated_output(output, names) def _python_agg_general(self, func, *args, **kwargs): + _dtype = kwargs.pop("_dtype", None) + func = _intercept_function(func) f = lambda x: func(x, *args, **kwargs) @@ -955,7 +964,14 @@ def _python_agg_general(self, func, *args, **kwargs): for name, obj in self._iterate_slices(): try: result, counts = self.grouper.agg_series(obj, f) - output[name] = self._try_cast(result, obj) + + if _dtype is None: # infer old dytpe + output[name] = self._try_cast(result, obj) + elif _dtype is False: + output[name] = result + else: + output[name] = _possibly_downcast_to_dtype(result, _dtype) + except TypeError: continue @@ -2889,16 +2905,6 @@ def _apply_to_column_groupbys(self, func): in self._iterate_column_groupbys()), keys=self._selected_obj.columns, axis=1) - def ohlc(self): - """ - Compute sum of values, excluding missing values - - For multiple groupings, the result index will be a MultiIndex - """ - return self._apply_to_column_groupbys( - lambda x: x._cython_agg_general('ohlc')) - - from pandas.tools.plotting import boxplot_frame_groupby DataFrameGroupBy.boxplot = boxplot_frame_groupby diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index fde9156017c4e..f460214954bfd 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1970,6 +1970,18 @@ def test_size(self): for key, group in grouped: self.assertEquals(result[key], len(group)) + def test_count(self): + df = pd.DataFrame([[1, 2], [1, nan], [3, nan]], columns=['A', 'B']) + count_as = df.groupby('A').count() + count_not_as = df.groupby('A', as_index=False).count() + + res = pd.DataFrame([[1, 1], [3, 0]], columns=['A', 'B']) + assert_frame_equal(count_not_as, res) + assert_frame_equal(count_as, res.set_index('A')) + + count_B = df.groupby('A')['B'].count() + assert_series_equal(count_B, res['B']) + def test_grouping_ndarray(self): grouped = self.df.groupby(self.df['A'].values)