Skip to content

Commit

Permalink
ENH: added groups attribute back and implemented GroupBy.__len__, add…
Browse files Browse the repository at this point in the history
…ressing GH #99 and GH #95
  • Loading branch information
wesm committed Aug 19, 2011
1 parent ba4957a commit c5b1659
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 19 deletions.
17 changes: 15 additions & 2 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,25 @@ def __init__(self, obj, grouper=None, axis=0, level=None,
self.groupings = groupings
self.exclusions = set(exclusions)

def __len__(self):
return len(self.groups)

_groups = None
@property
def groups(self):
if self._groups is not None:
return self._groups

if len(self.groupings) == 1:
return self.primary.groups
self._groups = self.primary.groups
else:
raise NotImplementedError
to_groupby = zip(*(ping.grouper for ping in self.groupings))
to_groupby = Index(to_groupby)

axis = self.obj._get_axis(self.axis)
self._groups = _tseries.groupby(axis, to_groupby)

return self._groups

@property
def name(self):
Expand Down
57 changes: 40 additions & 17 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

import pandas.util.testing as tm

# unittest.TestCase

def commonSetUp(self):
self.dateRange = DateRange('1/1/2005', periods=250, offset=dt.bday)
self.stringIndex = Index([rands(8).upper() for x in xrange(250)])
Expand All @@ -36,19 +34,6 @@ def commonSetUp(self):
self.timeMatrix = DataFrame(randMat, columns=self.columnIndex,
index=self.dateRange)


class GroupByTestCase(unittest.TestCase):
setUp = commonSetUp

def test_python_grouper(self):
groupFunc = self.groupDict.get
groups = groupby(self.stringIndex, groupFunc)
setDict = dict((k, set(v)) for k, v in groups.iteritems())
for idx in self.stringIndex:
key = groupFunc(idx)
groupSet = setDict[key]
assert(idx in groupSet)

class TestGroupBy(unittest.TestCase):

def setUp(self):
Expand Down Expand Up @@ -122,6 +107,33 @@ def test_series_agg_corner(self):
result = self.ts.groupby(self.ts * np.nan).sum()
assert_series_equal(result, Series([]))

def test_len(self):
df = tm.makeTimeDataFrame()
grouped = df.groupby([lambda x: x.year,
lambda x: x.month,
lambda x: x.day])
self.assertEquals(len(grouped), len(df))

grouped = df.groupby([lambda x: x.year,
lambda x: x.month])
expected = len(set([(x.year, x.month) for x in df.index]))
self.assertEquals(len(grouped), expected)

def test_groups(self):
grouped = self.df.groupby(['A'])
groups = grouped.groups
self.assert_(groups is grouped.groups) # caching works

for k, v in grouped.groups.iteritems():
self.assert_((self.df.ix[v]['A'] == k).all())

grouped = self.df.groupby(['A', 'B'])
groups = grouped.groups
self.assert_(groups is grouped.groups) # caching works
for k, v in grouped.groups.iteritems():
self.assert_((self.df.ix[v]['A'] == k[0]).all())
self.assert_((self.df.ix[v]['B'] == k[1]).all())

def test_aggregate_str_func(self):
def _check_results(grouped):
# single series
Expand Down Expand Up @@ -221,10 +233,21 @@ def test_attr_wrapper(self):
self.assertRaises(AttributeError, getattr, grouped, 'foo')

def test_series_describe_multikey(self):
raise nose.SkipTest
ts = tm.makeTimeSeries()
grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
grouped.describe()
result = grouped.describe()
assert_series_equal(result['mean'], grouped.mean())
assert_series_equal(result['std'], grouped.std())
assert_series_equal(result['min'], grouped.min())

def test_frame_describe_multikey(self):
grouped = self.tsframe.groupby([lambda x: x.year,
lambda x: x.month])
result = grouped.describe()

for col, ts in self.tsframe.iteritems():
expected = grouped[col].describe()
assert_series_equal(result['A'].unstack(), expected)

def test_frame_groupby(self):
grouped = self.tsframe.groupby(lambda x: x.weekday())
Expand Down

0 comments on commit c5b1659

Please sign in to comment.