Skip to content

Commit

Permalink
[BEAM-12533] Add simple __repr__ for DeferredDataFrame and `Defer…
Browse files Browse the repository at this point in the history
…redSeries` (#15089)

* Add simple __repr__ for DataFrame and Series

* lint

* yapf
  • Loading branch information
TheNeuralBit authored Jun 29, 2021
1 parent 2b0597e commit 5fffad6
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 0 deletions.
18 changes: 18 additions & 0 deletions sdks/python/apache_beam/dataframe/frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,14 @@ def wrapper(self, *args, **kwargs):


class DeferredDataFrameOrSeries(frame_base.DeferredFrame):
def _render_indexes(self):
if self.index.nlevels == 1:
return 'index=' + (
'<unnamed>' if self.index.name is None else repr(self.index.name))
else:
return 'indexes=[' + ', '.join(
'<unnamed>' if ix is None else repr(ix)
for ix in self.index.names) + ']'

__array__ = frame_base.wont_implement_method(
pd.Series, '__array__', reason="non-deferred-result")
Expand Down Expand Up @@ -1036,6 +1044,11 @@ def _set_index(self, value):
@populate_not_implemented(pd.Series)
@frame_base.DeferredFrame._register_for(pd.Series)
class DeferredSeries(DeferredDataFrameOrSeries):
def __repr__(self):
return (
f'DeferredSeries(name={self.name!r}, dtype={self.dtype}, '
f'{self._render_indexes()})')

@property # type: ignore
@frame_base.with_docs_from(pd.Series)
def name(self):
Expand Down Expand Up @@ -1942,6 +1955,11 @@ def repeat(self, repeats, axis):
@populate_not_implemented(pd.DataFrame)
@frame_base.DeferredFrame._register_for(pd.DataFrame)
class DeferredDataFrame(DeferredDataFrameOrSeries):
def __repr__(self):
return (
f'DeferredDataFrame(columns={list(self.columns)}, '
f'{self._render_indexes()})')

@property # type: ignore
@frame_base.with_docs_from(pd.DataFrame)
def columns(self):
Expand Down
111 changes: 111 additions & 0 deletions sdks/python/apache_beam/dataframe/frames_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2059,5 +2059,116 @@ def test_docs_defined(self, beam_type, pd_type):
f'operations:\n{docstring_missing}')


class ReprTest(unittest.TestCase):
def test_basic_dataframe(self):
df = frame_base.DeferredFrame.wrap(
expressions.ConstantExpression(GROUPBY_DF))
self.assertEqual(
repr(df),
(
"DeferredDataFrame(columns=['group', 'foo', 'bar', 'baz', 'bool', "
"'str'], index=<unnamed>)"))

def test_dataframe_with_named_index(self):
df = frame_base.DeferredFrame.wrap(
expressions.ConstantExpression(GROUPBY_DF.set_index('group')))
self.assertEqual(
repr(df),
(
"DeferredDataFrame(columns=['foo', 'bar', 'baz', 'bool', 'str'], "
"index='group')"))

def test_dataframe_with_partial_named_index(self):
df = frame_base.DeferredFrame.wrap(
expressions.ConstantExpression(
GROUPBY_DF.set_index([GROUPBY_DF.index, 'group'])))
self.assertEqual(
repr(df),
(
"DeferredDataFrame(columns=['foo', 'bar', 'baz', 'bool', 'str'], "
"indexes=[<unnamed>, 'group'])"))

def test_dataframe_with_named_multi_index(self):
df = frame_base.DeferredFrame.wrap(
expressions.ConstantExpression(GROUPBY_DF.set_index(['str', 'group'])))
self.assertEqual(
repr(df),
(
"DeferredDataFrame(columns=['foo', 'bar', 'baz', 'bool'], "
"indexes=['str', 'group'])"))

def test_dataframe_with_multiple_column_levels(self):
df = pd.DataFrame({
'foofoofoo': ['one', 'one', 'one', 'two', 'two', 'two'],
'barbar': ['A', 'B', 'C', 'A', 'B', 'C'],
'bazzy': [1, 2, 3, 4, 5, 6],
'zoop': ['x', 'y', 'z', 'q', 'w', 't']
})

df = df.pivot(index='foofoofoo', columns='barbar')
df = frame_base.DeferredFrame.wrap(expressions.ConstantExpression(df))
self.assertEqual(
repr(df),
(
"DeferredDataFrame(columns=[('bazzy', 'A'), ('bazzy', 'B'), "
"('bazzy', 'C'), ('zoop', 'A'), ('zoop', 'B'), ('zoop', 'C')], "
"index='foofoofoo')"))

def test_dataframe_with_multiple_column_and_multiple_index_levels(self):
df = pd.DataFrame({
'foofoofoo': ['one', 'one', 'one', 'two', 'two', 'two'],
'barbar': ['A', 'B', 'C', 'A', 'B', 'C'],
'bazzy': [1, 2, 3, 4, 5, 6],
'zoop': ['x', 'y', 'z', 'q', 'w', 't']
})

df = df.pivot(index='foofoofoo', columns='barbar')
df.index = [['a', 'b'], df.index]

# pandas repr displays this:
# bazzy zoop
# barbar A B C A B C
# foofoofoo
# a one 1 2 3 x y z
# b two 4 5 6 q w t
df = frame_base.DeferredFrame.wrap(expressions.ConstantExpression(df))
self.assertEqual(
repr(df),
(
"DeferredDataFrame(columns=[('bazzy', 'A'), ('bazzy', 'B'), "
"('bazzy', 'C'), ('zoop', 'A'), ('zoop', 'B'), ('zoop', 'C')], "
"indexes=[<unnamed>, 'foofoofoo'])"))

def test_basic_series(self):
df = frame_base.DeferredFrame.wrap(
expressions.ConstantExpression(GROUPBY_DF['bool']))
self.assertEqual(
repr(df), "DeferredSeries(name='bool', dtype=bool, index=<unnamed>)")

def test_series_with_named_index(self):
df = frame_base.DeferredFrame.wrap(
expressions.ConstantExpression(GROUPBY_DF.set_index('group')['str']))
self.assertEqual(
repr(df), "DeferredSeries(name='str', dtype=object, index='group')")

def test_series_with_partial_named_index(self):
df = frame_base.DeferredFrame.wrap(
expressions.ConstantExpression(
GROUPBY_DF.set_index([GROUPBY_DF.index, 'group'])['bar']))
self.assertEqual(
repr(df),
(
"DeferredSeries(name='bar', dtype=float64, "
"indexes=[<unnamed>, 'group'])"))

def test_series_with_named_multi_index(self):
df = frame_base.DeferredFrame.wrap(
expressions.ConstantExpression(
GROUPBY_DF.set_index(['str', 'group'])['baz']))
self.assertEqual(
repr(df),
"DeferredSeries(name='baz', dtype=float64, indexes=['str', 'group'])")


if __name__ == '__main__':
unittest.main()

0 comments on commit 5fffad6

Please sign in to comment.