Skip to content

Commit

Permalink
Improved the __repr__ and __str__ methods for BiocFrames. (#59)
Browse files Browse the repository at this point in the history
* Greatly streamline the print method for BiocFrames.

---------
  • Loading branch information
LTLA authored Oct 31, 2023
1 parent b395dc5 commit 1fa879c
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 128 deletions.
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Contributors

* Jayaram Kancherla [[email protected]](mailto:[email protected])
* Aaron Lun [[email protected]](mailto:[email protected])
91 changes: 42 additions & 49 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,12 @@ print(bframe)
```

## output
BiocFrame with 3 rows & 2 columns
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ ensembl <list> ┃ symbol <list> ┃
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ ENS00001 │ MAP1A │
│ ENS00002 │ BIN1 │
│ ENS00003 │ ESR1 │
└────────────────┴───────────────┘
BiocFrame with 3 rows and 2 columns
ensembl symbol
<list> <list>
[0] ENS00001 MAP1A
[1] ENS00002 BIN1
[2] ENS00003 ESR1

You can specify complex representations as columns, for example

Expand All @@ -71,14 +69,12 @@ print(bframe2)
```

## output
BiocFrame with 3 rows & 3 columns
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ row_names ┃ ensembl <list> ┃ symbol <list> ┃ ranges <BiocFrame> ┃
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ row1 │ ENS00001 │ MAP1A │ {'chr': 'chr1', 'start': 1000, 'end': 1100} │
│ row2 │ ENS00002 │ BIN1 │ {'chr': 'chr2', 'start': 1100, 'end': 4000} │
│ row3 │ ENS00002 │ ESR1 │ {'chr': 'chr3', 'start': 5000, 'end': 5500} │
└───────────┴────────────────┴───────────────┴─────────────────────────────────────────────┘
BiocFrame with 3 rows and 3 columns
ensembl symbol ranges
<list> <list> <BiocFrame>
row1 ENS00001 MAP1A chr1:1000:1100
row2 ENS00002 BIN1 chr2:1100:4000
row3 ENS00002 ESR1 chr3:5000:5500

### Properties

Expand Down Expand Up @@ -109,14 +105,12 @@ print(bframe)
```

## output
BiocFrame with 3 rows & 2 columns
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓
┃ column1 <list> ┃ column2 <list> ┃
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩
│ ENS00001 │ MAP1A │
│ ENS00002 │ BIN1 │
│ ENS00003 │ ESR1 │
└────────────────┴────────────────┘
BiocFrame with 3 rows and 2 columns
column1 column2
<list> <list>
[0] ENS00001 MAP1A
[1] ENS00002 BIN1
[2] ENS00003 ESR1

To add new columns,

Expand All @@ -126,14 +120,12 @@ print(bframe)
```

## output
BiocFrame with 3 rows & 3 columns
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ column1 <list> ┃ column2 <list> ┃ score <range> ┃
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ ENS00001 │ MAP1A │ 2 │
│ ENS00002 │ BIN1 │ 3 │
│ ENS00003 │ ESR1 │ 4 │
└────────────────┴────────────────┴───────────────┘
BiocFrame with 3 rows and 3 columns
column1 column2 score
<list> <list> <range>
[0] ENS00001 MAP1A 2
[1] ENS00002 BIN1 3
[2] ENS00003 ESR1 4

### Subset `BiocFrame`

Expand All @@ -145,12 +137,10 @@ print(sliced)
```

## output
BiocFrame with 1 row & 1 column
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓
┃ row_names ┃ column1 <list> ┃
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩
│ 1 │ ENS00002 │
└───────────┴────────────────┘
BiocFrame with 1 row and 1 column
column1
<list>
[0] ENS00002

This operation accepts different slice input types, you can either specify a boolean vector, a `slice` object, a list of indices, or row/column names to subset.

Expand Down Expand Up @@ -179,20 +169,23 @@ combined = combine(bframe1, bframe2)

# OR an object oriented approach

combined = bframe.combine(bframe2)
combined = bframe1.combine(bframe2)
```

## output
BiocFrame with 10 rows & 2
columns
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃ odd <list> ┃ even <list> ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ 1 │ 0 │
│ 3 │ 2 │
│ ... │ ... │
│ 99 │ 88 │
└────────────┴─────────────┘
BiocFrame with 10 rows and 2 columns
odd even
<list> <list>
[0] 1 0
[1] 3 2
[2] 5 4
[3] 7 6
[4] 9 8
[5] 11 0
[6] 33 22
[7] 55 44
[8] 77 66
[9] 99 88

For more details, check out the BiocFrame class [reference](https://biocpy.github.io/BiocFrame/api/biocframe.html#biocframe.BiocFrame.BiocFrame).

Expand Down
3 changes: 1 addition & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ python_requires = >=3.8
# For more information, check out https://semver.org/.
install_requires =
importlib-metadata; python_version<"3.8"
rich
biocgenerics>=0.1.1
biocgenerics>=0.1.3
biocutils>=0.0.6

[options.packages.find]
Expand Down
184 changes: 107 additions & 77 deletions src/biocframe/BiocFrame.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from collections import OrderedDict
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union, Sequence
from warnings import warn

from biocgenerics.colnames import colnames as colnames_generic
Expand All @@ -9,7 +9,8 @@
from biocgenerics.combine_rows import combine_rows
from biocgenerics.rownames import rownames as rownames_generic
from biocgenerics.rownames import set_rownames
from biocutils import is_list_of_type, normalize_subscript
from biocgenerics import show_as_cell, format_table
from biocutils import is_list_of_type, normalize_subscript, print_truncated_list

from ._validators import validate_cols, validate_rows, validate_unique_list
from .Factor import Factor
Expand Down Expand Up @@ -201,89 +202,101 @@ def _validate(self):
self._number_of_rows = 0

def __repr__(self) -> str:
if self.row_names is None:
if self.shape[0] == 0:
return f"Empty BiocFrame with no rows & {self.shape[1]} column{'s' if self.shape[1] != 1 else ''}."
output = "BiocFrame(data={"
data_blobs = []
for k, v in self._data.items():
if isinstance(v, list):
data_blobs.append(repr(k) + ": " + print_truncated_list(v))
else:
data_blobs.append(repr(k) + ": " + repr(v))
output += ", ".join(data_blobs)
output += "}"

if self.shape[1] == 0:
return f"Empty BiocFrame with {self.shape[0]} row{'s' if self.shape[0] != 1 else ''} & no columns."
output += ", number_of_rows=" + str(self.shape[0])
if self._row_names:
output += ", row_names=" + print_truncated_list(self._row_names)

return (
f"BiocFrame with {self.dims[0]} row{'s' if self.shape[0] != 1 else ''}"
f" & {self.dims[1]} column{'s' if self.dims[1] != 1 else ''}"
)
output += ", column_names=" + print_truncated_list(self._column_names)

def __str__(self) -> str:
if self.row_names is None:
if self.shape[0] == 0:
return f"Empty BiocFrame with no rows & {self.shape[1]} column{'s' if self.shape[1] != 1 else ''}."
if self._mcols is not None and self._mcols.shape[1] > 0:
# TODO: fix potential recursion here.
output += ", mcols=" + repr(self._mcols)

if self.shape[1] == 0:
return f"Empty BiocFrame with {self.shape[0]} row{'s' if self.shape[0] != 1 else ''} & no columns."
if len(self._metadata):
meta_blobs = []
for k, v in self._metadata.items():
if isinstance(v, list):
meta_blobs.append(repr(k) + ": " + print_truncated_list(v))
else:
meta_blobs.append(repr(k) + ": " + repr(v))
output += ", metadata={" + ", ".join(data_blobs) + "}"

from io import StringIO
output += ")"
return output

from rich.console import Console
from rich.table import Table
def __str__(self) -> str:
output = f"BiocFrame with {self.dims[0]} row{'s' if self.shape[0] != 1 else ''}"
output += f" and {self.dims[1]} column{'s' if self.dims[1] != 1 else ''}\n"

nr = self.shape[0]
added_table = False
if nr and self.shape[1]:
if nr <= 10:
indices = range(nr)
insert_ellipsis = False
else:
indices = [0, 1, 2, nr - 3, nr - 2, nr - 1]
insert_ellipsis = True

table = Table(
title=(
f"BiocFrame with {self.dims[0]} row{'s' if self.shape[0] != 1 else ''}"
f" & {self.dims[1]} column{'s' if self.dims[1] != 1 else ''}"
),
show_header=True,
)
if self.row_names is not None:
table.add_column("row_names")
if self._row_names is not None:
raw_floating = _slice_or_index(self._row_names, indices)
else:
raw_floating = ["[" + str(i) + "]" for i in indices]
if insert_ellipsis:
raw_floating = raw_floating[:3] + [""] + raw_floating[3:]
floating = ["", ""] + raw_floating

columns = []
for col in self._column_names:
data = self._data[col]
showed = show_as_cell(data, indices)
header = [col, "<" + type(data).__name__ + ">"]
minwidth = max(40, len(header[0]), len(header[1]))
for i, y in enumerate(showed):
if len(y) > minwidth:
showed[i] = y[: minwidth - 3] + "..."
if insert_ellipsis:
showed = showed[:3] + ["..."] + showed[3:]
columns.append(header + showed)

output += format_table(columns, floating_names=floating)
added_table = True

footer = []
if self.mcols is not None and self.mcols.shape[1]:
footer.append(
"mcols ("
+ str(self.mcols.shape[1])
+ "): "
+ print_truncated_list(
self.mcols.column_names, sep=" ", include_brackets=False
)
)
if len(self.metadata):
footer.append(
"metadata ("
+ str(len(self.metadata))
+ "): "
+ print_truncated_list(
list(self.metadata.keys()), sep=" ", include_brackets=False
)
)
if len(footer):
if added_table:
output += "\n------\n"
output += "\n".join(footer)

for col in self.column_names:
table.add_column(f"{str(col)} [italic]<{type(self.column(col)).__name__}>")

_rows = []
rows_to_show = 2
_top = self.shape[0]
if _top > rows_to_show:
_top = rows_to_show

# top two rows
for r in range(_top):
_row = self.row(r)
vals = list(_row.values())
res = [str(v) for v in vals]
if self.row_names:
res = [str(self.row_names[r])] + res
_rows.append(res)

if self.shape[0] > 2 * rows_to_show:
# add ...
_dots = []
if self.row_names:
_dots = ["..."]

_dots.extend(["..." for _ in range(len(self.column_names))])
_rows.append(_dots)

_last = self.shape[0] - _top
if _last < rows_to_show:
_last += 1

# last set of rows
for r in range(_last, len(self)):
_row = self.row(r)
vals = list(_row.values())
res = [str(v) for v in vals]
if self.row_names:
res = [str(self.row_names[r])] + res
_rows.append(res)

for _row in _rows:
table.add_row(*_row)

console = Console(file=StringIO())
with console.capture() as capture:
console.print(table)

return capture.get()
return output

@property
def shape(self) -> Tuple[int, int]:
Expand Down Expand Up @@ -1102,3 +1115,20 @@ def _colnames_bframe(x: BiocFrame):
@set_colnames.register(BiocFrame)
def _set_colnames_bframe(x: BiocFrame, names: List[str]):
x.column_names = names


@show_as_cell.register(BiocFrame)
def _show_as_cell_BiocFrame(x: BiocFrame, indices: Sequence[int]) -> List[str]:
constructs = []
for i in indices:
constructs.append([])

for k in x._column_names:
col = show_as_cell(x._data[k], indices)
for i, v in enumerate(col):
constructs[i].append(v)

for i, x in enumerate(constructs):
constructs[i] = ":".join(x)

return constructs

0 comments on commit 1fa879c

Please sign in to comment.