Skip to content

Commit

Permalink
switching laptops
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche committed Oct 17, 2023
1 parent c9ba493 commit 4c903c0
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 57 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ You can easily access UCSC genomes or load a genome annotation from a GTF file u
```python
import genomicranges

gr = genomicranges.from_gtf(<PATH TO GTF>)
gr = genomicranges.parse_gtf(<PATH TO GTF>)
# OR
gr = genomicranges.from_ucsc(genome="hg19")
gr = genomicranges.read_ucsc(genome="hg19")
```
#### Pandas DataFrame

Expand Down
9 changes: 5 additions & 4 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

[metadata]
name = GenomicRanges
description = Container class to represent genomic locations and support genomic analysis.
author = jkanche
description = Container class to represent and operate over genomic regions and annotations.
author = Jayaram Kancherla
author_email = [email protected]
license = MIT
license_files = LICENSE.txt
Expand Down Expand Up @@ -50,9 +50,10 @@ package_dir =
install_requires =
importlib-metadata; python_version<"3.8"
pandas
biocframe>=0.3.9
biocframe>=0.3.13
numpy
prettytable
biocgenerics
biocutils

[options.packages.find]
where = src
Expand Down
127 changes: 78 additions & 49 deletions src/genomicranges/GenomicRanges.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@

from biocframe import BiocFrame
from biocframe.types import SlicerArgTypes
from biocgenerics.combine import combine
from biocgenerics.combine_cols import combine_cols
from biocgenerics.combine_rows import combine_rows
from biocutils import is_list_of_type
from numpy import concatenate, count_nonzero, ndarray, sum, zeros
from pandas import DataFrame, concat, isna
from prettytable import PrettyTable

from .interval import (
OVERLAP_QUERY_TYPES,
Expand All @@ -42,7 +45,7 @@


class GenomicRanges(BiocFrame):
"""`GenomicRanges` provides functionality to represent and operate over genomic regions and annotations.
"""`GenomicRanges` provides a container class to represent and operate over genomic regions and annotations.
**Note: Intervals are inclusive on both ends and start at 1.**
Expand Down Expand Up @@ -410,47 +413,66 @@ def mcols(self, return_type: Optional[Callable] = None) -> Any:
raise ValueError(f"{return_type} not supported, {str(e)}")

def __repr__(self) -> str:
table = PrettyTable(padding_width=2)
table.field_names = [str(col) for col in self.column_names]
from io import StringIO

from rich.console import Console
from rich.table import Table

table = Table(
title=f"GenomicRanges with {self.dims[0]} intervals & {self.dims[1] - 4} metadata columns",
show_header=True,
)
if self.row_names is not None:
table.add_column("row_names")

for col in self.column_names:
table.add_column(f"{str(col)} [italic]<{type(self.column(col)).__name__}>")

_rows = []
rows_to_show = 2
_top = self.shape[0]
if _top > rows_to_show:
_top = rows_to_show

# top three rows
# top two rows
for r in range(_top):
_row = self.row(r)
vals = list(_row.values())
res = [str(v) for v in vals]
if self.row_names:
res = [str(self.row_names[r])] + res
_rows.append(res)

if self.shape[0] > 2 * rows_to_show:
# add ...
_rows.append(["..." for _ in range(len(self.column_names))])
_dots = []
if self.row_names:
_dots = ["..."]

_dots.extend(["..." for _ in range(len(self.column_names))])
_rows.append(_dots)

_last = self.shape[0] - rows_to_show
_last = self.shape[0] - _top
if _last <= rows_to_show:
_last = self.shape[0] - _top

# last three rows
for r in range(_last, len(self)):
# last set of rows
for r in range(_last + 1, len(self)):
_row = self.row(r)
vals = list(_row.values())
res = [str(v) for v in vals]
if self.row_names:
res = [str(self.row_names[r])] + res
_rows.append(res)

table.add_rows(_rows)
for _row in _rows:
table.add_row(*_row)

pattern = (
f"Class GenomicRanges with {self.dims[0]} intervals and "
f"{self.dims[1] - 4} metadata columns \n"
f"contains row names?: {self.row_names is not None} \n"
f"{table.get_string()}"
)
console = Console(file=StringIO())
with console.capture() as capture:
console.print(table)

return pattern
return capture.get()

# for documentation, otherwise serves no real use.
def __getitem__(self, args: SlicerArgTypes) -> Union["GenomicRanges", dict, list]:
Expand Down Expand Up @@ -2196,46 +2218,24 @@ def invert_strand(self) -> "GenomicRanges":
metadata=self.metadata,
)

def concat(self, *granges: "GenomicRanges") -> "GenomicRanges":
"""Row-wise concatenate multiple `GenomicRanges` objects.
def combine(self, *other: "GenomicRanges") -> "GenomicRanges":
"""Combine multiple GenomicRanges objects by row.
Note: Fills missing columns with an array of `None`s.
Args:
granges (GenomicRanges): Objects to concatenate.
*other (GenomicRanges): GenomicRanges objects.
Raises:
TypeError: If any ``granges`` are not "GenomicRanges".
TypeError: If all objects are not of type GenomicRanges.
Returns:
GenomicRanges: A new concatenated `GenomicRanges` object.
BiocFrame: A combined BiocFrame.
"""
all_granges = [isinstance(gr, GenomicRanges) for gr in granges]

if not all(all_granges):
raise TypeError("all provided objects are not GenomicRanges objects")

all_columns = [gr.column_names for gr in granges]
all_columns.append(self.column_names)
all_unique_columns = list(
set([item for sublist in all_columns for item in sublist])
)

new_data = OrderedDict()
for col in all_unique_columns:
if col not in new_data:
new_data[col] = []

for gr in granges:
if col in gr.column_names:
new_data[col].extend(gr.column(col))
else:
new_data[col].extend([None] * len(gr))

if col in self.column_names:
new_data[col].extend(self.column(col))
else:
new_data[col].extend([None] * len(self))
if not is_list_of_type(other, GenomicRanges):
raise TypeError("All objects to combine must be GenomicRanges objects.")

return GenomicRanges(new_data, column_names=all_unique_columns)
return super().combine(*other)

@classmethod
def empty(cls):
Expand All @@ -2245,3 +2245,32 @@ def empty(cls):
same type as caller, in this case a `GenomicRanges`.
"""
return cls(number_of_rows=0)


@combine.register(GenomicRanges)
def _combine_gr(*x: GenomicRanges):
if not is_list_of_type(x, GenomicRanges):
raise ValueError("All elements to `combine` must be `GenomicRanges` objects.")
return x[0].combine(*x[1:])


@combine_rows.register(GenomicRanges)
def _combine_rows_gr(*x: GenomicRanges):
if not is_list_of_type(x, GenomicRanges):
raise ValueError(
"All elements to `combine_rows` must be `GenomicRanges` objects."
)

return x[0].combine(*x[1:])


@combine_cols.register(GenomicRanges)
def _combine_cols_gr(*x: GenomicRanges):
if not is_list_of_type(x, GenomicRanges):
raise ValueError(
"All elements to `combine_cols` must be `GenomicRanges` objects."
)

raise NotImplementedError(
"`combine_cols` is not implemented for `GenomicRanges` objects."
)
2 changes: 1 addition & 1 deletion src/genomicranges/io/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def from_pandas(data: DataFrame) -> "GenomicRanges":
Must contain 'seqnames', 'starts' & 'ends' columns.
Returns:
GenomicRanges: object.
GenomicRanges: Object representing intervals.
"""

from ..GenomicRanges import GenomicRanges
Expand Down
2 changes: 1 addition & 1 deletion tests/test_gr_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_concat():
g_tgt = genomicranges.from_pandas(df_tgt)
assert g_tgt is not None

result = g_src.concat(g_tgt)
result = g_src.combine(g_tgt)

assert result is not None
assert result.shape[0] == 15

0 comments on commit 4c903c0

Please sign in to comment.