Skip to content

Commit

Permalink
Parses for GenomicRangesList (#44)
Browse files Browse the repository at this point in the history
- Add methods to parse RDS files containing `GenomicRangesList`
- Fix bug in reading strand information; mostly RLE vectors defined in S4Vectors package.
- Update tests and documentation
  • Loading branch information
jkanche authored Jul 14, 2024
1 parent 8ac6418 commit 3433853
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 9 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## Version 0.4.4

- Add methods to parse RDS files containing `GenomicRangesList`
- Fix bug in reading strand information; mostly RLE vectors.
- Update tests and documentation

## Version 0.4.0 - 0.4.3

- Migrate to the new class implementations
Expand Down
52 changes: 45 additions & 7 deletions src/rds2py/granges.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from genomicranges import GenomicRanges, SeqInfo
from iranges import IRanges
from biocframe import BiocFrame
from genomicranges import GenomicRanges, GenomicRangesList, SeqInfo
from iranges import IRanges

from .parser import get_class
from .pdf import as_pandas_from_dframe
Expand Down Expand Up @@ -36,12 +36,16 @@ def as_granges(robj):

_seqnames = _as_list(robj["attributes"]["seqnames"])

_strand_obj = robj["attributes"]["strand"]["attributes"]["values"]
_strands = _strand_obj["data"]
_strands = robj["attributes"]["strand"]
_fstrand = None
if "attributes" in _strands:
if "levels" in _strands["attributes"]:
_levels_data = _strands["attributes"]["levels"]["data"]
_strands = [_levels_data[x] for x in _strands]
_lengths = _strands["attributes"]["lengths"]["data"]
_factors = _strands["attributes"]["values"]["data"]
_levels = _strands["attributes"]["values"]["attributes"]["levels"]["data"]
_strds = [_levels[x - 1] for x in _factors]
_fstrand = []
for i, x in enumerate(_lengths):
_fstrand.extend([_strds[i]] * x)

_seqinfo_seqnames = robj["attributes"]["seqinfo"]["attributes"]["seqnames"]["data"]
_seqinfo_seqlengths = robj["attributes"]["seqinfo"]["attributes"]["seqlengths"][
Expand Down Expand Up @@ -71,6 +75,7 @@ def as_granges(robj):
return GenomicRanges(
seqnames=_seqnames,
ranges=_ranges,
strand=_fstrand,
names=_gr_names,
mcols=_mcols,
seqinfo=_seqinfo,
Expand Down Expand Up @@ -111,3 +116,36 @@ def _as_list(robj):
_data = _final

return _data


def as_granges_list(robj):
"""Parse an R object as a :py:class:`~genomicranges.GenomicRangesList.GenomicRangesList`.
Args:
robj:
Object parsed from the `RDS` file.
Usually the result of :py:func:`~rds2py.parser.read_rds`.
Returns:
A ``GenomicRangesList`` object.
"""

_cls = get_class(robj)

if _cls not in ["CompressedGRangesList", "GRangesList"]:
raise TypeError(f"obj is not genomic ranges list, but is `{_cls}`.")

_gre = as_granges(robj["attributes"]["unlistData"])

_groups = robj["attributes"]["partitioning"]["attributes"]["NAMES"]["data"]
_partitionends = robj["attributes"]["partitioning"]["attributes"]["end"]["data"]

_grelist = []

current = 0
for _pend in _partitionends:
_grelist.append(_gre[current:_pend])
current = _pend

return GenomicRangesList(ranges=_grelist, names=_groups)
Binary file added tests/data/grangeslist.rds
Binary file not shown.
13 changes: 11 additions & 2 deletions tests/test_granges.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pytest

from rds2py.granges import as_granges
from rds2py.granges import as_granges, as_granges_list
from rds2py.parser import read_rds

from genomicranges import GenomicRanges
from genomicranges import GenomicRanges, GenomicRangesList

__author__ = "jkanche"
__copyright__ = "jkanche"
Expand All @@ -16,3 +16,12 @@ def test_granges():
gr = as_granges(robj=robj)

assert isinstance(gr, GenomicRanges)


def test_granges_list():
robj = read_rds("tests/data/grangeslist.rds")

gr = as_granges_list(robj=robj)

assert isinstance(gr, GenomicRangesList)
assert len(gr) == 5

0 comments on commit 3433853

Please sign in to comment.