From 4e50ea0fc4442073210cca9167f7ed6c1df4db21 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Sun, 14 Jul 2024 15:52:46 -0700 Subject: [PATCH] Implement range based methods for `GenomicRangesList` (#110) - Expose the `generic_accessor` method used internally by `GenomicRangesList` to call functions from the underlying GenomicRanges for each element. - Add class method to initialize `GenomicRangesList` from dictionary. - Update tests --- CHANGELOG.md | 6 +++ src/genomicranges/GenomicRanges.py | 4 +- src/genomicranges/GenomicRangesList.py | 59 +++++++++++++++++++------- tests/test_gr_to_grl.py | 2 +- 4 files changed, 53 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7666a3c..6048776 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Version 0.4.26 + +- Expose the `generic_accessor` method used internally by `GenomicRangesList` to call functions from the underlying GenomicRanges for each element. +- Add class method to initialize `GenomicRangesList` from dictionary. +- Update tests + ## Version 0.4.25 - Method to split `GenomicRanges` by a list of groups. diff --git a/src/genomicranges/GenomicRanges.py b/src/genomicranges/GenomicRanges.py index 2109f94..1057ff4 100644 --- a/src/genomicranges/GenomicRanges.py +++ b/src/genomicranges/GenomicRanges.py @@ -1568,9 +1568,11 @@ def narrow( return output def _group_indices_by_chrm(self, ignore_strand: bool = False) -> dict: - __strand = self._strand + __strand = self._strand.copy() if ignore_strand: __strand = np.zeros(len(self), dtype=np.int8) + # else: + # __strand[__strand == 0] = 1 _seqnames = [self._seqinfo._seqnames[i] for i in self._seqnames] grp_keys = np.char.add( diff --git a/src/genomicranges/GenomicRangesList.py b/src/genomicranges/GenomicRangesList.py index ad1ffe6..ae5b0b7 100644 --- a/src/genomicranges/GenomicRangesList.py +++ b/src/genomicranges/GenomicRangesList.py @@ -2,8 +2,8 @@ from warnings import warn import biocutils as ut -from biocframe import BiocFrame import numpy as np +from biocframe import BiocFrame from .GenomicRanges import GenomicRanges @@ -593,7 +593,9 @@ def groups(self, group: Union[str, int]) -> "GenomicRangesList": return self[group] - def _generic_accessor(self, prop: str, func: bool = False) -> Dict[str, list]: + def generic_accessor( + self, prop: str, func: bool = False, cast: bool = False + ) -> Union[Dict[str, list], "GenomicRangesList"]: _all_prop = {} _ranges = self.ranges _groups = self.names @@ -610,6 +612,10 @@ def _generic_accessor(self, prop: str, func: bool = False) -> Dict[str, list]: _all_prop[_key] = _val + if cast is True: + current_class_const = type(self) + return current_class_const.from_dict(_all_prop) + return _all_prop def element_nrows(self) -> Dict[str, List[str]]: @@ -619,7 +625,7 @@ def element_nrows(self) -> Dict[str, List[str]]: An integer vector where each value corresponds to the length of the contained GenomicRanges object. """ - return self._generic_accessor("__len__", func=True) + return self.generic_accessor("__len__", func=True) def is_empty(self) -> bool: """Whether ``GRangesList`` has no elements or if all its elements are empty. @@ -650,7 +656,7 @@ def seqnames(self) -> Dict[str, List[str]]: A list with the same length as keys in the object, each element in the list contains another list of sequence names. """ - return self._generic_accessor("seqnames") + return self.generic_accessor("seqnames") @property def start(self) -> Dict[str, List[int]]: @@ -660,7 +666,7 @@ def start(self) -> Dict[str, List[int]]: A list with the same length as keys in the object, each element in the list contains another list values. """ - return self._generic_accessor("start") + return self.generic_accessor("start") @property def end(self) -> Dict[str, List[int]]: @@ -670,7 +676,7 @@ def end(self) -> Dict[str, List[int]]: A list with the same length as keys in the object, each element in the list contains another list values. """ - return self._generic_accessor("end") + return self.generic_accessor("end") @property def width(self) -> Dict[str, List[int]]: @@ -680,7 +686,7 @@ def width(self) -> Dict[str, List[int]]: A list with the same length as keys in the object, each element in the list contains another list values. """ - return self._generic_accessor("width") + return self.generic_accessor("width") @property def strand(self) -> Dict[str, List[int]]: @@ -690,7 +696,7 @@ def strand(self) -> Dict[str, List[int]]: A list with the same length as keys in the object, each element in the list contains another list values. """ - return self._generic_accessor("strand") + return self.generic_accessor("strand") @property def seq_info(self) -> Dict[str, List[int]]: @@ -700,7 +706,7 @@ def seq_info(self) -> Dict[str, List[int]]: A list with the same length as keys in the object, each element in the list contains another list values. """ - return self._generic_accessor("seq_info") + return self.generic_accessor("seq_info") @property def is_circular(self) -> Dict[str, List[int]]: @@ -710,7 +716,7 @@ def is_circular(self) -> Dict[str, List[int]]: A list with the same length as keys in the object, each element in the list contains another list values. """ - return self._generic_accessor("is_circular") + return self.generic_accessor("is_circular") def get_range_lengths(self) -> dict: """ @@ -836,9 +842,9 @@ def __getitem__( raise TypeError("Arguments to subset `GenomicRangesList` is not supported.") - ########################## - ######>> empty <<######### - ########################## + ####################################### + ######>> class initializers <<######### + ####################################### @classmethod def empty(cls, n: int): @@ -851,11 +857,20 @@ def empty(cls, n: int): return cls(ranges=GenomicRanges.empty(), range_lengths=_range_lengths) + @classmethod + def from_dict(cls, x: dict): + """Create a `GenomicRangesList` object from :py:class:`~dict`. + + Returns: + same type as caller, in this case a `GenomicRangesList`. + """ + return cls(ranges=list(x.values()), names=list(x.keys())) + ############################### ######>> to granges <<######### ############################### - def to_genomic_ranges(self) -> GenomicRanges: + def as_genomic_ranges(self) -> GenomicRanges: """Coerce object to a :py:class:`~genomicranges.GenomicRanges.GenomicRanges`. Returns: @@ -873,9 +888,21 @@ def to_genomic_ranges(self) -> GenomicRanges: return _combined_ranges - def to_granges(self) -> GenomicRanges: + def as_granges(self) -> GenomicRanges: """Alias to :py:meth:`~to_genomic_ranges`.""" - return self.to_genomic_ranges() + return self.as_genomic_ranges() + + #################################### + ######>> GRanges methods <<######### + #################################### + + def range(self) -> "GenomicRangesList": + """Calculate range bounds for each genomic element. + + Returns: + A new ``GenomicRangesList`` object with the range bounds. + """ + return self.generic_accessor("range", func=True, cast=True) @ut.combine_sequences.register(GenomicRangesList) diff --git a/tests/test_gr_to_grl.py b/tests/test_gr_to_grl.py index 17ce20a..932ff26 100644 --- a/tests/test_gr_to_grl.py +++ b/tests/test_gr_to_grl.py @@ -78,7 +78,7 @@ def test_to_granges(): ] ) - roundtrip = splits.to_genomic_ranges() + roundtrip = splits.as_genomic_ranges() assert roundtrip is not None assert isinstance(roundtrip, GenomicRanges)