From a28e7f1b33e65ca8b29608414a72209bdeedc8cf Mon Sep 17 00:00:00 2001 From: dmitrymyl Date: Thu, 28 Oct 2021 20:23:22 +0300 Subject: [PATCH] Fixed incorrect strand assignment for "-" RNAs and improved subject_dropped.bed to liftOver coordinates. --- ortho2align/genomicranges.py | 16 +++++++++++++++- ortho2align/pipeline.py | 31 ++++++++++++++++++++++++------- setup.py | 2 +- 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/ortho2align/genomicranges.py b/ortho2align/genomicranges.py index 08b2d2e..81a689d 100644 --- a/ortho2align/genomicranges.py +++ b/ortho2align/genomicranges.py @@ -1210,7 +1210,7 @@ def _prepare_side(self, side='q'): max(self.HSPs, key=lambda i: i.send).send) name = self.alignment.srange.name strand = nxor_strands(self.HSPs[0].sstrand, - self.alignment.srange.strand) + self.alignment.qrange.strand) blockSizes = [abs(hsp.send - hsp.sstart) for hsp in self.HSPs] blockPvals = [hsp.pval for hsp in self.HSPs] if strand == "+": @@ -1850,6 +1850,20 @@ def to_bed6(self, fileobj): '.', str(grange.strand)]) + '\n') + def drop_duplicates(self): + if len(self) <= 1: + return self + new_ranges = list() + new_ranges.append(self[0]) + for grange in self[1:]: + if grange == new_ranges[-1]: + continue + new_ranges.append(grange) + used_args = {'collection', } + kwargs = {attr: getattr(self, attr) + for attr in set(self.init_args) - used_args} + return self.__class__(new_ranges, **kwargs) + class GenomicRangesList(BaseGenomicRangesList): """Represents a list of `GenomicRange` instances. diff --git a/ortho2align/pipeline.py b/ortho2align/pipeline.py index 1079d5f..2add24a 100644 --- a/ortho2align/pipeline.py +++ b/ortho2align/pipeline.py @@ -884,13 +884,30 @@ def build_orthologs(alignments, for group in subject_orthologs for ortholog in group if ortholog] - query_dropped = BaseGenomicRangesList([item[0] - for item in dropped_ranges - if item]) - subject_dropped = BaseGenomicRangesList([srange - for item in dropped_ranges - if item - for srange in item[1]]) + query_dropped = list() + subject_dropped = list() + for item in dropped_ranges: + if len(item) != 2: + continue + query_dropped_range, subject_dropped_ranges = item + query_dropped.append(query_dropped_range) + print(query_dropped_range.name) + for grange in subject_dropped_ranges: + print(grange) + subject_lifts = grange.find_neighbours(query_dropped_range.relations['lifted']) + print(subject_lifts) + subject_dropped.append(subject_lifts) + query_dropped = BaseGenomicRangesList(query_dropped) + subject_dropped = BaseGenomicRangesList(srange + for group in subject_dropped + for srange in group).drop_duplicates() + # query_dropped = BaseGenomicRangesList([item[0] + # for item in dropped_ranges + # if item]) + # subject_dropped = BaseGenomicRangesList([srange + # for item in dropped_ranges + # if item + # for srange in item[1]]) total_dropped = len(query_dropped) query_exception_list = BaseGenomicRangesList(query_exception_ranges) diff --git a/setup.py b/setup.py index f4bbd55..e0de5a0 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup(name='ortho2align', - version='1.0.1', + version='1.0.3', description='A lncRNA ortholog discovery tool based on syntenic regions and statistical assessment of alignment nonrandomness.', url='http://github.com/dmitrymyl/ortho2align', author='Dmitry Mylarshchikov',