From 14d2cbc7ba1875cc57cf88f937457ddbb795b592 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Mon, 15 Jul 2024 18:41:42 -0700 Subject: [PATCH 1/7] use get_seqnames() to access seqnames, avoids the warning messages --- src/genomicranges/GenomicRanges.py | 36 +++++++++++++++--------------- src/genomicranges/SeqInfo.py | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/genomicranges/GenomicRanges.py b/src/genomicranges/GenomicRanges.py index 6b86da4..392b629 100644 --- a/src/genomicranges/GenomicRanges.py +++ b/src/genomicranges/GenomicRanges.py @@ -208,7 +208,7 @@ def __init__( ) def _build_reverse_seqindex(self, seqinfo: SeqInfo): - self._reverse_seqindex = ut.reverse_index.build_reverse_index(seqinfo.seqnames) + self._reverse_seqindex = ut.reverse_index.build_reverse_index(seqinfo.get_seqnames()) def _remove_reverse_seqindex(self): del self._reverse_seqindex @@ -354,7 +354,7 @@ def __str__(self) -> str: header = ["seqnames", ""] _seqnames = [] for x in self._seqnames[indices]: - _seqnames.append(self._seqinfo.seqnames[x]) + _seqnames.append(self._seqinfo.get_seqnames()[x]) showed = _seqnames if insert_ellipsis: @@ -407,7 +407,7 @@ def __str__(self) -> str: + str(len(self._seqinfo)) + " sequences): " + ut.print_truncated_list( - self._seqinfo.seqnames, + self._seqinfo.get_seqnames(), sep=" ", include_brackets=False, transform=lambda y: y, @@ -453,9 +453,9 @@ def get_seqnames( """ if as_type == "factor": - return self._seqnames, self._seqinfo.seqnames + return self._seqnames, self._seqinfo.get_seqnames() elif as_type == "list": - return [self._seqinfo.seqnames[x] for x in self._seqnames] + return [self._seqinfo.get_seqnames()[x] for x in self._seqnames] else: raise ValueError("Argument 'as_type' must be 'factor' or 'list'.") @@ -481,7 +481,7 @@ def set_seqnames( if not isinstance(seqnames, np.ndarray): seqnames = np.asarray( - [self._seqinfo.seqnames.index(x) for x in list(seqnames)] + [self._seqinfo.get_seqnames().index(x) for x in list(seqnames)] ) output = self._define_output(in_place) @@ -570,7 +570,7 @@ def get_strand( vector is retuned. If ``factor``, a tuple width levels as a dictionary and - indices to ``seqinfo.seqnames`` is returned. + indices to ``seqinfo.get_seqnames()`` is returned. If ``list``, then codes are mapped to levels and returned. @@ -1684,7 +1684,7 @@ def range( rev_map = [] groups = [] - for seq in self._seqinfo.seqnames: + for seq in self._seqinfo.get_seqnames(): _iter_strands = [0] if ignore_strand is True else [1, -1, 0] for strd in _iter_strands: _key = f"{seq}{_granges_delim}{strd}" @@ -1738,7 +1738,7 @@ def gaps( all_grp_ranges = [] groups = [] - for i, chrm in enumerate(self._seqinfo.seqnames): + for i, chrm in enumerate(self._seqinfo.get_seqnames()): _iter_strands = [0] if ignore_strand is True else [1, -1, 0] for strd in _iter_strands: _key = f"{chrm}{_granges_delim}{strd}" @@ -1801,7 +1801,7 @@ def disjoin( rev_map = [] groups = [] - for seq in self._seqinfo.seqnames: + for seq in self._seqinfo.get_seqnames(): _iter_strands = [0] if ignore_strand is True else [1, -1, 0] for strd in _iter_strands: _key = f"{seq}{_granges_delim}{strd}" @@ -1930,7 +1930,7 @@ def setdiff(self, other: "GenomicRanges") -> "GenomicRanges": range_bounds = all_combined.range(ignore_strand=True) rb_ends = {} for _, val in range_bounds: - rb_ends[val.seqnames[0]] = val.end[0] + rb_ends[val.get_seqnames()[0]] = val.end[0] x_gaps = self.gaps(end=rb_ends) x_gaps_u = x_gaps.union(other) @@ -1960,7 +1960,7 @@ def intersect(self, other: "GenomicRanges") -> "GenomicRanges": range_bounds = all_combined.range(ignore_strand=True) rb_ends = {} for _, val in range_bounds: - rb_ends[val.seqnames[0]] = val.end[0] + rb_ends[val.get_seqnames()[0]] = val.end[0] _gaps = other.gaps(end=rb_ends) # _inter = self.setdiff(_gaps) @@ -2452,9 +2452,9 @@ def match(self, query: "GenomicRanges") -> List[List[int]]: for i in range(len(query)): try: - _seqname = query.seqnames[i] + _seqname = query.get_seqnames()[i] except Exception as _: - warn(f"'{query.seqnames[i]}' is not present in subject.") + warn(f"'{query.get_seqnames()[i]}' is not present in subject.") _strand = query._strand[i] @@ -2653,7 +2653,7 @@ def tile_by_range( val._ranges._start[0], val._ranges.end[0] - 1, twidth ) - seqnames.extend([val.seqnames[0]] * len(all_intervals)) + seqnames.extend([val.get_seqnames()[0]] * len(all_intervals)) strand.extend([int(val.strand[0])] * len(all_intervals)) starts.extend([x[0] for x in all_intervals]) widths.extend(x[1] for x in all_intervals) @@ -2715,7 +2715,7 @@ def tile( val._ranges._start[0], val._ranges.end[0] - 1, twidth ) - seqnames.extend([val.seqnames[0]] * len(all_intervals)) + seqnames.extend([val.get_seqnames()[0]] * len(all_intervals)) strand.extend([int(val.strand[0])] * len(all_intervals)) starts.extend([x[0] for x in all_intervals]) widths.extend(x[1] for x in all_intervals) @@ -2757,7 +2757,7 @@ def sliding_windows(self, width: int, step: int = 1) -> "GenomicRanges": step=step, ) - seqnames.extend([val.seqnames[0]] * len(all_intervals)) + seqnames.extend([val.get_seqnames()[0]] * len(all_intervals)) strand.extend([int(val.strand[0])] * len(all_intervals)) starts.extend([x[0] for x in all_intervals]) widths.extend(x[1] for x in all_intervals) @@ -2814,7 +2814,7 @@ def tile_genome( seqlen_ = seqlengths if isinstance(seqlengths, SeqInfo): - seqlen_ = dict(zip(seqlengths.seqnames, seqlengths.seqlengths)) + seqlen_ = dict(zip(seqlengths.get_seqnames(), seqlengths.seqlengths)) seqnames = [] strand = [] diff --git a/src/genomicranges/SeqInfo.py b/src/genomicranges/SeqInfo.py index 826af48..06e8be7 100644 --- a/src/genomicranges/SeqInfo.py +++ b/src/genomicranges/SeqInfo.py @@ -65,7 +65,7 @@ def __iter__(self): def __next__(self): if self._current_index < len(self._sinfo): - iter_row_index = self._sinfo.seqnames[self._current_index] + iter_row_index = self._sinfo._seqnames[self._current_index] iter_slice = self._sinfo[self._current_index] self._current_index += 1 From c45be41a979926381db49163774e96a68ba8c2d1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 01:42:19 +0000 Subject: [PATCH 2/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/genomicranges/GenomicRanges.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/genomicranges/GenomicRanges.py b/src/genomicranges/GenomicRanges.py index 392b629..a848041 100644 --- a/src/genomicranges/GenomicRanges.py +++ b/src/genomicranges/GenomicRanges.py @@ -208,7 +208,9 @@ def __init__( ) def _build_reverse_seqindex(self, seqinfo: SeqInfo): - self._reverse_seqindex = ut.reverse_index.build_reverse_index(seqinfo.get_seqnames()) + self._reverse_seqindex = ut.reverse_index.build_reverse_index( + seqinfo.get_seqnames() + ) def _remove_reverse_seqindex(self): del self._reverse_seqindex From 87cf5873ecad6bd22d0fddf3080030a6335a8aba Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Mon, 15 Jul 2024 21:05:10 -0700 Subject: [PATCH 3/7] strand-aware overlaps and search methods --- src/genomicranges/GenomicRanges.py | 103 +++++++++++++++++++++++++---- tests/test_gr_methods_search.py | 2 +- tests/test_gr_overlaps.py | 8 +-- 3 files changed, 96 insertions(+), 17 deletions(-) diff --git a/src/genomicranges/GenomicRanges.py b/src/genomicranges/GenomicRanges.py index 392b629..13a8867 100644 --- a/src/genomicranges/GenomicRanges.py +++ b/src/genomicranges/GenomicRanges.py @@ -2098,8 +2098,18 @@ def find_overlaps( query_chrm_grps = query._group_indices_by_chrm(ignore_strand=ignore_strand) for group, indices in query_chrm_grps.items(): + _subset = [] if group in subject_chrm_grps: - _sub_subset = self[subject_chrm_grps[group]] + _subset.extend(subject_chrm_grps[group]) + + _grp_split = group.split(_granges_delim) + if _grp_split[1] != "0": + _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" + if _any_grp_fwd in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_fwd]) + + if len(_subset) > 0: + _sub_subset = self[_subset] _query_subset = query[indices] res_idx = _sub_subset._ranges.find_overlaps( @@ -2112,7 +2122,7 @@ def find_overlaps( ) for j, val in enumerate(res_idx): - _rev_map = [subject_chrm_grps[group][x] for x in val] + _rev_map = [_subset[x] for x in val] rev_map[indices[j]] = _rev_map return rev_map @@ -2172,8 +2182,18 @@ def count_overlaps( query_chrm_grps = query._group_indices_by_chrm(ignore_strand=ignore_strand) for group, indices in query_chrm_grps.items(): + _subset = [] if group in subject_chrm_grps: - _sub_subset = self[subject_chrm_grps[group]] + _subset.extend(subject_chrm_grps[group]) + + _grp_split = group.split(_granges_delim) + if _grp_split[1] != "0": + _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" + if _any_grp_fwd in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_fwd]) + + if len(_subset) > 0: + _sub_subset = self[_subset] _query_subset = query[indices] res_idx = _sub_subset._ranges.find_overlaps( @@ -2186,7 +2206,7 @@ def count_overlaps( ) for j, val in enumerate(res_idx): - _rev_map = [subject_chrm_grps[group][x] for x in val] + _rev_map = [_subset[x] for x in val] rev_map[indices[j]] = len(_rev_map) return rev_map @@ -2246,8 +2266,18 @@ def subset_by_overlaps( query_chrm_grps = query._group_indices_by_chrm(ignore_strand=ignore_strand) for group, indices in query_chrm_grps.items(): + _subset = [] if group in subject_chrm_grps: - _sub_subset = self[subject_chrm_grps[group]] + _subset.extend(subject_chrm_grps[group]) + + _grp_split = group.split(_granges_delim) + if _grp_split[1] != "0": + _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" + if _any_grp_fwd in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_fwd]) + + if len(_subset) > 0: + _sub_subset = self[_subset] _query_subset = query[indices] res_idx = _sub_subset._ranges.find_overlaps( @@ -2260,7 +2290,7 @@ def subset_by_overlaps( ) for _, val in enumerate(res_idx): - _rev_map = [subject_chrm_grps[group][x] for x in val] + _rev_map = [_subset[x] for x in val] rev_map.extend(_rev_map) return self[list(set(rev_map))] @@ -2300,8 +2330,33 @@ def nearest( query_chrm_grps = query._group_indices_by_chrm(ignore_strand=ignore_strand) for group, indices in query_chrm_grps.items(): + _subset = [] if group in subject_chrm_grps: - _sub_subset = self[subject_chrm_grps[group]] + _subset.extend(subject_chrm_grps[group]) + + print(group) + _grp_split = group.split(_granges_delim) + print(_grp_split) + if _grp_split[1] != "0": + print("if part") + _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" + print("any_grp_fwd", _any_grp_fwd) + if _any_grp_fwd in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_fwd]) + else: + print("else") + _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}1" + print("any_grp_fwd", _any_grp_fwd) + if _any_grp_fwd in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_fwd]) + + _any_grp_rev = f"{_grp_split[0]}{_granges_delim}-1" + print("_any_grp_rev", _any_grp_rev) + if _any_grp_rev in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_rev]) + + if len(_subset) > 0: + _sub_subset = self[_subset] _query_subset = query[indices] res_idx = _sub_subset._ranges.nearest( @@ -2309,7 +2364,7 @@ def nearest( ) for j, val in enumerate(res_idx): - _rev_map = [subject_chrm_grps[group][x] for x in val] + _rev_map = [_subset[x] for x in val] rev_map[indices[j]] = _rev_map return rev_map @@ -2345,8 +2400,18 @@ def precede( query_chrm_grps = query._group_indices_by_chrm(ignore_strand=ignore_strand) for group, indices in query_chrm_grps.items(): + _subset = [] if group in subject_chrm_grps: - _sub_subset = self[subject_chrm_grps[group]] + _subset.extend(subject_chrm_grps[group]) + + _grp_split = group.split(_granges_delim) + if _grp_split[1] != "0": + _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" + if _any_grp_fwd in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_fwd]) + + if len(_subset) > 0: + _sub_subset = self[_subset] _query_subset = query[indices] res_idx = _sub_subset._ranges.precede( @@ -2354,7 +2419,7 @@ def precede( ) for j, val in enumerate(res_idx): - _rev_map = [subject_chrm_grps[group][x] for x in val] + _rev_map = [_subset[x] for x in val] rev_map[indices[j]] = _rev_map return rev_map @@ -2390,16 +2455,30 @@ def follow( query_chrm_grps = query._group_indices_by_chrm(ignore_strand=ignore_strand) for group, indices in query_chrm_grps.items(): + _subset = [] if group in subject_chrm_grps: - _sub_subset = self[subject_chrm_grps[group]] + _subset.extend(subject_chrm_grps[group]) + + print(group) + + _grp_split = group.split(_granges_delim) + if _grp_split[1] != "0": + _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" + if _any_grp_fwd in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_fwd]) + + if len(_subset) > 0: + _sub_subset = self[_subset] _query_subset = query[indices] res_idx = _sub_subset._ranges.follow( query=_query_subset._ranges, select=select, delete_index=False ) + print(res_idx) + for j, val in enumerate(res_idx): - _rev_map = [subject_chrm_grps[group][x] for x in val] + _rev_map = [_subset[x] for x in val] rev_map[indices[j]] = _rev_map return rev_map diff --git a/tests/test_gr_methods_search.py b/tests/test_gr_methods_search.py index 8cc33f8..56e108b 100644 --- a/tests/test_gr_methods_search.py +++ b/tests/test_gr_methods_search.py @@ -40,7 +40,7 @@ def test_nearest(): query_hits = gr.nearest(q_gr) assert query_hits is not None - assert query_hits == [[4], [3], []] + assert query_hits == [[5], [1,2,3], [9]] def test_precede(): diff --git a/tests/test_gr_overlaps.py b/tests/test_gr_overlaps.py index b6eed30..e267492 100644 --- a/tests/test_gr_overlaps.py +++ b/tests/test_gr_overlaps.py @@ -49,7 +49,7 @@ def test_find_overlaps(): assert res is not None assert isinstance(res, list) - assert res == [[1, 2]] + assert res == [[1, 2, 3]] def test_find_overlaps_query_type(): @@ -59,7 +59,7 @@ def test_find_overlaps_query_type(): res = subject.find_overlaps(query, query_type="within") assert res is not None - assert res == [[1, 2]] + assert res == [[1, 2, 3]] def test_count_overlaps(): @@ -70,7 +70,7 @@ def test_count_overlaps(): assert res is not None assert isinstance(res, list) - assert res == [2] + assert res == [3] def test_subset_by_overlaps(): @@ -87,4 +87,4 @@ def test_subset_by_overlaps(): assert res is not None assert isinstance(res, GenomicRanges) - assert len(res) == 2 + assert len(res) == 3 From 9d851acfc5bd4cb35ef8ca87088cb9a999474468 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 04:05:46 +0000 Subject: [PATCH 4/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/genomicranges/GenomicRanges.py | 12 ++++++------ tests/test_gr_methods_search.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/genomicranges/GenomicRanges.py b/src/genomicranges/GenomicRanges.py index 63ac9a1..5bceb88 100644 --- a/src/genomicranges/GenomicRanges.py +++ b/src/genomicranges/GenomicRanges.py @@ -2103,7 +2103,7 @@ def find_overlaps( _subset = [] if group in subject_chrm_grps: _subset.extend(subject_chrm_grps[group]) - + _grp_split = group.split(_granges_delim) if _grp_split[1] != "0": _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" @@ -2187,7 +2187,7 @@ def count_overlaps( _subset = [] if group in subject_chrm_grps: _subset.extend(subject_chrm_grps[group]) - + _grp_split = group.split(_granges_delim) if _grp_split[1] != "0": _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" @@ -2271,7 +2271,7 @@ def subset_by_overlaps( _subset = [] if group in subject_chrm_grps: _subset.extend(subject_chrm_grps[group]) - + _grp_split = group.split(_granges_delim) if _grp_split[1] != "0": _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" @@ -2335,7 +2335,7 @@ def nearest( _subset = [] if group in subject_chrm_grps: _subset.extend(subject_chrm_grps[group]) - + print(group) _grp_split = group.split(_granges_delim) print(_grp_split) @@ -2405,7 +2405,7 @@ def precede( _subset = [] if group in subject_chrm_grps: _subset.extend(subject_chrm_grps[group]) - + _grp_split = group.split(_granges_delim) if _grp_split[1] != "0": _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" @@ -2462,7 +2462,7 @@ def follow( _subset.extend(subject_chrm_grps[group]) print(group) - + _grp_split = group.split(_granges_delim) if _grp_split[1] != "0": _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" diff --git a/tests/test_gr_methods_search.py b/tests/test_gr_methods_search.py index 56e108b..5f205b3 100644 --- a/tests/test_gr_methods_search.py +++ b/tests/test_gr_methods_search.py @@ -40,7 +40,7 @@ def test_nearest(): query_hits = gr.nearest(q_gr) assert query_hits is not None - assert query_hits == [[5], [1,2,3], [9]] + assert query_hits == [[5], [1, 2, 3], [9]] def test_precede(): From 91b25f7e6539419fcb173f717f819fc94b1317b5 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Mon, 15 Jul 2024 21:32:35 -0700 Subject: [PATCH 5/7] update tests --- src/genomicranges/GenomicRanges.py | 58 ++++++++++++++++++++++++------ tests/test_gr_binnedAvg.py | 2 +- tests/test_gr_methods_search.py | 2 +- tests/test_gr_overlaps.py | 11 ++++++ 4 files changed, 60 insertions(+), 13 deletions(-) diff --git a/src/genomicranges/GenomicRanges.py b/src/genomicranges/GenomicRanges.py index 63ac9a1..f3b6a51 100644 --- a/src/genomicranges/GenomicRanges.py +++ b/src/genomicranges/GenomicRanges.py @@ -2099,16 +2099,31 @@ def find_overlaps( subject_chrm_grps = self._group_indices_by_chrm(ignore_strand=ignore_strand) query_chrm_grps = query._group_indices_by_chrm(ignore_strand=ignore_strand) + print("subkect keys", subject_chrm_grps.keys()) + print("query keys", query_chrm_grps.keys()) + for group, indices in query_chrm_grps.items(): + print(group) _subset = [] if group in subject_chrm_grps: _subset.extend(subject_chrm_grps[group]) _grp_split = group.split(_granges_delim) + print(_grp_split) if _grp_split[1] != "0": + print("if") _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" if _any_grp_fwd in subject_chrm_grps: _subset.extend(subject_chrm_grps[_any_grp_fwd]) + else: + print("else") + _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}1" + if _any_grp_fwd in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_fwd]) + + _any_grp_rev = f"{_grp_split[0]}{_granges_delim}-1" + if _any_grp_rev in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_rev]) if len(_subset) > 0: _sub_subset = self[_subset] @@ -2193,6 +2208,14 @@ def count_overlaps( _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" if _any_grp_fwd in subject_chrm_grps: _subset.extend(subject_chrm_grps[_any_grp_fwd]) + else: + _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}1" + if _any_grp_fwd in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_fwd]) + + _any_grp_rev = f"{_grp_split[0]}{_granges_delim}-1" + if _any_grp_rev in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_rev]) if len(_subset) > 0: _sub_subset = self[_subset] @@ -2277,6 +2300,14 @@ def subset_by_overlaps( _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" if _any_grp_fwd in subject_chrm_grps: _subset.extend(subject_chrm_grps[_any_grp_fwd]) + else: + _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}1" + if _any_grp_fwd in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_fwd]) + + _any_grp_rev = f"{_grp_split[0]}{_granges_delim}-1" + if _any_grp_rev in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_rev]) if len(_subset) > 0: _sub_subset = self[_subset] @@ -2336,24 +2367,17 @@ def nearest( if group in subject_chrm_grps: _subset.extend(subject_chrm_grps[group]) - print(group) _grp_split = group.split(_granges_delim) - print(_grp_split) if _grp_split[1] != "0": - print("if part") _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" - print("any_grp_fwd", _any_grp_fwd) if _any_grp_fwd in subject_chrm_grps: _subset.extend(subject_chrm_grps[_any_grp_fwd]) else: - print("else") _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}1" - print("any_grp_fwd", _any_grp_fwd) if _any_grp_fwd in subject_chrm_grps: _subset.extend(subject_chrm_grps[_any_grp_fwd]) _any_grp_rev = f"{_grp_split[0]}{_granges_delim}-1" - print("_any_grp_rev", _any_grp_rev) if _any_grp_rev in subject_chrm_grps: _subset.extend(subject_chrm_grps[_any_grp_rev]) @@ -2411,6 +2435,14 @@ def precede( _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" if _any_grp_fwd in subject_chrm_grps: _subset.extend(subject_chrm_grps[_any_grp_fwd]) + else: + _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}1" + if _any_grp_fwd in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_fwd]) + + _any_grp_rev = f"{_grp_split[0]}{_granges_delim}-1" + if _any_grp_rev in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_rev]) if len(_subset) > 0: _sub_subset = self[_subset] @@ -2460,14 +2492,20 @@ def follow( _subset = [] if group in subject_chrm_grps: _subset.extend(subject_chrm_grps[group]) - - print(group) _grp_split = group.split(_granges_delim) if _grp_split[1] != "0": _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" if _any_grp_fwd in subject_chrm_grps: _subset.extend(subject_chrm_grps[_any_grp_fwd]) + else: + _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}1" + if _any_grp_fwd in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_fwd]) + + _any_grp_rev = f"{_grp_split[0]}{_granges_delim}-1" + if _any_grp_rev in subject_chrm_grps: + _subset.extend(subject_chrm_grps[_any_grp_rev]) if len(_subset) > 0: _sub_subset = self[_subset] @@ -2477,8 +2515,6 @@ def follow( query=_query_subset._ranges, select=select, delete_index=False ) - print(res_idx) - for j, val in enumerate(res_idx): _rev_map = [_subset[x] for x in val] rev_map[indices[j]] = _rev_map diff --git a/tests/test_gr_binnedAvg.py b/tests/test_gr_binnedAvg.py index f2c4802..bdea117 100644 --- a/tests/test_gr_binnedAvg.py +++ b/tests/test_gr_binnedAvg.py @@ -44,4 +44,4 @@ def test_binned_average(): res = subject.binned_average(bins=query, scorename="score", outname="binned_score") assert res is not None - assert res.mcols.get_column("binned_score") == [2] + assert res.mcols.get_column("binned_score") == [3] diff --git a/tests/test_gr_methods_search.py b/tests/test_gr_methods_search.py index 56e108b..b7641c8 100644 --- a/tests/test_gr_methods_search.py +++ b/tests/test_gr_methods_search.py @@ -68,7 +68,7 @@ def test_follow(): query_hits = gr.follow(q_gr) assert query_hits is not None - assert query_hits == [[4], [], []] + assert query_hits == [[5], [], [9]] def test_distance(): diff --git a/tests/test_gr_overlaps.py b/tests/test_gr_overlaps.py index e267492..d557121 100644 --- a/tests/test_gr_overlaps.py +++ b/tests/test_gr_overlaps.py @@ -61,6 +61,17 @@ def test_find_overlaps_query_type(): assert res is not None assert res == [[1, 2, 3]] +def test_find_overlaps_rtrip(): + x = GenomicRanges(["chr1", "chr1"], IRanges([2, 9] , [7, 19]), strand=["+", "-"]) + y = GenomicRanges(["chr1"], IRanges([5], [10]), strand=["*"]) + + resxy = x.find_overlaps(y) + assert resxy is not None + assert resxy == [[0, 1]] + + resyx = y.find_overlaps(x) + assert resyx is not None + assert resyx == [[0], [0]] def test_count_overlaps(): assert subject is not None From c9c6c18f024a060c29b8ccd631abcb91047ec35a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 04:34:15 +0000 Subject: [PATCH 6/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/genomicranges/GenomicRanges.py | 4 ++-- tests/test_gr_overlaps.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/genomicranges/GenomicRanges.py b/src/genomicranges/GenomicRanges.py index ab671db..5023a59 100644 --- a/src/genomicranges/GenomicRanges.py +++ b/src/genomicranges/GenomicRanges.py @@ -2359,7 +2359,7 @@ def nearest( _subset = [] if group in subject_chrm_grps: _subset.extend(subject_chrm_grps[group]) - + _grp_split = group.split(_granges_delim) if _grp_split[1] != "0": _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" @@ -2485,7 +2485,7 @@ def follow( _subset = [] if group in subject_chrm_grps: _subset.extend(subject_chrm_grps[group]) - + _grp_split = group.split(_granges_delim) if _grp_split[1] != "0": _any_grp_fwd = f"{_grp_split[0]}{_granges_delim}0" diff --git a/tests/test_gr_overlaps.py b/tests/test_gr_overlaps.py index d557121..bb594ff 100644 --- a/tests/test_gr_overlaps.py +++ b/tests/test_gr_overlaps.py @@ -61,9 +61,10 @@ def test_find_overlaps_query_type(): assert res is not None assert res == [[1, 2, 3]] + def test_find_overlaps_rtrip(): - x = GenomicRanges(["chr1", "chr1"], IRanges([2, 9] , [7, 19]), strand=["+", "-"]) - y = GenomicRanges(["chr1"], IRanges([5], [10]), strand=["*"]) + x = GenomicRanges(["chr1", "chr1"], IRanges([2, 9], [7, 19]), strand=["+", "-"]) + y = GenomicRanges(["chr1"], IRanges([5], [10]), strand=["*"]) resxy = x.find_overlaps(y) assert resxy is not None @@ -73,6 +74,7 @@ def test_find_overlaps_rtrip(): assert resyx is not None assert resyx == [[0], [0]] + def test_count_overlaps(): assert subject is not None assert query is not None From 25dde358657b851ac54c03f0deff96e444f7654c Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Mon, 15 Jul 2024 22:06:16 -0700 Subject: [PATCH 7/7] update changelog --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1df51d7..a57a403 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,11 @@ # Changelog -## Version 0.4.27 +## Version 0.4.27 - 0.4.28 - Implement `subtract` method, add tests. +- Use accessor methods to access properties especially `get_seqnames()` +- Modify search and overlap methods for strand-awareness. +- Update tests and documentation. ## Version 0.4.26