Skip to content

Commit

Permalink
overlaps method is aware of query type
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche committed Sep 24, 2024
1 parent 08559d8 commit b8ad1ab
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 20 deletions.
20 changes: 16 additions & 4 deletions src/iranges/IRanges.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from biocutils import Names, combine_rows, combine_sequences, show_as_cell

from .interval import (
calc_gap_and_overlap,
calc_gap_and_overlap_position,
create_np_interval_vector,
)

Expand Down Expand Up @@ -1772,6 +1772,7 @@ def _generic_find_hits(
max_gap,
min_overlap,
select,
query_type = "any",
delete_index=False,
):
self._build_ncls_index()
Expand All @@ -1788,10 +1789,14 @@ def _generic_find_hits(
if select != "all" and len(all_overlaps[_q_idx]) > 0:
continue

_gap, _overlap = calc_gap_and_overlap(
_gap, _overlap, _position = calc_gap_and_overlap_position(
(query._start[_q_idx], query._start[_q_idx] + query._width[_q_idx]),
(self._start[_s_idx], self._start[_s_idx] + self._width[_s_idx]),
)

if query_type != "any" and query_type != _position:
continue

_append = True

if _gap is not None and _gap > max_gap:
Expand Down Expand Up @@ -1877,7 +1882,14 @@ def find_overlaps(
_tgap = 0 if max_gap == -1 else max_gap

all_overlaps = self._generic_find_hits(
query, _tgap, _tgap, max_gap, min_overlap, select, delete_index=delete_index
query,
_tgap,
_tgap,
max_gap,
min_overlap,
select,
query_type=query_type,
delete_index=delete_index,
)
return all_overlaps

Expand Down Expand Up @@ -2189,7 +2201,7 @@ def distance(self, query: "IRanges") -> np.ndarray:
for i in range(len(self)):
i_self = self[i]
i_query = query[i]
_gap, _overlap = calc_gap_and_overlap(
_gap, _overlap, _position = calc_gap_and_overlap_position(
(i_self.start[0], i_self.end[0]), (i_query.start[0], i_query.end[0])
)

Expand Down
47 changes: 31 additions & 16 deletions src/iranges/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,16 @@ def create_np_interval_vector(
cov[_start:_end] += value

if with_reverse_map:
_ = [
revmap[x].append(name if name is not None else counter + 1)
for x in range(_start, _end)
]
_ = [revmap[x].append(name if name is not None else counter + 1) for x in range(_start, _end)]

counter += 1
return cov[1:], revmap


def calc_gap_and_overlap(
def calc_gap_and_overlap_position(
first: Tuple[int, int], second: Tuple[int, int]
) -> Tuple[Optional[int], Optional[int]]:
"""Calculate gap and/or overlap between two intervals.
) -> Tuple[Optional[int], Optional[int], Optional[str]]:
"""Calculate gap and/or overlap between two intervals, including overlap position.
Args:
first:
Expand All @@ -88,15 +85,33 @@ def calc_gap_and_overlap(
second:
Interval containing start and end positions.
`end` is non-inclusive.
Returns:
A tuple of (gap, overlap, overlap_position):
- gap: The gap between the intervals if non-overlapping, else None.
- overlap: The overlap size if overlapping, else None.
- overlap_position: Where the overlap occurs relative to the first interval.
Options are: 'start', 'end', 'within', or 'any' (if there's overlap but no specific case).
"""
if min(first[1], second[1]) > max(first[0], second[0]):
_overlap = min(first[1], second[1]) - max(first[0], second[0])
return (None, _overlap)
start_first, end_first = first
start_second, end_second = second

if end_first > start_second and end_second > start_first:
# Overlapping case
overlap = min(end_first, end_second) - max(start_first, start_second)

# Determine the overlap position
if start_second <= start_first and end_second >= end_first:
overlap_position = "within"
elif start_second < start_first:
overlap_position = "start"
elif end_second > end_first:
overlap_position = "end"
else:
overlap_position = "any"

_gap = None
if second[0] >= first[1]:
_gap = second[0] - first[1]
elif first[0] >= second[1]:
_gap = first[0] - second[1]
return None, overlap, overlap_position

return (_gap, None)
# Non-overlapping, calculate the gap
gap = max(start_first - end_second, start_second - end_first)
return gap, None, None

0 comments on commit b8ad1ab

Please sign in to comment.