diff --git a/src/iranges/IRanges.py b/src/iranges/IRanges.py index 17d2f85..b6097c6 100644 --- a/src/iranges/IRanges.py +++ b/src/iranges/IRanges.py @@ -9,7 +9,7 @@ from biocutils import Names, combine_rows, combine_sequences, show_as_cell from .interval import ( - calc_gap_and_overlap, + calc_gap_and_overlap_position, create_np_interval_vector, ) @@ -1772,6 +1772,7 @@ def _generic_find_hits( max_gap, min_overlap, select, + query_type = "any", delete_index=False, ): self._build_ncls_index() @@ -1788,10 +1789,14 @@ def _generic_find_hits( if select != "all" and len(all_overlaps[_q_idx]) > 0: continue - _gap, _overlap = calc_gap_and_overlap( + _gap, _overlap, _position = calc_gap_and_overlap_position( (query._start[_q_idx], query._start[_q_idx] + query._width[_q_idx]), (self._start[_s_idx], self._start[_s_idx] + self._width[_s_idx]), ) + + if query_type != "any" and query_type != _position: + continue + _append = True if _gap is not None and _gap > max_gap: @@ -1877,7 +1882,14 @@ def find_overlaps( _tgap = 0 if max_gap == -1 else max_gap all_overlaps = self._generic_find_hits( - query, _tgap, _tgap, max_gap, min_overlap, select, delete_index=delete_index + query, + _tgap, + _tgap, + max_gap, + min_overlap, + select, + query_type=query_type, + delete_index=delete_index, ) return all_overlaps @@ -2189,7 +2201,7 @@ def distance(self, query: "IRanges") -> np.ndarray: for i in range(len(self)): i_self = self[i] i_query = query[i] - _gap, _overlap = calc_gap_and_overlap( + _gap, _overlap, _position = calc_gap_and_overlap_position( (i_self.start[0], i_self.end[0]), (i_query.start[0], i_query.end[0]) ) diff --git a/src/iranges/interval.py b/src/iranges/interval.py index b7294ee..0905984 100644 --- a/src/iranges/interval.py +++ b/src/iranges/interval.py @@ -66,19 +66,16 @@ def create_np_interval_vector( cov[_start:_end] += value if with_reverse_map: - _ = [ - revmap[x].append(name if name is not None else counter + 1) - for x in range(_start, _end) - ] + _ = [revmap[x].append(name if name is not None else counter + 1) for x in range(_start, _end)] counter += 1 return cov[1:], revmap -def calc_gap_and_overlap( +def calc_gap_and_overlap_position( first: Tuple[int, int], second: Tuple[int, int] -) -> Tuple[Optional[int], Optional[int]]: - """Calculate gap and/or overlap between two intervals. +) -> Tuple[Optional[int], Optional[int], Optional[str]]: + """Calculate gap and/or overlap between two intervals, including overlap position. Args: first: @@ -88,15 +85,33 @@ def calc_gap_and_overlap( second: Interval containing start and end positions. `end` is non-inclusive. + + Returns: + A tuple of (gap, overlap, overlap_position): + - gap: The gap between the intervals if non-overlapping, else None. + - overlap: The overlap size if overlapping, else None. + - overlap_position: Where the overlap occurs relative to the first interval. + Options are: 'start', 'end', 'within', or 'any' (if there's overlap but no specific case). """ - if min(first[1], second[1]) > max(first[0], second[0]): - _overlap = min(first[1], second[1]) - max(first[0], second[0]) - return (None, _overlap) + start_first, end_first = first + start_second, end_second = second + + if end_first > start_second and end_second > start_first: + # Overlapping case + overlap = min(end_first, end_second) - max(start_first, start_second) + + # Determine the overlap position + if start_second <= start_first and end_second >= end_first: + overlap_position = "within" + elif start_second < start_first: + overlap_position = "start" + elif end_second > end_first: + overlap_position = "end" + else: + overlap_position = "any" - _gap = None - if second[0] >= first[1]: - _gap = second[0] - first[1] - elif first[0] >= second[1]: - _gap = first[0] - second[1] + return None, overlap, overlap_position - return (_gap, None) + # Non-overlapping, calculate the gap + gap = max(start_first - end_second, start_second - end_first) + return gap, None, None