Skip to content

Commit

Permalink
feat: amplicon hit building uses coordinate information to reduce sea…
Browse files Browse the repository at this point in the history
…rch space
  • Loading branch information
ameynert committed Dec 31, 2024
1 parent 7fe0202 commit f6b1708
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 13 deletions.
51 changes: 40 additions & 11 deletions prymer/offtarget/offtarget_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,20 +504,49 @@ def _to_amplicons(
if len(positive_hits) == 0 or len(negative_hits) == 0:
return []

# Sort the positive strand hits by start position and the negative strand hits by *end*
# position. The `max_len` cutoff is based on negative_hit.end - positive_hit.start + 1.
positive_hits_sorted = sorted(positive_hits, key=lambda h: h.start)
negative_hits_sorted = sorted(negative_hits, key=lambda h: h.end)

amplicons: list[Span] = []
for positive_hit, negative_hit in itertools.product(positive_hits, negative_hits):
if (
negative_hit.start > positive_hit.end
and negative_hit.end - positive_hit.start + 1 <= max_len

# Track the position of the previously examined negative hit.
prev_negative_hit_index = 0
for positive_hit in positive_hits_sorted:
# Check only negative hits starting with the previously examined one.
for negative_hit_index, negative_hit in enumerate(
negative_hits_sorted[prev_negative_hit_index:],
start=prev_negative_hit_index,
):
amplicons.append(
Span(
refname=positive_hit.refname,
start=positive_hit.start,
end=negative_hit.end,
strand=strand,
# TODO: Consider allowing overlapping positive and negative hits.
if (
negative_hit.start > positive_hit.end
and negative_hit.end - positive_hit.start + 1 <= max_len
):
# If the negative hit starts to the right of the positive hit, and the amplicon
# length is <= max_len, add it to the list of amplicon hits to be returned.
amplicons.append(
Span(
refname=positive_hit.refname,
start=positive_hit.start,
end=negative_hit.end,
strand=strand,
)
)
)

if negative_hit.end - positive_hit.start + 1 > max_len:
# Stop searching for negative hits to pair with this positive hit.
# All subsequence negative hits will have amplicon length > max_len
break

if negative_hit.end < positive_hit.start:
# This positive hit is genomically right of the current negative hit.
# All subsequent positive hits will also be genomically right of this negative
# hit, so we should start at the one after this. If this index is past the end
# of the list, the slice `negative_hits_sorted[prev_negative_hit_index:]` will
# be empty.
prev_negative_hit_index = negative_hit_index + 1

return amplicons

Expand Down
4 changes: 2 additions & 2 deletions tests/offtarget/test_offtarget.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,10 @@ def test_build_off_target_result(ref_fasta: Path) -> None:
hits_by_primer=hits_by_primer,
)

assert off_target_result.spans == [
assert set(off_target_result.spans) == {
Span(refname="chr1", start=100, end=299, strand=Strand.POSITIVE),
Span(refname="chr3", start=600, end=799, strand=Strand.NEGATIVE),
]
}


# Test that using the cache (or not) does not affect the results
Expand Down

0 comments on commit f6b1708

Please sign in to comment.