Skip to content

Commit

Permalink
refine short_circuit
Browse files Browse the repository at this point in the history
When the refine alignment creates a region with more than 5k variants from
base and comp samples, it will not attempt to benchmark. Instead, a warning
indicating the region was excluded is written to the refine.log.txt.
  • Loading branch information
ACEnglish committed Feb 8, 2024
1 parent b5d2788 commit 3367305
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 10 deletions.
16 changes: 16 additions & 0 deletions truvari/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,22 @@ def compare_calls(self, base_variants, comp_variants, chunk_id=0):
fns.append(ret)
return fns

# 5k variants takes too long
if self.short_circuit and (len(base_variants) + len(comp_variants)) > 5000:
pos = []
cnt = 0
chrom = None
for i in base_variants:
cnt += 1
pos.extend(truvari.entry_boundaries(i))
chrom = i.chrom
for i in comp_variants:
cnt += 1
pos.extend(truvari.entry_boundaries(i))
chrom = i.chrom
logging.warning("Skipping region %s:%d-%d with %d variants", chrom, min(*pos), max(*pos), cnt)
return []

match_matrix = self.build_matrix(
base_variants, comp_variants, chunk_id)
if isinstance(match_matrix, list):
Expand Down
22 changes: 12 additions & 10 deletions truvari/phab.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,19 +101,14 @@ def incorporate(consensus_sequence, entry, correction):
return correction + (alt_len - ref_len)


def make_haplotypes(sequence, entries, o_samp, ref, start, sample, passonly=True, max_size=50000):
def make_haplotypes(sequence, entries, o_samp, ref, start, sample):
"""
Given a reference sequence, set of entries to incorporate, sample name, reference key, and reference start position
Make the two haplotypes
"""
haps = (list(sequence), list(sequence))
correction = [-start, -start]
for entry in entries:
if entry.alts is None \
or (entry.alleles_variant_types[-1] not in ['SNP', 'INDEL']) \
or (passonly and truvari.entry_is_filtered(entry)) \
or truvari.entry_size(entry) > max_size:
continue
if entry.samples[sample]['GT'][0] == 1:
correction[0] = incorporate(haps[0], entry, correction[0])
if len(entry.samples[sample]['GT']) > 1 and entry.samples[sample]['GT'][1] == 1:
Expand Down Expand Up @@ -144,15 +139,23 @@ def make_consensus(data, ref_fn, passonly=True, max_size=50000):

cur_key = None
cur_entries = []
# Can do the entry filtering here
# Won't need to pass to make_haplotypes
for entry, key in vcf_i:
if cur_key is None:
cur_key = key

if entry.alts is None \
or (entry.alleles_variant_types[-1] not in ['SNP', 'INDEL']) \
or (passonly and truvari.entry_is_filtered(entry)) \
or truvari.entry_size(entry) > max_size:
continue

if key != cur_key:
ref = f"{cur_key[0]}:{cur_key[1].begin}-{cur_key[1].end - 1}"
ref_seq = reference.fetch(ref)
ret[ref] = make_haplotypes(ref_seq, cur_entries, o_samp,
ref, cur_key[1].begin, sample,
passonly, max_size)
ref, cur_key[1].begin, sample)
cur_key = key
cur_entries = []
cur_entries.append(entry)
Expand All @@ -161,8 +164,7 @@ def make_consensus(data, ref_fn, passonly=True, max_size=50000):
ref = f"{cur_key[0]}:{cur_key[1].begin}-{cur_key[1].end - 1}"
ref_seq = reference.fetch(ref)
ret[ref] = make_haplotypes(ref_seq, cur_entries, o_samp,
ref, cur_key[1].begin, sample, passonly,
max_size)
ref, cur_key[1].begin, sample)

return ret
#pylint: enable=too-many-locals
Expand Down

0 comments on commit 3367305

Please sign in to comment.