Skip to content

Commit

Permalink
test coverage of collapse --chain for #196
Browse files Browse the repository at this point in the history
A single site, but it covers the logic
  • Loading branch information
ACEnglish committed Feb 11, 2024
1 parent 1429779 commit 1cd03b2
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 13 deletions.
12 changes: 12 additions & 0 deletions repo_utils/answer_key/collapse/inputissue196_collapsed.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##contig=<ID=chr21,length=46709983,md5=2f45a3455007b7e271509161e52954a9>
##INFO=<ID=END,Number=1,Type=Integer,Description="SV END position">
##INFO=<ID=SVTYPE,Number=A,Type=String,Description="Variant type">
##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length of ref and alt alleles">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=NumCollapsed,Number=1,Type=Integer,Description="Number of calls collapsed into this call by truvari">
##INFO=<ID=CollapseId,Number=1,Type=String,Description="Truvari uid to help tie output.vcf and output.collapsed.vcf entries together">
##INFO=<ID=NumConsolidated,Number=1,Type=Integer,Description="Number of samples consolidated into this call by truvari">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878
chr21 14088112 chr21-14088113-DEL-223 A <DEL> . . END=14088558;SVTYPE=DEL;SVLEN=-223;NumCollapsed=2;NumConsolidated=0;CollapseId=1.0 GT 1|0
20 changes: 20 additions & 0 deletions repo_utils/answer_key/collapse/inputissue196_removed.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##contig=<ID=chr21,length=46709983,md5=2f45a3455007b7e271509161e52954a9>
##INFO=<ID=END,Number=1,Type=Integer,Description="SV END position">
##INFO=<ID=SVTYPE,Number=A,Type=String,Description="Variant type">
##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length of ref and alt alleles">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=TruScore,Number=1,Type=Integer,Description="Truvari score for similarity of match">
##INFO=<ID=PctSeqSimilarity,Number=1,Type=Float,Description="Pct sequence similarity between this variant and its closest match">
##INFO=<ID=PctSizeSimilarity,Number=1,Type=Float,Description="Pct size similarity between this variant and its closest match">
##INFO=<ID=PctRecOverlap,Number=1,Type=Float,Description="Percent reciprocal overlap percent of the two calls' coordinates">
##INFO=<ID=StartDistance,Number=1,Type=Integer,Description="Distance of the base call's end from comparison call's start">
##INFO=<ID=EndDistance,Number=1,Type=Integer,Description="Distance of the base call's end from comparison call's end">
##INFO=<ID=SizeDiff,Number=1,Type=Float,Description="Difference in size of base and comp calls">
##INFO=<ID=GTMatch,Number=1,Type=Integer,Description="Base/Comparison genotypes AC difference">
##INFO=<ID=MatchId,Number=.,Type=String,Description="Tuple of base and comparison call ids which were matched">
##INFO=<ID=Multi,Number=0,Type=Flag,Description="Call is false due to non-multimatching">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878
chr21 14088212 chr21-14088113-DEL-223 T <DEL> . . END=14088828;SVTYPE=DEL;SVLEN=-393;PctSeqSimilarity=0;PctSizeSimilarity=0.5674;PctRecOverlap=0.5624;SizeDiff=-170;StartDistance=-100;EndDistance=-270;GTMatch=.;TruScore=37;MatchId=1.0 GT 1|0
chr21 14088312 chr21-14088113-DEL-223 A <DEL> . . END=14089203;SVTYPE=DEL;SVLEN=-668;PctSeqSimilarity=0;PctSizeSimilarity=0.5883;PctRecOverlap=0.5796;SizeDiff=275;StartDistance=100;EndDistance=375;GTMatch=.;TruScore=38;MatchId=1.0 GT 1|0
10 changes: 10 additions & 0 deletions repo_utils/sub_tests/collapse.sh
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,13 @@ run test_collapse_intragth $truv collapse -i $INDIR/variants/bcftools_merged.vcf
if [ $test_collapse_intragth ]; then
collapse_assert intragth
fi


run test_collapse_chain $truv collapse -i $INDIR/variants/issue196_chain.vcf.gz \
-o $OD/inputissue196_collapsed.vcf \
-c $OD/inputissue196_removed.vcf \
--chain --pctseq 0 --pctsize 0.35
if [ $test_collapse_chain ]; then
collapse_assert issue196
fi

Binary file added repo_utils/test_files/variants/issue196_chain.vcf.gz
Binary file not shown.
Binary file not shown.
23 changes: 10 additions & 13 deletions truvari/collapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,7 @@ class CollapsedCalls():
match_id: str
matches: list = field(default_factory=list)
gt_consolidate_count: int = 0
genotype_mask: str = "" # bad

def combine(self, other):
"""
Put other's entries into this' collapsed_entries
"""
self.matches.append(other.entry)
self.matches.extend(other.matches)
genotype_mask: str = None # not actually a str

def calc_median_sizepos(self):
"""
Expand Down Expand Up @@ -94,29 +87,33 @@ def gt_conflict(self, other, which_gt):
self.genotype_mask |= o_mask
return False

def consolidate(self, other):
def combine(self, other):
"""
Add other's calls/matches to self's matches
"""
self.matches.append(other.entry)
self.matches.extend(other.matches)
self.gt_consolidate_count += other.gt_consolidate_count
if self.genotype_mask != "" and other.genotype_mask != "":
if self.genotype_mask is not None and other.genotype_mask is not None:
self.genotype_mask |= other.genotype_mask

def chain_collapse(cur_collapse, all_collapse, matcher):
"""
Perform transitive matching of cur_collapse to all_collapse
Check the cur_collapse's entry to all other collapses' consolidated entries
"""
for m_collap in all_collapse:
for other in m_collap.matches:
mat = matcher.build_match(cur_collapse.entry,
other.base,
other.comp,
m_collap.match_id,
skip_gt=True,
short_circuit=True)
if mat.state:
m_collap.consolidate(cur_collapse)
# The other's representative entry will report its
# similarity to the matched call that pulled it in
mat.base, mat.comp = mat.comp, mat.base
m_collap.matches.append(mat)
m_collap.combine(cur_collapse)
return True # you can just ignore it later
return False # You'll have to add it to your set of collapsed calls

Expand Down

0 comments on commit 1cd03b2

Please sign in to comment.