Skip to content

Commit

Permalink
Merge pull request #3 from amplab/master
Browse files Browse the repository at this point in the history
don't check allele match when evaluating SVs at exact breakpoint
  • Loading branch information
kwestbrooks committed May 19, 2014
2 parents a1e3bbe + b9f9078 commit dc5ed97
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 24 deletions.
2 changes: 2 additions & 0 deletions smashbenchmarking/vcf_eval/eval_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ def get_var(variants):
return variants.all_variants[loc]
true_var = get_var(true_variants)
pred_var = get_var(pred_variants)
if true_var.var_type.startswith("SV"):
return true_var.var_type == pred_var.var_type
if ( true_var.var_type != pred_var.var_type ):
return False
return true_var.alt == pred_var.alt
Expand Down
24 changes: 0 additions & 24 deletions test/chrom_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,30 +202,6 @@ def test_false_neg_sv_ins(self):
self.falseNegative(stat_reporter,VARIANT_TYPE.SV_INS)
self.trueNegative(stat_reporter,VARIANT_TYPE.SV_DEL)

def test_bad_sv_ins(self):
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
##source=TVsim\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr19 88013 . CTT C 20 PASS . GT 0/1\n
chr19 89272 . C T 20 PASS . GT 0/1\n
chr19 269751 . A AAAAGAAAGGCATGACCTATCCTTTTATGCCACCTGGATGGACCTCACAGGCACACTGCTTCATGAGAGAG 20 PASS . GT 1/1
"""

pred_io = StringIO.StringIO(pred_str)
pred_vcf = vcf.Reader(pred_io)
pred_vars = Variants(pred_vcf, MAX_INDEL_LEN)

sv_eps = 100

stat_reporter, errors = evaluate_variants(self.true_vars, pred_vars, sv_eps, sv_eps, None, None, None)

self.truePositive(stat_reporter,VARIANT_TYPE.SNP)
self.trueNegative(stat_reporter,VARIANT_TYPE.INDEL_INS)
self.truePositive(stat_reporter,VARIANT_TYPE.INDEL_DEL)
self.badCallAtTrueSite(stat_reporter,VARIANT_TYPE.SV_INS)
self.trueNegative(stat_reporter,VARIANT_TYPE.SV_DEL)

def test_sv_snp_collision(self):
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
Expand Down
46 changes: 46 additions & 0 deletions test/eval_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,52 @@ def testChromEvaluateVariants(self):
# TODO test known false positive functionality
# TODO test genotype concordance
pass
def testChromEvaluateVariantsSV(self):
#NB: SVs aren't rescued, just checked for within breakpoint tolerance
true_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 6 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAA 20 PASS . GT 0/1
"""
#SV with exact position, exact allele match
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 6 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAA 20 PASS . GT 0/1
"""
true_vars = vcf_to_ChromVariants(true_str,'chr1')
pred_vars = vcf_to_ChromVariants(pred_str,'chr1')
cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.SV_INS],1)
#SV with exact position, difference allele match
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 6 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAATGC 20 PASS . GT 0/1
"""
pred_vars = vcf_to_ChromVariants(pred_str,'chr1')
cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.SV_INS],1)
#SV with position within tolerance, exact allele match
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 4 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAA 20 PASS . GT 0/1
"""

pred_vars = vcf_to_ChromVariants(pred_str,'chr1')
cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.SV_INS],1)
#SV outside of tolerance
pred_str = """##fileformat=VCFv4.0\n
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n
chr1 110 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAA 20 PASS . GT 0/1
"""
pred_vars = vcf_to_ChromVariants(pred_str,'chr1')
cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50)
self.assertEqual(cvs.num_tp[VARIANT_TYPE.SV_INS],0)


def testRescueChromEvalVariants(self):
pred_str = """##fileformat=VCFv4.0\n
Expand Down

0 comments on commit dc5ed97

Please sign in to comment.