diff --git a/smashbenchmarking/vcf_eval/eval_helper.py b/smashbenchmarking/vcf_eval/eval_helper.py index e525148..77cdbc0 100644 --- a/smashbenchmarking/vcf_eval/eval_helper.py +++ b/smashbenchmarking/vcf_eval/eval_helper.py @@ -89,6 +89,8 @@ def get_var(variants): return variants.all_variants[loc] true_var = get_var(true_variants) pred_var = get_var(pred_variants) + if true_var.var_type.startswith("SV"): + return true_var.var_type == pred_var.var_type if ( true_var.var_type != pred_var.var_type ): return False return true_var.alt == pred_var.alt diff --git a/test/chrom_stats.py b/test/chrom_stats.py index e450d68..cc4248a 100755 --- a/test/chrom_stats.py +++ b/test/chrom_stats.py @@ -202,30 +202,6 @@ def test_false_neg_sv_ins(self): self.falseNegative(stat_reporter,VARIANT_TYPE.SV_INS) self.trueNegative(stat_reporter,VARIANT_TYPE.SV_DEL) - def test_bad_sv_ins(self): - pred_str = """##fileformat=VCFv4.0\n -##FORMAT=\n -##source=TVsim\n -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n -chr19 88013 . CTT C 20 PASS . GT 0/1\n -chr19 89272 . C T 20 PASS . GT 0/1\n -chr19 269751 . A AAAAGAAAGGCATGACCTATCCTTTTATGCCACCTGGATGGACCTCACAGGCACACTGCTTCATGAGAGAG 20 PASS . GT 1/1 -""" - - pred_io = StringIO.StringIO(pred_str) - pred_vcf = vcf.Reader(pred_io) - pred_vars = Variants(pred_vcf, MAX_INDEL_LEN) - - sv_eps = 100 - - stat_reporter, errors = evaluate_variants(self.true_vars, pred_vars, sv_eps, sv_eps, None, None, None) - - self.truePositive(stat_reporter,VARIANT_TYPE.SNP) - self.trueNegative(stat_reporter,VARIANT_TYPE.INDEL_INS) - self.truePositive(stat_reporter,VARIANT_TYPE.INDEL_DEL) - self.badCallAtTrueSite(stat_reporter,VARIANT_TYPE.SV_INS) - self.trueNegative(stat_reporter,VARIANT_TYPE.SV_DEL) - def test_sv_snp_collision(self): pred_str = """##fileformat=VCFv4.0\n ##FORMAT=\n diff --git a/test/eval_helper.py b/test/eval_helper.py index 2365d3d..154b1c6 100644 --- a/test/eval_helper.py +++ b/test/eval_helper.py @@ -124,6 +124,52 @@ def testChromEvaluateVariants(self): # TODO test known false positive functionality # TODO test genotype concordance pass + def testChromEvaluateVariantsSV(self): + #NB: SVs aren't rescued, just checked for within breakpoint tolerance + true_str = """##fileformat=VCFv4.0\n +##FORMAT=\n +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n +chr1 6 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAA 20 PASS . GT 0/1 +""" + #SV with exact position, exact allele match + pred_str = """##fileformat=VCFv4.0\n +##FORMAT=\n +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n +chr1 6 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAA 20 PASS . GT 0/1 +""" + true_vars = vcf_to_ChromVariants(true_str,'chr1') + pred_vars = vcf_to_ChromVariants(pred_str,'chr1') + cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50) + self.assertEqual(cvs.num_tp[VARIANT_TYPE.SV_INS],1) + #SV with exact position, difference allele match + pred_str = """##fileformat=VCFv4.0\n +##FORMAT=\n +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n +chr1 6 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAATGC 20 PASS . GT 0/1 +""" + pred_vars = vcf_to_ChromVariants(pred_str,'chr1') + cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50) + self.assertEqual(cvs.num_tp[VARIANT_TYPE.SV_INS],1) + #SV with position within tolerance, exact allele match + pred_str = """##fileformat=VCFv4.0\n +##FORMAT=\n +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n +chr1 4 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAA 20 PASS . GT 0/1 +""" + + pred_vars = vcf_to_ChromVariants(pred_str,'chr1') + cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50) + self.assertEqual(cvs.num_tp[VARIANT_TYPE.SV_INS],1) + #SV outside of tolerance + pred_str = """##fileformat=VCFv4.0\n +##FORMAT=\n +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001\n +chr1 110 . C CGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGACGTGAGATGAAAAA 20 PASS . GT 0/1 +""" + pred_vars = vcf_to_ChromVariants(pred_str,'chr1') + cvs = chrom_evaluate_variants(true_vars,pred_vars,100,100,get_reference(),50) + self.assertEqual(cvs.num_tp[VARIANT_TYPE.SV_INS],0) + def testRescueChromEvalVariants(self): pred_str = """##fileformat=VCFv4.0\n