From 5dd64f6d204f894967f3a998adf4659326265e35 Mon Sep 17 00:00:00 2001 From: Michael Schatz Date: Fri, 27 Oct 2017 18:30:31 -0400 Subject: [PATCH] prioritize sniffles variants over others --- src/scrubvcf.pl | 93 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 76 insertions(+), 17 deletions(-) diff --git a/src/scrubvcf.pl b/src/scrubvcf.pl index 68e9a6d..5ee4816 100755 --- a/src/scrubvcf.pl +++ b/src/scrubvcf.pl @@ -36,7 +36,7 @@ { $STRIP_OVERLAP = 1; shift @ARGV; - my $gender = shift @ARGV; + my $gender = shift @ARGV or die "Must specific a gender (male or female) when stripping overlapping variants\n"; print STDERR "stripping overlapping calls (gender = $gender)\n"; if ($gender eq "male") { } ## nothing to do @@ -51,8 +51,9 @@ my $lastchr = undef; -my $lastpos0 = -1; my $lastref0 = ""; my $lastalt0 = ""; -my $lastpos1 = -1; my $lastref1 = ""; my $lastalt1 = ""; +my $lastpos0 = -1; my $lastref0 = ""; my $lastalt0 = ""; my $lastline0 = undef; my $lastsniffles0 = 0; +my $lastpos1 = -1; my $lastref1 = ""; my $lastalt1 = ""; my $lastline1 = undef; my $lastsniffles1 = 0; +my $lasthomo = 0; while (<>) { @@ -69,6 +70,7 @@ my $alt = $fields[4]; my $qual = $fields[5]; my $filter = $fields[6]; + my $info = $fields[7]; my $genotype = (split (/:/, $fields[9]))[0]; if ($STRIP_OVERLAP) @@ -85,6 +87,21 @@ next; } + ## see if we can print the last variants because we have moved to a different chromosome or moved passed them + if (defined $lastchr) + { + if ($lastchr ne $chr) + { + if (defined $lastline0) { print $lastline0; $lastline0 = undef; if ($lasthomo) { $lastline1 = undef; }} + if (defined $lastline1) { print $lastline1; $lastline1 = undef; } + } + else + { + if ((defined $lastline0) && ($pos > $lastpos0)) { print $lastline0; $lastline0 = undef; if ($lasthomo) { $lastline1 = undef; } } + if ((defined $lastline1) && ($pos > $lastpos1)) { print $lastline1; $lastline1 = undef; } + } + } + my $printvar = 1; my $type = "SUB"; @@ -95,33 +112,71 @@ if ($genotype eq "0|1") { $hap = 1; } elsif ($genotype eq "1|0") { $hap = 0; } + my $issniffles = 0; + if (index($info, "Sniffles") >= 0) { $issniffles = 1; } + if ((defined $lastchr) && ($chr eq $lastchr)) { - if (($hap == 0) || ($hap == 2)) { if ($pos <= $lastpos0) - { $printvar = 0; $types{"FAIL_$hap"}++; print STDERR " overlap detected on hap $hap (lastpos: $lastpos0 $lastref0 $lastalt0) at $_"; } } + if (($hap == 0) || ($hap == 2)) + { + if ($pos <= $lastpos0) + { + if ($issniffles && !$lastsniffles0) + { + ## rescued + $types{"OVERRULE_$hap"}++; + $lastline0 = undef; if ($lasthomo) { $lastline1 = undef; $lastpos1 = -1; } + print STDERR " overrule overlap detected on hap $hap (lastpos: $lastpos0 $lastref0 $lastalt0) at $_"; + } + else + { + $printvar = 0; + $types{"FAIL_$hap"}++; + if ($issniffles) { $types{"FAIL_SNIFFLES_$hap"}++; } + print STDERR " overlap detected on hap $hap (lastpos: $lastpos0 $lastref0 $lastalt0) at $_"; + } + } + } - if (($hap == 1) || ($hap == 2)) { if ($pos <= $lastpos1) - { $printvar = 0; $types{"FAIL_$hap"}++; print STDERR " overlap detected on hap $hap (lastpos: $lastpos1 $lastref1 $lastalt1) at $_"; } } + if ($printvar) + { + if (($hap == 1) || ($hap == 2)) + { + if ($pos <= $lastpos1) + { + if ($issniffles && !$lastsniffles1) + { + ## rescued + $types{"OVERRULE_$hap"}++; + $lastline1 = undef; if ($lasthomo) { $lastline0 = undef; $lastpos0 = -1; } + print STDERR " overrule overlap detected on hap $hap (lastpos: $lastpos1 $lastref1 $lastalt1) at $_"; + } + else + { + $printvar = 0; + $types{"FAIL_$hap"}++; + if ($issniffles) { $types{"FAIL_SNIFFLES_$hap"}++; } + print STDERR " overlap detected on hap $hap (lastpos: $lastpos1 $lastref1 $lastalt1) at $_"; + } + } + } + } } if ($printvar) { $reported++; $types{$type}++; - print $_; + #print $_; ## Dont print right away, since there might be a sniffles SV next - if ((!defined $lastchr) || ($lastchr ne $chr)) - { - # reset the chromosome - $lastchr = $chr; - $lastpos0 = -1; $lastref0 = ""; $lastalt0 = ""; - $lastpos1 = -1; $lastref1 = ""; $lastalt1 = ""; - } + $lastchr = $chr; my $newpos = $pos + length($ref) - 1; - if (($hap == 0) || ($hap == 2)) { $lastpos0 = $newpos; $lastref0 = $ref; $lastalt0 = $alt; } - if (($hap == 1) || ($hap == 2)) { $lastpos1 = $newpos; $lastref1 = $ref; $lastalt1 = $alt; } + if (($hap == 0) || ($hap == 2)) { $lastpos0 = $newpos; $lastref0 = $ref; $lastalt0 = $alt; $lastline0 = $_; $lastsniffles0 = $issniffles; } + if (($hap == 1) || ($hap == 2)) { $lastpos1 = $newpos; $lastref1 = $ref; $lastalt1 = $alt; $lastline1 = $_; $lastsniffles1 = $issniffles; } + + if ($hap == 2) { $lasthomo = 1; } else { $lasthomo = 0; } } } else @@ -136,6 +191,10 @@ } } +if (defined $lastline0) { print $lastline0; $lastline0 = undef; if ($lasthomo) { $lastline1 = undef; }} +if (defined $lastline1) { print $lastline1; $lastline1 = undef; } + + print STDERR "## Reported $reported of $all variants:\n"; foreach my $t (sort keys %types) {