Skip to content

Commit

Permalink
check variants against header before writing variants
Browse files Browse the repository at this point in the history
  • Loading branch information
SHuang-Broad committed Aug 1, 2018
1 parent 3a40531 commit 45f8106
Showing 1 changed file with 51 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,13 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Sets;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.StringUtil;
import htsjdk.variant.variantcontext.CommonInfo;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFStandardHeaderLines;
import htsjdk.variant.vcf.*;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvType;
Expand All @@ -20,10 +19,7 @@
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;
import java.util.stream.Collectors;

/**
Expand Down Expand Up @@ -79,12 +75,15 @@ public static List<VariantContext> sortVariantsByCoordinate(final List<VariantCo

private static void writeVariants(final String fileName, final List<VariantContext> variantsArrayList,
final SAMSequenceDictionary referenceSequenceDictionary) {
final VCFHeader vcfHeader = getVcfHeader(referenceSequenceDictionary);
checkAllVariantsAgainstHeader(vcfHeader, variantsArrayList);

try (final OutputStream outputStream
= new BufferedOutputStream(BucketUtils.createFile(fileName))) {

final VariantContextWriter vcfWriter = getVariantContextWriter(outputStream, referenceSequenceDictionary);

vcfWriter.writeHeader(getVcfHeader(referenceSequenceDictionary));
vcfWriter.writeHeader(vcfHeader);
variantsArrayList.forEach(vcfWriter::add);
vcfWriter.close();

Expand All @@ -93,6 +92,49 @@ private static void writeVariants(final String fileName, final List<VariantConte
}
}

// TODO: 8/1/18 ultimately, this should have its own class, some sort of StructuralVariantValidator
/**
* Checks that all variants' annotations (currently only INFO) are in logged in VCF headers,
* so that output VCF is not technically corrupt.
*/
private static void checkAllVariantsAgainstHeader(final VCFHeader header, final List<VariantContext> variants) {

final Map<String, VariantContext> unregisteredAttributeAndExampleVariant = new HashMap<>();
for (final VariantContext variant : variants) {
final Map<String, Object> attributes = variant.getAttributes();
attributes.forEach((k,v) -> {
if ( !(header.hasInfoLine(k) || header.hasFormatLine(k)) ) {
unregisteredAttributeAndExampleVariant.putIfAbsent(k, variant);
} // TODO: 8/1/18 this is not checking each variant's annotation type or count, yet
});
if (variant.isFiltered()) {
final Set<String> filterIDs = header.getFilterLines().stream().map(VCFFilterHeaderLine::getID).collect(Collectors.toCollection(HashSet::new));
variant.getFilters().forEach(ft -> {
if ( !filterIDs.contains(ft) ) {
unregisteredAttributeAndExampleVariant.putIfAbsent(ft, variant);
}
});

}
}
if ( !unregisteredAttributeAndExampleVariant.isEmpty() ) {
List <String> simpleHeaderLines = new ArrayList<>(100);
header.getFilterLines().forEach(line -> simpleHeaderLines.add(line.toString()));
header.getFormatHeaderLines().forEach(line -> simpleHeaderLines.add(line.toString()));
header.getInfoHeaderLines().forEach(line -> simpleHeaderLines.add(line.toString()));
StringBuilder errorMessage = new StringBuilder("Caught variants that have attributes not registered in given header.\n Header lines: \n");
errorMessage.append( StringUtil.join("\n", simpleHeaderLines) );
unregisteredAttributeAndExampleVariant.forEach((k,v) -> {
errorMessage.append("missing: ");
errorMessage.append(k);
errorMessage.append("\texample: ");
errorMessage.append(v.toStringDecodeGenotypes());
errorMessage.append("\n");
});
throw new IllegalArgumentException(errorMessage.toString());
}
}

@VisibleForTesting
static VCFHeader getVcfHeader(final SAMSequenceDictionary referenceSequenceDictionary) {
final Set<VCFHeaderLine> headerLines = new HashSet<>(GATKSVVCFHeaderLines.getSymbAltAlleleLines());
Expand Down

0 comments on commit 45f8106

Please sign in to comment.