-
Notifications
You must be signed in to change notification settings - Fork 597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add cmd line to VCF generated by GATKSparkTool #4981
Changes from 3 commits
9057d0b
e0a0720
2ad08e2
2c55c15
c4a64f3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,25 +3,26 @@ | |
import htsjdk.samtools.SAMSequenceDictionary; | ||
import htsjdk.samtools.util.IOUtil; | ||
import htsjdk.samtools.util.Locatable; | ||
import htsjdk.tribble.AbstractFeatureReader; | ||
import htsjdk.tribble.TribbleException; | ||
import htsjdk.variant.variantcontext.*; | ||
import htsjdk.variant.variantcontext.writer.Options; | ||
import htsjdk.variant.variantcontext.writer.VariantContextWriter; | ||
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder; | ||
import htsjdk.variant.vcf.VCFConstants; | ||
import htsjdk.variant.vcf.VCFHeaderLine; | ||
import htsjdk.variant.vcf.VCFSimpleHeaderLine; | ||
import htsjdk.variant.vcf.VCFStandardHeaderLines; | ||
import org.apache.commons.io.FilenameUtils; | ||
import org.apache.commons.lang3.ArrayUtils; | ||
import org.apache.commons.lang3.tuple.MutablePair; | ||
import org.apache.commons.lang3.tuple.Pair; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.broadinstitute.hellbender.tools.walkers.genotyper.*; | ||
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts; | ||
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod; | ||
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator; | ||
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators; | ||
import org.broadinstitute.hellbender.utils.*; | ||
import org.broadinstitute.hellbender.utils.collections.Permutation; | ||
import org.broadinstitute.hellbender.utils.genotyper.IndexedAlleleList; | ||
import org.broadinstitute.hellbender.utils.param.ParamUtils; | ||
|
||
import java.io.File; | ||
|
@@ -49,6 +50,23 @@ public static boolean isInformative(final double[] gls) { | |
return MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL; | ||
} | ||
|
||
/** | ||
* @return A set of VCF header lines containing the tool name, version, date and command line. | ||
*/ | ||
public static Set<VCFHeaderLine> getDefaultVCFHeaderLines(final String toolkitShortName, final String toolName, | ||
final String versionString, final String dataTime, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dataTime -> dateTime There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, javadoc should include the param list with short descriptions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
final String cmdLine) { | ||
final Set<VCFHeaderLine> defaultVCFHeaderLines = new HashSet<>(); | ||
final Map<String, String> simpleHeaderLineMap = new HashMap<>(4); | ||
simpleHeaderLineMap.put("ID", toolName); | ||
simpleHeaderLineMap.put("Version", versionString); | ||
simpleHeaderLineMap.put("Date", dataTime); | ||
simpleHeaderLineMap.put("CommandLine", cmdLine); | ||
defaultVCFHeaderLines.add(new VCFHeaderLine("source", toolName)); | ||
defaultVCFHeaderLines.add(new VCFSimpleHeaderLine(String.format("%sCommandLine", toolkitShortName), simpleHeaderLineMap)); | ||
return defaultVCFHeaderLines; | ||
} | ||
|
||
/** | ||
* Creates a VariantContextWriter whose outputFile type is based on the extension of the output file name. | ||
* The default options set by VariantContextWriter are cleared before applying ALLOW_MISSING_FIELDS_IN_HEADER (if | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
package org.broadinstitute.hellbender.engine.spark; | ||
|
||
import htsjdk.variant.vcf.VCFHeader; | ||
import htsjdk.variant.vcf.VCFHeaderLine; | ||
import htsjdk.variant.vcf.VCFIDHeaderLine; | ||
import org.apache.spark.api.java.JavaSparkContext; | ||
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; | ||
import org.broadinstitute.hellbender.GATKBaseTest; | ||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.cmdline.TestProgramGroup; | ||
import org.testng.Assert; | ||
import org.testng.annotations.Test; | ||
|
||
import java.util.Set; | ||
|
||
public class GATKSparkToolUnitTest extends GATKBaseTest { | ||
|
||
@CommandLineProgramProperties( | ||
summary = "TestGATKSparkToolWithVariants", | ||
oneLineSummary = "TestGATKSparkToolWithVariants", | ||
programGroup = TestProgramGroup.class | ||
) | ||
public static class TestGATKSparkToolWithVariants extends GATKSparkTool { | ||
private static final long serialVersionUID = 0L; | ||
|
||
@Override | ||
protected void runTool(JavaSparkContext ctx) { | ||
//Do-Nothing | ||
} | ||
} | ||
@Test | ||
public void testGetDefaultToolVCFHeaderLines() { | ||
final TestGATKSparkToolWithVariants tool = new TestGATKSparkToolWithVariants(); | ||
final String[] args = {"--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "true"}; | ||
tool.instanceMain(args); | ||
|
||
Set<VCFHeaderLine> stdHeaderLines = tool.getDefaultToolVCFHeaderLines(); | ||
VCFHeader hdr = new VCFHeader(stdHeaderLines); | ||
|
||
VCFHeaderLine sourceLine = hdr.getOtherHeaderLine("source"); | ||
Assert.assertEquals(sourceLine.getValue(), tool.getClass().getSimpleName()); | ||
|
||
VCFIDHeaderLine commandLine = (VCFIDHeaderLine) hdr.getOtherHeaderLine("GATKCommandLine"); | ||
Assert.assertEquals(commandLine.getID(), tool.getClass().getSimpleName()); | ||
|
||
String commandLineString = commandLine.toString(); | ||
assertContains(commandLineString,"CommandLine="); | ||
assertContains(commandLineString,"Version="); | ||
assertContains(commandLineString,"Date="); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, we might as well move this method up into CommandLineProgram now, adjacent to getToolkitName, along with a
DEFAULT_TOOLKIT_SHORT_NAME
static constant, and return that. Then we can remove the two identical implementations. Also, lets keep the second part of the TODO comment with the new code.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
refactored, not sure if they are what you mean?