Skip to content

Commit

Permalink
Merge pull request #226 from BimberLab/23.3_fb_java8
Browse files Browse the repository at this point in the history
Backport switch from java8->newer java to fix tests
  • Loading branch information
bbimber authored May 23, 2023
2 parents da136fe + c65f6e8 commit b2a83a5
Show file tree
Hide file tree
Showing 7 changed files with 12 additions and 238 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,21 +48,6 @@ public void setMaxRamOverride(Integer maxRamOverride)
_maxRamOverride = maxRamOverride;
}

public void addJava8HomeToEnvironment()
{
//since GATK requires java8, set JAVA_HOME to match this:
File java8 = new File(SequencePipelineService.get().getJava8FilePath()).getParentFile();
if (java8.getParentFile() == null)
{
getLogger().debug("unexpected path to java8, cannot determine JAVA_HOME: " + java8.getPath());
return;
}

String javaDir = java8.getParentFile().getPath();
getLogger().debug("setting JAVA_HOME to java8 location: " + javaDir);
addToEnvironment("JAVA_HOME", javaDir);
}

public boolean jarExists()
{
return getJAR(false) != null;
Expand All @@ -80,7 +65,7 @@ protected void ensureDictionary(File referenceFasta) throws PipelineJobException
public String getVersionString() throws PipelineJobException
{
List<String> args = new ArrayList<>();
args.add(SequencePipelineService.get().getJava8FilePath());
args.add(SequencePipelineService.get().getJavaFilepath());
args.addAll(SequencePipelineService.get().getJavaOpts(_maxRamOverride));
args.add("-jar");
args.add(getJAR().getPath());
Expand All @@ -102,7 +87,7 @@ protected String getPackageName()
public List<String> getBaseArgs(@Nullable String toolName)
{
List<String> args = new ArrayList<>();
args.add(SequencePipelineService.get().getJava8FilePath());
args.add(SequencePipelineService.get().getJavaFilepath());
args.addAll(SequencePipelineService.get().getJavaOpts(_maxRamOverride));
args.add("-jar");
args.add(getJAR().getPath());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,38 +54,6 @@ public void setMaxRamOverride(Integer maxRamOverride)
_maxRamOverride = maxRamOverride;
}

protected void addJavaHomeToEnvironment()
{
//since GATK requires java8, set JAVA_HOME to match this:
File java8 = new File(SequencePipelineService.get().getJava8FilePath()).getParentFile();
if (java8.getParentFile() == null)
{
getLogger().debug("unexpected path to java8, cannot determine JAVA_HOME: " + java8.getPath());
return;
}

String javaDir = java8.getParentFile().getPath();
getLogger().debug("setting JAVA_HOME to java8 location: " + javaDir);
addToEnvironment("JAVA_HOME", javaDir);
}

protected File getQueueJAR()
{
String path = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath("GATKPATH");
if (path != null)
{
return new File(path);
}

path = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath(SequencePipelineService.SEQUENCE_TOOLS_PARAM);
if (path == null)
{
path = PipelineJobService.get().getAppProperties().getToolsDirectory();
}

return path == null ? new File("Queue.jar") : new File(path, "Queue.jar");
}

public boolean jarExists()
{
return getJAR() == null || !getJAR().exists();
Expand All @@ -100,69 +68,6 @@ protected void ensureDictionary(File referenceFasta) throws PipelineJobException
new CreateSequenceDictionaryWrapper(getLogger()).execute(referenceFasta, false);
}

public String getVersionString() throws PipelineJobException
{
List<String> args = new ArrayList<>();
args.add(SequencePipelineService.get().getJava8FilePath());
args.addAll(SequencePipelineService.get().getJavaOpts(_maxRamOverride));
args.add("-jar");
args.add(getJAR().getPath());
args.add("--version");

return StringUtils.trimToNull(executeWithOutput(args));
}

public Integer getMinRamPerQueueJob()
{
return _minRamPerQueueJob;
}

public void setMinRamPerQueueJob(Integer minRamPerQueueJob)
{
_minRamPerQueueJob = minRamPerQueueJob;
}

protected Integer getScatterForQueueJob()
{
// NOTE: Queue will create n number of jobs, dividing memory evenly between them. Because it is possible
// to submit a job w/ lower available RAM and comparably high CPUs, this could result in queue not having enough memory per job.
// therefore do a quick check and potentially scale down scatter
Integer maxThreads = SequencePipelineService.get().getMaxThreads(getLogger());
if (maxThreads != null)
{
if (_minRamPerQueueJob != null && _minRamPerQueueJob > 0)
{
String maxRamSetting = StringUtils.trimToNull(System.getenv("SEQUENCEANALYSIS_MAX_RAM"));
if (maxRamSetting != null)
{
try
{
Integer maxRamAllowed = ConvertHelper.convert(maxRamSetting, Integer.class);
if (maxRamAllowed != null)
{
int adjusted = Math.max(maxRamAllowed / _minRamPerQueueJob, 1);
if (adjusted < maxThreads)
{
getLogger().debug("lowering max threads to match available RAM. setting to: " + adjusted);
maxThreads = adjusted;
}
}
}
catch (ConvergenceException e)
{
getLogger().warn("non-numeric value for SEQUENCEANALYSIS_MAX_RAM: [" + maxRamSetting + "]");
}
}
}
}
else
{
maxThreads = 1;
}

return maxThreads;
}

protected List<String> getBaseArgs()
{
List<String> args = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public String getVersion() throws PipelineJobException
}

List<String> params = new LinkedList<>();
params.add(SequencePipelineService.get().getJava8FilePath());
params.add(SequencePipelineService.get().getJavaFilepath());
params.add("-jar");
params.add(getJar().getPath());
params.add(getToolName());
Expand Down Expand Up @@ -95,7 +95,7 @@ protected List<String> getBaseArgs(boolean basicArgsOnly) throws PipelineJobExce
}

List<String> params = new LinkedList<>();
params.add(SequencePipelineService.get().getJava8FilePath());
params.add(SequencePipelineService.get().getJavaFilepath());
params.addAll(SequencePipelineService.get().getJavaOpts());
params.add("-jar");
params.add(getJar().getPath());
Expand Down
8 changes: 4 additions & 4 deletions SequenceAnalysis/pipeline_code/sequence_tools_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,10 @@ then
rm -Rf gatk-4*
rm -Rf $LKTOOLS_DIR/GenomeAnalysisTK4.jar

wget $WGET_OPTS https://github.com/broadinstitute/gatk/releases/download/4.3.0.0/gatk-4.3.0.0.zip
unzip gatk-4.3.0.0.zip
wget $WGET_OPTS https://github.com/broadinstitute/gatk/releases/download/4.4.0.0/gatk-4.4.0.0.zip
unzip gatk-4.4.0.0.zip

cp ./gatk-4.3.0.0/gatk-package-4.3.0.0-local.jar $LKTOOLS_DIR/GenomeAnalysisTK4.jar
cp ./gatk-4.4.0.0/gatk-package-4.4.0.0-local.jar $LKTOOLS_DIR/GenomeAnalysisTK4.jar
else
echo "Already installed"
fi
Expand Down Expand Up @@ -695,7 +695,7 @@ then
rm -Rf $LKTOOLS_DIR/htsjdk-*
rm -Rf $LKTOOLS_DIR/libIntelDeflater.so

wget $WGET_OPTS https://github.com/broadinstitute/picard/releases/download/2.27.4/picard.jar
wget $WGET_OPTS https://github.com/broadinstitute/picard/releases/download/3.0.0/picard.jar

cp -R ./picard.jar $LKTOOLS_DIR/
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import java.io.File;
import java.util.Arrays;
import java.util.Collections;

/**
* User: bimber
Expand All @@ -24,7 +25,7 @@
*/
public class IndelRealignerStep extends AbstractCommandPipelineStep<IndelRealignerWrapper> implements BamProcessingStep
{
public IndelRealignerStep(PipelineStepProvider provider, PipelineContext ctx)
public IndelRealignerStep(PipelineStepProvider<?> provider, PipelineContext ctx)
{
super(provider, ctx, new IndelRealignerWrapper(ctx.getLogger()));
}
Expand All @@ -33,17 +34,7 @@ public static class Provider extends AbstractPipelineStepProvider<IndelRealigner
{
public Provider()
{
super("IndelRealigner", "Indel Realigner", "GATK", "The step runs GATK's IndelRealigner tool. This tools performs local realignment to minmize the number of mismatching bases across all the reads.", Arrays.asList(
ToolParameterDescriptor.create("useQueue", "Use Queue?", "If checked, this tool will attempt to run using GATK queue, allowing parallelization using scatter/gather.", "checkbox", new JSONObject()
{{
put("checked", false);
}}, false),
//TODO: consider supporting:
//--maxReadsForRealignment
//--maxReadsForConsensuses

ToolParameterDescriptor.create("minRamPerQueueJob", "Min RAM Per Queue Job", "This only applies if queue is checked. If provided, the scatter count (number of jobs) for queue will be adjusted to ensure at least this amount of RAM, in GB, is available for each job", "ldk-integerfield", null, null)
), null, "http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_indels_IndelRealigner.html");
super("IndelRealigner", "Indel Realigner", "GATK", "The step runs GATK's IndelRealigner tool. This tools performs local realignment to minmize the number of mismatching bases across all the reads.", Collections.emptyList(), null, "http://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_walkers_indels_IndelRealigner.html");
}

@Override
Expand All @@ -65,22 +56,7 @@ public Output processBam(Readset rs, File inputBam, ReferenceGenome referenceGen
getPipelineCtx().getLogger().debug("dict exists: " + preExistingDictionary + ", " + dictionary.getPath());

File outputBam = new File(outputDirectory, FileUtil.getBaseName(inputBam) + ".realigned.bam");
File created;
if (getProvider().getParameterByName("useQueue").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false))
{
Integer minRamPerQueueJob = getProvider().getParameterByName("minRamPerQueueJob").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
if (minRamPerQueueJob != null)
{
getWrapper().setMinRamPerQueueJob(minRamPerQueueJob);
}

created = getWrapper().executeWithQueue(inputBam, outputBam, referenceGenome.getWorkingFastaFile(), null);
}
else
{
created = getWrapper().execute(inputBam, outputBam, referenceGenome.getWorkingFastaFile(), null);
}

File created = getWrapper().execute(inputBam, outputBam, referenceGenome.getWorkingFastaFile(), null);
if (created != null)
{
output.setBAM(created);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,92 +89,6 @@ public File execute(File inputBam, @Nullable File outputBam, File referenceFasta
return processOutput(tempFiles, inputBam, outputBam, realignedBam);
}

public File executeWithQueue(File inputBam, File outputBam, File referenceFasta, @Nullable File knownIndelsVcf) throws PipelineJobException
{
getLogger().info("Running GATK IndelRealigner using Queue for: " + inputBam.getName());
addJavaHomeToEnvironment();

List<File> tempFiles = new ArrayList<>();
File workingBam = performSharedWork(inputBam, outputBam, referenceFasta, tempFiles);
if (!workingBam.equals(inputBam))
{
tempFiles.add(workingBam);
}

File intervalsFile = buildTargetIntervals(referenceFasta, workingBam, knownIndelsVcf, getExpectedIntervalsFile(inputBam));
if (intervalsFile == null)
{
getLogger().info("no intervals to realign, skipping");
return processOutput(tempFiles, inputBam, outputBam, null);
}

try
{
Module module = ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class);
FileResource r = (FileResource)module.getModuleResolver().lookup(Path.parse("external/qscript/IndelRealignerRunner.scala"));
File scalaScript = r.getFile();

if (scalaScript == null)
throw new FileNotFoundException("Not found: " + scalaScript);

if (!scalaScript.exists())
throw new FileNotFoundException("Not found: " + scalaScript.getPath());

List<String> args = new ArrayList<>();
args.add(SequencePipelineService.get().getJava8FilePath());
//for now, ignore java opts since queue's scatter/gather causes issues
//args.addAll(SequencePipelineService.get().getJavaOpts());
args.add("-classpath");
args.add(getJAR().getPath());
args.addAll(SequencePipelineService.get().getJavaOpts());
args.add("-jar");
args.add(getQueueJAR().getPath());
args.add("-S");
args.add(scalaScript.getPath());
args.add("-jobRunner");
args.add("ParallelShell");
args.add("-run");

args.add("-R");
args.add(referenceFasta.getPath());
args.add("-I");
args.add(workingBam.getPath());
args.add("-targetIntervals");
args.add(intervalsFile.getPath());

args.add("-runDir");
args.add(outputBam.getParentFile().getPath());

String tmpDir = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath("JAVA_TMP_DIR");
if (StringUtils.trimToNull(tmpDir) != null)
{
args.add("-tempDir");
args.add(tmpDir);
}

args.add("-o");

File realignedBam = outputBam == null ? new File(getOutputDir(inputBam), FileUtil.getBaseName(inputBam) + ".realigned.bam") : outputBam;
args.add(realignedBam.getPath());

args.add("-startFromScratch");
args.add("-scatterCount");
args.add(getScatterForQueueJob().toString());

execute(args);
if (!realignedBam.exists())
{
throw new PipelineJobException("Expected output not found: " + realignedBam.getPath());
}

return processOutput(tempFiles, inputBam, outputBam, realignedBam);
}
catch (IOException e)
{
throw new PipelineJobException(e);
}
}

private File processOutput(List<File> tempFiles, File inputBam, File outputBam, File realignedBam) throws PipelineJobException
{
if (!tempFiles.isEmpty())
Expand Down
6 changes: 0 additions & 6 deletions jbrowse/src/org/labkey/jbrowse/model/JsonFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -972,8 +972,6 @@ private void prepareLuceneIndex(Logger log) throws PipelineJobException
log.debug("Generating VCF full text index for file: " + getExpData().getFile().getName());

DISCVRSeqRunner runner = new DISCVRSeqRunner(log);
runner.addJava8HomeToEnvironment();

if (!runner.jarExists())
{
log.error("Unable to find DISCVRSeq.jar, skiping lucene index creation");
Expand All @@ -994,10 +992,6 @@ private void prepareLuceneIndex(Logger log) throws PipelineJobException
args.add(field);
}

// Always include this:
args.add("-AN");
args.add("SampleList");

runner.execute(args);
}

Expand Down

0 comments on commit b2a83a5

Please sign in to comment.