Skip to content

Commit

Permalink
Merge pull request #327 from LabKey/fb_merge_24.11_to_develop
Browse files Browse the repository at this point in the history
Merge discvr-24.11 to develop
  • Loading branch information
bbimber authored Dec 13, 2024
2 parents 2a39d2b + 7e67f1c commit f0522c6
Show file tree
Hide file tree
Showing 53 changed files with 831 additions and 961 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package org.labkey.api.sequenceanalysis.pipeline;

import org.jetbrains.annotations.Nullable;
import org.labkey.api.data.Container;

import java.io.File;
import java.util.Collection;
import java.util.List;

Expand All @@ -15,4 +17,9 @@ public interface JobResourceSettings
List<ToolParameterDescriptor> getParams();

Collection<String> getDockerVolumes(Container c);

default @Nullable File inferDockerVolume(File input)
{
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ static public void setInstance(SequencePipelineService instance)

abstract public Collection<String> getDockerVolumes(Container c);

/**
* The purpose of this method is to assist with translating from raw filepath to the desired volume to mount in a docker container.
* This is mostly relevant for situations where the NFS root should be mounted, rather than a child folder.
*/
abstract public @Nullable File inferDockerVolume(File input);

abstract public List<File> getSequenceJobInputFiles(PipelineJob job);

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ default void validateScatter(ScatterGatherMethod method, PipelineJob job) throws

}

default void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
default void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
{
ctx.getLogger().debug("No additional merge tasks are implemented for: " + getClass().getName());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package org.labkey.api.sequenceanalysis.run;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.jetbrains.annotations.Nullable;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker;
Expand All @@ -13,13 +13,24 @@
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

public class DockerWrapper extends AbstractCommandWrapper
{
private final String _containerName;
private final PipelineContext _ctx;
private File _tmpDir = null;
private String _entryPoint = null;
private boolean _runPrune = true;
private String _alternateUserHome = null;
private final Map<String, String> _dockerEnvironment = new HashMap<>();

public DockerWrapper(String containerName, Logger log, PipelineContext ctx)
{
Expand All @@ -28,12 +39,32 @@ public DockerWrapper(String containerName, Logger log, PipelineContext ctx)
_ctx = ctx;
}

public void setAlternateUserHome(String alternateUserHome)
{
_alternateUserHome = alternateUserHome;
}

public void setTmpDir(File tmpDir)
{
_tmpDir = tmpDir;
}

public void setEntryPoint(String entryPoint)
{
_entryPoint = entryPoint;
}

public void setRunPrune(boolean runPrune)
{
_runPrune = runPrune;
}

public void executeWithDocker(List<String> containerArgs, File workDir, PipelineOutputTracker tracker) throws PipelineJobException
{
executeWithDocker(containerArgs, workDir, tracker, null);
}

public void executeWithDocker(List<String> containerArgs, File workDir, PipelineOutputTracker tracker, @Nullable Collection<File> inputFiles) throws PipelineJobException
{
File localBashScript = new File(workDir, "docker.sh");
File dockerBashScript = new File(workDir, "dockerRun.sh");
Expand All @@ -45,70 +76,131 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
{
writer.println("#!/bin/bash");
writer.println("set -x");
writer.println("WD=`pwd`");
writer.println("HOME=`echo ~/`");
writer.println("set -e");

writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'");
writer.println("sudo $DOCKER pull " + _containerName);
writer.println("sudo $DOCKER run --rm=true \\");
writer.println("\t-v \"${WD}:/work\" \\");
writer.println("\t-v \"${HOME}:/homeDir\" \\");
_ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\"));
writer.println("$DOCKER pull " + _containerName);
if (_runPrune)
{
writer.println("$DOCKER image prune -f");
}

writer.println("$DOCKER run --rm=true \\");
writer.println("\t--group-add keep-groups \\");

// NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly:
File homeDir = new File(System.getProperty("user.home"));
if (homeDir.exists())
{
if (_ctx.getDockerVolumes().stream().noneMatch(homeDir.getPath()::startsWith))
{
writer.println("\t-v '" + homeDir.getPath() + "':'" + homeDir.getPath() + "' \\");
}
else
{
_ctx.getLogger().debug("homeDir already present in docker volumes, will not re-add");
}

_dockerEnvironment.put("USER_HOME", homeDir.getPath());
}

if (_alternateUserHome != null)
{
_dockerEnvironment.put("HOME", _alternateUserHome);
}

_ctx.getDockerVolumes().forEach(v -> writer.println("\t-v '" + v + "':'" + v + "' \\"));
if (inputFiles != null)
{
inspectInputFiles(inputFiles).forEach(v -> writer.println("\t-v '" + v + "':'" + v + "' \\"));
}

if (_tmpDir != null)
{
writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\");
// NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly:
if (_ctx.getDockerVolumes().stream().noneMatch(_tmpDir.getPath()::startsWith))
{
writer.println("\t-v '" + _tmpDir.getPath() + "':/tmp \\");
}
else
{
_ctx.getLogger().debug("tmpDir already present in docker volumes, omitting");
}

addToDockerEnvironment("TMPDIR", _tmpDir.getPath());
}

if (_entryPoint != null)
{
writer.println("\t--entrypoint \"" + _entryPoint + "\"\\");
}
writer.println("\t--entrypoint /bin/bash \\");
writer.println("\t-w /work \\");

writer.println("\t-w " + workDir.getPath() + " \\");
addToDockerEnvironment("WORK_DIR", workDir.getPath());

Integer maxRam = SequencePipelineService.get().getMaxRam();
if (maxRam != null)
{
writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\");
writer.println("\t--memory='" + maxRam + "g' \\");
}

for (String key : _dockerEnvironment.keySet())
{
writer.println("\t-e " + key + "='" + _dockerEnvironment.get(key) + "' \\");
}
writer.println("\t" + _containerName + " \\");
writer.println("\t/work/" + dockerBashScript.getName());
writer.println("EXIT_CODE=$?");
writer.println("echo 'Docker run exit code: '$EXIT_CODE");
writer.println("exit $EXIT_CODE");
writer.println("\t" + dockerBashScript.getPath());
writer.println("DOCKER_EXIT_CODE=$?");
writer.println("echo 'Docker run exit code: '$DOCKER_EXIT_CODE");
writer.println("exit $DOCKER_EXIT_CODE");

dockerWriter.println("#!/bin/bash");
dockerWriter.println("set -x");
dockerWriter.println(StringUtils.join(containerArgs, " "));
dockerWriter.println("EXIT_CODE=$?");
dockerWriter.println("echo 'Exit code: '$?");
dockerWriter.println("exit $EXIT_CODE");
dockerWriter.println("BASH_EXIT_CODE=$?");
dockerWriter.println("echo 'Bash exit code: '$BASH_EXIT_CODE");
dockerWriter.println("exit $BASH_EXIT_CODE");
}
catch (IOException e)
{
throw new PipelineJobException(e);
}

localBashScript.setExecutable(true);
dockerBashScript.setExecutable(true);
execute(Arrays.asList("/bin/bash", localBashScript.getPath()));
}

public File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException
public void addToDockerEnvironment(String key, String value)
{
try
_dockerEnvironment.put(key, value);
}

private Collection<File> inspectInputFiles(Collection<File> inputFiles)
{
Set<File> toAdd = inputFiles.stream().map(f -> f.isDirectory() ? f : f.getParentFile()).filter(x -> _ctx.getDockerVolumes().stream().noneMatch(x.getPath()::startsWith)).collect(Collectors.toSet());
if (!toAdd.isEmpty())
{
if (workingDirectory.equals(input.getParentFile()))
{
return input;
}
Set<File> paths = new HashSet<>();
toAdd.forEach(x -> {
_ctx.getLogger().debug("Adding volume for path: " + x.getPath());

File local = new File(workingDirectory, input.getName());
if (!local.exists())
{
getLogger().debug("Copying file locally: " + input.getPath());
FileUtils.copyFile(input, local);
}
File converted = SequencePipelineService.get().inferDockerVolume(x);
if (!x.equals(converted))
{
_ctx.getLogger().debug("added as: " + converted.getPath());
}

output.addIntermediateFile(local);
if (_ctx.getDockerVolumes().stream().noneMatch(converted.getPath()::startsWith))
{
paths.add(converted);
}
});

return local;
}
catch (IOException e)
{
throw new PipelineJobException(e);
return paths;
}

return Collections.emptySet();
}
}
18 changes: 1 addition & 17 deletions SequenceAnalysis/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -194,20 +194,4 @@ if (project.findProject(BuildUtils.getTestProjectPath(project.gradle)) != null &
<< "\ncontext.pipelineConfig=${configDir.getAbsolutePath().replace("\\", "\\\\")}"
}
}
}

project.tasks.register("copyJars", Copy)
{ CopySpec copy ->
copy.group = "Build"
copy.description = "Copy commons-math3 JAR to module's lib directory"

copy.setDuplicatesStrategy(DuplicatesStrategy.EXCLUDE)
copy.from(project.configurations.external)
copy.into new File("${project.labkey.explodedModuleLibDir}")
copy.include {
"**commons-math3-**.jar"
}
}

project.tasks.named('module').configure { dependsOn(project.tasks.copyJars) }
project.tasks.named('copyJars').configure { mustRunAfter(project.tasks.populateExplodedLib) }
}
14 changes: 14 additions & 0 deletions SequenceAnalysis/pipeline_code/extra_tools_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,12 @@ then
unzip paragraph-v2.4a-binary.zip
rm paragraph-v2.4a-binary.zip

python3 -m pip install pysam intervaltree

cd ../
cp -R paragraph $LKTOOLS_DIR
ln -s ${LKTOOLS_DIR}/paragraph/bin/paragraph ${LKTOOLS_DIR}/paragraph
ln -s ${LKTOOLS_DIR}/paragraph/bin/idxdepth ${LKTOOLS_DIR}/idxdepth
ln -s ${LKTOOLS_DIR}/paragraph/bin/multigrmpy.py ${LKTOOLS_DIR}/multigrmpy.py
else
echo "Already installed"
Expand All @@ -215,3 +218,14 @@ then
else
echo "Already installed"
fi

if [[ ! -e ${LKTOOLS_DIR}/multiqc || ! -z $FORCE_REINSTALL ]];
then
echo "Cleaning up previous installs"
rm -Rf multiqc*
rm -Rf $LKTOOLS_DIR/multiqc*

python3 -m pip install --user multiqc
else
echo "Already installed"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
import org.labkey.sequenceanalysis.run.alignment.StarWrapper;
import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper;
import org.labkey.sequenceanalysis.run.analysis.BamIterator;
import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillFromFastaStep;
import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillTagsStep;
import org.labkey.sequenceanalysis.run.analysis.BcftoolsFixploidyStep;
import org.labkey.sequenceanalysis.run.analysis.DeepVariantAnalysis;
Expand Down Expand Up @@ -365,6 +366,7 @@ public static void registerPipelineSteps()
SequencePipelineService.get().registerPipelineStep(new SummarizeGenotypeQualityStep.Provider());
SequencePipelineService.get().registerPipelineStep(new BcftoolsFillTagsStep.Provider());
SequencePipelineService.get().registerPipelineStep(new BcftoolsFixploidyStep.Provider());
SequencePipelineService.get().registerPipelineStep(new BcftoolsFillFromFastaStep.Provider());
SequencePipelineService.get().registerPipelineStep(new SVAnnotateStep.Provider());

//handlers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -472,16 +472,28 @@ public Collection<String> getDockerVolumes(Container c)
{
if (settings.isAvailable(c))
{
for (String volume : settings.getDockerVolumes(c))
{
volumeLines.add("-v '" + volume + "':'" + volume + "'");
}
return Collections.unmodifiableCollection(settings.getDockerVolumes(c));
}
}

return volumeLines;
}

@Override
public @Nullable File inferDockerVolume(File input)
{
for (JobResourceSettings settings : SequencePipelineServiceImpl.get().getResourceSettings())
{
File ret = settings.inferDockerVolume(input);
if (ret != null)
{
return ret;
}
}

return input;
}

@Override
public List<File> getSequenceJobInputFiles(PipelineJob job)
{
Expand Down Expand Up @@ -570,7 +582,7 @@ public void registerResourceSettings(JobResourceSettings settings)
@Override
public Set<JobResourceSettings> getResourceSettings()
{
return _resourceSettings;
return Collections.unmodifiableSet(_resourceSettings);
}

@Override
Expand Down
Loading

0 comments on commit f0522c6

Please sign in to comment.