Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

changes for better cluster orchestration #26

Merged
merged 18 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions Main/lib/perl/WorkflowSteps/MakeBlastPPdbNextflowConfig.pm
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ sub run {
my $nextflowConfigFile = $self->getParamValue("nextflowConfigFile");
my $resultsDirectory = $self->getParamValue("resultsDirectory");
my $outputFileName = $self->getParamValue("outputFileName");
my $workingDirRelativePath = $self->getParamValue("workingDirRelativePath");

my $workflowDataDir = $self->getWorkflowDataDir();

Expand All @@ -38,14 +39,18 @@ sub run {
my $nextflowConfig = "$workflowDataDir/$nextflowConfigFile";
open(F, ">$nextflowConfig") || die "Can't open task prop file '$nextflowConfig' for writing";

my $proteinSequenceFileInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $proteinSequenceFile);
my $pdbFastaFileInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $pdbFastaFile);
my $resultsDirectoryInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $resultsDirectory);

my $configString = <<NEXTFLOW;
params {
queryFastaFile = "$clusterWorkflowDataDir/$proteinSequenceFile"
outputDir = "$clusterWorkflowDataDir/$resultsDirectory"
queryFastaFile = "$proteinSequenceFileInNextflowWorkingDirOnCluster"
outputDir = "$resultsDirectoryInNextflowWorkingDirOnCluster"
outputFile = "$outputFileName"
fastaSubsetSize = $fastaSubsetSize
blastProgram = "blastp"
targetFastaFile = "$clusterWorkflowDataDir/$pdbFastaFile"
targetFastaFile = "$pdbFastaFileInNextflowWorkingDirOnCluster"
preConfiguredDatabase = false
targetDatabaseIndex = "NA"
}
Expand Down
10 changes: 7 additions & 3 deletions Main/lib/perl/WorkflowSteps/MakeBlatNextflowConfig.pm
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ sub run {
my $dbType = $self->getParamValue("dbType");
my $queryType = $self->getParamValue("queryType");
my $outputFileName = $self->getParamValue("outputFileName");
my $workingDirRelativePath = $self->getParamValue("workingDirRelativePath");

my $increasedMemory = $self->getParamValue("increasedMemory");
my $initialMemory = $self->getParamValue("initialMemory");
Expand All @@ -41,15 +42,18 @@ sub run {
} else {
open(F, ">", $configPath) or die "$! :Can't open config file '$configPath' for writing";

my $queryFileInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $seqFile);
my $databaseInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $databasePath);
my $resultsDirectoryInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $resultsDirectory);

my $configString = <<NEXTFLOW;
params {
queryFasta = "$seqFile"
queryFasta = "$queryFileInNextflowWorkingDirOnCluster"
fastaSubsetSize = $fastaSubsetSize
genomeFasta = "$databasePath"
genomeFasta = "$databaseInNextflowWorkingDirOnCluster"
dbType = "$dbType"
queryType = "$queryType"
outputDir = "$outputDir"
outputDir = "$resultsDirectoryInNextflowWorkingDirOnCluster"
outputFileName = "$outputFileName"
}

Expand Down
11 changes: 8 additions & 3 deletions Main/lib/perl/WorkflowSteps/MakeEpitopeMappingNextflowConfig.pm
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ sub run {
my $peptideMatchResults = $self->getParamValue("peptideMatchResults");

my $nextflowConfigFile = $self->getParamValue("nextflowConfigFile");
my $workingDirRelativePath = $self->getParamValue("workingDirRelativePath");

my $clusterWorkflowDataDir = $self->getClusterWorkflowDataDir();
my $executor = $self->getClusterExecutor();
Expand All @@ -42,13 +43,17 @@ sub run {
my $nextflowConfig = "$workflowDataDir/$nextflowConfigFile";
open(F, ">$nextflowConfig") || die "Can't open task prop file '$nextflowConfig' for writing";

my $proteinSequenceFileInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $proteinSequenceFile);
my $iedbPeptidesTabFileInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $iedbPeptidesTabFile);
my $resultsDirectoryInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $resultsDirectory);

my $configString = <<NEXTFLOW;
params {
refFasta = "$clusterWorkflowDataDir/$proteinSequenceFile"
peptidesTab = "$clusterWorkflowDataDir/$iedbPeptidesTabFile"
refFasta = "$proteinSequenceFileInNextflowWorkingDirOnCluster"
peptidesTab = "$iedbPeptidesTabFileInNextflowWorkingDirOnCluster"
taxon = $speciesNcbiTaxonId
peptideMatchResults = "$peptideMatchResults"
results = "$clusterWorkflowDataDir/$resultsDirectory"
results = "$resultsDirectoryInNextflowWorkingDirOnCluster"
nonTaxaShortPeptideCutoff = $nonTaxaShortPeptideCutoff
}
process {
Expand Down
7 changes: 5 additions & 2 deletions Main/lib/perl/WorkflowSteps/MakeExportpredNextflowConfig.pm
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ sub run {
my $nextflowConfigFile = $self->getParamValue("nextflowConfigFile");
my $resultsDirectory = $self->getParamValue("resultsDirectory");
my $outputFileName = $self->getParamValue("outputFileName");
my $workingDirRelativePath = $self->getParamValue("workingDirRelativePath");

my $workflowDataDir = $self->getWorkflowDataDir();

Expand All @@ -27,11 +28,13 @@ sub run {
} else {
my $nextflowConfig = "$workflowDataDir/$nextflowConfigFile";
open(F, ">$nextflowConfig") || die "Can't open task prop file '$nextflowConfig' for writing";
my $proteinSequenceFileInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $proteinSequenceFile);
my $resultsDirectoryInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $resultsDirectory);

my $configString = <<NEXTFLOW;
params {
inputFilePath = "$clusterWorkflowDataDir/$proteinSequenceFile"
outputDir = "$clusterWorkflowDataDir/$resultsDirectory"
inputFilePath = "$proteinSequenceFileInNextflowWorkingDirOnCluster"
outputDir = "$resultsDirectoryInNextflowWorkingDirOnCluster"
outputFileName = "$outputFileName"
fastaSubsetSize = $fastaSubsetSize
}
Expand Down
7 changes: 5 additions & 2 deletions Main/lib/perl/WorkflowSteps/MakeOrfFinderConfig.pm
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ sub run {
my $resultsDirectory = $self->getParamValue("resultsDirectory");
my $outputFileName = $self->getParamValue("outputFileName");
my $minPepLength = $self->getParamValue("minPepLength");
my $workingDirRelativePath = $self->getParamValue("workingDirRelativePath");

my $workflowDataDir = $self->getWorkflowDataDir();

Expand All @@ -29,11 +30,13 @@ sub run {
} else {
my $nextflowConfig = "$workflowDataDir/$nextflowConfigFile";
open(F, ">$nextflowConfig") || die "Can't open task prop file '$nextflowConfig' for writing";
my $genomicSequenceFileInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $genomicSequenceFile);
my $resultsDirectoryInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $resultsDirectory);

my $configString = <<NEXTFLOW;
params {
inputFilePath = "$clusterWorkflowDataDir/$genomicSequenceFile"
outputDir = "$clusterWorkflowDataDir/$resultsDirectory"
inputFilePath = "$genomicSequenceFileInNextflowWorkingDirOnCluster"
outputDir = "$resultsDirectoryInNextflowWorkingDirOnCluster"
outputFileName = "$outputFileName"
minPepLength = $minPepLength
fastaSubsetSize = $fastaSubsetSize
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ sub run {
my $nextflowConfigFile = $self->getParamValue("nextflowConfigFile");
my $resultsDirectory = $self->getParamValue("resultsDirectory");
my $outputFileName = $self->getParamValue("outputFileName");
my $workingDirRelativePath = $self->getParamValue("workingDirRelativePath");

my $workflowDataDir = $self->getWorkflowDataDir();

Expand All @@ -29,10 +30,13 @@ sub run {
my $nextflowConfig = "$workflowDataDir/$nextflowConfigFile";
open(F, ">$nextflowConfig") || die "Can't open task prop file '$nextflowConfig' for writing";

my $proteinSequenceFileInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $proteinSequenceFile);
my $resultsDirectoryInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $resultsDirectory);

my $configString = <<NEXTFLOW;
params {
inputFilePath = "$clusterWorkflowDataDir/$proteinSequenceFile"
outputDir = "$clusterWorkflowDataDir/$resultsDirectory"
inputFilePath = "$proteinSequenceFileInNextflowWorkingDirOnCluster"
outputDir = "$resultsDirectoryInNextflowWorkingDirOnCluster"
outputFileName = "$outputFileName"
fastaSubsetSize = $fastaSubsetSize
}
Expand Down
7 changes: 5 additions & 2 deletions Main/lib/perl/WorkflowSteps/MakePsiPredNextflowConfig.pm
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ sub run {
my $nextflowConfigFile = $self->getParamValue("nextflowConfigFile");
my $resultsDirectory = $self->getParamValue("resultsDirectory");
my $outputFilePrefix = $self->getParamValue("outputFilePrefix");
my $workingDirRelativePath = $self->getParamValue("workingDirRelativePath");

my $executor = $self->getClusterExecutor();
my $clusterConfigFile = "\$baseDir/conf/${executor}.config";
Expand All @@ -30,10 +31,12 @@ sub run {
my $nextflowConfig = "$workflowDataDir/$nextflowConfigFile";
open(F, ">$nextflowConfig") || die "Can't open task prop file '$nextflowConfig' for writing";

my $proteinSequenceFileInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $proteinSequenceFile);
my $resultsDirectoryInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $resultsDirectory);
my $configString = <<NEXTFLOW;
params {
inputFilePath = "$clusterWorkflowDataDir/$proteinSequenceFile"
outputDir = "$clusterWorkflowDataDir/$resultsDirectory"
inputFilePath = "$proteinSequenceFileInNextflowWorkingDirOnCluster"
outputDir = "$resultsDirectoryInNextflowWorkingDirOnCluster"
outputFilePrefix = "$outputFilePrefix"
fastaSubsetSize = $fastaSubsetSize
}
Expand Down
8 changes: 6 additions & 2 deletions Main/lib/perl/WorkflowSteps/MakeSegNextflowConfig.pm
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ sub run {
my $nextflowConfigFile = $self->getParamValue("nextflowConfigFile");
my $resultsDirectory = $self->getParamValue("resultsDirectory");
my $outputFileName = $self->getParamValue("outputFileName");
my $workingDirRelativePath = $self->getParamValue("workingDirRelativePath");

my $workflowDataDir = $self->getWorkflowDataDir();

Expand All @@ -29,10 +30,13 @@ sub run {
my $nextflowConfig = "$workflowDataDir/$nextflowConfigFile";
open(F, ">$nextflowConfig") || die "Can't open task prop file '$nextflowConfig' for writing";

my $proteinSequenceFileInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $proteinSequenceFile);
my $resultsDirectoryInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $resultsDirectory);

my $configString = <<NEXTFLOW;
params {
inputFilePath = "$clusterWorkflowDataDir/$proteinSequenceFile"
outputDir = "$clusterWorkflowDataDir/$resultsDirectory"
inputFilePath = "$proteinSequenceFileInNextflowWorkingDirOnCluster"
outputDir = "$resultsDirectoryInNextflowWorkingDirOnCluster"
outputFileName = "$outputFileName"
fastaSubsetSize = $fastaSubsetSize
seqType = "aa"
Expand Down
7 changes: 5 additions & 2 deletions Main/lib/perl/WorkflowSteps/MakeTRNAScanConfig.pm
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ sub run {
my $nextflowConfigFile = $self->getParamValue("nextflowConfigFile");
my $resultsDirectory = $self->getParamValue("resultsDirectory");
my $trnascanOutputFileName = $self->getParamValue("outputFileName");
my $workingDirRelativePath = $self->getParamValue("workingDirRelativePath");

my $workflowDataDir = $self->getWorkflowDataDir();

Expand All @@ -26,13 +27,15 @@ sub run {
if ($undo) {
$self->runCmd(0, "rm $workflowDataDir/$nextflowConfigFile");
} else {
my $genomicSequenceFileInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $genomicSequenceFile);
my $resultsDirectoryInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $resultsDirectory);
my $nextflowConfig = "$workflowDataDir/$nextflowConfigFile";
open(F, ">$nextflowConfig") || die "Can't open task prop file '$nextflowConfig' for writing";

my $configString = <<NEXTFLOW;
params {
inputFilePath = "$clusterWorkflowDataDir/$genomicSequenceFile"
outputDir = "$clusterWorkflowDataDir/$resultsDirectory"
inputFilePath = $genomicSequenceFileInNextflowWorkingDirOnCluster
outputDir = $resultsDirectoryInNextflowWorkingDirOnCluster
outputFile = "$trnascanOutputFileName"
fastaSubsetSize = $fastaSubsetSize
}
Expand Down
7 changes: 5 additions & 2 deletions Main/lib/perl/WorkflowSteps/MakeTmhmmNextflowConfig.pm
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ sub run {
my $nextflowConfigFile = $self->getParamValue("nextflowConfigFile");
my $resultsDirectory = $self->getParamValue("resultsDirectory");
my $outputFileName = $self->getParamValue("outputFileName");
my $workingDirRelativePath = $self->getParamValue("workingDirRelativePath");

my $workflowDataDir = $self->getWorkflowDataDir();

Expand All @@ -34,12 +35,14 @@ sub run {
$self->runCmd(0, "rm $workflowDataDir/$nextflowConfigFile");
} else {
my $nextflowConfig = "$workflowDataDir/$nextflowConfigFile";
my $proteinSequenceFileInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $proteinSequenceFile);
my $resultsDirectoryInNextflowWorkingDirOnCluster = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $resultsDirectory);
open(F, ">$nextflowConfig") || die "Can't open task prop file '$nextflowConfig' for writing";

my $configString = <<NEXTFLOW;
params {
inputFilePath = "$clusterWorkflowDataDir/$proteinSequenceFile"
outputDir = "$clusterWorkflowDataDir/$resultsDirectory"
inputFilePath = "$proteinSequenceFileInNextflowWorkingDirOnCluster"
outputDir = "$resultsDirectoryInNextflowWorkingDirOnCluster"
outputFileName = "$outputFileName"
fastaSubsetSize = $fastaSubsetSize
}
Expand Down
1 change: 0 additions & 1 deletion Main/lib/perl/WorkflowSteps/NextflowResultsCache.pm
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ sub hasCacheFile {
sub copyTo {
my ($self, $test, $undo, $cacheDir, $resultsPath) = @_;


if($undo) {} #nothing to see here
else {
if ($test) {
Expand Down
15 changes: 15 additions & 0 deletions Main/lib/perl/WorkflowSteps/WorkflowStep.pm
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,21 @@ sub runSqlFetchOneRowFromOrgDb {
return @output;
}

sub getClusterNextflowWorkingDir {
my ($self, $relativeDataDirPath) = @_;
my $clusterWorkflowDataDir = $self->getClusterWorkflowDataDir();
return $clusterWorkflowDataDir . "/" . $self->uniqueNameForNextflowWorkingDirectory($relativeDataDirPath);
}

sub relativePathToNextflowClusterPath {
my ($self, $relativeDataDirPath, $fileOrDirRelativePath) = @;
my $clusterNextflowWorkingDir = $self->getClusterNextflowWorkingDir($relativeDataDirPath);

# remove the relativeDataDirPath "prefix" from the fileOrDirRelativePath
my $noPrefix = substr($fileOrDirRelativePath, length($relativeDataDirPath) - length($fileOrDirRelativePath));

return $clusterNextflowWorkingDir . "/" . $noPrefix;
}

1;

23 changes: 0 additions & 23 deletions Main/lib/xml/workflow/HtsSNPsExperiment.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,33 +34,11 @@
<depends name="makeDataDir"/>
</step>

<!-- make an empty final dir, and mirror it to the cluster. the samples will each mirror their own file -->
<!-- to the cluster, so the mirroring can be done in pieces and in parallel -->

<step name="makeFinalDir" stepClass="ReFlow::StepClasses::MakeDataDir">
<paramValue name="dataDir">$$finalDir$$</paramValue>
<depends name="makeExperimentDatasetLoaderDir"/>
</step>

<step name="mirrorToCluster" stepClass="ReFlow::StepClasses::MirrorToComputeCluster" stepLoadTypes="toCluster">
<paramValue name="fileOrDirToMirror">$$dataDir$$</paramValue>
<depends name="makeSamplesOutputDir"/>
</step>

<step name="mirrorExperimentDatasetLoaderDirToCluster" stepClass="ReFlow::StepClasses::MirrorToComputeCluster" stepLoadTypes="toCluster">
<paramValue name="fileOrDirToMirror">$$dataDir$$/$$experimentDatasetName$$</paramValue>
<depends name="makeExperimentDatasetLoaderDir"/>
<depends name="mirrorToCluster"/>
</step>

<!-- load dataset manually so we can mirror the final dir before filling it with the sample files -->

<step name="mirrorFinalDirToCluster" stepClass="ReFlow::StepClasses::MirrorToComputeCluster" stepLoadTypes="toCluster">
<paramValue name="fileOrDirToMirror">$$finalDir$$</paramValue>
<depends name="makeFinalDir"/>
<depends name="mirrorExperimentDatasetLoaderDirToCluster"/>
</step>

<step name="insertDataset" stepClass="ApiCommonWorkflow::Main::WorkflowSteps::DatasetLoaderInsertDataset">
<paramValue name="datasetName">$$experimentDatasetName$$</paramValue>
<paramValue name="datasetLoaderXmlFileName">$$organismDatasetLoaderXmlFile$$</paramValue>
Expand All @@ -73,7 +51,6 @@
<paramValue name="datasetName">$$experimentDatasetName$$</paramValue>
<paramValue name="datasetLoaderXmlFileName">$$organismDatasetLoaderXmlFile$$</paramValue>
<paramValue name="dataDir">$$dataDir$$/$$experimentDatasetName$$</paramValue>
<depends name="mirrorFinalDirToCluster"/>
</step>

<step name="insertExtDb" stepClass="ApiCommonWorkflow::Main::WorkflowSteps::DatasetLoaderInsertExtDb" stepLoadTypes="plugin">
Expand Down
6 changes: 0 additions & 6 deletions Main/lib/xml/workflow/OrganismSpecific.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,6 @@
<paramValue name="dataDir">$$dataDir$$</paramValue>
</step>

<step name="mirrorToCluster" stepClass="ReFlow::StepClasses::MirrorToComputeCluster" stepLoadTypes="toCluster">
<paramValue name="fileOrDirToMirror">$$dataDir$$</paramValue>
<depends name="makeDataDir"/>
</step>

<subgraph name="genericProfiles" xmlFile="generated/$$projectName$$/$$organismAbbrev$$/genericProfile.xml" excludeIfXmlFileDoesNotExist="true">
<paramValue name="parentDataDir">$$dataDir$$</paramValue>
<paramValue name="organismDatasetLoaderXmlFile">$$organismDatasetLoaderXmlFile$$</paramValue>
Expand Down Expand Up @@ -59,7 +54,6 @@
<paramValue name="organismAbbrev">$$organismAbbrev$$</paramValue>
<paramValue name="relativeWebServicesDir">$$relativeWebServicesDir$$</paramValue>
<paramValue name="proteinsFile">$$proteinsFile$$</paramValue>
<depends name="mirrorToCluster"/>
</subgraph>


Expand Down
Loading