From 6ddf877f35679d2f40b5d3386c1cba2189eb48cf Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 14 Feb 2019 10:00:00 +0100
Subject: [PATCH 01/22] update submodule

---
 Sarek-data | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Sarek-data b/Sarek-data
index 03b5a18b2b..9087faa53d 160000
--- a/Sarek-data
+++ b/Sarek-data
@@ -1 +1 @@
-Subproject commit 03b5a18b2bdba3dac6307e27a5b5c7e5fec3bd54
+Subproject commit 9087faa53d25fca90c1a84a48cfaf7cbed496317

From 62e5aa20dcc197207713d885678afb808601fc14 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 14 Feb 2019 11:03:44 +0100
Subject: [PATCH 02/22] add current changes [skip ci]

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6ca27045ac..1f4109bc65 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 -   [#694](https://github.com/SciLifeLab/Sarek/pull/694) - Add monochrome and grey logos for light or dark background
 -   [#698](https://github.com/SciLifeLab/Sarek/pull/698) - Add btb profile for munin server
 -   [#702](https://github.com/SciLifeLab/Sarek/pull/702) - Add font-ttf-dejavu-sans-mono `2.37` and fontconfig `2.12.6` to container
+-   [#XXX](https://github.com/SciLifeLab/Sarek/pull/XXX) - Add `MULTIPLE` as a test
 
 ### `Changed`
 
@@ -30,6 +31,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 -   [#702](https://github.com/SciLifeLab/Sarek/pull/702) - Update FastQC to `0.11.8`
 -   [#705](https://github.com/SciLifeLab/Sarek/pull/705) - Change `--TMP_DIR` by `--tmp-dir` for GATK `4.0.9.0` BaseRecalibrator
 -   [#706](https://github.com/SciLifeLab/Sarek/pull/706) - Update TravisCI testing
+-   [#XXX](https://github.com/SciLifeLab/Sarek/pull/XXX) - Update `Sarek-data` submodule with multiple patients TSV file
 
 ### `Fixed`
 

From 1e6ad90a5ffebd796b38857f0c57ef20f8e71c45 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 14 Feb 2019 15:03:50 +0100
Subject: [PATCH 03/22] add test for MULTIPLE

---
 scripts/test.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/scripts/test.sh b/scripts/test.sh
index ae591fd2c9..9288df2c6b 100755
--- a/scripts/test.sh
+++ b/scripts/test.sh
@@ -125,6 +125,12 @@ then
   clean_repo
 fi
 
+if [[ MULTIPLE =~ $TEST ]]
+then
+  run_wrapper --somatic --sample Sarek-data/testdata/tsv/tiny-multiple.tsv --variantCalling --tools FreeBayes,HaplotypeCaller,Manta,Mutect2 --noReports
+	run_wrapper --somatic --sample Sarek-data/testdata/tsv/tiny-multiple.tsv --variantCalling --tools Manta,Strelka --noReports --strelkaBP
+fi
+
 if [[ BUILDCONTAINERS =~ $TEST ]] && [[ $PROFILE == docker ]]
 then
   ./scripts/do_all.sh --genome $GENOME

From f3e650cd86bcf4e6765115d4965283f261204172 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 14 Feb 2019 16:41:22 +0100
Subject: [PATCH 04/22] fix multi TSV sample

---
 somaticVC.nf | 44 +++-----------------------------------------
 1 file changed, 3 insertions(+), 41 deletions(-)

diff --git a/somaticVC.nf b/somaticVC.nf
index 138fe911d3..458052ca29 100644
--- a/somaticVC.nf
+++ b/somaticVC.nf
@@ -69,10 +69,6 @@ if (params.test && params.genome in ['GRCh37', 'GRCh38']) {
   referenceMap.intervals = file("$workflow.projectDir/repeats/tiny_${params.genome}.list")
 }
 
-// TODO
-// FreeBayes does not need recalibrated BAMs, but we need to test whether
-// the channels are set up correctly when we disable it
-
 tsvPath = ''
 if (params.sample) tsvPath = params.sample
 else tsvPath = "${directoryMap.recalibrated}/recalibrated.tsv"
@@ -101,33 +97,17 @@ if (params.verbose) bamFiles = bamFiles.view {
   Files : [${it[3].fileName}, ${it[4].fileName}]"
 }
 
-// assume input is recalibrated, ignore explicitBqsrNeeded
-(recalibratedBam, recalTables) = bamFiles.into(2)
-
-recalTables = recalTables.map{ it + [null] } // null recalibration table means: do not use --BQSR
-
-recalTables = recalTables.map { [it[0]] + it[2..-1] } // remove status
-
-if (params.verbose) recalibratedBam = recalibratedBam.view {
+if (params.verbose) bamFiles = bamFiles.view {
   "Recalibrated BAM for variant Calling:\n\
   ID    : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\
   Files : [${it[3].fileName}, ${it[4].fileName}]"
 }
 
-// Here we have a recalibrated bam set, but we need to separate the bam files based on patient status.
-// The sample tsv config file which is formatted like: "subject status sample lane fastq1 fastq2"
-// cf fastqFiles channel, I decided just to add _status to the sample name to have less changes to do.
-// And so I'm sorting the channel if the sample match _0, then it's a normal sample, otherwise tumor.
-// Then combine normal and tumor to get each possibilities
-// ie. normal vs tumor1, normal vs tumor2, normal vs tumor3
-// then copy this channel into channels for each variant calling
-// I guess it will still work even if we have multiple normal samples
-
 // separate recalibrateBams by status
 bamsNormal = Channel.create()
 bamsTumor = Channel.create()
 
-recalibratedBam
+bamFiles
   .choice(bamsTumor, bamsNormal) {it[1] == 0 ? 1 : 0}
 
 bamsNormal = bamsNormal.ifEmpty{exit 1, "No normal sample defined, check TSV file: ${tsvFile}"}
@@ -214,18 +194,7 @@ if (params.verbose) bedIntervals = bedIntervals.view {
   "  Interv: ${it.baseName}"
 }
 
-(bamsNormalTemp, bamsNormal, bedIntervals) = generateIntervalsForVC(bamsNormal, bedIntervals)
-(bamsTumorTemp, bamsTumor, bedIntervals) = generateIntervalsForVC(bamsTumor, bedIntervals)
-
-bamsAll = bamsNormal.combine(bamsTumor)
-
-// Since idPatientNormal and idPatientTumor are the same
-// It's removed from bamsAll Channel (same for genderNormal)
-// /!\ It is assumed that every sample are from the same patient
-bamsAll = bamsAll.map {
-  idPatientNormal, idSampleNormal, bamNormal, baiNormal, idPatientTumor, idSampleTumor, bamTumor, baiTumor ->
-  [idPatientNormal, idSampleNormal, bamNormal, baiNormal, idSampleTumor, bamTumor, baiTumor]
-}
+bamsAll = bamsNormal.join(bamsTumor)
 
 // Manta and Strelka
 (bamsForManta, bamsForStrelka, bamsForStrelkaBP, bamsAll) = bamsAll.into(4)
@@ -816,13 +785,6 @@ def defineToolList() {
   ]
 }
 
-def generateIntervalsForVC(bams, intervals) {
-  def (bamsNew, bamsForVC) = bams.into(2)
-  def (intervalsNew, vcIntervals) = intervals.into(2)
-  def bamsForVCNew = bamsForVC.combine(vcIntervals)
-  return [bamsForVCNew, bamsNew, intervalsNew]
-}
-
 def grabRevision() {
   // Return the same string executed from github or not
   return workflow.revision ?: workflow.commitId ?: workflow.scriptId.substring(0,10)

From f5c265abf8c8a92e0b58ac00e92b8c1818a18408 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Thu, 14 Feb 2019 16:48:14 +0100
Subject: [PATCH 05/22] update CHANGELOG

---
 CHANGELOG.md | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ec915c138f..0015e7f7ba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,22 +9,23 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ### `Added`
 -   [#712](https://github.com/SciLifeLab/Sarek/pull/712), [#718](https://github.com/SciLifeLab/Sarek/pull/718) - Added possibilities to run Sarek with `conda`
+-   [#719](https://github.com/SciLifeLab/Sarek/pull/719) - Annotation documentation
+-   [#719](https://github.com/SciLifeLab/Sarek/pull/719) - Helper script to download `snpeff` and `VEP` cache files
+-   [#719](https://github.com/SciLifeLab/Sarek/pull/719) - New `--annotation_cache`, `--snpEff_cache`, `--vep_cache` parameters
+-   [#719](https://github.com/SciLifeLab/Sarek/pull/719) - Possibility to use cache wen annotating with `snpEff` and `VEP`
+-   [#728](https://github.com/SciLifeLab/Sarek/pull/728) - Update `Sarek-data` submodule with multiple patients TSV file
 
 ### `Changed`
 
 -   [#710](https://github.com/SciLifeLab/Sarek/pull/710) - Improve release checklist and script
 -   [#711](https://github.com/SciLifeLab/Sarek/pull/711) - Improve configuration priorities
--   [#719](https://github.com/SciLifeLab/Sarek/pull/719) - `vepCacheVersion` is now defined in `conf/genomes.config` or `conf/igenomes.config`
--   [#719](https://github.com/SciLifeLab/Sarek/pull/719) - `snpeff` and `vep` containers are now built with conda
 -   [#716](https://github.com/SciLifeLab/Sarek/pull/716) - Update paths to containers and iGenomes
+-   [#719](https://github.com/SciLifeLab/Sarek/pull/719) - `snpeff` and `vep` containers are now built with conda
+-   [#719](https://github.com/SciLifeLab/Sarek/pull/719) - `vepCacheVersion` is now defined in `conf/genomes.config` or `conf/igenomes.config`
+-   [#722](https://github.com/SciLifeLab/Sarek/pull/722) - Add path to ASCAT `.gc` file in `igenomes.config`
+-   [#722](https://github.com/SciLifeLab/Sarek/pull/722) - Update `Sarek-data` submodule
 -   [#724](https://github.com/SciLifeLab/Sarek/pull/724) - Improved AwsBatch configuration
 
-### `Added`
--   [#719](https://github.com/SciLifeLab/Sarek/pull/719) - Possibility to use cache wen annotating with `snpEff` and `VEP`
--   [#719](https://github.com/SciLifeLab/Sarek/pull/719) - New `--annotation_cache`, `--snpEff_cache`, `--vep_cache` parameters
--   [#719](https://github.com/SciLifeLab/Sarek/pull/719) - Helper script to download `snpeff` and `VEP` cache files
--   [#719](https://github.com/SciLifeLab/Sarek/pull/719) - Annotation documentation
-
 ### `Removed`
 -   [#715](https://github.com/SciLifeLab/Sarek/pull/715) - Remove `defReferencesFiles` function from `buildReferences.nf`
 -   [#719](https://github.com/SciLifeLab/Sarek/pull/719) - `snpEff` base container is no longer used
@@ -32,6 +33,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ### `Fixed`
 -   [#720](https://github.com/SciLifeLab/Sarek/pull/720) - bamQC is now run on the recalibrated bams, and not after MarkDuplicates
+-   [#728](https://github.com/SciLifeLab/Sarek/pull/728) - Fix multi sample TSV file [#691](https://github.com/SciLifeLab/Sarek/issues/691)
 
 ## [2.2.2] - 2018-12-19
 
@@ -58,7 +60,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 -   [#702](https://github.com/SciLifeLab/Sarek/pull/702) - Update FastQC to `0.11.8`
 -   [#705](https://github.com/SciLifeLab/Sarek/pull/705) - Change `--TMP_DIR` by `--tmp-dir` for GATK `4.0.9.0` BaseRecalibrator
 -   [#706](https://github.com/SciLifeLab/Sarek/pull/706) - Update TravisCI testing
--   [#XXX](https://github.com/SciLifeLab/Sarek/pull/XXX) - Update `Sarek-data` submodule with multiple patients TSV file
 
 ### `Fixed`
 

From 3624ff50bbf58c66d4598d20c04b4251b187ec59 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Fri, 15 Feb 2019 15:17:15 +0100
Subject: [PATCH 06/22] change output configuration for VCFs

---
 annotate.nf           | 26 +++++++++++---------------
 conf/travis.config    |  8 ++++++++
 germlineVC.nf         |  6 +++---
 lib/SarekUtils.groovy | 21 ++++++++++-----------
 somaticVC.nf          | 22 ++++++++--------------
 5 files changed, 40 insertions(+), 43 deletions(-)

diff --git a/annotate.nf b/annotate.nf
index 73eef9eb4c..ddff03f4a7 100644
--- a/annotate.nf
+++ b/annotate.nf
@@ -70,15 +70,15 @@ vcfNotToAnnotate = Channel.create()
 if (annotateVCF == []) {
 // we annote all available vcfs by default that we can find in the VariantCalling directory
   Channel.empty().mix(
-    Channel.fromPath("${directoryMap.haplotypecaller}/*.vcf.gz")
+    Channel.fromPath("${params.outDir}/VariantCalling/*/${directoryMap.haplotypecaller}/*.vcf.gz")
       .flatten().map{vcf -> ['haplotypecaller', vcf]},
-    Channel.fromPath("${directoryMap.manta}/*SV.vcf.gz")
+    Channel.fromPath("${params.outDir}/VariantCalling/*/${directoryMap.manta}/*SV.vcf.gz")
       .flatten().map{vcf -> ['manta', vcf]},
-    Channel.fromPath("${directoryMap.mutect2}/*.vcf.gz")
+    Channel.fromPath("${params.outDir}/VariantCalling/*/${directoryMap.mutect2}/*.vcf.gz")
       .flatten().map{vcf -> ['mutect2', vcf]},
-    Channel.fromPath("${directoryMap.strelka}/*{somatic,variants}*.vcf.gz")		// Strelka only
+    Channel.fromPath("${params.outDir}/VariantCalling/*/${directoryMap.strelka}/*{somatic,variants}*.vcf.gz")		// Strelka only
       .flatten().map{vcf -> ['strelka', vcf]},
-    Channel.fromPath("${directoryMap.strelkabp}/*{somatic,variants}*.vcf.gz")	// Strelka with Manta indel candidates
+    Channel.fromPath("${params.outDir}/VariantCalling/*/${directoryMap.strelkabp}/*{somatic,variants}*.vcf.gz")	// Strelka with Manta indel candidates
       .flatten().map{vcf -> ['strelkabp', vcf]}
   ).choice(vcfToAnnotate, vcfNotToAnnotate) {
     annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1
@@ -147,20 +147,18 @@ if (params.verbose) vcfReport = vcfReport.view {
   "Files : [${it.fileName}]"
 }
 
-snpEff_cache = params.snpEff_cache ? params.snpEff_cache : "null"
-
 process RunSnpeff {
   tag {"${variantCaller} - ${vcf}"}
 
   publishDir params.outDir, mode: params.publishDirMode, saveAs: {
-    if (it == "${vcf.simpleName}_snpEff.csv") "${directoryMap.snpeffReports.minus(params.outDir+'/')}/${it}"
+    if (it == "${vcf.simpleName}_snpEff.csv") "${params.outDir}/Reports/${directoryMap.snpeff}/${it}"
     else if (it == "${vcf.simpleName}_snpEff.ann.vcf") null
-    else "${directoryMap.snpeff.minus(params.outDir+'/')}/${it}"
+    else "${params.outDir}/Annotation/${directoryMap.snpeff}/${it}"
   }
 
   input:
     set variantCaller, file(vcf) from vcfForSnpeff
-    file dataDir from Channel.fromPath(snpEff_cache, type: 'dir')
+    file dataDir from Channel.value(params.snpEff_cache ? params.snpEff_cache : "null")
     val snpeffDb from Channel.value(params.genomes[params.genome].snpeffDb)
 
   output:
@@ -204,19 +202,17 @@ if('merge' in tools) {
   )
 }
 
-vep_cache = params.vep_cache ? params.vep_cache : "null"
-
 process RunVEP {
   tag {"${variantCaller} - ${vcf}"}
 
   publishDir params.outDir, mode: params.publishDirMode, saveAs: {
-    if (it == "${vcf.simpleName}_VEP.summary.html") "${directoryMap.vep.minus(params.outDir+'/')}/${it}"
+    if (it == "${vcf.simpleName}_VEP.summary.html") "${params.outDir}/Annotation/${directoryMap.vep}/${it}"
     else null
   }
 
   input:
     set annotator, variantCaller, file(vcf), file(idx) from vcfForVep
-    file dataDir from Channel.fromPath(vep_cache, type: 'dir')
+    file dataDir from Channel.value(params.vep_cache ? params.vep_cache : "null")
     val cache_version from Channel.value(params.genomes[params.genome].vepCacheVersion)
 
   output:
@@ -260,7 +256,7 @@ vcfToCompress = snpeffVCF.mix(vepVCF)
 process CompressVCF {
   tag {"${annotator} - ${vcf}"}
 
-  publishDir "${directoryMap."$finalannotator"}", mode: params.publishDirMode
+  publishDir "${params.outDir}/Annotation/${directoryMap."$finalannotator"}", mode: params.publishDirMode
 
   input:
     set annotator, variantCaller, file(vcf) from vcfToCompress
diff --git a/conf/travis.config b/conf/travis.config
index 3bf97ca817..8865582d65 100644
--- a/conf/travis.config
+++ b/conf/travis.config
@@ -19,3 +19,11 @@ process {
   cpus = params.max_cpus
   memory = params.max_memory
 }
+
+withName:RunVEP {
+  maxForks = 1
+}
+
+withName:RunSnpeff {
+  maxForks = 1
+}
diff --git a/germlineVC.nf b/germlineVC.nf
index 7c32373d6e..ac40dbb398 100644
--- a/germlineVC.nf
+++ b/germlineVC.nf
@@ -318,7 +318,7 @@ if (params.verbose) vcfsToMerge = vcfsToMerge.view {
 process ConcatVCF {
   tag {variantCaller + "-" + idSampleNormal}
 
-  publishDir "${directoryMap."$variantCaller"}", mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap."$variantCaller"}", mode: params.publishDirMode
 
   input:
     set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge
@@ -356,7 +356,7 @@ if (params.verbose) vcfConcatenated = vcfConcatenated.view {
 process RunSingleStrelka {
   tag {idSample}
 
-  publishDir directoryMap.strelka, mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.strelka}", mode: params.publishDirMode
 
   input:
     set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleStrelka
@@ -409,7 +409,7 @@ if (params.verbose) singleStrelkaOutput = singleStrelkaOutput.view {
 process RunSingleManta {
   tag {idSample + " - Single Diploid"}
 
-  publishDir directoryMap.manta, mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.manta}", mode: params.publishDirMode
 
   input:
     set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleManta
diff --git a/lib/SarekUtils.groovy b/lib/SarekUtils.groovy
index 9b69a3aa17..87fc5587a4 100644
--- a/lib/SarekUtils.groovy
+++ b/lib/SarekUtils.groovy
@@ -137,23 +137,22 @@ class SarekUtils {
     return [
     'duplicateMarked'  : "${outDir}/Preprocessing/DuplicateMarked",
     'recalibrated'     : "${outDir}/Preprocessing/Recalibrated",
-    'ascat'            : "${outDir}/VariantCalling/Ascat",
-    'freebayes'        : "${outDir}/VariantCalling/FreeBayes",
-    'gvcf-hc'          : "${outDir}/VariantCalling/HaplotypeCallerGVCF",
-    'haplotypecaller'  : "${outDir}/VariantCalling/HaplotypeCaller",
-    'manta'            : "${outDir}/VariantCalling/Manta",
-    'mutect2'          : "${outDir}/VariantCalling/MuTect2",
-    'strelka'          : "${outDir}/VariantCalling/Strelka",
-    'strelkabp'        : "${outDir}/VariantCalling/StrelkaBP",
-    'snpeff'           : "${outDir}/Annotation/SnpEff",
-    'vep'              : "${outDir}/Annotation/VEP",
+    'ascat'            : "Ascat",
+    'freebayes'        : "FreeBayes",
+    'gvcf-hc'          : "HaplotypeCallerGVCF",
+    'haplotypecaller'  : "HaplotypeCaller",
+    'manta'            : "Manta",
+    'mutect2'          : "MuTect2",
+    'strelka'          : "Strelka",
+    'strelkabp'        : "StrelkaBP",
+    'snpeff'           : "SnpEff",
+    'vep'              : "VEP",
     'bamQC'            : "${outDir}/Reports/bamQC",
     'bcftoolsStats'    : "${outDir}/Reports/BCFToolsStats",
     'fastQC'           : "${outDir}/Reports/FastQC",
     'markDuplicatesQC' : "${outDir}/Reports/MarkDuplicates",
     'multiQC'          : "${outDir}/Reports/MultiQC",
     'samtoolsStats'    : "${outDir}/Reports/SamToolsStats",
-    'snpeffReports'    : "${outDir}/Reports/SnpEff",
     'vcftools'         : "${outDir}/Reports/VCFTools",
     'version'          : "${outDir}/Reports/ToolsVersion"
     ]
diff --git a/somaticVC.nf b/somaticVC.nf
index 1b9e13dbbc..b0d84ba2e3 100644
--- a/somaticVC.nf
+++ b/somaticVC.nf
@@ -92,13 +92,7 @@ if (tsvPath) {
 startMessage()
 
 if (params.verbose) bamFiles = bamFiles.view {
-  "BAMs to process:\n\
-  ID    : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\
-  Files : [${it[3].fileName}, ${it[4].fileName}]"
-}
-
-if (params.verbose) bamFiles = bamFiles.view {
-  "Recalibrated BAM for variant Calling:\n\
+  "BAMs for variant Calling:\n\
   ID    : ${it[0]}\tStatus: ${it[1]}\tSample: ${it[2]}\n\
   Files : [${it[3].fileName}, ${it[4].fileName}]"
 }
@@ -286,7 +280,7 @@ if (params.verbose) vcfsToMerge = vcfsToMerge.view {
 process ConcatVCF {
   tag {variantCaller + "_" + idSampleTumor + "_vs_" + idSampleNormal}
 
-  publishDir "${directoryMap."$variantCaller"}", mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap."$variantCaller"}", mode: params.publishDirMode
 
   input:
     set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge
@@ -321,7 +315,7 @@ if (params.verbose) vcfConcatenated = vcfConcatenated.view {
 process RunStrelka {
   tag {idSampleTumor + "_vs_" + idSampleNormal}
 
-  publishDir directoryMap.strelka, mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.strelka}", mode: params.publishDirMode
 
   input:
     set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForStrelka
@@ -378,7 +372,7 @@ if (params.verbose) strelkaOutput = strelkaOutput.view {
 process RunManta {
   tag {idSampleTumor + "_vs_" + idSampleNormal}
 
-  publishDir directoryMap.manta, mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.manta}", mode: params.publishDirMode
 
   input:
     set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForManta
@@ -432,7 +426,7 @@ if (params.verbose) mantaOutput = mantaOutput.view {
 process RunSingleManta {
   tag {idSample + " - Tumor-Only"}
 
-  publishDir directoryMap.manta, mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.manta}", mode: params.publishDirMode
 
   input:
     set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleManta
@@ -491,7 +485,7 @@ bamsForStrelkaBP = bamsForStrelkaBP.map {
 process RunStrelkaBP {
   tag {idSampleTumor + "_vs_" + idSampleNormal}
 
-  publishDir directoryMap.strelkabp, mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.strelkabp}", mode: params.publishDirMode
 
   input:
     set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(mantaCSI), file(mantaCSIi) from bamsForStrelkaBP
@@ -583,7 +577,7 @@ alleleCountOutput = alleleCountOutput.map {
 process RunConvertAlleleCounts {
   tag {idSampleTumor + "_vs_" + idSampleNormal}
 
-  publishDir directoryMap.ascat, mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.ascat}", mode: params.publishDirMode
 
   input:
     set idPatient, idSampleNormal, idSampleTumor, file(alleleCountNormal), file(alleleCountTumor) from alleleCountOutput
@@ -605,7 +599,7 @@ process RunConvertAlleleCounts {
 process RunAscat {
   tag {idSampleTumor + "_vs_" + idSampleNormal}
 
-  publishDir directoryMap.ascat, mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.ascat}", mode: params.publishDirMode
 
   input:
     set idPatient, idSampleNormal, idSampleTumor, file(bafNormal), file(logrNormal), file(bafTumor), file(logrTumor) from convertAlleleCountsOutput

From 884a1099edccc18a17beeb4b619f014a44d02b86 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Mon, 18 Feb 2019 16:12:23 +0100
Subject: [PATCH 07/22] update documentation [skip ci]

---
 docs/OUTPUT.md | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/docs/OUTPUT.md b/docs/OUTPUT.md
index eb0040ca1f..a2039b721c 100644
--- a/docs/OUTPUT.md
+++ b/docs/OUTPUT.md
@@ -29,7 +29,7 @@ Some of the Manta VCF files are not always succeed in going through the VEP filt
 
 The HTML summary files show general statistics and quality-related measures.
 In the header of the annotated VCF files one can find the VEP/Ensembl version used for annotation, also the version numbers for additional databases like Clinvar or dbSNP used in the "VEP" line.
-The format of the [consequence annotations][VEP-predictions] is also  in the VCF header describing the INFO field.
+The format of the [consequence annotations][VEP-predictions] is also in the VCF header describing the INFO field.
 In the moment it contains:
 * Consequence: impact of the variation, if there is any
 * Codons: the codon change, i.e. cGt/cAt
@@ -53,11 +53,12 @@ The preprocessing is following the [GATK Best Practices][GATK-BP] to obtain alig
 This is the place for the BAM file delivered to users: besides the duplicatemarked files the recalibration tables are also stored (`*.recal.table`), these can be used to create base recalibrated files.
 The `.tsv` file is autogenerated also, these can be used by Sarek for further processing and/or variant calling.
 
-The BAM file headers contain the details about the actual command-line arguments for mapping, merging, use `samtools view -H <filename>` to view the used  reference, read groups etc.
+The BAM file headers contain the details about the actual command-line arguments for mapping, merging, use `samtools view -H <filename>` to view the used reference, read groups etc.
 
 ### Recalibrated:
 
-This directory is usually empty, it is the location for the final recalibrated files in the preprocessing pipeline: recalibrated BAMs are usually 2-3 times  larger than the duplicatemarked files. To re-generate recalibrated BAMs you have to apply the  recalibration table delivered to the `NonRecalibrated` directory either by calling Sarek, or doing this [recalibration step][BQSR-link] yourself.
+This directory is usually empty, it is the location for the final recalibrated files in the preprocessing pipeline: recalibrated BAMs are usually 2-3 times larger than the duplicatemarked files.
+To re-generate recalibrated BAMs you have to apply the recalibration table delivered to the `NonRecalibrated` directory either by calling Sarek, or doing this [recalibration step][BQSR-link] yourself.
 
 ---
 ## Reports:
@@ -65,7 +66,7 @@ This directory is usually empty, it is the location for the final recalibrated f
 The `Reports` directory is the place for collecting outputs for different quality control (QC) software; going through these files can help us to decide whether the sequencing and the workflow was successful, or further steps are needed to get meaningful results.
 The main entry point it the [MultiQC][multiqc-link] directory: the HTML index file aggregates and visualizes all the software use for QC.
 
-### MultiQC  
+### MultiQC
 To assess the quality of the sequencing and workflow the best start is to view at the `Reports/MultiQC/multiqc_report.html` file of the `MultiQC` directory, where the statistics and graphics of all the software below should be presented.
 The actual graphs and the tables are configurable, and generally much easier to view than the raw output of the individual software.
 The subsequent QC compartments are:
@@ -73,25 +74,30 @@ The subsequent QC compartments are:
 * bamQC: [Qualimap][qualimap-link] examines sequencing alignment data in SAM/BAM files according to the features of the mapped reads and provides an overall view of the data provides quality control statistics about aligned BAM files
 * BCFToolsStats: [bcftools][bcftools] measuring non-reference allele frequency, depth distribution, stats by quality and per-sample counts, singleton stats, etc. of VCF files.
 * [FastQC][fastqc]: provides statistics about the raw FASTQ files only.
-* MarkDuplicates: a [Picard][picard-md] tool to tag PCR/optical duplicates from aligned BAM data
-* SamToolsStats: [samtools][samtools] collection of statistics from BAM files
+* MarkDuplicates: a [Picard][picard-md] tool to tag PCR/optical duplicates from aligned BAM data.
+* SamToolsStats: [samtools][samtools] collection of statistics from BAM files.
 ---
 
-## VariantCallings:
+## VariantCalling:
 
-All the raw results regarding variant-calling are collected in this directory. Not all the software below are producing VCF files, also both somatic and germline
+All the raw results regarding variant-calling are collected in this directory.
+Not all the software below are producing VCF files, also both somatic and germline
 variants are collected in this directory.
 
-* [Ascat][ascat]: is a method to derive copy number profiles of tumour cells, accounting for normal cell admixture and tumour aneuploidy. This direcory contains the graphical output of the software, CNV, ploidy and sample purity estimations.
-* [FreeBayes][freebayes]: is for Bayesian haplotype-based genetic polymorphism discovery and genotyping. The single VCF file generated by FreeBayes
-is huge, it is recommended to flatten and filter this VCF, i.e. using the provided [SpeedSeq][speedseq] filter
+* [Ascat][ascat]: is a method to derive copy number profiles of tumour cells, accounting for normal cell admixture and tumour aneuploidy.
+This directory contains the graphical output of the software, CNV, ploidy and sample purity estimations.
+* [FreeBayes][freebayes]: is for Bayesian haplotype-based genetic polymorphism discovery and genotyping.
+The single VCF file generated by FreeBayes is huge, it is recommended to flatten and filter this VCF, i.e. using the provided [SpeedSeq][speedseq] filter.
 * [HaplotypeCaller][haplotypecaller] is the in-house germline caller of the Broad Institute, the non-recalibrated variant files are there to check the
-germline variations and compare the two samples (tumour and normal) for possible mixup
-* HaplotypeCallerGVCF: germline calls in [gVCF format][genomicvcf] even for the tumour sample: this format makes possible the joint analysis of a cohort
-* [Manta][manta]: is a structural variant caller supported by Illumina. There are several output files, corresponding to germline (diploid) calls, candidate calls and somatic files.
+germline variations and compare the two samples (tumour and normal) for possible mixup.
+* HaplotypeCallerGVCF: germline calls in [gVCF format][genomicvcf] even for the tumour sample: this format makes possible the joint analysis of a cohort.
+* [Manta][manta]: is a structural variant caller supported by Illumina.
+There are several output files, corresponding to germline (diploid) calls, candidate calls and somatic files.
 Manta provides a candidate list for small indels also that can be fed to Strelka.
-* [MuTect2][mutect2] is the current somatic caller of GATK for both SNPs and indels. Recommended to keep only lines with the "PASS" filter.
-* [Strelka2][strelka2] is somatic SNP and indel caller supported by Illumina. Strelka gives filtered and unfiltered calls for SNPs and indels separately, together with germline calls.
+* [MuTect2][mutect2] is the current somatic caller of GATK for both SNPs and indels.
+Recommended to keep only lines with the "PASS" filter.
+* [Strelka2][strelka2] is somatic SNP and indel caller supported by Illumina.
+Strelka gives filtered and unfiltered calls for SNPs and indels separately, together with germline calls.
 
 [ascat]:https://www.crick.ac.uk/research/a-z-researchers/researchers-v-y/peter-van-loo/software/
 [bcftools]: http://www.htslib.org/doc/bcftools.html

From 4cba4fee46e7ccf9b6cf03e05e158f06237bb370 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Mon, 18 Feb 2019 16:12:55 +0100
Subject: [PATCH 08/22] test annotation on only one vcf file

---
 scripts/test.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/scripts/test.sh b/scripts/test.sh
index 9288df2c6b..25affda709 100755
--- a/scripts/test.sh
+++ b/scripts/test.sh
@@ -94,7 +94,7 @@ if [[ ALL,GERMLINE =~ $TEST ]]
 then
 	# Added Strelka to germline test (no Strelka best practices test for this small data) and not asking for reports
 	run_wrapper --germline --sampleDir Sarek-data/testdata/tiny/normal --variantCalling --tools HaplotypeCaller,Strelka --noReports
-	run_wrapper --germline --sampleDir Sarek-data/testdata/tiny/normal --variantCalling --tools HaplotypeCaller,Strelka --bed `pwd`/Sarek-data/testdata/target.bed --noReports
+	run_wrapper --germline --sampleDir Sarek-data/testdata/tiny/normal --variantCalling --tools HaplotypeCaller,Strelka --bed Sarek-data/testdata/target.bed --noReports
 	run_wrapper --germline --step recalibrate --noReports
 	clean_repo
 fi
@@ -120,8 +120,7 @@ then
   then
     ANNOTATOR=merge,snpEFF,VEP
   fi
-  run_wrapper --annotate --tools ${ANNOTATOR} --annotateVCF Sarek-data/testdata/vcf/Strelka_1234N_variants.vcf.gz --noReports
-  run_wrapper --annotate --tools ${ANNOTATOR} --annotateVCF Sarek-data/testdata/vcf/Strelka_1234N_variants.vcf.gz,Sarek-data/testdata/vcf/Strelka_9876T_variants.vcf.gz
+  run_wrapper --annotate --tools ${ANNOTATOR} --annotateVCF Sarek-data/testdata/vcf/Strelka_1234N_variants.vcf.gz
   clean_repo
 fi
 

From d7f15ecc8ca7e2a100923f01c929803854cd18c1 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Mon, 18 Feb 2019 16:13:37 +0100
Subject: [PATCH 09/22] annotated VCFs are now ordered by idPatient (if it
 exists)

---
 annotate.nf | 117 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 72 insertions(+), 45 deletions(-)

diff --git a/annotate.nf b/annotate.nf
index ddff03f4a7..a45dc11b5b 100644
--- a/annotate.nf
+++ b/annotate.nf
@@ -46,10 +46,9 @@ if (workflow.profile == 'awsbatch') {
     if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
 }
 
-
-tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : []
 annotateTools = params.annotateTools ? params.annotateTools.split(',').collect{it.trim().toLowerCase()} : []
 annotateVCF = params.annotateVCF ? params.annotateVCF.split(',').collect{it.trim()} : []
+tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : []
 
 directoryMap = SarekUtils.defineDirectoryMap(params.outDir)
 toolList = defineToolList()
@@ -68,30 +67,30 @@ vcfToAnnotate = Channel.create()
 vcfNotToAnnotate = Channel.create()
 
 if (annotateVCF == []) {
-// we annote all available vcfs by default that we can find in the VariantCalling directory
+// Sarek, by default, annotates all available vcfs that it can find in the VariantCalling directory
+// Excluding vcfs from FreeBayes, and g.vcf from HaplotypeCaller
+// Basically it's: VariantCalling/*/{HaplotypeCaller,Manta,MuTect2,Strelka,StrelkaBP}/*.vcf.gz
+// Without *SmallIndels.vcf.gz from Manta, and *.genome.vcf.gz from Strelka
+// This small snipet `vcf.minus(vcf.fileName)[-2]` catches idPatient
+// This field is used to output final annotated VCFs in the correct directory
   Channel.empty().mix(
-    Channel.fromPath("${params.outDir}/VariantCalling/*/${directoryMap.haplotypecaller}/*.vcf.gz")
-      .flatten().map{vcf -> ['haplotypecaller', vcf]},
-    Channel.fromPath("${params.outDir}/VariantCalling/*/${directoryMap.manta}/*SV.vcf.gz")
-      .flatten().map{vcf -> ['manta', vcf]},
-    Channel.fromPath("${params.outDir}/VariantCalling/*/${directoryMap.mutect2}/*.vcf.gz")
-      .flatten().map{vcf -> ['mutect2', vcf]},
-    Channel.fromPath("${params.outDir}/VariantCalling/*/${directoryMap.strelka}/*{somatic,variants}*.vcf.gz")		// Strelka only
-      .flatten().map{vcf -> ['strelka', vcf]},
-    Channel.fromPath("${params.outDir}/VariantCalling/*/${directoryMap.strelkabp}/*{somatic,variants}*.vcf.gz")	// Strelka with Manta indel candidates
-      .flatten().map{vcf -> ['strelkabp', vcf]}
+    Channel.fromPath("${params.outDir}/VariantCalling/*/HaplotypeCaller/*.vcf.gz")
+      .flatten().map{vcf -> ['haplotypecaller', vcf.minus(vcf.fileName)[-2], vcf]},
+    Channel.fromPath("${params.outDir}/VariantCalling/*/Manta/*SV.vcf.gz")
+      .flatten().map{vcf -> ['manta', vcf.minus(vcf.fileName)[-2], vcf]},
+    Channel.fromPath("${params.outDir}/VariantCalling/*/MuTect2/*.vcf.gz")
+      .flatten().map{vcf -> ['mutect2', vcf.minus(vcf.fileName)[-2], vcf]},
+    Channel.fromPath("${params.outDir}/VariantCalling/*/Strelka/*{somatic,variant}*.vcf.gz")		// Strelka only
+      .flatten().map{vcf -> ['strelka', vcf.minus(vcf.fileName)[-2], vcf]},
+    Channel.fromPath("${params.outDir}/VariantCalling/*/StrelkaBP/*{somatic,variant}*.vcf.gz")	// Strelka with Manta indel candidates
+      .flatten().map{vcf -> ['strelkabp', vcf.minus(vcf.fileName)[-2], vcf]}
   ).choice(vcfToAnnotate, vcfNotToAnnotate) {
     annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1
   }
 } else if (annotateTools == []) {
 // alternatively, annotate user-submitted VCFs
-  list = ""
-  annotateVCF.each{ list += ",${it}" }
-  list = list.substring(1)
-  if (StringUtils.countMatches("${list}", ",") == 0) vcfToAnnotate = Channel.fromPath("${list}")
-    .map{vcf -> ['userspecified', vcf]}
-  else vcfToAnnotate = Channel.fromPath("{$list}")
-    .map{vcf -> ['userspecified', vcf]}
+  vcfToAnnotate = Channel.fromPath(annotateVCF)
+    .map{vcf -> ['userspecified', '', vcf]}
 } else exit 1, "specify only tools or files to annotate, not both"
 
 vcfNotToAnnotate.close()
@@ -101,17 +100,20 @@ vcfNotToAnnotate.close()
 (vcfForBCFtools, vcfForVCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(4)
 
 vcfForVep = vcfForVep.map {
-  variantCaller, vcf ->
-  ["vep", variantCaller, vcf, null]
+  variantCaller, idPatient, vcf ->
+  ["VEP", variantCaller, idPatient, vcf, null]
 }
 
 process RunBcftoolsStats {
-  tag {vcf}
+  tag { idPatient != ""
+    ? "${idPatient} - ${vcf}"
+    : "${vcf}"
+  }
 
   publishDir directoryMap.bcftoolsStats, mode: params.publishDirMode
 
   input:
-    set variantCaller, file(vcf) from vcfForBCFtools
+    set variantCaller, idPatient, file(vcf) from vcfForBCFtools
 
   output:
     file ("*.bcf.tools.stats.out") into bcfReport
@@ -127,12 +129,15 @@ if (params.verbose) bcfReport = bcfReport.view {
 }
 
 process RunVcftools {
-  tag {vcf}
+  tag { idPatient != ""
+    ? "${idPatient} - ${variantCaller} - ${vcf}"
+    : "${variantCaller} - ${vcf}"
+  }
 
   publishDir directoryMap.vcftools, mode: params.publishDirMode
 
   input:
-    set variantCaller, file(vcf) from vcfForVCFtools
+    set variantCaller, idPatient, file(vcf) from vcfForVCFtools
 
   output:
     file ("${vcf.simpleName}.*") into vcfReport
@@ -148,22 +153,25 @@ if (params.verbose) vcfReport = vcfReport.view {
 }
 
 process RunSnpeff {
-  tag {"${variantCaller} - ${vcf}"}
+  tag { idPatient != ""
+    ? "${idPatient} - ${variantCaller} - ${vcf}"
+    : "${variantCaller} - ${vcf}"
+  }
 
   publishDir params.outDir, mode: params.publishDirMode, saveAs: {
-    if (it == "${vcf.simpleName}_snpEff.csv") "${params.outDir}/Reports/${directoryMap.snpeff}/${it}"
-    else if (it == "${vcf.simpleName}_snpEff.ann.vcf") null
-    else "${params.outDir}/Annotation/${directoryMap.snpeff}/${it}"
+    if (it == "${vcf.simpleName}_snpEff.ann.vcf") null
+    else if (idPatient != "") "Annotation/${idPatient}/snpEff/${it}"
+    else "Annotation/snpEff/${it}"
   }
 
   input:
-    set variantCaller, file(vcf) from vcfForSnpeff
+    set variantCaller, idPatient, file(vcf) from vcfForSnpeff
     file dataDir from Channel.value(params.snpEff_cache ? params.snpEff_cache : "null")
     val snpeffDb from Channel.value(params.genomes[params.genome].snpeffDb)
 
   output:
     set file("${vcf.simpleName}_snpEff.genes.txt"), file("${vcf.simpleName}_snpEff.csv"), file("${vcf.simpleName}_snpEff.summary.html") into snpeffOutput
-    set val("snpeff"), variantCaller, file("${vcf.simpleName}_snpEff.ann.vcf") into snpeffVCF
+    set val("snpEff"), variantCaller, idPatient, file("${vcf.simpleName}_snpEff.ann.vcf") into snpeffVCF
 
   when: 'snpeff' in tools || 'merge' in tools
 
@@ -203,26 +211,32 @@ if('merge' in tools) {
 }
 
 process RunVEP {
-  tag {"${variantCaller} - ${vcf}"}
+  tag { idPatient != ""
+    ? "${idPatient} - ${variantCaller} - ${vcf}"
+    : "${variantCaller} - ${vcf}"
+  }
 
   publishDir params.outDir, mode: params.publishDirMode, saveAs: {
-    if (it == "${vcf.simpleName}_VEP.summary.html") "${params.outDir}/Annotation/${directoryMap.vep}/${it}"
+    if (it == "${vcf.simpleName}_VEP.summary.html") {
+      if (idPatient != "") "Annotation/${idPatient}/VEP/${it}"
+      else "Annotation/VEP/${it}"
+    }
     else null
   }
 
   input:
-    set annotator, variantCaller, file(vcf), file(idx) from vcfForVep
+    set annotator, variantCaller,  idPatient, file(vcf), file(idx) from vcfForVep
     file dataDir from Channel.value(params.vep_cache ? params.vep_cache : "null")
     val cache_version from Channel.value(params.genomes[params.genome].vepCacheVersion)
 
   output:
-    set finalannotator, variantCaller, file("${vcf.simpleName}_VEP.ann.vcf") into vepVCF
+    set finalAnnotator, variantCaller, idPatient, file("${vcf.simpleName}_VEP.ann.vcf") into vepVCF
     file("${vcf.simpleName}_VEP.summary.html") into vepReport
 
   when: 'vep' in tools || 'merge' in tools
 
   script:
-  finalannotator = annotator == "snpeff" ? 'merge' : 'vep'
+  finalAnnotator = annotator == "snpEff" ? 'merge' : 'VEP'
   genome = params.genome == 'smallGRCh37' ? 'GRCh37' : params.genome
   cache = (params.vep_cache && params.annotation_cache) ? "--dir_cache \${PWD}/${dataDir}" : "--dir_cache /.vep"
   """
@@ -254,18 +268,25 @@ if (params.verbose) vepReport = vepReport.view {
 vcfToCompress = snpeffVCF.mix(vepVCF)
 
 process CompressVCF {
-  tag {"${annotator} - ${vcf}"}
+  tag { idPatient != ""
+    ? "${idPatient} - ${annotator} - ${vcf}"
+    : "${annotator} - ${vcf}"
+  }
 
-  publishDir "${params.outDir}/Annotation/${directoryMap."$finalannotator"}", mode: params.publishDirMode
+  publishDir params.outDir, mode: params.publishDirMode, saveAs: {
+    idPatient != ""
+    ? "Annotation/${idPatient}/${finalAnnotator}/${it}"
+    : "Annotation/${finalAnnotator}/${it}"
+  }
 
   input:
-    set annotator, variantCaller, file(vcf) from vcfToCompress
+    set annotator, variantCaller, idPatient, file(vcf) from vcfToCompress
 
   output:
-    set annotator, variantCaller, file("*.vcf.gz"), file("*.vcf.gz.tbi") into (vcfCompressed, vcfCompressedoutput)
+    set annotator, variantCaller, idPatient, file("*.vcf.gz"), file("*.vcf.gz.tbi") into (vcfCompressed, vcfCompressedoutput)
 
   script:
-  finalannotator = annotator == "merge" ? "vep" : annotator
+  finalAnnotator = annotator == "merge" ? "VEP" : annotator
   """
   bgzip < ${vcf} > ${vcf}.gz
   tabix ${vcf}.gz
@@ -273,9 +294,15 @@ process CompressVCF {
 }
 
 if (params.verbose) vcfCompressedoutput = vcfCompressedoutput.view {
-  "${it[0]} VCF:\n" +
-  "File  : ${it[2].fileName}\n" +
-  "Index : ${it[3].fileName}"
+  if (it[2] != "") {
+    "${it[2]} - ${it[0]} VCF:\n" +
+    "File  : ${it[3].fileName}\n" +
+    "Index : ${it[4].fileName}"
+  } else {
+    "${it[0]} VCF:\n" +
+    "File  : ${it[3].fileName}\n" +
+    "Index : ${it[4].fileName}"
+  }
 }
 
 /*

From f9c1403025b0f2ffeb6f2f632a322489d086241c Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Mon, 18 Feb 2019 17:27:06 +0100
Subject: [PATCH 10/22] fix annoying stuff

---
 annotate.nf           | 52 ++++++++++++-------------------------------
 germlineVC.nf         | 18 +++++++--------
 lib/SarekUtils.groovy | 10 ---------
 somaticVC.nf          | 30 +++++++++++++------------
 4 files changed, 39 insertions(+), 71 deletions(-)

diff --git a/annotate.nf b/annotate.nf
index a45dc11b5b..1fb8d21ec2 100644
--- a/annotate.nf
+++ b/annotate.nf
@@ -69,9 +69,9 @@ vcfNotToAnnotate = Channel.create()
 if (annotateVCF == []) {
 // Sarek, by default, annotates all available vcfs that it can find in the VariantCalling directory
 // Excluding vcfs from FreeBayes, and g.vcf from HaplotypeCaller
-// Basically it's: VariantCalling/*/{HaplotypeCaller,Manta,MuTect2,Strelka,StrelkaBP}/*.vcf.gz
+// Basically it's: VariantCalling/*/{HaplotypeCaller,Manta,MuTect2,Strelka}/*.vcf.gz
 // Without *SmallIndels.vcf.gz from Manta, and *.genome.vcf.gz from Strelka
-// This small snipet `vcf.minus(vcf.fileName)[-2]` catches idPatient
+// The small snipet `vcf.minus(vcf.fileName)[-2]` catches idPatient
 // This field is used to output final annotated VCFs in the correct directory
   Channel.empty().mix(
     Channel.fromPath("${params.outDir}/VariantCalling/*/HaplotypeCaller/*.vcf.gz")
@@ -80,17 +80,16 @@ if (annotateVCF == []) {
       .flatten().map{vcf -> ['manta', vcf.minus(vcf.fileName)[-2], vcf]},
     Channel.fromPath("${params.outDir}/VariantCalling/*/MuTect2/*.vcf.gz")
       .flatten().map{vcf -> ['mutect2', vcf.minus(vcf.fileName)[-2], vcf]},
-    Channel.fromPath("${params.outDir}/VariantCalling/*/Strelka/*{somatic,variant}*.vcf.gz")		// Strelka only
+    Channel.fromPath("${params.outDir}/VariantCalling/*/Strelka/*{somatic,variant}*.vcf.gz")
       .flatten().map{vcf -> ['strelka', vcf.minus(vcf.fileName)[-2], vcf]},
-    Channel.fromPath("${params.outDir}/VariantCalling/*/StrelkaBP/*{somatic,variant}*.vcf.gz")	// Strelka with Manta indel candidates
-      .flatten().map{vcf -> ['strelkabp', vcf.minus(vcf.fileName)[-2], vcf]}
   ).choice(vcfToAnnotate, vcfNotToAnnotate) {
     annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1
   }
 } else if (annotateTools == []) {
-// alternatively, annotate user-submitted VCFs
+// Annotate user-submitted VCFs
+// If user-submitted, Sarek assume that the idPatient should be assumed automatically
   vcfToAnnotate = Channel.fromPath(annotateVCF)
-    .map{vcf -> ['userspecified', '', vcf]}
+    .map{vcf -> ['userspecified', vcf.minus(vcf.fileName)[-2], vcf]}
 } else exit 1, "specify only tools or files to annotate, not both"
 
 vcfNotToAnnotate.close()
@@ -105,10 +104,7 @@ vcfForVep = vcfForVep.map {
 }
 
 process RunBcftoolsStats {
-  tag { idPatient != ""
-    ? "${idPatient} - ${vcf}"
-    : "${vcf}"
-  }
+  tag {"${idPatient} - ${vcf}"}
 
   publishDir directoryMap.bcftoolsStats, mode: params.publishDirMode
 
@@ -129,10 +125,7 @@ if (params.verbose) bcfReport = bcfReport.view {
 }
 
 process RunVcftools {
-  tag { idPatient != ""
-    ? "${idPatient} - ${variantCaller} - ${vcf}"
-    : "${variantCaller} - ${vcf}"
-  }
+  tag {"${idPatient} - ${variantCaller} - ${vcf}"}
 
   publishDir directoryMap.vcftools, mode: params.publishDirMode
 
@@ -153,15 +146,11 @@ if (params.verbose) vcfReport = vcfReport.view {
 }
 
 process RunSnpeff {
-  tag { idPatient != ""
-    ? "${idPatient} - ${variantCaller} - ${vcf}"
-    : "${variantCaller} - ${vcf}"
-  }
+  tag {"${idPatient} - ${variantCaller} - ${vcf}"}
 
   publishDir params.outDir, mode: params.publishDirMode, saveAs: {
     if (it == "${vcf.simpleName}_snpEff.ann.vcf") null
-    else if (idPatient != "") "Annotation/${idPatient}/snpEff/${it}"
-    else "Annotation/snpEff/${it}"
+    else "Annotation/${idPatient}/snpEff/${it}"
   }
 
   input:
@@ -211,16 +200,10 @@ if('merge' in tools) {
 }
 
 process RunVEP {
-  tag { idPatient != ""
-    ? "${idPatient} - ${variantCaller} - ${vcf}"
-    : "${variantCaller} - ${vcf}"
-  }
+  tag {"${idPatient} - ${variantCaller} - ${vcf}"}
 
   publishDir params.outDir, mode: params.publishDirMode, saveAs: {
-    if (it == "${vcf.simpleName}_VEP.summary.html") {
-      if (idPatient != "") "Annotation/${idPatient}/VEP/${it}"
-      else "Annotation/VEP/${it}"
-    }
+    if (it == "${vcf.simpleName}_VEP.summary.html") "Annotation/${idPatient}/VEP/${it}"
     else null
   }
 
@@ -268,16 +251,9 @@ if (params.verbose) vepReport = vepReport.view {
 vcfToCompress = snpeffVCF.mix(vepVCF)
 
 process CompressVCF {
-  tag { idPatient != ""
-    ? "${idPatient} - ${annotator} - ${vcf}"
-    : "${annotator} - ${vcf}"
-  }
+  tag {"${idPatient} - ${annotator} - ${vcf}"}
 
-  publishDir params.outDir, mode: params.publishDirMode, saveAs: {
-    idPatient != ""
-    ? "Annotation/${idPatient}/${finalAnnotator}/${it}"
-    : "Annotation/${finalAnnotator}/${it}"
-  }
+  publishDir "${params.outDir}/Annotation/${idPatient}/${finalAnnotator}/${it}", mode: params.publishDirMode
 
   input:
     set annotator, variantCaller, idPatient, file(vcf) from vcfToCompress
diff --git a/germlineVC.nf b/germlineVC.nf
index ac40dbb398..3e1d7e6748 100644
--- a/germlineVC.nf
+++ b/germlineVC.nf
@@ -318,11 +318,12 @@ if (params.verbose) vcfsToMerge = vcfsToMerge.view {
 process ConcatVCF {
   tag {variantCaller + "-" + idSampleNormal}
 
-  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap."$variantCaller"}", mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/${"$variantCaller"}", mode: params.publishDirMode
 
   input:
     set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge
     file(genomeIndex) from Channel.value(referenceMap.genomeIndex)
+    file(targetBED) from Channel.value(params.targetBED ? params.targetBED : "null")
 
   output:
 		// we have this funny *_* pattern to avoid copying the raw calls to publishdir
@@ -335,12 +336,10 @@ process ConcatVCF {
   if (variantCaller == 'haplotypecaller') outputFile = "${variantCaller}_${idSampleNormal}.vcf"
   else if (variantCaller == 'gvcf-hc') outputFile = "haplotypecaller_${idSampleNormal}.g.vcf"
   else outputFile = "${variantCaller}_${idSampleTumor}_vs_${idSampleNormal}.vcf"
-
 	if(params.targetBED)		// targeted
-		concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} -t ${params.targetBED}"
+		concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} -t ${targetBED}"
 	else										// WGS
 		concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} "
-
 	"""
 	concatenateVCFs.sh ${concatOptions}
 	"""
@@ -356,10 +355,11 @@ if (params.verbose) vcfConcatenated = vcfConcatenated.view {
 process RunSingleStrelka {
   tag {idSample}
 
-  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.strelka}", mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/Strelka", mode: params.publishDirMode
 
   input:
     set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleStrelka
+    file(targetBED) from Channel.value(params.targetBED ? params.targetBED : "null")
     set file(genomeFile), file(genomeIndex) from Channel.value([
       referenceMap.genomeFile,
       referenceMap.genomeIndex
@@ -372,15 +372,15 @@ process RunSingleStrelka {
 
   script:
 	"""
-	if [ ! -s "${params.targetBED}" ]; then
-		# do WGS
+	if [ ! -s "${targetBED}" ]; then
+		# WGS
 		configureStrelkaGermlineWorkflow.py \
 		--bam ${bam} \
 		--referenceFasta ${genomeFile} \
 		--runDir Strelka
 	else
 		# WES or targeted
-		bgzip --threads ${task.cpus} -c ${params.targetBED} > call_targets.bed.gz
+		bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz
 		tabix call_targets.bed.gz
 		configureStrelkaGermlineWorkflow.py \
 		--bam ${bam} \
@@ -409,7 +409,7 @@ if (params.verbose) singleStrelkaOutput = singleStrelkaOutput.view {
 process RunSingleManta {
   tag {idSample + " - Single Diploid"}
 
-  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.manta}", mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/Manta", mode: params.publishDirMode
 
   input:
     set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleManta
diff --git a/lib/SarekUtils.groovy b/lib/SarekUtils.groovy
index 87fc5587a4..dbfa0b46db 100644
--- a/lib/SarekUtils.groovy
+++ b/lib/SarekUtils.groovy
@@ -137,16 +137,6 @@ class SarekUtils {
     return [
     'duplicateMarked'  : "${outDir}/Preprocessing/DuplicateMarked",
     'recalibrated'     : "${outDir}/Preprocessing/Recalibrated",
-    'ascat'            : "Ascat",
-    'freebayes'        : "FreeBayes",
-    'gvcf-hc'          : "HaplotypeCallerGVCF",
-    'haplotypecaller'  : "HaplotypeCaller",
-    'manta'            : "Manta",
-    'mutect2'          : "MuTect2",
-    'strelka'          : "Strelka",
-    'strelkabp'        : "StrelkaBP",
-    'snpeff'           : "SnpEff",
-    'vep'              : "VEP",
     'bamQC'            : "${outDir}/Reports/bamQC",
     'bcftoolsStats'    : "${outDir}/Reports/BCFToolsStats",
     'fastQC'           : "${outDir}/Reports/FastQC",
diff --git a/somaticVC.nf b/somaticVC.nf
index b0d84ba2e3..06a02812ba 100644
--- a/somaticVC.nf
+++ b/somaticVC.nf
@@ -280,11 +280,12 @@ if (params.verbose) vcfsToMerge = vcfsToMerge.view {
 process ConcatVCF {
   tag {variantCaller + "_" + idSampleTumor + "_vs_" + idSampleNormal}
 
-  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap."$variantCaller"}", mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/${"$variantCaller"}", mode: params.publishDirMode
 
   input:
     set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge
     file(genomeIndex) from Channel.value(referenceMap.genomeIndex)
+    file(targetBED) from Channel.value(params.targetBED ? params.targetBED : "null")
 
   output:
 		// we have this funny *_* pattern to avoid copying the raw calls to publishdir
@@ -297,7 +298,7 @@ process ConcatVCF {
   outputFile = "${variantCaller}_${idSampleTumor}_vs_${idSampleNormal}.vcf"
 
   if(params.targetBED)		// targeted
-		concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} -t ${params.targetBED}"
+		concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} -t ${targetBED}"
 	else										// WGS
 		concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} "
 
@@ -315,10 +316,11 @@ if (params.verbose) vcfConcatenated = vcfConcatenated.view {
 process RunStrelka {
   tag {idSampleTumor + "_vs_" + idSampleNormal}
 
-  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.strelka}", mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/Strelka", mode: params.publishDirMode
 
   input:
     set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForStrelka
+    file(targetBED) from Channel.value(params.targetBED ? params.targetBED : "null")
     set file(genomeFile), file(genomeIndex), file(genomeDict) from Channel.value([
       referenceMap.genomeFile,
       referenceMap.genomeIndex,
@@ -332,7 +334,7 @@ process RunStrelka {
 
   script:
 	"""
-	if [ ! -s "${params.targetBED}" ]; then
+	if [ ! -s "${targetBED}" ]; then
 		# do WGS
 		configureStrelkaSomaticWorkflow.py \
 		--tumor ${bamTumor} \
@@ -341,7 +343,7 @@ process RunStrelka {
 		--runDir Strelka
 	else
 		# WES or targeted
-		bgzip --threads ${task.cpus} -c ${params.targetBED} > call_targets.bed.gz
+		bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz
 		tabix call_targets.bed.gz
 		configureStrelkaSomaticWorkflow.py \
 		--tumor ${bamTumor} \
@@ -372,7 +374,7 @@ if (params.verbose) strelkaOutput = strelkaOutput.view {
 process RunManta {
   tag {idSampleTumor + "_vs_" + idSampleNormal}
 
-  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.manta}", mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/Manta", mode: params.publishDirMode
 
   input:
     set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForManta
@@ -426,7 +428,7 @@ if (params.verbose) mantaOutput = mantaOutput.view {
 process RunSingleManta {
   tag {idSample + " - Tumor-Only"}
 
-  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.manta}", mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/Manta", mode: params.publishDirMode
 
   input:
     set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleManta
@@ -485,7 +487,7 @@ bamsForStrelkaBP = bamsForStrelkaBP.map {
 process RunStrelkaBP {
   tag {idSampleTumor + "_vs_" + idSampleNormal}
 
-  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.strelkabp}", mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/Strelka", mode: params.publishDirMode
 
   input:
     set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(mantaCSI), file(mantaCSIi) from bamsForStrelkaBP
@@ -512,13 +514,13 @@ process RunStrelkaBP {
   python Strelka/runWorkflow.py -m local -j ${task.cpus}
 
   mv Strelka/results/variants/somatic.indels.vcf.gz \
-    Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz
+    StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz
   mv Strelka/results/variants/somatic.indels.vcf.gz.tbi \
-    Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz.tbi
+    StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz.tbi
   mv Strelka/results/variants/somatic.snvs.vcf.gz \
-    Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz
+    StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz
   mv Strelka/results/variants/somatic.snvs.vcf.gz.tbi \
-    Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz.tbi
+    StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz.tbi
   """
 }
 
@@ -577,7 +579,7 @@ alleleCountOutput = alleleCountOutput.map {
 process RunConvertAlleleCounts {
   tag {idSampleTumor + "_vs_" + idSampleNormal}
 
-  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.ascat}", mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/ASCAT", mode: params.publishDirMode
 
   input:
     set idPatient, idSampleNormal, idSampleTumor, file(alleleCountNormal), file(alleleCountTumor) from alleleCountOutput
@@ -599,7 +601,7 @@ process RunConvertAlleleCounts {
 process RunAscat {
   tag {idSampleTumor + "_vs_" + idSampleNormal}
 
-  publishDir "${params.outDir}/VariantCalling/${idPatient}/${directoryMap.ascat}", mode: params.publishDirMode
+  publishDir "${params.outDir}/VariantCalling/${idPatient}/ASCAT", mode: params.publishDirMode
 
   input:
     set idPatient, idSampleNormal, idSampleTumor, file(bafNormal), file(logrNormal), file(bafTumor), file(logrTumor) from convertAlleleCountsOutput

From 8e6d7b20cf29dd32282797596b897f139c110a55 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Mon, 18 Feb 2019 17:30:21 +0100
Subject: [PATCH 11/22] fix output

---
 annotate.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/annotate.nf b/annotate.nf
index 1fb8d21ec2..721f63c6dd 100644
--- a/annotate.nf
+++ b/annotate.nf
@@ -253,7 +253,7 @@ vcfToCompress = snpeffVCF.mix(vepVCF)
 process CompressVCF {
   tag {"${idPatient} - ${annotator} - ${vcf}"}
 
-  publishDir "${params.outDir}/Annotation/${idPatient}/${finalAnnotator}/${it}", mode: params.publishDirMode
+  publishDir "${params.outDir}/Annotation/${idPatient}/${finalAnnotator}", mode: params.publishDirMode
 
   input:
     set annotator, variantCaller, idPatient, file(vcf) from vcfToCompress

From 5410f0da7c4eef63b53ce10b24e328850338ff18 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Tue, 19 Feb 2019 09:50:00 +0100
Subject: [PATCH 12/22] fix --targetBED

---
 germlineVC.nf | 40 ++++++++++++++++------------------------
 somaticVC.nf  | 32 +++++++++++---------------------
 2 files changed, 27 insertions(+), 45 deletions(-)

diff --git a/germlineVC.nf b/germlineVC.nf
index 3e1d7e6748..b5fdc98c7c 100644
--- a/germlineVC.nf
+++ b/germlineVC.nf
@@ -371,31 +371,23 @@ process RunSingleStrelka {
   when: 'strelka' in tools && !params.onlyQC
 
   script:
+  if (params.targetBED) {
+    beforeScript = "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz"
+    options = "--exome --callRegions call_targets.bed.gz"
+  }
 	"""
-	if [ ! -s "${targetBED}" ]; then
-		# WGS
-		configureStrelkaGermlineWorkflow.py \
-		--bam ${bam} \
-		--referenceFasta ${genomeFile} \
-		--runDir Strelka
-	else
-		# WES or targeted
-		bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz
-		tabix call_targets.bed.gz
-		configureStrelkaGermlineWorkflow.py \
-		--bam ${bam} \
-		--referenceFasta ${genomeFile} \
-		--exome \
-		--callRegions call_targets.bed.gz \
-		--runDir Strelka
-	fi
-
-	# always run this part
-		python Strelka/runWorkflow.py -m local -j ${task.cpus}
-		mv Strelka/results/variants/genome.*.vcf.gz Strelka_${idSample}_genome.vcf.gz
-		mv Strelka/results/variants/genome.*.vcf.gz.tbi Strelka_${idSample}_genome.vcf.gz.tbi
-		mv Strelka/results/variants/variants.vcf.gz Strelka_${idSample}_variants.vcf.gz
-		mv Strelka/results/variants/variants.vcf.gz.tbi Strelka_${idSample}_variants.vcf.gz.tbi
+	${beforeScript}
+  configureStrelkaGermlineWorkflow.py \
+	--bam ${bam} \
+	--referenceFasta ${genomeFile} \
+  ${options} \
+	--runDir Strelka
+
+	python Strelka/runWorkflow.py -m local -j ${task.cpus}
+	mv Strelka/results/variants/genome.*.vcf.gz Strelka_${idSample}_genome.vcf.gz
+	mv Strelka/results/variants/genome.*.vcf.gz.tbi Strelka_${idSample}_genome.vcf.gz.tbi
+	mv Strelka/results/variants/variants.vcf.gz Strelka_${idSample}_variants.vcf.gz
+	mv Strelka/results/variants/variants.vcf.gz.tbi Strelka_${idSample}_variants.vcf.gz.tbi
 	"""
 }
 
diff --git a/somaticVC.nf b/somaticVC.nf
index 06a02812ba..afac5b755f 100644
--- a/somaticVC.nf
+++ b/somaticVC.nf
@@ -333,30 +333,20 @@ process RunStrelka {
   when: 'strelka' in tools && !params.onlyQC
 
   script:
+  if (params.targetBED) {
+    beforeScript = "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz"
+    options = "--exome --callRegions call_targets.bed.gz"
+  }
 	"""
-	if [ ! -s "${targetBED}" ]; then
-		# do WGS
-		configureStrelkaSomaticWorkflow.py \
-		--tumor ${bamTumor} \
-		--normal ${bamNormal} \
-		--referenceFasta ${genomeFile} \
-		--runDir Strelka
-	else
-		# WES or targeted
-		bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz
-		tabix call_targets.bed.gz
-		configureStrelkaSomaticWorkflow.py \
-		--tumor ${bamTumor} \
-		--normal ${bamNormal} \
-		--referenceFasta ${genomeFile} \
-		--exome \
-		--callRegions call_targets.bed.gz \
-		--runDir Strelka
-	fi
+	${beforeScript}
+  configureStrelkaSomaticWorkflow.py \
+  --tumor ${bamTumor} \
+  --normal ${bamNormal} \
+	--referenceFasta ${genomeFile} \
+  ${options} \
+	--runDir Strelka
 
 	python Strelka/runWorkflow.py -m local -j ${task.cpus}
-	# always run this part
-
 	mv Strelka/results/variants/somatic.indels.vcf.gz Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz
 	mv Strelka/results/variants/somatic.indels.vcf.gz.tbi Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz.tbi
 	mv Strelka/results/variants/somatic.snvs.vcf.gz Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz

From 1cdc30379d4a706cc6c79613dd4310d17af6957d Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Tue, 19 Feb 2019 10:20:21 +0100
Subject: [PATCH 13/22] spacing and alignements

---
 annotate.nf   | 12 +++--------
 germlineVC.nf | 40 +++++++++++++++++------------------
 somaticVC.nf  | 58 +++++++++++++++++++++++++--------------------------
 3 files changed, 52 insertions(+), 58 deletions(-)

diff --git a/annotate.nf b/annotate.nf
index 721f63c6dd..2dffd05b06 100644
--- a/annotate.nf
+++ b/annotate.nf
@@ -270,15 +270,9 @@ process CompressVCF {
 }
 
 if (params.verbose) vcfCompressedoutput = vcfCompressedoutput.view {
-  if (it[2] != "") {
-    "${it[2]} - ${it[0]} VCF:\n" +
-    "File  : ${it[3].fileName}\n" +
-    "Index : ${it[4].fileName}"
-  } else {
-    "${it[0]} VCF:\n" +
-    "File  : ${it[3].fileName}\n" +
-    "Index : ${it[4].fileName}"
-  }
+  "${it[2]} - ${it[0]} VCF:\n" +
+  "File  : ${it[3].fileName}\n" +
+  "Index : ${it[4].fileName}"
 }
 
 /*
diff --git a/germlineVC.nf b/germlineVC.nf
index b5fdc98c7c..6ea64e9f93 100644
--- a/germlineVC.nf
+++ b/germlineVC.nf
@@ -326,7 +326,7 @@ process ConcatVCF {
     file(targetBED) from Channel.value(params.targetBED ? params.targetBED : "null")
 
   output:
-		// we have this funny *_* pattern to avoid copying the raw calls to publishdir
+    // we have this funny *_* pattern to avoid copying the raw calls to publishdir
     set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*_*.vcf.gz"), file("*_*.vcf.gz.tbi") into vcfConcatenated
 
 
@@ -336,13 +336,13 @@ process ConcatVCF {
   if (variantCaller == 'haplotypecaller') outputFile = "${variantCaller}_${idSampleNormal}.vcf"
   else if (variantCaller == 'gvcf-hc') outputFile = "haplotypecaller_${idSampleNormal}.g.vcf"
   else outputFile = "${variantCaller}_${idSampleTumor}_vs_${idSampleNormal}.vcf"
-	if(params.targetBED)		// targeted
-		concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} -t ${targetBED}"
-	else										// WGS
-		concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} "
-	"""
-	concatenateVCFs.sh ${concatOptions}
-	"""
+  if(params.targetBED)    // targeted
+    concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} -t ${targetBED}"
+  else                    // WGS
+    concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} "
+  """
+  concatenateVCFs.sh ${concatOptions}
+  """
 }
 
 if (params.verbose) vcfConcatenated = vcfConcatenated.view {
@@ -375,20 +375,20 @@ process RunSingleStrelka {
     beforeScript = "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz"
     options = "--exome --callRegions call_targets.bed.gz"
   }
-	"""
-	${beforeScript}
+  """
+  ${beforeScript}
   configureStrelkaGermlineWorkflow.py \
-	--bam ${bam} \
-	--referenceFasta ${genomeFile} \
+  --bam ${bam} \
+  --referenceFasta ${genomeFile} \
   ${options} \
-	--runDir Strelka
-
-	python Strelka/runWorkflow.py -m local -j ${task.cpus}
-	mv Strelka/results/variants/genome.*.vcf.gz Strelka_${idSample}_genome.vcf.gz
-	mv Strelka/results/variants/genome.*.vcf.gz.tbi Strelka_${idSample}_genome.vcf.gz.tbi
-	mv Strelka/results/variants/variants.vcf.gz Strelka_${idSample}_variants.vcf.gz
-	mv Strelka/results/variants/variants.vcf.gz.tbi Strelka_${idSample}_variants.vcf.gz.tbi
-	"""
+  --runDir Strelka
+
+  python Strelka/runWorkflow.py -m local -j ${task.cpus}
+  mv Strelka/results/variants/genome.*.vcf.gz Strelka_${idSample}_genome.vcf.gz
+  mv Strelka/results/variants/genome.*.vcf.gz.tbi Strelka_${idSample}_genome.vcf.gz.tbi
+  mv Strelka/results/variants/variants.vcf.gz Strelka_${idSample}_variants.vcf.gz
+  mv Strelka/results/variants/variants.vcf.gz.tbi Strelka_${idSample}_variants.vcf.gz.tbi
+  """
 }
 
 if (params.verbose) singleStrelkaOutput = singleStrelkaOutput.view {
diff --git a/somaticVC.nf b/somaticVC.nf
index afac5b755f..be1aab2871 100644
--- a/somaticVC.nf
+++ b/somaticVC.nf
@@ -219,18 +219,18 @@ process RunMutect2 {
 
   script:
   """
-	gatk --java-options "-Xmx${task.memory.toGiga()}g" \
-		Mutect2 \
-		-R ${genomeFile}\
-		-I ${bamTumor}  -tumor ${idSampleTumor} \
-		-I ${bamNormal} -normal ${idSampleNormal} \
-		-L ${intervalBed} \
-		-O ${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf
+  gatk --java-options "-Xmx${task.memory.toGiga()}g" \
+    Mutect2 \
+    -R ${genomeFile}\
+    -I ${bamTumor}  -tumor ${idSampleTumor} \
+    -I ${bamNormal} -normal ${idSampleNormal} \
+    -L ${intervalBed} \
+    -O ${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf
   """
 }
-//		--germline_resource af-only-gnomad.vcf.gz \
-//		--normal_panel pon.vcf.gz \
-//		--dbsnp ${dbsnp} \
+//    --germline_resource af-only-gnomad.vcf.gz \
+//    --normal_panel pon.vcf.gz \
+//    --dbsnp ${dbsnp} \
 
 mutect2Output = mutect2Output.groupTuple(by:[0,1,2,3])
 
@@ -288,22 +288,22 @@ process ConcatVCF {
     file(targetBED) from Channel.value(params.targetBED ? params.targetBED : "null")
 
   output:
-		// we have this funny *_* pattern to avoid copying the raw calls to publishdir
+    // we have this funny *_* pattern to avoid copying the raw calls to publishdir
     set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*_*.vcf.gz"), file("*_*.vcf.gz.tbi") into vcfConcatenated
-		// TODO DRY with ConcatVCF
+    // TODO DRY with ConcatVCF
 
   when: ( 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC
 
   script:
   outputFile = "${variantCaller}_${idSampleTumor}_vs_${idSampleNormal}.vcf"
 
-  if(params.targetBED)		// targeted
-		concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} -t ${targetBED}"
-	else										// WGS
-		concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} "
+  if(params.targetBED)    // targeted
+    concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} -t ${targetBED}"
+  else                    // WGS
+    concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} "
 
-	"""
-	concatenateVCFs.sh ${concatOptions}
+  """
+  concatenateVCFs.sh ${concatOptions}
   """
 }
 
@@ -337,21 +337,21 @@ process RunStrelka {
     beforeScript = "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz"
     options = "--exome --callRegions call_targets.bed.gz"
   }
-	"""
-	${beforeScript}
+  """
+  ${beforeScript}
   configureStrelkaSomaticWorkflow.py \
   --tumor ${bamTumor} \
   --normal ${bamNormal} \
-	--referenceFasta ${genomeFile} \
+  --referenceFasta ${genomeFile} \
   ${options} \
-	--runDir Strelka
-
-	python Strelka/runWorkflow.py -m local -j ${task.cpus}
-	mv Strelka/results/variants/somatic.indels.vcf.gz Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz
-	mv Strelka/results/variants/somatic.indels.vcf.gz.tbi Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz.tbi
-	mv Strelka/results/variants/somatic.snvs.vcf.gz Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz
-	mv Strelka/results/variants/somatic.snvs.vcf.gz.tbi Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz.tbi
-	"""
+  --runDir Strelka
+
+  python Strelka/runWorkflow.py -m local -j ${task.cpus}
+  mv Strelka/results/variants/somatic.indels.vcf.gz Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz
+  mv Strelka/results/variants/somatic.indels.vcf.gz.tbi Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz.tbi
+  mv Strelka/results/variants/somatic.snvs.vcf.gz Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz
+  mv Strelka/results/variants/somatic.snvs.vcf.gz.tbi Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz.tbi
+  """
 }
 
 if (params.verbose) strelkaOutput = strelkaOutput.view {

From 1abb513561074717beca53dee3a7ac1f272938db Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Tue, 19 Feb 2019 10:23:29 +0100
Subject: [PATCH 14/22] finally fix targetBED

---
 germlineVC.nf | 3 +++
 somaticVC.nf  | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/germlineVC.nf b/germlineVC.nf
index 6ea64e9f93..eaa7494a7d 100644
--- a/germlineVC.nf
+++ b/germlineVC.nf
@@ -374,6 +374,9 @@ process RunSingleStrelka {
   if (params.targetBED) {
     beforeScript = "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz"
     options = "--exome --callRegions call_targets.bed.gz"
+  } else {
+    beforeScript = ""
+    options = ""
   }
   """
   ${beforeScript}
diff --git a/somaticVC.nf b/somaticVC.nf
index be1aab2871..b51de85ab3 100644
--- a/somaticVC.nf
+++ b/somaticVC.nf
@@ -336,6 +336,9 @@ process RunStrelka {
   if (params.targetBED) {
     beforeScript = "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz"
     options = "--exome --callRegions call_targets.bed.gz"
+  } else {
+    beforeScript = ""
+    options = ""
   }
   """
   ${beforeScript}

From eb3dfb1649e4f374b84a6c88a905435c97b0b7fe Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Tue, 19 Feb 2019 11:07:55 +0100
Subject: [PATCH 15/22] this time it's for real, targetBED is fixed

---
 germlineVC.nf | 9 ++++-----
 somaticVC.nf  | 8 ++++----
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/germlineVC.nf b/germlineVC.nf
index eaa7494a7d..e91a974bb3 100644
--- a/germlineVC.nf
+++ b/germlineVC.nf
@@ -46,7 +46,7 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project <U
 // Check for awsbatch profile configuration
 // make sure queue is defined
 if (workflow.profile == 'awsbatch') {
-    if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
+    if (!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
 }
 
 tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : []
@@ -323,20 +323,19 @@ process ConcatVCF {
   input:
     set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge
     file(genomeIndex) from Channel.value(referenceMap.genomeIndex)
-    file(targetBED) from Channel.value(params.targetBED ? params.targetBED : "null")
+    file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null")
 
   output:
     // we have this funny *_* pattern to avoid copying the raw calls to publishdir
     set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*_*.vcf.gz"), file("*_*.vcf.gz.tbi") into vcfConcatenated
 
-
   when: ( 'haplotypecaller' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC
 
   script:
   if (variantCaller == 'haplotypecaller') outputFile = "${variantCaller}_${idSampleNormal}.vcf"
   else if (variantCaller == 'gvcf-hc') outputFile = "haplotypecaller_${idSampleNormal}.g.vcf"
   else outputFile = "${variantCaller}_${idSampleTumor}_vs_${idSampleNormal}.vcf"
-  if(params.targetBED)    // targeted
+  if (params.targetBED)   // targeted
     concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} -t ${targetBED}"
   else                    // WGS
     concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} "
@@ -359,7 +358,7 @@ process RunSingleStrelka {
 
   input:
     set idPatient, status, idSample, file(bam), file(bai) from bamsForSingleStrelka
-    file(targetBED) from Channel.value(params.targetBED ? params.targetBED : "null")
+    file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null")
     set file(genomeFile), file(genomeIndex) from Channel.value([
       referenceMap.genomeFile,
       referenceMap.genomeIndex
diff --git a/somaticVC.nf b/somaticVC.nf
index b51de85ab3..2dc78b0853 100644
--- a/somaticVC.nf
+++ b/somaticVC.nf
@@ -53,7 +53,7 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project <U
 // Check for awsbatch profile configuration
 // make sure queue is defined
 if (workflow.profile == 'awsbatch') {
-    if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
+    if (!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
 }
 
 tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : []
@@ -285,7 +285,7 @@ process ConcatVCF {
   input:
     set variantCaller, idPatient, idSampleNormal, idSampleTumor, file(vcFiles) from vcfsToMerge
     file(genomeIndex) from Channel.value(referenceMap.genomeIndex)
-    file(targetBED) from Channel.value(params.targetBED ? params.targetBED : "null")
+    file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null")
 
   output:
     // we have this funny *_* pattern to avoid copying the raw calls to publishdir
@@ -297,7 +297,7 @@ process ConcatVCF {
   script:
   outputFile = "${variantCaller}_${idSampleTumor}_vs_${idSampleNormal}.vcf"
 
-  if(params.targetBED)    // targeted
+  if (params.targetBED)   // targeted
     concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} -t ${targetBED}"
   else                    // WGS
     concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} "
@@ -320,7 +320,7 @@ process RunStrelka {
 
   input:
     set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from bamsForStrelka
-    file(targetBED) from Channel.value(params.targetBED ? params.targetBED : "null")
+    file(targetBED) from Channel.value(params.targetBED ? file(params.targetBED) : "null")
     set file(genomeFile), file(genomeIndex), file(genomeDict) from Channel.value([
       referenceMap.genomeFile,
       referenceMap.genomeIndex,

From f2201ace385eade92d2c4f3ce1917105661d6d1f Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Tue, 19 Feb 2019 11:08:20 +0100
Subject: [PATCH 16/22] spacing / alignement / code polishing

---
 annotate.nf        | 8 ++++----
 buildContainers.nf | 2 +-
 buildReferences.nf | 2 +-
 main.nf            | 4 +---
 runMultiQC.nf      | 2 +-
 5 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/annotate.nf b/annotate.nf
index 2dffd05b06..2a18c8fb1c 100644
--- a/annotate.nf
+++ b/annotate.nf
@@ -43,7 +43,7 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project <U
 // Check for awsbatch profile configuration
 // make sure queue is defined
 if (workflow.profile == 'awsbatch') {
-    if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
+    if (!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
 }
 
 annotateTools = params.annotateTools ? params.annotateTools.split(',').collect{it.trim().toLowerCase()} : []
@@ -155,7 +155,7 @@ process RunSnpeff {
 
   input:
     set variantCaller, idPatient, file(vcf) from vcfForSnpeff
-    file dataDir from Channel.value(params.snpEff_cache ? params.snpEff_cache : "null")
+    file dataDir from Channel.value(params.snpEff_cache ? file(params.snpEff_cache) : "null")
     val snpeffDb from Channel.value(params.genomes[params.genome].snpeffDb)
 
   output:
@@ -186,7 +186,7 @@ if (params.verbose) snpeffOutput = snpeffOutput.view {
   "File  : ${it.fileName}"
 }
 
-if('merge' in tools) {
+if ('merge' in tools) {
   // When running in the 'merge' mode
   // snpEff output is used as VEP input
   // Used a feedback loop from vcfCompressed
@@ -209,7 +209,7 @@ process RunVEP {
 
   input:
     set annotator, variantCaller,  idPatient, file(vcf), file(idx) from vcfForVep
-    file dataDir from Channel.value(params.vep_cache ? params.vep_cache : "null")
+    file dataDir from Channel.value(params.vep_cache ? file(params.vep_cache) : "null")
     val cache_version from Channel.value(params.genomes[params.genome].vepCacheVersion)
 
   output:
diff --git a/buildContainers.nf b/buildContainers.nf
index 9f2e8bd1f8..7255dffa06 100644
--- a/buildContainers.nf
+++ b/buildContainers.nf
@@ -41,7 +41,7 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project <U
 // Check for awsbatch profile configuration
 // make sure queue is defined
 if (workflow.profile == 'awsbatch') {
-    if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
+    if (!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
 }
 
 // Define containers to handle (build/push or pull)
diff --git a/buildReferences.nf b/buildReferences.nf
index b0693b054b..bdacd43402 100644
--- a/buildReferences.nf
+++ b/buildReferences.nf
@@ -43,7 +43,7 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project <U
 // Check for awsbatch profile configuration
 // make sure queue is defined
 if (workflow.profile == 'awsbatch') {
-    if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
+    if (!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
 }
 
 ch_referencesFiles = Channel.fromPath("${params.refDir}/*").ifEmpty(null)
diff --git a/main.nf b/main.nf
index 2016b462af..6e5a53f3f4 100644
--- a/main.nf
+++ b/main.nf
@@ -43,10 +43,9 @@ kate: syntax groovy; space-indent on; indent-width 2;
 // Check for awsbatch profile configuration
 // make sure queue is defined
 if (workflow.profile == 'awsbatch') {
-    if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
+    if (!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
 }
 
-
 if (params.help) exit 0, helpMessage()
 if (!SarekUtils.isAllowedParams(params)) exit 1, "params unknown, see --help for more information"
 if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project <UPPMAX Project ID>"
@@ -214,7 +213,6 @@ if (params.verbose) bamQCmappedReport = bamQCmappedReport.view {
   Dir   : [${it.fileName}]"
 }
 
-
 // Sort bam whether they are standalone or should be merged
 // Borrowed code from https://github.com/guigolab/chip-nf
 
diff --git a/runMultiQC.nf b/runMultiQC.nf
index d0f24b5e1d..5bf719e977 100644
--- a/runMultiQC.nf
+++ b/runMultiQC.nf
@@ -40,7 +40,7 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project <U
 // Check for awsbatch profile configuration
 // make sure queue is defined
 if (workflow.profile == 'awsbatch') {
-    if(!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
+    if (!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
 }
 
 directoryMap = SarekUtils.defineDirectoryMap(params.outDir)

From f4057327645fb0b12d05980dc2eec45cc63f90d3 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Tue, 19 Feb 2019 11:45:59 +0100
Subject: [PATCH 17/22] remove defineDirectoryMap()

---
 annotate.nf           |  5 ++---
 germlineVC.nf         | 23 +++++++----------------
 lib/SarekUtils.groovy | 16 ----------------
 main.nf               | 31 +++++++++++++++----------------
 runMultiQC.nf         | 21 ++++++++++-----------
 somaticVC.nf          | 29 +++++++++--------------------
 6 files changed, 43 insertions(+), 82 deletions(-)

diff --git a/annotate.nf b/annotate.nf
index 2a18c8fb1c..6dafd5ec3e 100644
--- a/annotate.nf
+++ b/annotate.nf
@@ -50,7 +50,6 @@ annotateTools = params.annotateTools ? params.annotateTools.split(',').collect{i
 annotateVCF = params.annotateVCF ? params.annotateVCF.split(',').collect{it.trim()} : []
 tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : []
 
-directoryMap = SarekUtils.defineDirectoryMap(params.outDir)
 toolList = defineToolList()
 
 if (!SarekUtils.checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information'
@@ -106,7 +105,7 @@ vcfForVep = vcfForVep.map {
 process RunBcftoolsStats {
   tag {"${idPatient} - ${vcf}"}
 
-  publishDir directoryMap.bcftoolsStats, mode: params.publishDirMode
+  publishDir "${params.outDir}/Reports/BCFToolsStats", mode: params.publishDirMode
 
   input:
     set variantCaller, idPatient, file(vcf) from vcfForBCFtools
@@ -127,7 +126,7 @@ if (params.verbose) bcfReport = bcfReport.view {
 process RunVcftools {
   tag {"${idPatient} - ${variantCaller} - ${vcf}"}
 
-  publishDir directoryMap.vcftools, mode: params.publishDirMode
+  publishDir "${params.outDir}/Reports/VCFTools", mode: params.publishDirMode
 
   input:
     set variantCaller, idPatient, file(vcf) from vcfForVCFtools
diff --git a/germlineVC.nf b/germlineVC.nf
index e91a974bb3..ce192c1e7f 100644
--- a/germlineVC.nf
+++ b/germlineVC.nf
@@ -51,7 +51,6 @@ if (workflow.profile == 'awsbatch') {
 
 tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : []
 
-directoryMap = SarekUtils.defineDirectoryMap(params.outDir)
 referenceMap = defineReferenceMap()
 toolList = defineToolList()
 
@@ -68,7 +67,7 @@ if (params.test && params.genome in ['GRCh37', 'GRCh38']) {
 
 tsvPath = ''
 if (params.sample) tsvPath = params.sample
-else tsvPath = "${directoryMap.recalibrated}/recalibrated.tsv"
+else tsvPath = "${params.outDir}/Preprocessing/Recalibrated/recalibrated.tsv"
 
 // Set up the bamFiles channel
 
@@ -335,12 +334,9 @@ process ConcatVCF {
   if (variantCaller == 'haplotypecaller') outputFile = "${variantCaller}_${idSampleNormal}.vcf"
   else if (variantCaller == 'gvcf-hc') outputFile = "haplotypecaller_${idSampleNormal}.g.vcf"
   else outputFile = "${variantCaller}_${idSampleTumor}_vs_${idSampleNormal}.vcf"
-  if (params.targetBED)   // targeted
-    concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} -t ${targetBED}"
-  else                    // WGS
-    concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} "
+  options = params.targetBED ? "-t ${targetBED}" : ""
   """
-  concatenateVCFs.sh ${concatOptions}
+  concatenateVCFs.sh -i ${genomeIndex} -c ${task.cpus} -o ${outputFile} ${concatOptions}
   """
 }
 
@@ -370,13 +366,8 @@ process RunSingleStrelka {
   when: 'strelka' in tools && !params.onlyQC
 
   script:
-  if (params.targetBED) {
-    beforeScript = "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz"
-    options = "--exome --callRegions call_targets.bed.gz"
-  } else {
-    beforeScript = ""
-    options = ""
-  }
+  beforeScript = params.targetBED ? "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz" : ""
+  options = params.targetBED ? "--exome --callRegions call_targets.bed.gz" : ""
   """
   ${beforeScript}
   configureStrelkaGermlineWorkflow.py \
@@ -467,7 +458,7 @@ vcfForQC = Channel.empty().mix(
 process RunBcftoolsStats {
   tag {vcf}
 
-  publishDir directoryMap.bcftoolsStats, mode: params.publishDirMode
+  publishDir "${params.outDir}/Reports/BCFToolsStats", mode: params.publishDirMode
 
   input:
     set variantCaller, file(vcf) from vcfForBCFtools
@@ -490,7 +481,7 @@ bcfReport.close()
 process RunVcftools {
   tag {vcf}
 
-  publishDir directoryMap.vcftools, mode: params.publishDirMode
+  publishDir "${params.outDir}/Reports/VCFTools", mode: params.publishDirMode
 
   input:
     set variantCaller, file(vcf) from vcfForVCFtools
diff --git a/lib/SarekUtils.groovy b/lib/SarekUtils.groovy
index dbfa0b46db..0d38fe7dfa 100644
--- a/lib/SarekUtils.groovy
+++ b/lib/SarekUtils.groovy
@@ -132,22 +132,6 @@ class SarekUtils {
     return true
   }
 
-  // Define map of directories
-  static def defineDirectoryMap(outDir) {
-    return [
-    'duplicateMarked'  : "${outDir}/Preprocessing/DuplicateMarked",
-    'recalibrated'     : "${outDir}/Preprocessing/Recalibrated",
-    'bamQC'            : "${outDir}/Reports/bamQC",
-    'bcftoolsStats'    : "${outDir}/Reports/BCFToolsStats",
-    'fastQC'           : "${outDir}/Reports/FastQC",
-    'markDuplicatesQC' : "${outDir}/Reports/MarkDuplicates",
-    'multiQC'          : "${outDir}/Reports/MultiQC",
-    'samtoolsStats'    : "${outDir}/Reports/SamToolsStats",
-    'vcftools'         : "${outDir}/Reports/VCFTools",
-    'version'          : "${outDir}/Reports/ToolsVersion"
-    ]
-  }
-
   // Channeling the TSV file containing BAM.
   // Format is: "subject gender status sample bam bai"
   static def extractBams(tsvFile, mode) {
diff --git a/main.nf b/main.nf
index 6e5a53f3f4..8dbc6a6785 100644
--- a/main.nf
+++ b/main.nf
@@ -53,7 +53,6 @@ if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project <U
 step = params.step.toLowerCase()
 if (step == 'preprocessing') step = 'mapping'
 
-directoryMap = SarekUtils.defineDirectoryMap(params.outDir)
 referenceMap = defineReferenceMap()
 stepList = defineStepList()
 
@@ -74,7 +73,7 @@ if (params.sample) tsvPath = params.sample
 if (!params.sample && !params.sampleDir) {
   tsvPaths = [
       'mapping': "${workflow.projectDir}/Sarek-data/testdata/tsv/tiny.tsv",
-      'recalibrate': "${directoryMap.duplicateMarked}/duplicateMarked.tsv"
+      'recalibrate': "${params.outDir}/Preprocessing/DuplicateMarked/duplicateMarked.tsv"
   ]
   if (params.test || step != 'mapping') tsvPath = tsvPaths[step]
 }
@@ -129,7 +128,7 @@ if (params.verbose) bamFiles = bamFiles.view {
 process RunFastQC {
   tag {idPatient + "-" + idRun}
 
-  publishDir "${directoryMap.fastQC}/${idRun}", mode: params.publishDirMode
+  publishDir "${params.outDir}/Reports/FastQC/${idRun}", mode: params.publishDirMode
 
   input:
     set idPatient, status, idSample, idRun, file(fastqFile1), file(fastqFile2) from fastqFilesforFastQC
@@ -183,7 +182,7 @@ if (params.verbose) mappedBam = mappedBam.view {
 process RunBamQCmapped {
   tag {idPatient + "-" + idSample}
 
-  publishDir directoryMap.bamQC, mode: params.publishDirMode
+  publishDir "${params.outDir}/Reports/bamQC", mode: params.publishDirMode
 
   input:
     set idPatient, status, idSample, idRun, file(bam) from mappedBamForQC
@@ -267,8 +266,8 @@ process MarkDuplicates {
 
   publishDir params.outDir, mode: params.publishDirMode,
     saveAs: {
-      if (it == "${idSample}.bam.metrics") "${directoryMap.markDuplicatesQC.minus(params.outDir+'/')}/${it}"
-      else "${directoryMap.duplicateMarked.minus(params.outDir+'/')}/${it}"
+      if (it == "${idSample}.bam.metrics") "Reports/MarkDuplicates/${it}"
+      else "Preprocessing/DuplicateMarked/${it}"
     }
 
   input:
@@ -298,9 +297,9 @@ process MarkDuplicates {
 // Creating a TSV file to restart from this step
 markDuplicatesTSV.map { idPatient, status, idSample, bam, bai ->
   gender = patientGenders[idPatient]
-  "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.duplicateMarked}/${bam}\t${directoryMap.duplicateMarked}/${bai}\n"
+  "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outDir}/Preprocessing/DuplicateMarked/${bam}\t${params.outDir}/Preprocessing/DuplicateMarked/${bai}\n"
 }.collectFile(
-  name: 'duplicateMarked.tsv', sort: true, storeDir: directoryMap.duplicateMarked
+  name: 'duplicateMarked.tsv', sort: true, storeDir: "${params.outDir}/Preprocessing/DuplicateMarked"
 )
 
 duplicateMarkedBams = duplicateMarkedBams.map {
@@ -322,7 +321,7 @@ if (params.verbose) duplicateMarkedBams = duplicateMarkedBams.view {
 process CreateRecalibrationTable {
   tag {idPatient + "-" + idSample}
 
-  publishDir directoryMap.duplicateMarked, mode: params.publishDirMode, overwrite: false
+  publishDir "${params.outDir}/Preprocessing/DuplicateMarked", mode: params.publishDirMode, overwrite: false
 
   input:
     set idPatient, status, idSample, file(bam), file(bai) from mdBam // realignedBam
@@ -362,9 +361,9 @@ process CreateRecalibrationTable {
 // Create a TSV file to restart from this step
 recalibrationTableTSV.map { idPatient, status, idSample, bam, bai, recalTable ->
   gender = patientGenders[idPatient]
-  "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.duplicateMarked}/${bam}\t${directoryMap.duplicateMarked}/${bai}\t${directoryMap.duplicateMarked}/${recalTable}\n"
+  "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outDir}/Preprocessing/DuplicateMarked/${bam}\t${params.outDir}/Preprocessing/DuplicateMarked/${bai}\t${params.outDir}/Preprocessing/DuplicateMarked/${recalTable}\n"
 }.collectFile(
-  name: 'duplicateMarked.tsv', sort: true, storeDir: directoryMap.duplicateMarked
+  name: 'duplicateMarked.tsv', sort: true, storeDir: "${params.outDir}/Preprocessing/DuplicateMarked"
 )
 
 recalibrationTable = mdBamToJoin.join(recalibrationTable, by:[0,1,2])
@@ -380,7 +379,7 @@ if (params.verbose) recalibrationTable = recalibrationTable.view {
 process RecalibrateBam {
   tag {idPatient + "-" + idSample}
 
-  publishDir directoryMap.recalibrated, mode: params.publishDirMode
+  publishDir "${params.outDir}/Preprocessing/Recalibrated", mode: params.publishDirMode
 
   input:
     set idPatient, status, idSample, file(bam), file(bai), file(recalibrationReport) from recalibrationTable
@@ -412,9 +411,9 @@ process RecalibrateBam {
 // Creating a TSV file to restart from this step
 recalibratedBamTSV.map { idPatient, status, idSample, bam, bai ->
   gender = patientGenders[idPatient]
-  "${idPatient}\t${gender}\t${status}\t${idSample}\t${directoryMap.recalibrated}/${bam}\t${directoryMap.recalibrated}/${bai}\n"
+  "${idPatient}\t${gender}\t${status}\t${idSample}\t${params.outDir}/Preprocessing/Recalibrated/${bam}\t${params.outDir}/Preprocessing/Recalibrated/${bai}\n"
 }.collectFile(
-  name: 'recalibrated.tsv', sort: true, storeDir: directoryMap.recalibrated
+  name: 'recalibrated.tsv', sort: true, storeDir: "${params.outDir}/Preprocessing/Recalibrated"
 )
 
 if (params.verbose) recalibratedBam = recalibratedBam.view {
@@ -430,7 +429,7 @@ if (params.verbose) recalibratedBam = recalibratedBam.view {
 process RunSamtoolsStats {
   tag {idPatient + "-" + idSample}
 
-  publishDir directoryMap.samtoolsStats, mode: params.publishDirMode
+  publishDir "${params.outDir}/Reports/SamToolsStats", mode: params.publishDirMode
 
   input:
     set idPatient, status, idSample, file(bam), file(bai) from bamForSamToolsStats
@@ -451,7 +450,7 @@ if (params.verbose) samtoolsStatsReport = samtoolsStatsReport.view {
 process RunBamQCrecalibrated {
   tag {idPatient + "-" + idSample}
 
-  publishDir directoryMap.bamQC, mode: params.publishDirMode
+  publishDir "${params.outDir}/Reports/bamQC", mode: params.publishDirMode
 
   input:
     set idPatient, status, idSample, file(bam), file(bai) from bamForBamQC
diff --git a/runMultiQC.nf b/runMultiQC.nf
index 5bf719e977..fcd1972e76 100644
--- a/runMultiQC.nf
+++ b/runMultiQC.nf
@@ -43,7 +43,6 @@ if (workflow.profile == 'awsbatch') {
     if (!params.awsqueue) exit 1, "Provide the job queue for aws batch!"
 }
 
-directoryMap = SarekUtils.defineDirectoryMap(params.outDir)
 /*
 ================================================================================
 =                               P R O C E S S E S                              =
@@ -53,10 +52,10 @@ directoryMap = SarekUtils.defineDirectoryMap(params.outDir)
 startMessage()
 
 process GetVersionAll {
-  publishDir directoryMap.multiQC, mode: params.publishDirMode
+  publishDir "${params.outDir}/Reports/MultiQC", mode: params.publishDirMode
 
   input:
-    file(versions) from Channel.fromPath("${directoryMap.version}/*").collect().ifEmpty(file ("empty"))
+    file(versions) from Channel.fromPath("${params.outDir}/Reports/ToolsVersion/*").collect().ifEmpty(file ("empty"))
 
   output:
     file ("tool_versions_mqc.yaml") into versionsForMultiQC
@@ -92,17 +91,17 @@ if (params.verbose && !params.noReports) versionsForMultiQC = versionsForMultiQC
 
 reportsForMultiQC = Channel.empty()
   .mix(
-    Channel.fromPath("${directoryMap.bamQC}/*", type: 'dir'),
-    Channel.fromPath("${directoryMap.bcftoolsStats}/*"),
-    Channel.fromPath("${directoryMap.fastQC}/*/*"),
-    Channel.fromPath("${directoryMap.markDuplicatesQC}/*"),
-    Channel.fromPath("${directoryMap.samtoolsStats}/*"),
-    Channel.fromPath("${directoryMap.snpeffReports}/*"),
-    Channel.fromPath("${directoryMap.vcftools}/*"),
+    Channel.fromPath("${params.outDir}/Reports/bamQC/*", type: 'dir'),
+    Channel.fromPath("${params.outDir}/Reports/BCFToolsStats/*"),
+    Channel.fromPath("${params.outDir}/Reports/FastQC/*/*"),
+    Channel.fromPath("${params.outDir}/Reports/MarkDuplicates/*"),
+    Channel.fromPath("${params.outDir}/Reports/SamToolsStats/*"),
+    Channel.fromPath("${params.outDir}/Annotation/*/snpEff/*.csv"),
+    Channel.fromPath("${params.outDir}/Reports/VCFTools/*"),
   ).collect()
 
 process RunMultiQC {
-  publishDir directoryMap.multiQC, mode: params.publishDirMode
+  publishDir "${params.outDir}/Reports/MultiQC", mode: params.publishDirMode
 
   input:
     file (multiqcConfig) from createMultiQCconfig()
diff --git a/somaticVC.nf b/somaticVC.nf
index 2dc78b0853..c959af9bf9 100644
--- a/somaticVC.nf
+++ b/somaticVC.nf
@@ -58,7 +58,6 @@ if (workflow.profile == 'awsbatch') {
 
 tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase()} : []
 
-directoryMap = SarekUtils.defineDirectoryMap(params.outDir)
 referenceMap = defineReferenceMap()
 toolList = defineToolList()
 
@@ -71,7 +70,7 @@ if (params.test && params.genome in ['GRCh37', 'GRCh38']) {
 
 tsvPath = ''
 if (params.sample) tsvPath = params.sample
-else tsvPath = "${directoryMap.recalibrated}/recalibrated.tsv"
+else tsvPath = "${params.outDir}/Preprocessing/Recalibrated/recalibrated.tsv"
 
 // Set up the bamFiles channel
 
@@ -296,14 +295,9 @@ process ConcatVCF {
 
   script:
   outputFile = "${variantCaller}_${idSampleTumor}_vs_${idSampleNormal}.vcf"
-
-  if (params.targetBED)   // targeted
-    concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} -t ${targetBED}"
-  else                    // WGS
-    concatOptions = "-i ${genomeIndex} -c ${task.cpus} -o ${outputFile} "
-
+  options = params.targetBED ? "-t ${targetBED}" : ""
   """
-  concatenateVCFs.sh ${concatOptions}
+  concatenateVCFs.sh -i ${genomeIndex} -c ${task.cpus} -o ${outputFile} ${options}
   """
 }
 
@@ -333,13 +327,8 @@ process RunStrelka {
   when: 'strelka' in tools && !params.onlyQC
 
   script:
-  if (params.targetBED) {
-    beforeScript = "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz"
-    options = "--exome --callRegions call_targets.bed.gz"
-  } else {
-    beforeScript = ""
-    options = ""
-  }
+  beforeScript = params.targetBED ? "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz" : ""
+  options = params.targetBED ? "--exome --callRegions call_targets.bed.gz" : ""
   """
   ${beforeScript}
   configureStrelkaSomaticWorkflow.py \
@@ -653,7 +642,7 @@ vcfForQC = Channel.empty().mix(
 process RunBcftoolsStats {
   tag {vcf}
 
-  publishDir directoryMap.bcftoolsStats, mode: params.publishDirMode
+  publishDir "${params.outDir}/Reports/BCFToolsStats", mode: params.publishDirMode
 
   input:
     set variantCaller, file(vcf) from vcfForBCFtools
@@ -676,7 +665,7 @@ bcfReport.close()
 process RunVcftools {
   tag {vcf}
 
-  publishDir directoryMap.vcftools, mode: params.publishDirMode
+  publishDir "${params.outDir}/Reports/VCFTools", mode: params.publishDirMode
 
   input:
     set variantCaller, file(vcf) from vcfForVCFtools
@@ -697,7 +686,7 @@ if (params.verbose) vcfReport = vcfReport.view {
 vcfReport.close()
 
 process GetVersionAlleleCount {
-  publishDir directoryMap.version, mode: params.publishDirMode
+  publishDir "${params.outDir}/Reports/ToolsVersion", mode: params.publishDirMode
   output: file("v_*.txt")
   when: 'ascat' in tools && !params.onlyQC
 
@@ -708,7 +697,7 @@ process GetVersionAlleleCount {
 }
 
 process GetVersionASCAT {
-  publishDir directoryMap.version, mode: params.publishDirMode
+  publishDir "${params.outDir}/Reports/ToolsVersion", mode: params.publishDirMode
   output: file("v_*.txt")
   when: 'ascat' in tools && !params.onlyQC
 

From 57e86f8e308e17a62d653a651d1249208a686391 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Tue, 19 Feb 2019 11:46:50 +0100
Subject: [PATCH 18/22] update CHANGELOG

---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4cc8ebbd6f..0a8e38bf4e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,17 +28,22 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 -   [#722](https://github.com/SciLifeLab/Sarek/pull/722) - Update `Sarek-data` submodule
 -   [#723](https://github.com/SciLifeLab/Sarek/pull/723), [#725](https://github.com/SciLifeLab/Sarek/pull/725) - Update docs
 -   [#724](https://github.com/SciLifeLab/Sarek/pull/724) - Improved AwsBatch configuration
+-   [#728](https://github.com/SciLifeLab/Sarek/pull/728) - VCFs and Annotated VCFs are now ordered by Patient, then tools
+-   [#728](https://github.com/SciLifeLab/Sarek/pull/728) - Strelka Best Practices output is now prefixed with `StrelkaBP_`
+-   [#728](https://github.com/SciLifeLab/Sarek/pull/728) - Improved usage of `targetBED` params
 
 ### `Removed`
 -   [#715](https://github.com/SciLifeLab/Sarek/pull/715) - Remove `defReferencesFiles` function from `buildReferences.nf`
 -   [#719](https://github.com/SciLifeLab/Sarek/pull/719) - `snpEff` base container is no longer used
 -   [#721](https://github.com/SciLifeLab/Sarek/pull/721) - Remove COSMIC docs
+-   [#728](https://github.com/SciLifeLab/Sarek/pull/728) - Remove `defineDirectoryMap()`
 
 ### `Fixed`
 -   [#720](https://github.com/SciLifeLab/Sarek/pull/720) - bamQC is now run on the recalibrated bams, and not after MarkDuplicates
 -   [#726](https://github.com/SciLifeLab/Sarek/pull/726) - Fix Ascat ref file input (one file can't be a set)
 -   [#727](https://github.com/SciLifeLab/Sarek/pull/727) - bamQC outputs are no longer overwritten (name of dir is now the file instead of sample)
 -   [#728](https://github.com/SciLifeLab/Sarek/pull/728) - Fix multi sample TSV file [#691](https://github.com/SciLifeLab/Sarek/issues/691)
+-   [#728](https://github.com/SciLifeLab/Sarek/pull/728) - Fix issue with annotation that was consuming `cache` channels
 
 ## [2.2.2] - 2018-12-19
 

From 736ee5666dbfdb2a20f27183581717e4e6f327a8 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Tue, 19 Feb 2019 13:08:59 +0100
Subject: [PATCH 19/22] typo

---
 germlineVC.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/germlineVC.nf b/germlineVC.nf
index ce192c1e7f..21416a874f 100644
--- a/germlineVC.nf
+++ b/germlineVC.nf
@@ -336,7 +336,7 @@ process ConcatVCF {
   else outputFile = "${variantCaller}_${idSampleTumor}_vs_${idSampleNormal}.vcf"
   options = params.targetBED ? "-t ${targetBED}" : ""
   """
-  concatenateVCFs.sh -i ${genomeIndex} -c ${task.cpus} -o ${outputFile} ${concatOptions}
+  concatenateVCFs.sh -i ${genomeIndex} -c ${task.cpus} -o ${outputFile} ${options}
   """
 }
 

From 73b1c6f69231ba875813c1bff4dbbc980376d68d Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Tue, 19 Feb 2019 17:20:44 +0100
Subject: [PATCH 20/22] fix ifEmpty for AWSBatch

---
 runMultiQC.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runMultiQC.nf b/runMultiQC.nf
index fcd1972e76..2aec568485 100644
--- a/runMultiQC.nf
+++ b/runMultiQC.nf
@@ -55,7 +55,7 @@ process GetVersionAll {
   publishDir "${params.outDir}/Reports/MultiQC", mode: params.publishDirMode
 
   input:
-    file(versions) from Channel.fromPath("${params.outDir}/Reports/ToolsVersion/*").collect().ifEmpty(file ("empty"))
+    file(versions) from Channel.fromPath("${params.outDir}/Reports/ToolsVersion/*").collect().ifEmpty(null)
 
   output:
     file ("tool_versions_mqc.yaml") into versionsForMultiQC

From a98814429dadc60fdfc95cfcaed72aca77062f49 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Tue, 19 Feb 2019 17:28:04 +0100
Subject: [PATCH 21/22] fix issue with AWS Batch cc @KochTobi

---
 annotate.nf | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/annotate.nf b/annotate.nf
index 6dafd5ec3e..0a60414209 100644
--- a/annotate.nf
+++ b/annotate.nf
@@ -74,13 +74,13 @@ if (annotateVCF == []) {
 // This field is used to output final annotated VCFs in the correct directory
   Channel.empty().mix(
     Channel.fromPath("${params.outDir}/VariantCalling/*/HaplotypeCaller/*.vcf.gz")
-      .flatten().map{vcf -> ['haplotypecaller', vcf.minus(vcf.fileName)[-2], vcf]},
+      .flatten().map{vcf -> ['haplotypecaller', vcf.minus(vcf.fileName)[-2].toString(), vcf]},
     Channel.fromPath("${params.outDir}/VariantCalling/*/Manta/*SV.vcf.gz")
-      .flatten().map{vcf -> ['manta', vcf.minus(vcf.fileName)[-2], vcf]},
+      .flatten().map{vcf -> ['manta', vcf.minus(vcf.fileName)[-2].toString(), vcf]},
     Channel.fromPath("${params.outDir}/VariantCalling/*/MuTect2/*.vcf.gz")
-      .flatten().map{vcf -> ['mutect2', vcf.minus(vcf.fileName)[-2], vcf]},
+      .flatten().map{vcf -> ['mutect2', vcf.minus(vcf.fileName)[-2].toString(), vcf]},
     Channel.fromPath("${params.outDir}/VariantCalling/*/Strelka/*{somatic,variant}*.vcf.gz")
-      .flatten().map{vcf -> ['strelka', vcf.minus(vcf.fileName)[-2], vcf]},
+      .flatten().map{vcf -> ['strelka', vcf.minus(vcf.fileName)[-2].toString(), vcf]},
   ).choice(vcfToAnnotate, vcfNotToAnnotate) {
     annotateTools == [] || (annotateTools != [] && it[0] in annotateTools) ? 0 : 1
   }
@@ -88,7 +88,7 @@ if (annotateVCF == []) {
 // Annotate user-submitted VCFs
 // If user-submitted, Sarek assume that the idPatient should be assumed automatically
   vcfToAnnotate = Channel.fromPath(annotateVCF)
-    .map{vcf -> ['userspecified', vcf.minus(vcf.fileName)[-2], vcf]}
+    .map{vcf -> ['userspecified', vcf.minus(vcf.fileName)[-2].toString(), vcf]}
 } else exit 1, "specify only tools or files to annotate, not both"
 
 vcfNotToAnnotate.close()

From aca1b687cdf0554b6af6959cb7cd0cf59bf26ffa Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Tue, 19 Feb 2019 17:31:36 +0100
Subject: [PATCH 22/22] typo

---
 annotate.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/annotate.nf b/annotate.nf
index 0a60414209..02768b4553 100644
--- a/annotate.nf
+++ b/annotate.nf
@@ -70,7 +70,7 @@ if (annotateVCF == []) {
 // Excluding vcfs from FreeBayes, and g.vcf from HaplotypeCaller
 // Basically it's: VariantCalling/*/{HaplotypeCaller,Manta,MuTect2,Strelka}/*.vcf.gz
 // Without *SmallIndels.vcf.gz from Manta, and *.genome.vcf.gz from Strelka
-// The small snipet `vcf.minus(vcf.fileName)[-2]` catches idPatient
+// The small snippet `vcf.minus(vcf.fileName)[-2]` catches idPatient
 // This field is used to output final annotated VCFs in the correct directory
   Channel.empty().mix(
     Channel.fromPath("${params.outDir}/VariantCalling/*/HaplotypeCaller/*.vcf.gz")