From 440d2f93b642490a9d0e4e3514f6416784eb2171 Mon Sep 17 00:00:00 2001
From: Rike <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Sun, 17 Jul 2022 17:22:55 +0200
Subject: [PATCH 1/6] have freebayes only use a single core, parallel
 implementation isn't adding anything in our etup

---
 conf/base.config | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/conf/base.config b/conf/base.config
index 5c87237b50..a98a1dec58 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -53,4 +53,7 @@ process {
         memory       = { check_max( 60.GB * task.attempt, 'memory' ) }
         time         = { check_max( 48.h  * task.attempt, 'time'    ) }
     }
+    withName: 'FREEBAYES' {
+        cpus         = { check_max( 1 * task.attempt, 'cpus'    ) }
+    }
 }

From 9f6ee7b4182de3ccd88864ae67c5e03e36f78fed Mon Sep 17 00:00:00 2001
From: Rike <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Tue, 19 Jul 2022 12:05:24 +0200
Subject: [PATCH 2/6] resources that worked for matched ICGC WGS data

---
 conf/base.config | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index a98a1dec58..7b588f2dd9 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -49,11 +49,28 @@ process {
         errorStrategy = 'retry'
         maxRetries    = 2
     }
-    withName: 'BWAMEM2_MEM' {
-        memory       = { check_max( 60.GB * task.attempt, 'memory' ) }
+    withName: 'BWAMEM1_MEM|BWAMEM2_MEM' {
+        cpus         = { check_max( 24 * task.attempt, 'cpus' ) }
+        memory       = { check_max( 36.GB * task.attempt, 'memory' ) }
         time         = { check_max( 48.h  * task.attempt, 'time'    ) }
     }
-    withName: 'FREEBAYES' {
+    withName: 'FASTP'{
+        cpus         = { check_max( 12 * task.attempt, 'cpus' ) }
+    }
+    withName:'FASTQC|FASTP|MOSDEPTH|SAMTOOLS_CONVERT|SAMTOOLS_MERGE'{
+        memory       = { check_max( 4.GB * task.attempt, 'memory' ) }
+    }
+    withName:'APPLYBQSR|APPLYBQSR_SPARK|BASERECALIBRATOR|SAMTOOLS_STATS'{
+        cpus         = { check_max( 4 * task.attempt, 'cpus' ) }
+    }
+    withName:'APPLYBQSR|APPLYBQSR_SPARK|BASERECALIBRATOR|GATHERBQSRREPORTS'{
+        memory       = { check_max( 46.GB * task.attempt, 'memory' ) }
+    }
+    withName: 'MARKDUPLICATES'{
+        memory       = { check_max( 300.GB * task.attempt, 'memory' ) }
+    }
+    withName: 'FREEBAYES|SAMTOOLS_STATS|SAMTOOLS_INDEX|UNZIP' {
         cpus         = { check_max( 1 * task.attempt, 'cpus'    ) }
     }
+
 }

From 1fcd53e88fdae567365b5c255b0908bb36ce0569 Mon Sep 17 00:00:00 2001
From: Rike <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Tue, 19 Jul 2022 12:05:51 +0200
Subject: [PATCH 3/6] reduce resource requests in local modules

---
 modules/local/build_intervals/main.nf      | 1 -
 modules/local/create_intervals_bed/main.nf | 1 -
 2 files changed, 2 deletions(-)

diff --git a/modules/local/build_intervals/main.nf b/modules/local/build_intervals/main.nf
index 38fbb36eec..77f4a5b826 100644
--- a/modules/local/build_intervals/main.nf
+++ b/modules/local/build_intervals/main.nf
@@ -1,6 +1,5 @@
 process BUILD_INTERVALS {
     tag "$fasta_fai"
-    label 'process_medium'
 
     conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
diff --git a/modules/local/create_intervals_bed/main.nf b/modules/local/create_intervals_bed/main.nf
index 08bb6eb5e6..50376259e5 100644
--- a/modules/local/create_intervals_bed/main.nf
+++ b/modules/local/create_intervals_bed/main.nf
@@ -1,6 +1,5 @@
 process CREATE_INTERVALS_BED {
     tag "$intervals"
-    label 'process_medium'
 
     conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?

From e91c85ba32e34c90bbeaae118ec70131d58d0092 Mon Sep 17 00:00:00 2001
From: Rike <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Tue, 19 Jul 2022 12:09:22 +0200
Subject: [PATCH 4/6] fix naming

---
 conf/base.config | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 7b588f2dd9..13decc5358 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -60,13 +60,13 @@ process {
     withName:'FASTQC|FASTP|MOSDEPTH|SAMTOOLS_CONVERT|SAMTOOLS_MERGE'{
         memory       = { check_max( 4.GB * task.attempt, 'memory' ) }
     }
-    withName:'APPLYBQSR|APPLYBQSR_SPARK|BASERECALIBRATOR|SAMTOOLS_STATS'{
+    withName:'GATK4_APPLYBQSR|GATK4_APPLYBQSR_SPARK|GATK4_BASERECALIBRATOR|SAMTOOLS_STATS'{
         cpus         = { check_max( 4 * task.attempt, 'cpus' ) }
     }
-    withName:'APPLYBQSR|APPLYBQSR_SPARK|BASERECALIBRATOR|GATHERBQSRREPORTS'{
+    withName:'GATK4_APPLYBQSR|GATK4_APPLYBQSR_SPARK|GATK4_BASERECALIBRATOR|GATK4_GATHERBQSRREPORTS'{
         memory       = { check_max( 46.GB * task.attempt, 'memory' ) }
     }
-    withName: 'MARKDUPLICATES'{
+    withName: 'GATK4_MARKDUPLICATES'{
         memory       = { check_max( 300.GB * task.attempt, 'memory' ) }
     }
     withName: 'FREEBAYES|SAMTOOLS_STATS|SAMTOOLS_INDEX|UNZIP' {

From ba1096adc1d08b5471d86cc5312914dd71ac264b Mon Sep 17 00:00:00 2001
From: Rike <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Tue, 19 Jul 2022 12:11:04 +0200
Subject: [PATCH 5/6] update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ed0dc39ca3..0e47008dda 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -43,6 +43,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#632](https://github.com/nf-core/sarek/pull/632) - Added params `--snpeff_version` to allow more configuration on the snpeff container definition
 - [#632](https://github.com/nf-core/sarek/pull/632) - Added params `--vep_include_fasta` to use the fasta file for annotation
 - [#639](https://github.com/nf-core/sarek/pull/639) - Adding genes-txt-file and summary-html-file to the published output from snpEff.
+- [#647](https://github.com/nf-core/sarek/pull/647) - Update resource requests for preprocessing based on what worked for 5 ICGC matched WGS samples
 
 ### Changed
 

From c7c4d279c40d8cbe927f20ba33451e1d00d54085 Mon Sep 17 00:00:00 2001
From: Rike <friederike.hanssen@qbic.uni-tuebingen.de>
Date: Tue, 19 Jul 2022 12:25:53 +0200
Subject: [PATCH 6/6] also add some docs on the resource requests

---
 docs/usage.md | 233 ++++++++++++++++++++++++++------------------------
 1 file changed, 119 insertions(+), 114 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 91b209e509..2318e7e66b 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -581,10 +581,6 @@ This list is by no means exhaustive and it will depend on the specific analysis
 | [Control-FREEC](https://github.com/BoevaLab/FREEC)                                                      |  x  |  x  |   x    |    -    |   x   |    x    |
 | [MSIsensorPro](https://github.com/xjtu-omics/msisensor-pro)                                             |  x  |  x  |   x    |    -    |   -   |    x    |
 
-## How to create a panel-of-normals for Mutect2
-
-For a detailed tutorial on how to create a panel-of-normals, see [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531132).
-
 ## How to run ASCAT with WES
 
 _under construction_
@@ -610,39 +606,91 @@ Then, you can derive both loci (just chromosome and position) and allele files (
 
 For further reading and documentation, please take a look at the Battenberg repository. -->
 
-## Where do the used reference genomes originate from
+## What are the bwa/bwa-mem2 parameters?
 
-_under construction - help needed_
+For mapping, sarek follows the parameter suggestions provided in this [paper](https://www.nature.com/articles/s41467-018-06159-4):
 
-GATK.GRCh38:
+`-K 100000000` : for deterministic pipeline results, for more info see [here](https://github.com/CCDG/Pipeline-Standardization/issues/2)
 
-| File                  | Tools                                                                                                                                                                                                                                                                                                                                                                                                                                                | Origin                                                                                                                                                                                  | Docs                                                                                 |
-| :-------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------- |
-| ascat_alleles         | ASCAT                                                                                                                                                                                                                                                                                                                                                                                                                                                | https://www.dropbox.com/s/uouszfktzgoqfy7/G1000_alleles_hg38.zip                                                                                                                        | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS                   |
-| ascat_loci            | ASCAT                                                                                                                                                                                                                                                                                                                                                                                                                                                | https://www.dropbox.com/s/80cq0qgao8l1inj/G1000_loci_hg38.zip                                                                                                                           | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS                   |
-| ascat_loci_gc         | ASCAT                                                                                                                                                                                                                                                                                                                                                                                                                                                | https://www.dropbox.com/s/80cq0qgao8l1inj/G1000_loci_hg38.zip                                                                                                                           | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS                   |
-| ascat_loci_rt         | ASCAT                                                                                                                                                                                                                                                                                                                                                                                                                                                | https://www.dropbox.com/s/xlp99uneqh6nh6p/RT_G1000_hg38.zip                                                                                                                             | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS                   |
-| bwa                   | bwa-mem                                                                                                                                                                                                                                                                                                                                                                                                                                              | bwa index -p bwa/${fasta.baseName} $fasta                                                                                                                                               |                                                                                      |
-| bwamem2               | bwa-mem2                                                                                                                                                                                                                                                                                                                                                                                                                                             | bwa-mem2 index -p bwamem2/${fasta} $fasta                                                                                                                                               |                                                                                      |
-| dragmap               | DragMap                                                                                                                                                                                                                                                                                                                                                                                                                                              | dragen-os --build-hash-table true --ht-reference $fasta --output-directory dragmap                                                                                                      |                                                                                      |
-| dbsnp                 | Baserecalibrator, ControlFREEC, GenotypeGVCF, HaplotypeCaller                                                                                                                                                                                                                                                                                                                                                                                        | possibly from an old ftp server dbsnp_146.hg38.vcf.gz                                                                                                                                   | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle       |
-| dbsnp_tbi             | Baserecalibrator, ControlFREEC, GenotypeGVCF, HaplotypeCaller                                                                                                                                                                                                                                                                                                                                                                                        |                                                                                                                                                                                         |                                                                                      |
-| dict                  | Baserecalibrator(Spark), CNNScoreVariant, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, MarkDulpicates(Spark), MergeVCFs, Mutect2, Variantrecalibrator                                                                                                                                                                                               | https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.dict                                                     | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle       |
-| fasta                 | ApplyBQSR(Spark), ApplyVQSR, ASCAT, Baserecalibrator(Spark), BWA, BWAMem2, CNNScoreVariant, CNVKit, ControlFREEC, DragMap, DEEPVariant, EnsemblVEP, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, FreeBayes, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, interval building, Manta, MarkDuplicates(Spark),MergeVCFs,MSISensorPro, Mutect2, Samtools, snpEff, Strelka, Tiddit, Variantrecalibrator | https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta                                                    | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle       |
-| fasta_fai             | ApplyBQSR(Spark), ApplyVQSR, ASCAT, Baserecalibrator(Spark), BWA, BWAMem2, CNNScoreVariant, CNVKit, ControlFREEC, DragMap, DEEPVariant, EnsemblVEP, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, FreeBayes, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, interval building, Manta, MarkDuplicates(Spark),MergeVCFs,MSISensorPro, Mutect2, Samtools, snpEff, Strelka, Tiddit, Variantrecalibrator | https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.fai                                                | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle       |
-| germline_resource     | GetPileupsummaries,Mutect2                                                                                                                                                                                                                                                                                                                                                                                                                           | ? gnomAD.r2.1.1.GRCh38.PASS.AC.AF.only.vcf.gz"                                                                                                                                          |                                                                                      |
-| germline_resource_tbi | GetPileupsummaries,Mutect2                                                                                                                                                                                                                                                                                                                                                                                                                           | ? gnomAD.r2.1.1.GRCh38.PASS.AC.AF.only.vcf.gz.tbi"                                                                                                                                      |                                                                                      |
-| intervals             | ApplyBQSR(Spark), ASCAT, Baserecalibraotr(Spark), BCFTools, CNNScoreVariants, ControlFREEC, Deepvariant, FilterVariantTranches, FreeBayes, GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, Strelka, mpileup, MSISensorPro, Mutect2, VCFTools                                                                                                                                                                                                      | https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/wgs_calling_regions.hg38.interval_list                                           |                                                                                      |
-| known_indels          | BaseRecalibrator(Spark), FilterVariantTranches                                                                                                                                                                                                                                                                                                                                                                                                       | https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz,beta/Homo_sapiens_assembly38.known_indels}.vcf.            |                                                                                      |
-| known_indels_tbi      | BaseRecalibrator(Spark), FilterVariantTranches                                                                                                                                                                                                                                                                                                                                                                                                       | https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" |                                                                                      |
-| mappability           | ControlFREEC                                                                                                                                                                                                                                                                                                                                                                                                                                         | http://xfer.curie.fr/get/vyIi4w8EONl/out100m2_hg38.zip                                                                                                                                  | http://boevalab.inf.ethz.ch/FREEC/tutorial.html                                      |
-| pon                   | Mutect2                                                                                                                                                                                                                                                                                                                                                                                                                                              | https://console.cloud.google.com/storage/browser/_details/gatk-best-practices/somatic-hg38/1000g_pon.hg38.vcf.gz                                                                        | https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON- |
-| pon_tbi               | Mutect2                                                                                                                                                                                                                                                                                                                                                                                                                                              | https://console.cloud.google.com/storage/browser/_details/gatk-best-practices/somatic-hg38/1000g_pon.hg38.vcf.gz.tbi                                                                    | https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON- |
-| snpeff_db             |                                                                                                                                                                                                                                                                                                                                                                                                                                                      | 'GRCh38.99'                                                                                                                                                                             |                                                                                      |
-| snpeff_genome         |                                                                                                                                                                                                                                                                                                                                                                                                                                                      | 'GRCh38'                                                                                                                                                                                |                                                                                      |
-| vep_cache_version     |                                                                                                                                                                                                                                                                                                                                                                                                                                                      | 105                                                                                                                                                                                     |                                                                                      |
-| vep_genome            |                                                                                                                                                                                                                                                                                                                                                                                                                                                      | 'GRCh38'                                                                                                                                                                                |                                                                                      |
-| chr_dir               |                                                                                                                                                                                                                                                                                                                                                                                                                                                      | "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Chromosomes"                                                                                                                 |                                                                                      |
+`-Y`: force soft-clipping rather than default hard-clipping of supplementary alignments
+
+In addition, currently the mismatch penalty for reads with tumor status in the sample sheet are mapped with a mismatch penalty of `-B 3`.
+
+## MultiQC related issues
+
+### Plots for SnpEff are missing
+
+When plots are missing, it is possible that the fasta and the custom SnpEff database are not matching https://pcingola.github.io/SnpEff/se_faq/#error_chromosome_not_found-details.
+The SnpEff completes without throwing an error causing nextflow to complete successfully. An indication for the error are these lines in the `.command` files:
+
+```
+ERRORS: Some errors were detected
+Error type      Number of errors
+ERROR_CHROMOSOME_NOT_FOUND      17522411
+```
+
+## How to create a panel-of-normals for Mutect2
+
+For a detailed tutorial on how to create a panel-of-normals, see [here](https://gatk.broadinstitute.org/hc/en-us/articles/360035531132).
+
+## Spark related issues
+
+If you have problems running processes that make use of Spark such as `MarkDuplicates`.
+You are probably experiencing issues with the limit of open files in your system.
+You can check your current limit by typing the following:
+
+```bash
+ulimit -n
+```
+
+The default limit size is usually 1024 which is quite low to run Spark jobs.
+In order to increase the size limit permanently you can:
+
+Edit the file `/etc/security/limits.conf` and add the lines:
+
+```bash
+*     soft   nofile  65535
+*     hard   nofile  65535
+```
+
+Edit the file `/etc/sysctl.conf` and add the line:
+
+```bash
+fs.file-max = 65535
+```
+
+Edit the file `/etc/sysconfig/docker` and add the new limits to OPTIONS like this:
+
+```bash
+OPTIONS=”—default-ulimit nofile=65535:65535"
+```
+
+Re-start your session.
+
+Note that the way to increase the open file limit in your system may be slightly different or require additional steps.
+
+### Cannot delete work folder when using docker + Spark
+
+Currently, when running spark-based tools in combination with docker, it is required to set `docker.userEmulation = false`. This can unfortunately causes permission issues when `work/` is being written with root permissions. In case this happens, you might need to configure docker to run without `userEmulation` (see [here](https://github.com/Midnighter/nf-core-adr/blob/main/docs/adr/0008-refrain-from-using-docker-useremulation-in-nextflow.md)).
+
+## How to handle UMIs
+
+Sarek can process UMI-reads, using [fgbio](http://fulcrumgenomics.github.io/fgbio/tools/latest/) tools.
+
+In order to use reads containing UMI tags as your initial input, you need to include `--umi_read_structure [structure]` in your parameters.
+
+This will enable pre-processing of the reads and UMI consensus reads calling, which will then be used to continue the workflow from the mapping steps. For post-UMI processing depending on the experimental setup, duplicate marking and base quality recalibration can be skipped with [`--skip_tools`].
+
+### UMI Read Structure
+
+This parameter is a string, which follows a [convention](https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures) to describe the structure of the umi.
+If your reads contain a UMI only on one end, the string should only represent one structure (i.e. "2M11S+T"); should your reads contain a UMI on both ends, the string will contain two structures separated by a blank space (i.e. "2M11S+T 2M11S+T").
+
+### Limitations and future updates
+
+Recent updates to Samtools have been introduced, which can speed-up performance of fgbio tools used in this workflow.
+The current workflow does not handle duplex UMIs (i.e. where opposite strands of a duplex molecule have been tagged with a different UMI), and best practices have been proposed to process this type of data.
+Both changes will be implemented in a future release.
 
 ## How to run sarek when no(t all) reference files are in igenomes
 
@@ -674,6 +722,40 @@ Example for not using known indels, but all other provided reference file:
 nextflow run nf-core/sarek --known_indels false --genome GRCh38.GATK
 ```
 
+### Where do the used reference genomes originate from
+
+_under construction - help needed_
+
+GATK.GRCh38:
+
+| File                  | Tools                                                                                                                                                                                                                                                                                                                                                                                                                                                | Origin                                                                                                                                                                                  | Docs                                                                                 |
+| :-------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------- |
+| ascat_alleles         | ASCAT                                                                                                                                                                                                                                                                                                                                                                                                                                                | https://www.dropbox.com/s/uouszfktzgoqfy7/G1000_alleles_hg38.zip                                                                                                                        | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS                   |
+| ascat_loci            | ASCAT                                                                                                                                                                                                                                                                                                                                                                                                                                                | https://www.dropbox.com/s/80cq0qgao8l1inj/G1000_loci_hg38.zip                                                                                                                           | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS                   |
+| ascat_loci_gc         | ASCAT                                                                                                                                                                                                                                                                                                                                                                                                                                                | https://www.dropbox.com/s/80cq0qgao8l1inj/G1000_loci_hg38.zip                                                                                                                           | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS                   |
+| ascat_loci_rt         | ASCAT                                                                                                                                                                                                                                                                                                                                                                                                                                                | https://www.dropbox.com/s/xlp99uneqh6nh6p/RT_G1000_hg38.zip                                                                                                                             | https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS                   |
+| bwa                   | bwa-mem                                                                                                                                                                                                                                                                                                                                                                                                                                              | bwa index -p bwa/${fasta.baseName} $fasta                                                                                                                                               |                                                                                      |
+| bwamem2               | bwa-mem2                                                                                                                                                                                                                                                                                                                                                                                                                                             | bwa-mem2 index -p bwamem2/${fasta} $fasta                                                                                                                                               |                                                                                      |
+| dragmap               | DragMap                                                                                                                                                                                                                                                                                                                                                                                                                                              | dragen-os --build-hash-table true --ht-reference $fasta --output-directory dragmap                                                                                                      |                                                                                      |
+| dbsnp                 | Baserecalibrator, ControlFREEC, GenotypeGVCF, HaplotypeCaller                                                                                                                                                                                                                                                                                                                                                                                        | possibly from an old ftp server dbsnp_146.hg38.vcf.gz                                                                                                                                   | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle       |
+| dbsnp_tbi             | Baserecalibrator, ControlFREEC, GenotypeGVCF, HaplotypeCaller                                                                                                                                                                                                                                                                                                                                                                                        |                                                                                                                                                                                         |                                                                                      |
+| dict                  | Baserecalibrator(Spark), CNNScoreVariant, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, MarkDulpicates(Spark), MergeVCFs, Mutect2, Variantrecalibrator                                                                                                                                                                                               | https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.dict                                                     | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle       |
+| fasta                 | ApplyBQSR(Spark), ApplyVQSR, ASCAT, Baserecalibrator(Spark), BWA, BWAMem2, CNNScoreVariant, CNVKit, ControlFREEC, DragMap, DEEPVariant, EnsemblVEP, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, FreeBayes, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, interval building, Manta, MarkDuplicates(Spark),MergeVCFs,MSISensorPro, Mutect2, Samtools, snpEff, Strelka, Tiddit, Variantrecalibrator | https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta                                                    | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle       |
+| fasta_fai             | ApplyBQSR(Spark), ApplyVQSR, ASCAT, Baserecalibrator(Spark), BWA, BWAMem2, CNNScoreVariant, CNVKit, ControlFREEC, DragMap, DEEPVariant, EnsemblVEP, EstimateLibraryComplexity, FilterMutectCalls, FilterVariantTranches, FreeBayes, GatherPileupSummaries,GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, interval building, Manta, MarkDuplicates(Spark),MergeVCFs,MSISensorPro, Mutect2, Samtools, snpEff, Strelka, Tiddit, Variantrecalibrator | https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.fasta.fai                                                | https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle       |
+| germline_resource     | GetPileupsummaries,Mutect2                                                                                                                                                                                                                                                                                                                                                                                                                           | ? gnomAD.r2.1.1.GRCh38.PASS.AC.AF.only.vcf.gz"                                                                                                                                          |                                                                                      |
+| germline_resource_tbi | GetPileupsummaries,Mutect2                                                                                                                                                                                                                                                                                                                                                                                                                           | ? gnomAD.r2.1.1.GRCh38.PASS.AC.AF.only.vcf.gz.tbi"                                                                                                                                      |                                                                                      |
+| intervals             | ApplyBQSR(Spark), ASCAT, Baserecalibraotr(Spark), BCFTools, CNNScoreVariants, ControlFREEC, Deepvariant, FilterVariantTranches, FreeBayes, GenotypeGVCF, GetPileupSummaries, HaplotypeCaller, Strelka, mpileup, MSISensorPro, Mutect2, VCFTools                                                                                                                                                                                                      | https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/wgs_calling_regions.hg38.interval_list                                           |                                                                                      |
+| known_indels          | BaseRecalibrator(Spark), FilterVariantTranches                                                                                                                                                                                                                                                                                                                                                                                                       | https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz,beta/Homo_sapiens_assembly38.known_indels}.vcf.            |                                                                                      |
+| known_indels_tbi      | BaseRecalibrator(Spark), FilterVariantTranches                                                                                                                                                                                                                                                                                                                                                                                                       | https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" |                                                                                      |
+| mappability           | ControlFREEC                                                                                                                                                                                                                                                                                                                                                                                                                                         | http://xfer.curie.fr/get/vyIi4w8EONl/out100m2_hg38.zip                                                                                                                                  | http://boevalab.inf.ethz.ch/FREEC/tutorial.html                                      |
+| pon                   | Mutect2                                                                                                                                                                                                                                                                                                                                                                                                                                              | https://console.cloud.google.com/storage/browser/_details/gatk-best-practices/somatic-hg38/1000g_pon.hg38.vcf.gz                                                                        | https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON- |
+| pon_tbi               | Mutect2                                                                                                                                                                                                                                                                                                                                                                                                                                              | https://console.cloud.google.com/storage/browser/_details/gatk-best-practices/somatic-hg38/1000g_pon.hg38.vcf.gz.tbi                                                                    | https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON- |
+| snpeff_db             |                                                                                                                                                                                                                                                                                                                                                                                                                                                      | 'GRCh38.99'                                                                                                                                                                             |                                                                                      |
+| snpeff_genome         |                                                                                                                                                                                                                                                                                                                                                                                                                                                      | 'GRCh38'                                                                                                                                                                                |                                                                                      |
+| vep_cache_version     |                                                                                                                                                                                                                                                                                                                                                                                                                                                      | 105                                                                                                                                                                                     |                                                                                      |
+| vep_genome            |                                                                                                                                                                                                                                                                                                                                                                                                                                                      | 'GRCh38'                                                                                                                                                                                |                                                                                      |
+| chr_dir               |                                                                                                                                                                                                                                                                                                                                                                                                                                                      | "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Chromosomes"                                                                                                                 |                                                                                      |
+
 ## How to customise SnpEff and VEP annotation
 
 _under construction help needed_
@@ -769,87 +851,10 @@ nextflow run download_cache.nf --cadd_cache </path/to/CADD/cache> --cadd_version
 
 #### SpliceRegions
 
-## What are the bwa/bwa-mem2 parameters?
-
-For mapping, sarek follows the parameter suggestions provided in this [paper](https://www.nature.com/articles/s41467-018-06159-4):
-
-`-K 100000000` : for deterministic pipeline results, for more info see [here](https://github.com/CCDG/Pipeline-Standardization/issues/2)
-
-`-Y`: force soft-clipping rather than default hard-clipping of supplementary alignments
-
-In addition, currently the mismatch penalty for reads with tumor status in the sample sheet are mapped with a mismatch penalty of `-B 3`.
-
-## Spark related issues
-
-If you have problems running processes that make use of Spark such as `MarkDuplicates`.
-You are probably experiencing issues with the limit of open files in your system.
-You can check your current limit by typing the following:
-
-```bash
-ulimit -n
-```
-
-The default limit size is usually 1024 which is quite low to run Spark jobs.
-In order to increase the size limit permanently you can:
-
-Edit the file `/etc/security/limits.conf` and add the lines:
-
-```bash
-*     soft   nofile  65535
-*     hard   nofile  65535
-```
-
-Edit the file `/etc/sysctl.conf` and add the line:
+## Requested resources for the tools
 
-```bash
-fs.file-max = 65535
-```
-
-Edit the file `/etc/sysconfig/docker` and add the new limits to OPTIONS like this:
-
-```bash
-OPTIONS=”—default-ulimit nofile=65535:65535"
-```
-
-Re-start your session.
-
-Note that the way to increase the open file limit in your system may be slightly different or require additional steps.
-
-### Cannot delete work folder when using docker + Spark
-
-Currently, when running spark-based tools in combination with docker, it is required to set `docker.userEmulation = false`. This can unfortunately causes permission issues when `work/` is being written with root permissions. In case this happens, you might need to configure docker to run without `userEmulation` (see [here](https://github.com/Midnighter/nf-core-adr/blob/main/docs/adr/0008-refrain-from-using-docker-useremulation-in-nextflow.md)).
-
-## How to handle UMIs
-
-Sarek can process UMI-reads, using [fgbio](http://fulcrumgenomics.github.io/fgbio/tools/latest/) tools.
-
-In order to use reads containing UMI tags as your initial input, you need to include `--umi_read_structure [structure]` in your parameters.
-
-This will enable pre-processing of the reads and UMI consensus reads calling, which will then be used to continue the workflow from the mapping steps. For post-UMI processing depending on the experimental setup, duplicate marking and base quality recalibration can be skipped with [`--skip_tools`].
-
-### UMI Read Structure
-
-This parameter is a string, which follows a [convention](https://github.com/fulcrumgenomics/fgbio/wiki/Read-Structures) to describe the structure of the umi.
-If your reads contain a UMI only on one end, the string should only represent one structure (i.e. "2M11S+T"); should your reads contain a UMI on both ends, the string will contain two structures separated by a blank space (i.e. "2M11S+T 2M11S+T").
-
-### Limitations and future updates
-
-Recent updates to Samtools have been introduced, which can speed-up performance of fgbio tools used in this workflow.
-The current workflow does not handle duplex UMIs (i.e. where opposite strands of a duplex molecule have been tagged with a different UMI), and best practices have been proposed to process this type of data.
-Both changes will be implemented in a future release.
-
-## MultiQC related issues
-
-### Plots for SnpEff are missing
-
-When plots are missing, it is possible that the fasta and the custom SnpEff database are not matching https://pcingola.github.io/SnpEff/se_faq/#error_chromosome_not_found-details.
-The SnpEff completes without throwing an error causing nextflow to complete successfully. An indication for the error are these lines in the `.command` files:
-
-```
-ERRORS: Some errors were detected
-Error type      Number of errors
-ERROR_CHROMOSOME_NOT_FOUND      17522411
-```
+Resource requests are difficult to generalize and are often dependent on input data size. Currently, the number of cpus and memory requested by default were adapted from tests on 5 ICGC paired whole-genome sequencing samples with approximately 40X and 80X depth.
+For targeted data analysis, this is overshooting by a lot. In this case resources for each process can be limited by either setting `--max_memory` and `-max_cpus` or tailoring the request by process name as described [here](#resource-requests). If you are using sarek for a certain data type regulary, and would like to make these requests available to others on your system, an institution-specific, pipeline-specific config file can be added [here](https://github.com/nf-core/configs/tree/master/conf/pipeline/sarek).
 
 ## How to set sarek up to use sentieon