diff --git a/CHANGELOG.md b/CHANGELOG.md
index 01f82529df..d4556d4ce0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,16 +11,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#1130](https://github.com/nf-core/sarek/pull/1130) - Back to dev
- [#1013](https://github.com/nf-core/sarek/pull/1013) - Mutect2 multi sample mode with `--joint_mutect2`
+- [#1153](https://github.com/nf-core/sarek/pull/1153) - Add input validation for Sentieon & FGBio UMI incompatibility
+- [#1158](https://github.com/nf-core/sarek/pull/1158) - Add preprint
+- [#1159](https://github.com/nf-core/sarek/pull/1159) - ISMB Poster
### Changed
+- [#1151](https://github.com/nf-core/sarek/pull/1151) - Refactor codebase
+- [#1157](https://github.com/nf-core/sarek/pull/1157) - Move all vep args from `ext.args` to `params.vep_custom_args` to allow easier modifications
- [#1059](https://github.com/nf-core/sarek/pull/1059) - Add `nf-validation` for samplesheet validation
+- [#1160](https://github.com/nf-core/sarek/pull/1160) - Updating tiddit to v3.6.1
### Fixed
- [#1143](https://github.com/nf-core/sarek/pull/1143) - `snpeff_db` is now a string
- [#1145](https://github.com/nf-core/sarek/pull/1145) - Fixed Zenodo links in `README.md` and in `WorkflowMain.groovy`
- [#1149](https://github.com/nf-core/sarek/pull/1149) - Update `Manta` modules and fix usage of `--exome` flag
+- [#1155](https://github.com/nf-core/sarek/pull/1155) - Restore proper rendering in `usage.md`
+- [#1163](https://github.com/nf-core/sarek/pull/1163) - Correcting location of output folder for joint variant calling with GATK's haplotypecaller
## [3.2.3](https://github.com/nf-core/sarek/releases/tag/3.2.3) - Gällivare
diff --git a/README.md b/README.md
index c3c664f8b1..d8a3182841 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ It's listed on [Elixir - Tools and Data Services Registry](https://bio.tools/nf-
Depending on the options and samples provided, the pipeline can currently perform the following:
- Form consensus reads from UMI sequences (`fgbio`)
-- Sequencing quality control and trimming (`FastQC`, `fastp`)
+- Sequencing quality control and trimming (enabled by `--trim_fastq`) (`FastQC`, `fastp`)
- Map Reads to Reference (`BWA-mem`, `BWA-mem2`, `dragmap` or `Sentieon BWA-mem`)
- Process BAM file (`GATK MarkDuplicates`, `GATK BaseRecalibrator` and `GATK ApplyBQSR` or `Sentieon LocusCollector` and `Sentieon Dedup`)
- Summarise alignment statistics (`samtools stats`, `mosdepth`)
@@ -180,6 +180,8 @@ For further information or help, don't hesitate to get in touch on the [Slack `#
If you use `nf-core/sarek` for your analysis, please cite the `Sarek` article as follows:
+> Friederike Hanssen, Maxime U Garcia, Lasse Folkersen, Anders Sune Pedersen, Francesco Lescai, Susanne Jodoin, Edmund Miller, Oskar Wacker, Nicholas Smith, nf-core community, Gisela Gabernet, Sven Nahnsen **Scalable and efficient DNA sequencing analysis on different compute infrastructures aiding variant discovery** _bioRxiv_ [doi: 10.1101/2023.07.19.549462](https://doi.org/10.1101/2023.07.19.549462).
+
> Garcia M, Juhos S, Larsson M et al. **Sarek: A portable workflow for whole-genome sequencing analysis of germline and somatic variants [version 2; peer review: 2 approved]** _F1000Research_ 2020, 9:63 [doi: 10.12688/f1000research.16665.2](http://dx.doi.org/10.12688/f1000research.16665.2).
You can cite the sarek zenodo record for a specific version using the following [doi: 10.5281/zenodo.3476425](https://doi.org/10.5281/zenodo.3476425)
diff --git a/conf/modules/aligner.config b/conf/modules/aligner.config
index d5a13f7c25..931f18d6af 100644
--- a/conf/modules/aligner.config
+++ b/conf/modules/aligner.config
@@ -33,13 +33,7 @@ process {
ext.when = { params.aligner == "sentieon-bwamem" }
}
-
-
- withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" {
- // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof
- // However if it's skipped, reads need to be coordinate-sorted
- // Only name sort if Spark for Markduplicates + duplicate marking is not skipped
- ext.args2 = { params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates') && (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' }
+ withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN|SENTIEON_BWAMEM)" {
ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "${meta.id}.sorted" }
publishDir = [
mode: params.publish_dir_mode,
@@ -61,29 +55,12 @@ process {
]
}
-
- withName: "SENTIEON_BWAMEM" {
- // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof.
- // However, currently SENTIEON_BWAMEM only supports coordinate sorting the reads.
- ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "${meta.id}.sorted" }
- publishDir = [
- mode: params.publish_dir_mode,
- path: { "${params.outdir}/preprocessing/" },
- pattern: "*bam",
- // Only save if save_output_as_bam AND
- // (save_mapped OR no_markduplicates OR sentieon_dedup) AND
- // only a single BAM file per sample
- saveAs: {
- if (params.save_output_as_bam &&
- (
- params.save_mapped ||
- (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) &&
- !(params.tools && params.tools.split(',').contains('sentieon_dedup'))
- ) && (meta.size * meta.num_lanes == 1)
- ) { "mapped/${meta.id}/${it}" }
- else { null }
- }
- ]
+ withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" {
+ // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof
+ // However if it's skipped, reads need to be coordinate-sorted
+ // Only name sort if Spark for Markduplicates + duplicate marking is not skipped
+ // Currently SENTIEON_BWAMEM only supports coordinate sorting the reads.
+ ext.args2 = { params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates') && (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' }
}
withName: "BWAMEM.*_MEM|SENTIEON_BWAMEM" {
diff --git a/conf/modules/annotate.config b/conf/modules/annotate.config
index 8ee07c22eb..fe6c693b02 100644
--- a/conf/modules/annotate.config
+++ b/conf/modules/annotate.config
@@ -36,7 +36,6 @@ process {
if (params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) {
withName: 'ENSEMBLVEP_VEP' {
ext.args = { [
- '--everything --filter_common --per_gene --total_length --offline --format vcf',
(params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '',
(params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'consequence=${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '',
(params.vep_loftee) ? "--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-${params.vep_version}/share/ensembl-vep-${params.vep_version}-0" : '',
diff --git a/conf/modules/markduplicates.config b/conf/modules/markduplicates.config
index fe256eb223..702b9b721b 100644
--- a/conf/modules/markduplicates.config
+++ b/conf/modules/markduplicates.config
@@ -33,6 +33,16 @@ process {
]
}
+ withName: 'NFCORE_SAREK:SAREK:(BAM_MARKDUPLICATES|BAM_MARKDUPLICATES_SPARK):CRAM_QC_MOSDEPTH_SAMTOOLS:SAMTOOLS_STATS' {
+ ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) }
+ ext.prefix = { "${meta.id}.md.cram" }
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/reports/samtools/${meta.id}" },
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
withName: 'BAM_TO_CRAM_MAPPING' {
// Run only when mapping should be saved as CRAM or when no MD is done
ext.when = (params.save_mapped && !params.save_output_as_bam) ||
diff --git a/conf/modules/modules.config b/conf/modules/modules.config
index d82be13f97..356d0768ac 100644
--- a/conf/modules/modules.config
+++ b/conf/modules/modules.config
@@ -40,26 +40,6 @@ process {
]
}
- withName: 'NFCORE_SAREK:SAREK:(BAM_MARKDUPLICATES|BAM_MARKDUPLICATES_SPARK):CRAM_QC_MOSDEPTH_SAMTOOLS:SAMTOOLS_STATS' {
- ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) }
- ext.prefix = { "${meta.id}.md.cram" }
- publishDir = [
- mode: params.publish_dir_mode,
- path: { "${params.outdir}/reports/samtools/${meta.id}" },
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
-
- withName: 'NFCORE_SAREK:SAREK:BAM_SENTIEON_DEDUP:CRAM_QC_MOSDEPTH_SAMTOOLS:SAMTOOLS_STATS' {
- ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) }
- ext.prefix = { "${meta.id}.dedup.cram" }
- publishDir = [
- mode: params.publish_dir_mode,
- path: { "${params.outdir}/reports/samtools/${meta.id}" },
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
-
withName: 'NFCORE_SAREK:SAREK:CRAM_QC_NO_MD:SAMTOOLS_STATS' {
ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) }
ext.prefix = { "${meta.id}.sorted.cram" }
diff --git a/conf/modules/mutect2.config b/conf/modules/mutect2.config
index dad80037a8..210edde641 100644
--- a/conf/modules/mutect2.config
+++ b/conf/modules/mutect2.config
@@ -15,6 +15,7 @@
process {
if (params.tools && params.tools.split(',').contains('mutect2')) {
+
withName: 'GATK4_MUTECT2' {
ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" }
ext.when = { params.tools && params.tools.split(',').contains('mutect2') }
diff --git a/conf/modules/post_variant_calling.config b/conf/modules/post_variant_calling.config
index 31058ae5e0..9bc621a35d 100644
--- a/conf/modules/post_variant_calling.config
+++ b/conf/modules/post_variant_calling.config
@@ -15,7 +15,9 @@
// Like, for instance, concatenating the unannotated, germline vcf-files
process {
+
withName: 'GERMLINE_VCFS_CONCAT'{
+ ext.when = params.concatenate_vcfs
publishDir = [
//specify to avoid publishing, overwritten otherwise
enabled: false
@@ -24,6 +26,7 @@ process {
withName: 'GERMLINE_VCFS_CONCAT_SORT'{
ext.prefix = { "${meta.id}.germline" }
+ ext.when = params.concatenate_vcfs
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/concat/${meta.id}/" }
@@ -32,10 +35,12 @@ process {
withName: 'TABIX_EXT_VCF' {
ext.prefix = { "${input.baseName}" }
+ ext.when = params.concatenate_vcfs
}
withName: 'TABIX_GERMLINE_VCFS_CONCAT_SORT'{
ext.prefix = { "${meta.id}.germline" }
+ ext.when = params.concatenate_vcfs
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/concat/${meta.id}/" }
diff --git a/conf/modules/prepare_cache.config b/conf/modules/prepare_cache.config
index fc5486fc2a..fd13b10cfd 100644
--- a/conf/modules/prepare_cache.config
+++ b/conf/modules/prepare_cache.config
@@ -14,6 +14,7 @@
// PREPARE_CACHE
process {
+
// SNPEFF
withName: 'SNPEFF_DOWNLOAD' {
ext.when = { params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge')) }
diff --git a/conf/modules/sentieon_dedup.config b/conf/modules/sentieon_dedup.config
index 77e0500bb0..1bbf81b25a 100644
--- a/conf/modules/sentieon_dedup.config
+++ b/conf/modules/sentieon_dedup.config
@@ -34,4 +34,16 @@ process {
]
}
+ if (params.tools && params.tools.contains('sentieon_dedup')) {
+ withName: 'NFCORE_SAREK:SAREK:BAM_SENTIEON_DEDUP:CRAM_QC_MOSDEPTH_SAMTOOLS:SAMTOOLS_STATS' {
+ ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) }
+ ext.prefix = { "${meta.id}.dedup.cram" }
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/reports/samtools/${meta.id}" },
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+ }
+
}
diff --git a/conf/modules/sentieon_haplotyper.config b/conf/modules/sentieon_haplotyper.config
index 2265697cbc..26f1300498 100644
--- a/conf/modules/sentieon_haplotyper.config
+++ b/conf/modules/sentieon_haplotyper.config
@@ -45,14 +45,16 @@ process {
]
}
- withName: '.*BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER:VCF_VARIANT_FILTERING_GATK:FILTERVARIANTTRANCHES' {
- ext.prefix = {"${meta.id}.haplotyper"}
- ext.args = { "--info-key CNN_1D" }
- publishDir = [
- mode: params.publish_dir_mode,
- path: { "${params.outdir}/variant_calling/sentieon_haplotyper/${meta.id}/"},
- pattern: "*{vcf.gz,vcf.gz.tbi}"
- ]
+ if (params.tools && params.tools.contains('sentieon_haplotyper')) {
+ withName: '.*BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER:VCF_VARIANT_FILTERING_GATK:FILTERVARIANTTRANCHES' {
+ ext.prefix = {"${meta.id}.haplotyper"}
+ ext.args = { "--info-key CNN_1D" }
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/sentieon_haplotyper/${meta.id}/"},
+ pattern: "*{vcf.gz,vcf.gz.tbi}"
+ ]
+ }
}
}
diff --git a/conf/modules/sentieon_joint_germline.config b/conf/modules/sentieon_joint_germline.config
index 503e920a46..c956ccba83 100644
--- a/conf/modules/sentieon_joint_germline.config
+++ b/conf/modules/sentieon_joint_germline.config
@@ -15,7 +15,7 @@
process {
- withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:SENTIEON_GVCFTYPER' {
+ withName: 'SENTIEON_GVCFTYPER' {
ext.args = { "--allow-old-rms-mapping-quality-annotation-data" }
ext.prefix = { meta.intervals_name }
publishDir = [
@@ -24,32 +24,32 @@ process {
}
if (params.tools && params.tools.contains('sentieon_haplotyper') && params.joint_germline) {
- withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON::BCFTOOLS_SORT' {
+ withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:BCFTOOLS_SORT' {
ext.prefix = { vcf.baseName - ".vcf" + ".sort" }
publishDir = [
enabled: false
]
}
- }
- withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:MERGE_GENOTYPEGVCFS' {
- ext.prefix = "joint_germline"
- publishDir = [
- mode: params.publish_dir_mode,
- path: { "${params.outdir}/variant_calling/sentieon_haplotyper/joint_variant_calling/" },
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
- pattern: "*{vcf.gz,vcf.gz.tbi}"
- ]
- }
+ withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:MERGE_GENOTYPEGVCFS' {
+ ext.prefix = "joint_germline"
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/sentieon_haplotyper/joint_variant_calling/" },
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+ pattern: "*{vcf.gz,vcf.gz.tbi}"
+ ]
+ }
- withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:MERGE_VQSR' {
- ext.prefix = "joint_germline_recalibrated"
- publishDir = [
- mode: params.publish_dir_mode,
- path: { "${params.outdir}/variant_calling/sentieon_haplotyper/joint_variant_calling/"},
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
- pattern: "*{vcf.gz,vcf.gz.tbi}"
- ]
+ withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:MERGE_VQSR' {
+ ext.prefix = "joint_germline_recalibrated"
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/sentieon_haplotyper/joint_variant_calling/"},
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+ pattern: "*{vcf.gz,vcf.gz.tbi}"
+ ]
+ }
}
withName: 'SENTIEON_VARCAL_INDEL' {
@@ -78,5 +78,4 @@ process {
ext.args = '--sensitivity 99.9 --var_type SNP'
}
-
}
diff --git a/conf/modules/umi.config b/conf/modules/umi.config
index ed4f29570b..98040ce359 100644
--- a/conf/modules/umi.config
+++ b/conf/modules/umi.config
@@ -62,6 +62,7 @@ process {
enabled: false
]
}
+
withName: 'GROUPREADSBYUMI' {
publishDir = [
[ path: { "${params.outdir}/reports/umi/" },
diff --git a/conf/test.config b/conf/test.config
index 55822c97ca..81653565c7 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -46,6 +46,7 @@ params {
}
process {
+
withName:'.*:FREEC_SOMATIC'{
ext.args = {
[
@@ -69,27 +70,26 @@ process {
}
}
- if (params.tools && params.tools.split(',').contains('mutect2')) {
- if (params.joint_mutect2) {
- withName: 'MUTECT2_PAIRED' {
- ext.args = { params.ignore_soft_clipped_bases ?
- "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" :
- "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" }
- }
+ if (params.joint_mutect2) {
+ withName: 'MUTECT2_PAIRED' {
+ ext.args = { params.ignore_soft_clipped_bases ?
+ "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" :
+ "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" }
}
- else {
- withName: '.*MUTECT2_PAIRED'{
- //sample name from when the test data was generated
- ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " }
- }
+ }
+ else {
+ withName: 'MUTECT2_PAIRED'{
+ //sample name from when the test data was generated
+ ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " }
}
}
- withName: '.*:FILTERVARIANTTRANCHES'{
+ withName: 'FILTERVARIANTTRANCHES'{
ext.args = { "--info-key CNN_1D --indel-tranche 0" }
}
}
+
// Enable container engines/virtualisation envs for CI testing
// only works when specified with the profile ENV
// otherwise tests can be done with the regular provided profiles
diff --git a/conf/test/cache.config b/conf/test/cache.config
index 481322dfc6..4e4119d3a7 100644
--- a/conf/test/cache.config
+++ b/conf/test/cache.config
@@ -63,6 +63,7 @@ process {
ext.sentieon_auth_data_base64 = secrets.SENTIEON_AUTH_DATA_BASE64
}
+ // This must contain .* in order to properly overwrite the standard config in test cases
withName:'.*:FREEC_SOMATIC'{
ext.args = {
[
@@ -86,14 +87,12 @@ process {
}
}
- if (params.tools && params.tools.split(',').contains('mutect2')) {
- withName: '.*MUTECT2_PAIRED'{
- //sample name from when the test data was generated
- ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " }
- }
+ withName: 'MUTECT2_PAIRED'{
+ //sample name from when the test data was generated
+ ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " }
}
- withName: '.*:FILTERVARIANTTRANCHES'{
+ withName: 'FILTERVARIANTTRANCHES'{
ext.args = { "--info-key CNN_1D --indel-tranche 0" }
}
}
diff --git a/docs/images/sarek_subway.png b/docs/images/sarek_subway.png
index 0989560c48..e2a689b1ca 100644
Binary files a/docs/images/sarek_subway.png and b/docs/images/sarek_subway.png differ
diff --git a/docs/images/sarek_subway.svg b/docs/images/sarek_subway.svg
index 29b8183b5c..ad8352b1c7 100644
--- a/docs/images/sarek_subway.svg
+++ b/docs/images/sarek_subway.svg
@@ -4,14 +4,14 @@
+ id="tspan2660">Optionally Sentieon acceleratedSNPs & IndelsSV & CNVMSI
diff --git a/docs/output.md b/docs/output.md
index 76a6b9c6e9..c4d8cfd04d 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -394,7 +394,7 @@ If the haplotype-called VCF files are not filtered, then Sarek should be run wit
- `.haplotypecaller.g.vcf.gz` and `.haplotypecaller.g.vcf.gz.tbi`
- VCF with tabix index
-**Output directory: `{outdir}/variantcalling/sentieon_haplotyper/joint_variant_calling/`**
+**Output directory: `{outdir}/variantcalling/haplotypecaller/joint_variant_calling/`**
- `joint_germline.vcf.gz` and `joint_germline.vcf.gz.tbi`
- VCF with tabix index
diff --git a/docs/posters/ISMB_ECCB_2023_FHanssen.pdf b/docs/posters/ISMB_ECCB_2023_FHanssen.pdf
new file mode 100644
index 0000000000..e99c617057
Binary files /dev/null and b/docs/posters/ISMB_ECCB_2023_FHanssen.pdf differ
diff --git a/docs/usage.md b/docs/usage.md
index 2ed80ca550..b807523ba1 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -2,6 +2,8 @@
## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/sarek/usage](https://nf-co.re/sarek/usage)
+> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._
+
# Introduction
Sarek is a workflow designed to detect germline and somatic variants on whole genome, whole exome, or targeted sequencing data.
@@ -1023,11 +1025,6 @@ Enable with `--vep_spliceregion`.
For more details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#spliceregion) and [here](https://www.ensembl.info/2018/10/26/cool-stuff-the-vep-can-do-splice-site-variant-annotation/)."
-## Requested resources for the tools
-
-Resource requests are difficult to generalize and are often dependent on input data size. Currently, the number of cpus and memory requested by default were adapted from tests on 5 ICGC paired whole-genome sequencing samples with approximately 40X and 80X depth.
-For targeted data analysis, this is overshooting by a lot. In this case resources for each process can be limited by either setting `--max_memory` and `-max_cpus` or tailoring the request by process name as described [here](#resource-requests). If you are using sarek for a certain data type regulary, and would like to make these requests available to others on your system, an institution-specific, pipeline-specific config file can be added [here](https://github.com/nf-core/configs/tree/master/conf/pipeline/sarek).
-
## MultiQC related issues
### Plots for SnpEff are missing
@@ -1050,3 +1047,8 @@ Sentieon's [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcft
### QualCal (BQSR)
Currently, Sentieon's version of BQSR, QualCal, is not available in Sarek. Recent Illumina sequencers tend to provide well-calibrated BQs, so BQSR may not provide much benefit. By default Sarek runs GATK's BQSR; that can be skipped by adding the option `--skip_tools baserecalibrator`.
+
+## Requested resources for the tools
+
+Resource requests are difficult to generalize and are often dependent on input data size. Currently, the number of cpus and memory requested by default were adapted from tests on 5 ICGC paired whole-genome sequencing samples with approximately 40X and 80X depth.
+For targeted data analysis, this is overshooting by a lot. In this case resources for each process can be limited by either setting `--max_memory` and `-max_cpus` or tailoring the request by process name as described [here](#resource-requests). If you are using sarek for a certain data type regulary, and would like to make these requests available to others on your system, an institution-specific, pipeline-specific config file can be added [here](https://github.com/nf-core/configs/tree/master/conf/pipeline/sarek).
diff --git a/modules.json b/modules.json
index 53db751786..a90b2fea51 100644
--- a/modules.json
+++ b/modules.json
@@ -438,7 +438,7 @@
},
"tiddit/sv": {
"branch": "master",
- "git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
+ "git_sha": "1c90a501d102b800c27697f5ef39a6e217ab1915",
"installed_by": ["modules"]
},
"untar": {
diff --git a/modules/nf-core/tiddit/sv/main.nf b/modules/nf-core/tiddit/sv/main.nf
index 1ebc8565f4..67a0670dbc 100644
--- a/modules/nf-core/tiddit/sv/main.nf
+++ b/modules/nf-core/tiddit/sv/main.nf
@@ -2,10 +2,10 @@ process TIDDIT_SV {
tag "$meta.id"
label 'process_medium'
- conda "bioconda::tiddit=3.3.2"
+ conda "bioconda::tiddit=3.6.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/tiddit:3.3.2--py310hc2b7f4b_0' :
- 'biocontainers/tiddit:3.3.2--py310hc2b7f4b_0' }"
+ 'https://depot.galaxyproject.org/singularity/tiddit:3.6.1--py38h24c8ff8_0' :
+ 'biocontainers/tiddit:3.6.1--py38h24c8ff8_0' }"
input:
tuple val(meta), path(input), path(input_index)
diff --git a/nextflow.config b/nextflow.config
index 367f7cbf37..b5e8345c1b 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -73,8 +73,8 @@ params {
sentieon_haplotyper_emit_mode = "variant" // default value for Sentieon haplotyper
// Annotation
- vep_out_format = 'vcf'
- vep_custom_args = null // No custom arguments for VEP
+ vep_out_format = "vcf"
+ vep_custom_args = "--everything --filter_common --per_gene --total_length --offline --format vcf" // Default arguments for VEP
vep_dbnsfp = null // dbnsfp plugin disabled within VEP
dbnsfp = null // No dbnsfp processed file
dbnsfp_tbi = null // No dbnsfp processed file index
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 63956f9873..8d96f33d61 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -105,7 +105,7 @@
"tools": {
"type": "string",
"fa_icon": "fas fa-toolbox",
- "description": "Tools to use for variant calling and/or for annotation.",
+ "description": "Tools to use for duplicate marking, variant calling and/or for annotation.",
"help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.",
"pattern": "^((ascat|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? [ meta + [ num_intervals:num_intervals ], cram, intervals ] }
+ // Run, if --tools mpileup
keep_bcftools_mpileup = false
BCFTOOLS_MPILEUP(cram_intervals, fasta, keep_bcftools_mpileup)
+ //Only run, if --tools ControlFreec
SAMTOOLS_MPILEUP(cram_intervals, fasta)
// Figuring out if there is one or more vcf(s) from the same sample
diff --git a/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf b/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf
index 8ec2811be6..ca4e1688fa 100644
--- a/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf
+++ b/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf
@@ -1,7 +1,13 @@
-include { VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main'
-include { SENTIEON_HAPLOTYPER } from '../../../modules/nf-core/sentieon/haplotyper/main'
-include { GATK4_MERGEVCFS as MERGE_SENTIEON_HAPLOTYPER_VCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main'
-include { GATK4_MERGEVCFS as MERGE_SENTIEON_HAPLOTYPER_GVCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main'
+//
+// SENTIEON HAPLOTYPER germline variant calling
+//
+// For all modules here:
+// A when clause condition is defined in the conf/modules.config to determine if the module should be run
+
+include { GATK4_MERGEVCFS as MERGE_SENTIEON_HAPLOTYPER_GVCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main'
+include { GATK4_MERGEVCFS as MERGE_SENTIEON_HAPLOTYPER_VCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main'
+include { SENTIEON_HAPLOTYPER } from '../../../modules/nf-core/sentieon/haplotyper/main'
+include { VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main'
workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER {
take:
@@ -59,8 +65,6 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER {
emit_vcf,
emit_mode_items.contains('gvcf'))
- versions = versions.mix(SENTIEON_HAPLOTYPER.out.versions)
-
if (joint_germline) {
genotype_intervals = SENTIEON_HAPLOTYPER.out.gvcf
.join(SENTIEON_HAPLOTYPER.out.gvcf_tbi, failOnMismatch: true)
@@ -101,8 +105,6 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER {
// Only when using intervals
MERGE_SENTIEON_HAPLOTYPER_VCFS(vcfs_for_merging, dict)
- versions = versions.mix(MERGE_SENTIEON_HAPLOTYPER_VCFS.out.versions)
-
haplotyper_vcf = Channel.empty().mix(
MERGE_SENTIEON_HAPLOTYPER_VCFS.out.vcf,
haplotyper_vcf_branch.no_intervals)
@@ -142,12 +144,14 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER {
MERGE_SENTIEON_HAPLOTYPER_GVCFS(gvcfs_for_merging, dict)
- versions = versions.mix(MERGE_SENTIEON_HAPLOTYPER_GVCFS.out.versions)
-
gvcf = Channel.empty().mix(
MERGE_SENTIEON_HAPLOTYPER_GVCFS.out.vcf,
haplotyper_gvcf_branch.no_intervals)
+ versions = versions.mix(SENTIEON_HAPLOTYPER.out.versions)
+ versions = versions.mix(MERGE_SENTIEON_HAPLOTYPER_VCFS.out.versions)
+ versions = versions.mix(MERGE_SENTIEON_HAPLOTYPER_GVCFS.out.versions)
+
emit:
versions
vcf
diff --git a/subworkflows/local/bam_variant_calling_single_strelka/main.nf b/subworkflows/local/bam_variant_calling_single_strelka/main.nf
index 1d3e34d81b..ab6b3373c3 100644
--- a/subworkflows/local/bam_variant_calling_single_strelka/main.nf
+++ b/subworkflows/local/bam_variant_calling_single_strelka/main.nf
@@ -1,6 +1,12 @@
-include { GATK4_MERGEVCFS as MERGE_STRELKA } from '../../../modules/nf-core/gatk4/mergevcfs/main'
-include { GATK4_MERGEVCFS as MERGE_STRELKA_GENOME } from '../../../modules/nf-core/gatk4/mergevcfs/main'
-include { STRELKA_GERMLINE as STRELKA_SINGLE } from '../../../modules/nf-core/strelka/germline/main'
+//
+// STRELKA2 single sample variant calling
+//
+// For all modules here:
+// A when clause condition is defined in the conf/modules.config to determine if the module should be run
+
+include { GATK4_MERGEVCFS as MERGE_STRELKA } from '../../../modules/nf-core/gatk4/mergevcfs/main'
+include { GATK4_MERGEVCFS as MERGE_STRELKA_GENOME } from '../../../modules/nf-core/gatk4/mergevcfs/main'
+include { STRELKA_GERMLINE as STRELKA_SINGLE } from '../../../modules/nf-core/strelka/germline/main'
workflow BAM_VARIANT_CALLING_SINGLE_STRELKA {
take:
@@ -36,7 +42,7 @@ workflow BAM_VARIANT_CALLING_SINGLE_STRELKA {
// Only when using intervals
genome_vcf_to_merge = genome_vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple()
- vcf_to_merge = vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple()
+ vcf_to_merge = vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple()
MERGE_STRELKA(vcf_to_merge, dict)
MERGE_STRELKA_GENOME(genome_vcf_to_merge, dict)
diff --git a/subworkflows/local/bam_variant_calling_single_tiddit/main.nf b/subworkflows/local/bam_variant_calling_single_tiddit/main.nf
index 32697dde94..356ce7c2fa 100644
--- a/subworkflows/local/bam_variant_calling_single_tiddit/main.nf
+++ b/subworkflows/local/bam_variant_calling_single_tiddit/main.nf
@@ -1,3 +1,9 @@
+//
+// TIDDIT single sample variant calling
+//
+// For all modules here:
+// A when clause condition is defined in the conf/modules.config to determine if the module should be run
+
include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../../modules/nf-core/tabix/bgziptabix/main'
include { TIDDIT_SV } from '../../../modules/nf-core/tiddit/sv/main'
@@ -15,7 +21,7 @@ workflow BAM_VARIANT_CALLING_SINGLE_TIDDIT {
TABIX_BGZIP_TIDDIT_SV(TIDDIT_SV.out.vcf)
ploidy = TIDDIT_SV.out.ploidy
- vcf = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi.map{ meta, gz, tbi -> [ meta + [ variantcaller: 'tiddit'], gz ] }
+ vcf = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi.map{ meta, gz, tbi -> [ meta + [ variantcaller: 'tiddit'], gz ] }
versions = versions.mix(TABIX_BGZIP_TIDDIT_SV.out.versions)
versions = versions.mix(TIDDIT_SV.out.versions)
diff --git a/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf b/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf
index 3f98f54807..64f45508ab 100644
--- a/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf
+++ b/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf
@@ -1,3 +1,9 @@
+//
+// ASCAT variant calling
+//
+// For all modules here:
+// A when clause condition is defined in the conf/modules.config to determine if the module should be run
+
include { ASCAT } from '../../../modules/nf-core/ascat/main'
workflow BAM_VARIANT_CALLING_SOMATIC_ASCAT {
diff --git a/subworkflows/local/bam_variant_calling_somatic_controlfreec/main.nf b/subworkflows/local/bam_variant_calling_somatic_controlfreec/main.nf
index 93b91e605a..a2e7e17cff 100644
--- a/subworkflows/local/bam_variant_calling_somatic_controlfreec/main.nf
+++ b/subworkflows/local/bam_variant_calling_somatic_controlfreec/main.nf
@@ -1,8 +1,14 @@
-include { CONTROLFREEC_FREEC as FREEC_SOMATIC } from '../../../modules/nf-core/controlfreec/freec/main'
+//
+// CONTROLFREEC somatc variant calling
+//
+// For all modules here:
+// A when clause condition is defined in the conf/modules.config to determine if the module should be run
+
+include { CONTROLFREEC_FREEC as FREEC_SOMATIC } from '../../../modules/nf-core/controlfreec/freec/main'
include { CONTROLFREEC_ASSESSSIGNIFICANCE as ASSESS_SIGNIFICANCE } from '../../../modules/nf-core/controlfreec/assesssignificance/main'
-include { CONTROLFREEC_FREEC2BED as FREEC2BED } from '../../../modules/nf-core/controlfreec/freec2bed/main'
-include { CONTROLFREEC_FREEC2CIRCOS as FREEC2CIRCOS } from '../../../modules/nf-core/controlfreec/freec2circos/main'
-include { CONTROLFREEC_MAKEGRAPH as MAKEGRAPH } from '../../../modules/nf-core/controlfreec/makegraph/main'
+include { CONTROLFREEC_FREEC2BED as FREEC2BED } from '../../../modules/nf-core/controlfreec/freec2bed/main'
+include { CONTROLFREEC_FREEC2CIRCOS as FREEC2CIRCOS } from '../../../modules/nf-core/controlfreec/freec2circos/main'
+include { CONTROLFREEC_MAKEGRAPH as MAKEGRAPH } from '../../../modules/nf-core/controlfreec/makegraph/main'
workflow BAM_VARIANT_CALLING_SOMATIC_CONTROLFREEC {
take:
diff --git a/subworkflows/local/bam_variant_calling_somatic_manta/main.nf b/subworkflows/local/bam_variant_calling_somatic_manta/main.nf
index c0db37ed5c..7eb5e6687d 100644
--- a/subworkflows/local/bam_variant_calling_somatic_manta/main.nf
+++ b/subworkflows/local/bam_variant_calling_somatic_manta/main.nf
@@ -1,3 +1,9 @@
+//
+// MANTA somatic variant calling
+//
+// For all modules here:
+// A when clause condition is defined in the conf/modules.config to determine if the module should be run
+
include { MANTA_SOMATIC } from '../../../modules/nf-core/manta/somatic/main'
workflow BAM_VARIANT_CALLING_SOMATIC_MANTA {
diff --git a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf
index 153a4fdcc0..991f484d5c 100644
--- a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf
+++ b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf
@@ -1,17 +1,18 @@
//
-// Run GATK mutect2 in tumor normal mode, getepileupsummaries, calculatecontamination, learnreadorientationmodel and filtermutectcalls
+//
+// MUTECT2: tumor-normal mode variantcalling: getpileupsummaries, calculatecontamination, learnreadorientationmodel and filtermutectcalls
//
-include { GATK4_MERGEVCFS as MERGE_MUTECT2 } from '../../../modules/nf-core/gatk4/mergevcfs/main'
-include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/nf-core/gatk4/calculatecontamination/main'
-include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../modules/nf-core/gatk4/filtermutectcalls/main'
-include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_NORMAL} from '../../../modules/nf-core/gatk4/gatherpileupsummaries/main'
-include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/gatk4/gatherpileupsummaries/main'
-include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_NORMAL } from '../../../modules/nf-core/gatk4/getpileupsummaries/main'
-include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/gatk4/getpileupsummaries/main'
-include { GATK4_LEARNREADORIENTATIONMODEL as LEARNREADORIENTATIONMODEL } from '../../../modules/nf-core/gatk4/learnreadorientationmodel/main'
-include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../modules/nf-core/gatk4/mergemutectstats/main'
-include { GATK4_MUTECT2 as MUTECT2_PAIRED } from '../../../modules/nf-core/gatk4/mutect2/main'
+include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/nf-core/gatk4/calculatecontamination/main'
+include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../modules/nf-core/gatk4/filtermutectcalls/main'
+include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_NORMAL } from '../../../modules/nf-core/gatk4/gatherpileupsummaries/main'
+include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/gatk4/gatherpileupsummaries/main'
+include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_NORMAL } from '../../../modules/nf-core/gatk4/getpileupsummaries/main'
+include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/gatk4/getpileupsummaries/main'
+include { GATK4_LEARNREADORIENTATIONMODEL as LEARNREADORIENTATIONMODEL } from '../../../modules/nf-core/gatk4/learnreadorientationmodel/main'
+include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../modules/nf-core/gatk4/mergemutectstats/main'
+include { GATK4_MERGEVCFS as MERGE_MUTECT2 } from '../../../modules/nf-core/gatk4/mergevcfs/main'
+include { GATK4_MUTECT2 as MUTECT2_PAIRED } from '../../../modules/nf-core/gatk4/mutect2/main'
workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {
take:
@@ -105,7 +106,6 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {
normal: [ meta, input_list[0], input_index_list[0], intervals ]
}
-
// Prepare input channel for normal pileup summaries.
// Remember, the input channel contains tumor-normal pairs, so there will be multiple copies of the normal sample for each tumor for a given patient.
// Therefore, we use unique function to generate normal pileup summaries once for each patient for better efficiency.
diff --git a/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf b/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf
index 7cc2fc4bbb..02c729f93e 100644
--- a/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf
+++ b/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf
@@ -1,3 +1,9 @@
+//
+// STRELKA2 tumor-normal variant calling
+//
+// For all modules here:
+// A when clause condition is defined in the conf/modules.config to determine if the module should be run
+
include { GATK4_MERGEVCFS as MERGE_STRELKA_INDELS } from '../../../modules/nf-core/gatk4/mergevcfs/main'
include { GATK4_MERGEVCFS as MERGE_STRELKA_SNVS } from '../../../modules/nf-core/gatk4/mergevcfs/main'
include { STRELKA_SOMATIC } from '../../../modules/nf-core/strelka/somatic/main'
diff --git a/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf b/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf
index 411c670ac1..259520fce1 100644
--- a/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf
+++ b/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf
@@ -1,3 +1,9 @@
+//
+// TIDDIT single sample variant calling
+//
+// For all modules here:
+// A when clause condition is defined in the conf/modules.config to determine if the module should be run
+
include { BAM_VARIANT_CALLING_SINGLE_TIDDIT as TIDDIT_NORMAL } from '../bam_variant_calling_single_tiddit/main.nf'
include { BAM_VARIANT_CALLING_SINGLE_TIDDIT as TIDDIT_TUMOR } from '../bam_variant_calling_single_tiddit/main.nf'
include { SVDB_MERGE } from '../../../modules/nf-core/svdb/merge/main.nf'
diff --git a/subworkflows/local/bam_variant_calling_tumor_only_controlfreec/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_controlfreec/main.nf
index 42e0b0567c..993faf127c 100644
--- a/subworkflows/local/bam_variant_calling_tumor_only_controlfreec/main.nf
+++ b/subworkflows/local/bam_variant_calling_tumor_only_controlfreec/main.nf
@@ -1,3 +1,9 @@
+//
+// CONTROLFREEC tumor-only variant calling
+//
+// For all modules here:
+// A when clause condition is defined in the conf/modules.config to determine if the module should be run
+
include { CONTROLFREEC_FREEC as FREEC_TUMORONLY } from '../../../modules/nf-core/controlfreec/freec/main'
include { CONTROLFREEC_ASSESSSIGNIFICANCE as ASSESS_SIGNIFICANCE } from '../../../modules/nf-core/controlfreec/assesssignificance/main'
include { CONTROLFREEC_FREEC2BED as FREEC2BED } from '../../../modules/nf-core/controlfreec/freec2bed/main'
diff --git a/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf
index 8bc8f8f28a..10045c7356 100644
--- a/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf
+++ b/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf
@@ -1,3 +1,9 @@
+//
+// MANTA single sample variant calling
+//
+// For all modules here:
+// A when clause condition is defined in the conf/modules.config to determine if the module should be run
+
include { MANTA_TUMORONLY } from '../../../modules/nf-core/manta/tumoronly/main'
// Seems to be the consensus on upstream modules implementation too
diff --git a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf
index 9120abd25c..d776d89878 100644
--- a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf
+++ b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf
@@ -1,6 +1,8 @@
//
-// Run GATK mutect2 in tumor only mode, getepileupsummaries, calculatecontamination and filtermutectcalls
+// GATK MUTECT2 in tumor only mode: getepileupsummaries, calculatecontamination and filtermutectcalls
//
+// For all modules here:
+// A when clause condition is defined in the conf/modules.config to determine if the module should be run
include { GATK4_MERGEVCFS as MERGE_MUTECT2 } from '../../../modules/nf-core/gatk4/mergevcfs/main'
include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/nf-core/gatk4/calculatecontamination/main'
@@ -87,10 +89,10 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {
MERGEMUTECTSTATS(stats_to_merge)
// Mix intervals and no_intervals channels together
- vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals)
- tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals)
+ vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals)
+ tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals)
stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals)
- f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals)
+ f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals)
// Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2
LEARNREADORIENTATIONMODEL(f1r2)
diff --git a/subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf b/subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf
deleted file mode 100644
index 1b7e757751..0000000000
--- a/subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf
+++ /dev/null
@@ -1,46 +0,0 @@
-//
-// MAPPING
-//
-// For all modules here:
-// A when clause condition is defined in the conf/modules.config to determine if the module should be run
-
-include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main'
-include { BWA_MEM as BWAMEM1_MEM } from '../../../modules/nf-core/bwa/mem/main'
-include { DRAGMAP_ALIGN } from '../../../modules/nf-core/dragmap/align/main'
-
-workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP {
- take:
- reads // channel: [mandatory] meta, reads
- index // channel: [mandatory] index
- sort // boolean: [mandatory] true -> sort, false -> don't sort
-
- main:
-
- versions = Channel.empty()
- reports = Channel.empty()
-
- // Only one of the following should be run
- BWAMEM1_MEM(reads, index.map{ it -> [ [ id:'index' ], it ] }, sort) // If aligner is bwa-mem
- BWAMEM2_MEM(reads, index.map{ it -> [ [ id:'index' ], it ] }, sort) // If aligner is bwa-mem2
- DRAGMAP_ALIGN(reads, index.map{ it -> [ [ id:'index' ], it ] }, sort) // If aligner is dragmap
-
- // Get the bam files from the aligner
- // Only one aligner is run
- bam = Channel.empty()
- bam = bam.mix(BWAMEM1_MEM.out.bam)
- bam = bam.mix(BWAMEM2_MEM.out.bam)
- bam = bam.mix(DRAGMAP_ALIGN.out.bam)
-
- // Gather reports of all tools used
- reports = reports.mix(DRAGMAP_ALIGN.out.log)
-
- // Gather versions of all tools used
- versions = versions.mix(BWAMEM1_MEM.out.versions)
- versions = versions.mix(BWAMEM2_MEM.out.versions)
- versions = versions.mix(DRAGMAP_ALIGN.out.versions)
-
- emit:
- bam // channel: [ [meta], bam ]
- reports
- versions // channel: [ versions.yml ]
-}
diff --git a/subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main.nf b/subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main.nf
index 0699eb5c19..914cf55ec2 100644
--- a/subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main.nf
+++ b/subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main.nf
@@ -36,8 +36,7 @@ workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON {
bam = bam.mix(DRAGMAP_ALIGN.out.bam)
bam = bam.mix(SENTIEON_BWAMEM.out.bam_and_bai.map{ meta, bam, bai -> [ meta, bam ] })
- bai = Channel.empty()
- bai = bai.mix(SENTIEON_BWAMEM.out.bam_and_bai.map{ meta, bam, bai -> [ meta, bai ] })
+ bai = SENTIEON_BWAMEM.out.bam_and_bai.map{ meta, bam, bai -> [ meta, bai ] }
// Gather reports of all tools used
reports = reports.mix(DRAGMAP_ALIGN.out.log)
diff --git a/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf b/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf
index 51377194ae..c237e64014 100644
--- a/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf
+++ b/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf
@@ -6,17 +6,18 @@
// For all modules here:
// A when clause condition is defined in the conf/modules.config to determine if the module should be run
-include { FGBIO_CALLMOLECULARCONSENSUSREADS as CALLUMICONSENSUS } from '../../../modules/nf-core/fgbio/callmolecularconsensusreads/main.nf'
-include { FGBIO_FASTQTOBAM as FASTQTOBAM } from '../../../modules/nf-core/fgbio/fastqtobam/main'
-include { FGBIO_GROUPREADSBYUMI as GROUPREADSBYUMI } from '../../../modules/nf-core/fgbio/groupreadsbyumi/main'
-include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP as ALIGN_UMI } from '../fastq_align_bwamem_mem2_dragmap/main'
-include { SAMBLASTER } from '../../../modules/nf-core/samblaster/main'
-include { SAMTOOLS_BAM2FQ as BAM2FASTQ } from '../../../modules/nf-core/samtools/bam2fq/main.nf'
+include { FGBIO_CALLMOLECULARCONSENSUSREADS as CALLUMICONSENSUS } from '../../../modules/nf-core/fgbio/callmolecularconsensusreads/main.nf'
+include { FGBIO_FASTQTOBAM as FASTQTOBAM } from '../../../modules/nf-core/fgbio/fastqtobam/main'
+include { FGBIO_GROUPREADSBYUMI as GROUPREADSBYUMI } from '../../../modules/nf-core/fgbio/groupreadsbyumi/main'
+include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON as ALIGN_UMI } from '../fastq_align_bwamem_mem2_dragmap_sentieon/main'
+include { SAMBLASTER } from '../../../modules/nf-core/samblaster/main'
+include { SAMTOOLS_BAM2FQ as BAM2FASTQ } from '../../../modules/nf-core/samtools/bam2fq/main.nf'
workflow FASTQ_CREATE_UMI_CONSENSUS_FGBIO {
take:
reads // channel: [mandatory] [ val(meta), [ reads ] ]
fasta // channel: [mandatory] /path/to/reference/fasta
+ fai // channel: [optional] /path/to/reference/fasta_fai, needed for Sentieon
map_index // channel: [mandatory] Pre-computed mapping index
groupreadsbyumi_strategy // string: [mandatory] grouping strategy - default: "Adjacency"
@@ -37,7 +38,7 @@ workflow FASTQ_CREATE_UMI_CONSENSUS_FGBIO {
// appropriately tagged interleaved FASTQ reads are mapped to the reference
// bams will not be sorted (hence, sort = false)
sort = false
- ALIGN_UMI(BAM2FASTQ.out.reads, map_index, sort)
+ ALIGN_UMI(BAM2FASTQ.out.reads, map_index, sort, fasta, fai)
// samblaster is used in order to tag mates information in the BAM file
// this is used in order to group reads by UMI
diff --git a/subworkflows/local/post_variantcalling/main.nf b/subworkflows/local/post_variantcalling/main.nf
new file mode 100644
index 0000000000..bf23ff13d4
--- /dev/null
+++ b/subworkflows/local/post_variantcalling/main.nf
@@ -0,0 +1,27 @@
+//
+// POST VARIANT CALLING: processes run on variantcalled but not annotated VCFs
+//
+
+include { CONCATENATE_GERMLINE_VCFS } from '../vcf_concatenate_germline/main'
+
+workflow POST_VARIANTCALLING {
+
+ take:
+ vcfs
+ concatenate_vcfs
+
+ main:
+ versions = Channel.empty()
+
+ if(concatenate_vcfs){
+ CONCATENATE_GERMLINE_VCFS(vcfs)
+
+ vcfs = vcfs.mix(CONCATENATE_GERMLINE_VCFS.out.vcfs)
+ versions = versions.mix(CONCATENATE_GERMLINE_VCFS.out.versions)
+ }
+
+ emit:
+ vcfs // post processed vcfs
+
+ versions // channel: [ versions.yml ]
+}
diff --git a/subworkflows/local/vcf_annotate_all/main.nf b/subworkflows/local/vcf_annotate_all/main.nf
index 31e73df4ca..5b759d8818 100644
--- a/subworkflows/local/vcf_annotate_all/main.nf
+++ b/subworkflows/local/vcf_annotate_all/main.nf
@@ -47,9 +47,9 @@ workflow VCF_ANNOTATE_ALL {
vcf_for_vep = vcf.map{ meta, vcf -> [ meta, vcf, [] ] }
VCF_ANNOTATE_ENSEMBLVEP(vcf_for_vep, fasta, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files)
- reports = reports.mix(VCF_ANNOTATE_ENSEMBLVEP.out.reports)
- vcf_ann = vcf_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.vcf_tbi)
- tab_ann = tab_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.tab)
+ reports = reports.mix(VCF_ANNOTATE_ENSEMBLVEP.out.reports)
+ vcf_ann = vcf_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.vcf_tbi)
+ tab_ann = tab_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.tab)
json_ann = json_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.json)
versions = versions.mix(VCF_ANNOTATE_ENSEMBLVEP.out.versions)
}
diff --git a/subworkflows/local/vcf_concatenate_germline/main.nf b/subworkflows/local/vcf_concatenate_germline/main.nf
new file mode 100644
index 0000000000..87f46b22e1
--- /dev/null
+++ b/subworkflows/local/vcf_concatenate_germline/main.nf
@@ -0,0 +1,42 @@
+//
+// CONCATENATE Germline VCFs
+//
+
+// Concatenation of germline vcf-files
+include { ADD_INFO_TO_VCF } from '../../../modules/local/add_info_to_vcf/main'
+include { TABIX_BGZIPTABIX as TABIX_EXT_VCF } from '../../../modules/nf-core/tabix/bgziptabix/main'
+include { BCFTOOLS_CONCAT as GERMLINE_VCFS_CONCAT } from '../../../modules/nf-core/bcftools/concat/main'
+include { BCFTOOLS_SORT as GERMLINE_VCFS_CONCAT_SORT } from '../../../modules/nf-core/bcftools/sort/main'
+include { TABIX_TABIX as TABIX_GERMLINE_VCFS_CONCAT_SORT } from '../../../modules/nf-core/tabix/tabix/main'
+
+workflow CONCATENATE_GERMLINE_VCFS {
+
+ take:
+ vcfs
+
+ main:
+ versions = Channel.empty()
+
+ // Concatenate vcf-files
+ ADD_INFO_TO_VCF(vcfs)
+ TABIX_EXT_VCF(ADD_INFO_TO_VCF.out.vcf)
+
+ // Gather vcfs and vcf-tbis for concatenating germline-vcfs
+ germline_vcfs_with_tbis = TABIX_EXT_VCF.out.gz_tbi.map{ meta, vcf, tbi -> [ meta.subMap('id'), vcf, tbi ] }.groupTuple()
+
+ GERMLINE_VCFS_CONCAT(germline_vcfs_with_tbis)
+ GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT.out.vcf)
+ TABIX_GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT_SORT.out.vcf)
+
+ // Gather versions of all tools used
+ versions = versions.mix(ADD_INFO_TO_VCF.out.versions)
+ versions = versions.mix(TABIX_EXT_VCF.out.versions)
+ versions = versions.mix(GERMLINE_VCFS_CONCAT.out.versions)
+ versions = versions.mix(GERMLINE_VCFS_CONCAT.out.versions)
+ versions = versions.mix(TABIX_GERMLINE_VCFS_CONCAT_SORT.out.versions)
+
+ emit:
+ vcfs = germline_vcfs_with_tbis // post processed vcfs
+
+ versions // channel: [ versions.yml ]
+}
diff --git a/tests/config/tags.yml b/tests/config/tags.yml
index 8116fb3c23..017246e117 100644
--- a/tests/config/tags.yml
+++ b/tests/config/tags.yml
@@ -124,7 +124,7 @@ sentieon/bwamem:
- tests/test_alignment_sentieon_bwamem.yml
## markduplicates
-markduplicates:
+gatk4/markduplicates:
- conf/modules/markduplicates.config
- modules/nf-core/gatk4/markduplicates/main.nf
- modules/nf-core/mosdepth/main.nf
@@ -454,4 +454,6 @@ concatenate_vcfs:
- subworkflows/local/bam_variant_calling_mpileup/main.nf
- subworkflows/local/bam_variant_calling_single_strelka/main.nf
- subworkflows/local/bam_variant_calling_single_tiddit/main.nf
+ - subworkflows/local/post_variantcalling/main.nf
+ - subworkflows/local/vcf_concatenate_germline/main.nf
- tests/test_concat_germline_vcfs.yml
diff --git a/tests/test_markduplicates_from_bam.yml b/tests/test_markduplicates_from_bam.yml
index dee006127f..d6191216bd 100644
--- a/tests/test_markduplicates_from_bam.yml
+++ b/tests/test_markduplicates_from_bam.yml
@@ -2,7 +2,7 @@
command: nextflow run main.nf -profile test_cache,markduplicates_bam --outdir results
tags:
- input_bam
- - markduplicates
+ - gatk4/markduplicates
- preprocessing
files:
- path: results/csv/markduplicates.csv
@@ -48,6 +48,7 @@
# conda changes md5sums for test
- path: results/reports/samtools/test/test.recal.cram.stats
# conda changes md5sums for test
+
- name: Run skip markduplicates bam from step markduplicates
command: nextflow run main.nf -profile test_cache,markduplicates_bam,skip_markduplicates --outdir results
tags:
diff --git a/tests/test_markduplicates_from_cram.yml b/tests/test_markduplicates_from_cram.yml
index ab2e020c85..f36619f719 100644
--- a/tests/test_markduplicates_from_cram.yml
+++ b/tests/test_markduplicates_from_cram.yml
@@ -2,7 +2,7 @@
command: nextflow run main.nf -profile test_cache,markduplicates_cram --outdir results
tags:
- input_cram
- - markduplicates
+ - gatk4/markduplicates
- preprocessing
files:
- path: results/csv/markduplicates.csv
diff --git a/tests/test_sentieon_dedup_from_bam.yml b/tests/test_sentieon_dedup_from_bam.yml
index 4784a45816..cba467d07f 100644
--- a/tests/test_sentieon_dedup_from_bam.yml
+++ b/tests/test_sentieon_dedup_from_bam.yml
@@ -2,7 +2,6 @@
command: nextflow run main.nf -profile test_cache,sentieon_dedup_bam --outdir results
tags:
- input_bam
- - markduplicates
- preprocessing
- sentieon/dedup
files:
diff --git a/tests/test_sentieon_dedup_from_cram.yml b/tests/test_sentieon_dedup_from_cram.yml
index ce96acfb5c..e92fff1c60 100644
--- a/tests/test_sentieon_dedup_from_cram.yml
+++ b/tests/test_sentieon_dedup_from_cram.yml
@@ -2,7 +2,6 @@
command: nextflow run main.nf -profile test_cache,sentieon_dedup_cram --outdir results
tags:
- input_cram
- - markduplicates
- preprocessing
- sentieon/dedup
files:
diff --git a/tests/test_sentieon_haplotyper.yml b/tests/test_sentieon_haplotyper.yml
index 80b58984d0..5e06ccf604 100644
--- a/tests/test_sentieon_haplotyper.yml
+++ b/tests/test_sentieon_haplotyper.yml
@@ -85,7 +85,7 @@
- variant_calling
files:
- path: results/csv/variantcalled.csv
- md5sum: b1d10b32d106b180a773782c7f3b127b
+ should_exist: false
- path: results/multiqc
- path: results/preprocessing/converted/test/test.converted.cram
# binary changes md5sums on reruns
@@ -123,7 +123,7 @@
- variant_calling
files:
- path: results/csv/variantcalled.csv
- md5sum: eacdbbd51f3381ca33c9d0a51283c2dc
+ md5sum: 4d3dd4f6dcb34a91a949641f2b1ac202
- path: results/multiqc
- path: results/preprocessing/converted/test/test.converted.cram
# binary changes md5sums on reruns
diff --git a/tests/test_sentieon_joint_germline.yml b/tests/test_sentieon_joint_germline.yml
index f21bcd0d2f..99b5d4e826 100644
--- a/tests/test_sentieon_joint_germline.yml
+++ b/tests/test_sentieon_joint_germline.yml
@@ -7,7 +7,7 @@
- sentieon/haplotyper
files:
- path: results/csv/variantcalled.csv
- md5sum: e7b30e6034ecb5928c96a4f96b9be4da
+ md5sum: 6ec10f6455c2b5290c7f6fc687c529ca
- path: results/multiqc
- path: results/preprocessing/recalibrated/test/test.recal.cram
should_exist: false
diff --git a/tests/test_umi.yml b/tests/test_umi.yml
index 19aeac27d3..52be524ecd 100644
--- a/tests/test_umi.yml
+++ b/tests/test_umi.yml
@@ -50,6 +50,16 @@
# text-based file changes md5sums on reruns
- path: results/reports/samtools/test/test.recal.cram.stats
# text-based file changes md5sums on reruns
+
+- name: Run Sentieon-FGBio UMI combination test
+ command: nextflow run main.nf -profile test_cache,umi --outdir results --aligner "sentieon-bwamem"
+ tags:
+ - preprocessing
+ - umi
+ exit_code: 1
+ stdout:
+ contains:
+ - "Sentieon BWA is currently not compatible with FGBio UMI handeling. Please choose a different aligner."
# - name: Run UMI TSO test
# command: nextflow run main.nf -profile test_cache,umi_tso --outdir results
# tags:
diff --git a/workflows/sarek.nf b/workflows/sarek.nf
index 26a1186237..669dac1bd5 100644
--- a/workflows/sarek.nf
+++ b/workflows/sarek.nf
@@ -205,6 +205,10 @@ if (params.step == 'mapping' && params.aligner.contains("dragmap") && !(params.s
log.warn("DragMap was specified as aligner. Base recalibration is not contained in --skip_tools. It is recommended to skip baserecalibration when using DragMap\nhttps://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode")
}
+if (params.step == 'mapping' && params.aligner.contains("sentieon-bwamem") && params.umi_read_structure) {
+ error("Sentieon BWA is currently not compatible with FGBio UMI handeling. Please choose a different aligner.")
+}
+
if (params.tools && params.tools.contains("sentieon_haplotyper") && params.joint_germline && (!params.sentieon_haplotyper_emit_mode || !(params.sentieon_haplotyper_emit_mode.contains('gvcf')))) {
error("When setting the option `--joint_germline` and including `sentieon_haplotyper` among the requested tools, please set `--sentieon_haplotyper_emit_mode` to include `gvcf`.")
}
@@ -332,95 +336,91 @@ if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && p
*/
// Create samplesheets to restart from different steps
-include { CHANNEL_ALIGN_CREATE_CSV } from '../subworkflows/local/channel_align_create_csv/main'
-include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../subworkflows/local/channel_markduplicates_create_csv/main'
-include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../subworkflows/local/channel_baserecalibrator_create_csv/main'
-include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../subworkflows/local/channel_applybqsr_create_csv/main'
-include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../subworkflows/local/channel_variant_calling_create_csv/main'
+include { CHANNEL_ALIGN_CREATE_CSV } from '../subworkflows/local/channel_align_create_csv/main'
+include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../subworkflows/local/channel_markduplicates_create_csv/main'
+include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../subworkflows/local/channel_baserecalibrator_create_csv/main'
+include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../subworkflows/local/channel_applybqsr_create_csv/main'
+include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../subworkflows/local/channel_variant_calling_create_csv/main'
// Download annotation cache if needed
-include { PREPARE_CACHE } from '../subworkflows/local/prepare_cache/main'
+include { PREPARE_CACHE } from '../subworkflows/local/prepare_cache/main'
// Build indices if needed
-include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome/main'
+include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome/main'
// Build intervals if needed
-include { PREPARE_INTERVALS } from '../subworkflows/local/prepare_intervals/main'
+include { PREPARE_INTERVALS } from '../subworkflows/local/prepare_intervals/main'
// Build CNVkit reference if needed
-include { PREPARE_REFERENCE_CNVKIT } from '../subworkflows/local/prepare_reference_cnvkit/main'
+include { PREPARE_REFERENCE_CNVKIT } from '../subworkflows/local/prepare_reference_cnvkit/main'
// Convert BAM files to FASTQ files
-include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../subworkflows/local/bam_convert_samtools/main'
-include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_UMI } from '../subworkflows/local/bam_convert_samtools/main'
+include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../subworkflows/local/bam_convert_samtools/main'
+include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_UMI } from '../subworkflows/local/bam_convert_samtools/main'
// Run FASTQC
-include { FASTQC } from '../modules/nf-core/fastqc/main'
+include { FASTQC } from '../modules/nf-core/fastqc/main'
// TRIM/SPLIT FASTQ Files
-include { FASTP } from '../modules/nf-core/fastp/main'
+include { FASTP } from '../modules/nf-core/fastp/main'
// Create umi consensus bams from fastq
-include { FASTQ_CREATE_UMI_CONSENSUS_FGBIO } from '../subworkflows/local/fastq_create_umi_consensus_fgbio/main'
+include { FASTQ_CREATE_UMI_CONSENSUS_FGBIO } from '../subworkflows/local/fastq_create_umi_consensus_fgbio/main'
// Map input reads to reference genome
-include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON } from '../subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main'
+include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON } from '../subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main'
// Merge and index BAM files (optional)
-include { BAM_MERGE_INDEX_SAMTOOLS } from '../subworkflows/local/bam_merge_index_samtools/main'
+include { BAM_MERGE_INDEX_SAMTOOLS } from '../subworkflows/local/bam_merge_index_samtools/main'
// Convert BAM files
-include { SAMTOOLS_CONVERT as BAM_TO_CRAM } from '../modules/nf-core/samtools/convert/main'
-include { SAMTOOLS_CONVERT as BAM_TO_CRAM_MAPPING } from '../modules/nf-core/samtools/convert/main'
+include { SAMTOOLS_CONVERT as BAM_TO_CRAM } from '../modules/nf-core/samtools/convert/main'
+include { SAMTOOLS_CONVERT as BAM_TO_CRAM_MAPPING } from '../modules/nf-core/samtools/convert/main'
// Convert CRAM files (optional)
-include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../modules/nf-core/samtools/convert/main'
-include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../modules/nf-core/samtools/convert/main'
+include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../modules/nf-core/samtools/convert/main'
+include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../modules/nf-core/samtools/convert/main'
// Mark Duplicates (+QC)
-include { BAM_MARKDUPLICATES } from '../subworkflows/local/bam_markduplicates/main'
-include { BAM_MARKDUPLICATES_SPARK } from '../subworkflows/local/bam_markduplicates_spark/main'
-include { BAM_SENTIEON_DEDUP } from '../subworkflows/local/bam_sentieon_dedup/main'
+include { BAM_MARKDUPLICATES } from '../subworkflows/local/bam_markduplicates/main'
+include { BAM_MARKDUPLICATES_SPARK } from '../subworkflows/local/bam_markduplicates_spark/main'
+include { BAM_SENTIEON_DEDUP } from '../subworkflows/local/bam_sentieon_dedup/main'
// QC on CRAM
-include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../subworkflows/local/cram_qc_mosdepth_samtools/main'
-include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../subworkflows/local/cram_qc_mosdepth_samtools/main'
+include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../subworkflows/local/cram_qc_mosdepth_samtools/main'
+include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../subworkflows/local/cram_qc_mosdepth_samtools/main'
// Create recalibration tables
-include { BAM_BASERECALIBRATOR } from '../subworkflows/local/bam_baserecalibrator/main'
-include { BAM_BASERECALIBRATOR_SPARK } from '../subworkflows/local/bam_baserecalibrator_spark/main'
+include { BAM_BASERECALIBRATOR } from '../subworkflows/local/bam_baserecalibrator/main'
+include { BAM_BASERECALIBRATOR_SPARK } from '../subworkflows/local/bam_baserecalibrator_spark/main'
// Create recalibrated cram files to use for variant calling (+QC)
-include { BAM_APPLYBQSR } from '../subworkflows/local/bam_applybqsr/main'
-include { BAM_APPLYBQSR_SPARK } from '../subworkflows/local/bam_applybqsr_spark/main'
+include { BAM_APPLYBQSR } from '../subworkflows/local/bam_applybqsr/main'
+include { BAM_APPLYBQSR_SPARK } from '../subworkflows/local/bam_applybqsr_spark/main'
// Variant calling on a single normal sample
-include { BAM_VARIANT_CALLING_GERMLINE_ALL } from '../subworkflows/local/bam_variant_calling_germline_all/main'
+include { BAM_VARIANT_CALLING_GERMLINE_ALL } from '../subworkflows/local/bam_variant_calling_germline_all/main'
// Variant calling on a single tumor sample
-include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../subworkflows/local/bam_variant_calling_tumor_only_all/main'
+include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../subworkflows/local/bam_variant_calling_tumor_only_all/main'
// Variant calling on tumor/normal pair
-include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../subworkflows/local/bam_variant_calling_somatic_all/main'
+include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../subworkflows/local/bam_variant_calling_somatic_all/main'
-// Concatenation of germline vcf-files
-include { ADD_INFO_TO_VCF } from '../modules/local/add_info_to_vcf/main'
-include { TABIX_BGZIPTABIX as TABIX_EXT_VCF } from '../modules/nf-core/tabix/bgziptabix/main'
-include { BCFTOOLS_CONCAT as GERMLINE_VCFS_CONCAT } from '../modules/nf-core/bcftools/concat/main'
-include { BCFTOOLS_SORT as GERMLINE_VCFS_CONCAT_SORT } from '../modules/nf-core/bcftools/sort/main'
-include { TABIX_TABIX as TABIX_GERMLINE_VCFS_CONCAT_SORT } from '../modules/nf-core/tabix/tabix/main'
+// POST VARIANTCALLING: e.g. merging
+include { POST_VARIANTCALLING } from '../subworkflows/local/post_variantcalling/main'
// QC on VCF files
-include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../subworkflows/local/vcf_qc_bcftools_vcftools/main'
+include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../subworkflows/local/vcf_qc_bcftools_vcftools/main'
// Annotation
-include { VCF_ANNOTATE_ALL } from '../subworkflows/local/vcf_annotate_all/main'
+include { VCF_ANNOTATE_ALL } from '../subworkflows/local/vcf_annotate_all/main'
// REPORTING VERSIONS OF SOFTWARE USED
-include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
+include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
// MULTIQC
-include { MULTIQC } from '../modules/nf-core/multiqc/main'
+include { MULTIQC } from '../modules/nf-core/multiqc/main'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -431,20 +431,20 @@ include { MULTIQC } from '../modules/nf-c
workflow SAREK {
// MULTIQC
- ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
- ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty()
- ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty()
+ ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
+ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty()
+ ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty()
ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
// To gather all QC reports for MultiQC
- reports = Channel.empty()
+ reports = Channel.empty()
// To gather used softwares versions for MultiQC
versions = Channel.empty()
// Download cache if needed
// Assuming that if the cache is provided, the user has already downloaded it
- ensemblvep_info = params.vep_cache ? [] : Channel.of([ [ id:"${params.vep_genome}.${params.vep_cache_version}" ], params.vep_genome, params.vep_species, params.vep_cache_version ])
- snpeff_info = params.snpeff_cache ? [] : Channel.of([ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], params.snpeff_genome, params.snpeff_db ])
+ ensemblvep_info = params.vep_cache ? [] : Channel.of([ [ id:"${params.vep_genome}.${params.vep_cache_version}" ], params.vep_genome, params.vep_species, params.vep_cache_version ])
+ snpeff_info = params.snpeff_cache ? [] : Channel.of([ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], params.snpeff_genome, params.snpeff_db ])
if (params.download_cache) {
PREPARE_CACHE(ensemblvep_info, snpeff_info)
@@ -509,16 +509,16 @@ workflow SAREK {
known_sites_indels = dbsnp.concat(known_indels).collect()
known_sites_indels_tbi = dbsnp_tbi.concat(known_indels_tbi).collect()
- known_sites_snps = dbsnp.concat(known_snps).collect()
- known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect()
+ known_sites_snps = dbsnp.concat(known_snps).collect()
+ known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect()
// Build intervals if needed
PREPARE_INTERVALS(fasta_fai, params.intervals, params.no_intervals)
// Intervals for speed up preprocessing/variant calling by spread/gather
// [interval.bed] all intervals in one file
- intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined
- intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined
+ intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined
+ intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined
// For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS)
intervals_for_preprocessing = params.wes ?
@@ -599,6 +599,7 @@ workflow SAREK {
FASTQ_CREATE_UMI_CONSENSUS_FGBIO(
input_fastq,
fasta,
+ fasta_fai,
index_alignement,
params.group_by_umi_strategy)
@@ -1167,18 +1168,9 @@ workflow SAREK {
params.joint_mutect2
)
- if (params.concatenate_vcfs) {
- // Concatenate vcf-files
- ADD_INFO_TO_VCF(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all)
- TABIX_EXT_VCF(ADD_INFO_TO_VCF.out.vcf)
-
- // Gather vcfs and vcf-tbis for concatenating germline-vcfs
- germline_vcfs_with_tbis = TABIX_EXT_VCF.out.gz_tbi.map{ meta, vcf, tbi -> [ meta.subMap('id'), vcf, tbi ] }.groupTuple()
-
- GERMLINE_VCFS_CONCAT(germline_vcfs_with_tbis)
- GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT.out.vcf)
- TABIX_GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT_SORT.out.vcf)
- }
+ // POST VARIANTCALLING
+ POST_VARIANTCALLING(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all,
+ params.concatenate_vcfs)
// Gather vcf files for annotation and QC
vcf_to_annotate = Channel.empty()
@@ -1193,22 +1185,22 @@ workflow SAREK {
vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.vcf_all)
vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all)
- // Gather used softwares versions
- versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.versions)
- versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.versions)
- versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.versions)
-
// QC
VCF_QC_BCFTOOLS_VCFTOOLS(vcf_to_annotate, intervals_bed_combined)
- versions = versions.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.versions)
reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.bcftools_stats.collect{ meta, stats -> stats })
reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_counts.collect{ meta, counts -> counts })
reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_qual.collect{ meta, qual -> qual })
reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_filter_summary.collect{ meta, summary -> summary })
- vcf_to_csv = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.gvcf_sentieon_haplotyper)
- CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_csv)
+ CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_annotate)
+
+ // Gather used variant calling softwares versions
+ versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.versions)
+ versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.versions)
+ versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.versions)
+ versions = versions.mix(POST_VARIANTCALLING.out.versions)
+ versions = versions.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.versions)
// ANNOTATE
if (params.step == 'annotate') vcf_to_annotate = input_sample