diff --git a/CHANGELOG.md b/CHANGELOG.md index 01f82529df..d4556d4ce0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,16 +11,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#1130](https://github.com/nf-core/sarek/pull/1130) - Back to dev - [#1013](https://github.com/nf-core/sarek/pull/1013) - Mutect2 multi sample mode with `--joint_mutect2` +- [#1153](https://github.com/nf-core/sarek/pull/1153) - Add input validation for Sentieon & FGBio UMI incompatibility +- [#1158](https://github.com/nf-core/sarek/pull/1158) - Add preprint +- [#1159](https://github.com/nf-core/sarek/pull/1159) - ISMB Poster ### Changed +- [#1151](https://github.com/nf-core/sarek/pull/1151) - Refactor codebase +- [#1157](https://github.com/nf-core/sarek/pull/1157) - Move all vep args from `ext.args` to `params.vep_custom_args` to allow easier modifications - [#1059](https://github.com/nf-core/sarek/pull/1059) - Add `nf-validation` for samplesheet validation +- [#1160](https://github.com/nf-core/sarek/pull/1160) - Updating tiddit to v3.6.1 ### Fixed - [#1143](https://github.com/nf-core/sarek/pull/1143) - `snpeff_db` is now a string - [#1145](https://github.com/nf-core/sarek/pull/1145) - Fixed Zenodo links in `README.md` and in `WorkflowMain.groovy` - [#1149](https://github.com/nf-core/sarek/pull/1149) - Update `Manta` modules and fix usage of `--exome` flag +- [#1155](https://github.com/nf-core/sarek/pull/1155) - Restore proper rendering in `usage.md` +- [#1163](https://github.com/nf-core/sarek/pull/1163) - Correcting location of output folder for joint variant calling with GATK's haplotypecaller ## [3.2.3](https://github.com/nf-core/sarek/releases/tag/3.2.3) - Gällivare diff --git a/README.md b/README.md index c3c664f8b1..d8a3182841 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ It's listed on [Elixir - Tools and Data Services Registry](https://bio.tools/nf- Depending on the options and samples provided, the pipeline can currently perform the following: - Form consensus reads from UMI sequences (`fgbio`) -- Sequencing quality control and trimming (`FastQC`, `fastp`) +- Sequencing quality control and trimming (enabled by `--trim_fastq`) (`FastQC`, `fastp`) - Map Reads to Reference (`BWA-mem`, `BWA-mem2`, `dragmap` or `Sentieon BWA-mem`) - Process BAM file (`GATK MarkDuplicates`, `GATK BaseRecalibrator` and `GATK ApplyBQSR` or `Sentieon LocusCollector` and `Sentieon Dedup`) - Summarise alignment statistics (`samtools stats`, `mosdepth`) @@ -180,6 +180,8 @@ For further information or help, don't hesitate to get in touch on the [Slack `# If you use `nf-core/sarek` for your analysis, please cite the `Sarek` article as follows: +> Friederike Hanssen, Maxime U Garcia, Lasse Folkersen, Anders Sune Pedersen, Francesco Lescai, Susanne Jodoin, Edmund Miller, Oskar Wacker, Nicholas Smith, nf-core community, Gisela Gabernet, Sven Nahnsen **Scalable and efficient DNA sequencing analysis on different compute infrastructures aiding variant discovery** _bioRxiv_ [doi: 10.1101/2023.07.19.549462](https://doi.org/10.1101/2023.07.19.549462). + > Garcia M, Juhos S, Larsson M et al. **Sarek: A portable workflow for whole-genome sequencing analysis of germline and somatic variants [version 2; peer review: 2 approved]** _F1000Research_ 2020, 9:63 [doi: 10.12688/f1000research.16665.2](http://dx.doi.org/10.12688/f1000research.16665.2). You can cite the sarek zenodo record for a specific version using the following [doi: 10.5281/zenodo.3476425](https://doi.org/10.5281/zenodo.3476425) diff --git a/conf/modules/aligner.config b/conf/modules/aligner.config index d5a13f7c25..931f18d6af 100644 --- a/conf/modules/aligner.config +++ b/conf/modules/aligner.config @@ -33,13 +33,7 @@ process { ext.when = { params.aligner == "sentieon-bwamem" } } - - - withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" { - // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof - // However if it's skipped, reads need to be coordinate-sorted - // Only name sort if Spark for Markduplicates + duplicate marking is not skipped - ext.args2 = { params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates') && (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' } + withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN|SENTIEON_BWAMEM)" { ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "${meta.id}.sorted" } publishDir = [ mode: params.publish_dir_mode, @@ -61,29 +55,12 @@ process { ] } - - withName: "SENTIEON_BWAMEM" { - // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof. - // However, currently SENTIEON_BWAMEM only supports coordinate sorting the reads. - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "${meta.id}.sorted" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/" }, - pattern: "*bam", - // Only save if save_output_as_bam AND - // (save_mapped OR no_markduplicates OR sentieon_dedup) AND - // only a single BAM file per sample - saveAs: { - if (params.save_output_as_bam && - ( - params.save_mapped || - (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) && - !(params.tools && params.tools.split(',').contains('sentieon_dedup')) - ) && (meta.size * meta.num_lanes == 1) - ) { "mapped/${meta.id}/${it}" } - else { null } - } - ] + withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" { + // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof + // However if it's skipped, reads need to be coordinate-sorted + // Only name sort if Spark for Markduplicates + duplicate marking is not skipped + // Currently SENTIEON_BWAMEM only supports coordinate sorting the reads. + ext.args2 = { params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates') && (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' } } withName: "BWAMEM.*_MEM|SENTIEON_BWAMEM" { diff --git a/conf/modules/annotate.config b/conf/modules/annotate.config index 8ee07c22eb..fe6c693b02 100644 --- a/conf/modules/annotate.config +++ b/conf/modules/annotate.config @@ -36,7 +36,6 @@ process { if (params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) { withName: 'ENSEMBLVEP_VEP' { ext.args = { [ - '--everything --filter_common --per_gene --total_length --offline --format vcf', (params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', (params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'consequence=${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', (params.vep_loftee) ? "--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-${params.vep_version}/share/ensembl-vep-${params.vep_version}-0" : '', diff --git a/conf/modules/markduplicates.config b/conf/modules/markduplicates.config index fe256eb223..702b9b721b 100644 --- a/conf/modules/markduplicates.config +++ b/conf/modules/markduplicates.config @@ -33,6 +33,16 @@ process { ] } + withName: 'NFCORE_SAREK:SAREK:(BAM_MARKDUPLICATES|BAM_MARKDUPLICATES_SPARK):CRAM_QC_MOSDEPTH_SAMTOOLS:SAMTOOLS_STATS' { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } + ext.prefix = { "${meta.id}.md.cram" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/samtools/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'BAM_TO_CRAM_MAPPING' { // Run only when mapping should be saved as CRAM or when no MD is done ext.when = (params.save_mapped && !params.save_output_as_bam) || diff --git a/conf/modules/modules.config b/conf/modules/modules.config index d82be13f97..356d0768ac 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -40,26 +40,6 @@ process { ] } - withName: 'NFCORE_SAREK:SAREK:(BAM_MARKDUPLICATES|BAM_MARKDUPLICATES_SPARK):CRAM_QC_MOSDEPTH_SAMTOOLS:SAMTOOLS_STATS' { - ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } - ext.prefix = { "${meta.id}.md.cram" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/samtools/${meta.id}" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'NFCORE_SAREK:SAREK:BAM_SENTIEON_DEDUP:CRAM_QC_MOSDEPTH_SAMTOOLS:SAMTOOLS_STATS' { - ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } - ext.prefix = { "${meta.id}.dedup.cram" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/samtools/${meta.id}" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: 'NFCORE_SAREK:SAREK:CRAM_QC_NO_MD:SAMTOOLS_STATS' { ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } ext.prefix = { "${meta.id}.sorted.cram" } diff --git a/conf/modules/mutect2.config b/conf/modules/mutect2.config index dad80037a8..210edde641 100644 --- a/conf/modules/mutect2.config +++ b/conf/modules/mutect2.config @@ -15,6 +15,7 @@ process { if (params.tools && params.tools.split(',').contains('mutect2')) { + withName: 'GATK4_MUTECT2' { ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } ext.when = { params.tools && params.tools.split(',').contains('mutect2') } diff --git a/conf/modules/post_variant_calling.config b/conf/modules/post_variant_calling.config index 31058ae5e0..9bc621a35d 100644 --- a/conf/modules/post_variant_calling.config +++ b/conf/modules/post_variant_calling.config @@ -15,7 +15,9 @@ // Like, for instance, concatenating the unannotated, germline vcf-files process { + withName: 'GERMLINE_VCFS_CONCAT'{ + ext.when = params.concatenate_vcfs publishDir = [ //specify to avoid publishing, overwritten otherwise enabled: false @@ -24,6 +26,7 @@ process { withName: 'GERMLINE_VCFS_CONCAT_SORT'{ ext.prefix = { "${meta.id}.germline" } + ext.when = params.concatenate_vcfs publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/concat/${meta.id}/" } @@ -32,10 +35,12 @@ process { withName: 'TABIX_EXT_VCF' { ext.prefix = { "${input.baseName}" } + ext.when = params.concatenate_vcfs } withName: 'TABIX_GERMLINE_VCFS_CONCAT_SORT'{ ext.prefix = { "${meta.id}.germline" } + ext.when = params.concatenate_vcfs publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/concat/${meta.id}/" } diff --git a/conf/modules/prepare_cache.config b/conf/modules/prepare_cache.config index fc5486fc2a..fd13b10cfd 100644 --- a/conf/modules/prepare_cache.config +++ b/conf/modules/prepare_cache.config @@ -14,6 +14,7 @@ // PREPARE_CACHE process { + // SNPEFF withName: 'SNPEFF_DOWNLOAD' { ext.when = { params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge')) } diff --git a/conf/modules/sentieon_dedup.config b/conf/modules/sentieon_dedup.config index 77e0500bb0..1bbf81b25a 100644 --- a/conf/modules/sentieon_dedup.config +++ b/conf/modules/sentieon_dedup.config @@ -34,4 +34,16 @@ process { ] } + if (params.tools && params.tools.contains('sentieon_dedup')) { + withName: 'NFCORE_SAREK:SAREK:BAM_SENTIEON_DEDUP:CRAM_QC_MOSDEPTH_SAMTOOLS:SAMTOOLS_STATS' { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } + ext.prefix = { "${meta.id}.dedup.cram" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/samtools/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } diff --git a/conf/modules/sentieon_haplotyper.config b/conf/modules/sentieon_haplotyper.config index 2265697cbc..26f1300498 100644 --- a/conf/modules/sentieon_haplotyper.config +++ b/conf/modules/sentieon_haplotyper.config @@ -45,14 +45,16 @@ process { ] } - withName: '.*BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER:VCF_VARIANT_FILTERING_GATK:FILTERVARIANTTRANCHES' { - ext.prefix = {"${meta.id}.haplotyper"} - ext.args = { "--info-key CNN_1D" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/sentieon_haplotyper/${meta.id}/"}, - pattern: "*{vcf.gz,vcf.gz.tbi}" - ] + if (params.tools && params.tools.contains('sentieon_haplotyper')) { + withName: '.*BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER:VCF_VARIANT_FILTERING_GATK:FILTERVARIANTTRANCHES' { + ext.prefix = {"${meta.id}.haplotyper"} + ext.args = { "--info-key CNN_1D" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_haplotyper/${meta.id}/"}, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } } } diff --git a/conf/modules/sentieon_joint_germline.config b/conf/modules/sentieon_joint_germline.config index 503e920a46..c956ccba83 100644 --- a/conf/modules/sentieon_joint_germline.config +++ b/conf/modules/sentieon_joint_germline.config @@ -15,7 +15,7 @@ process { - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:SENTIEON_GVCFTYPER' { + withName: 'SENTIEON_GVCFTYPER' { ext.args = { "--allow-old-rms-mapping-quality-annotation-data" } ext.prefix = { meta.intervals_name } publishDir = [ @@ -24,32 +24,32 @@ process { } if (params.tools && params.tools.contains('sentieon_haplotyper') && params.joint_germline) { - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON::BCFTOOLS_SORT' { + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:BCFTOOLS_SORT' { ext.prefix = { vcf.baseName - ".vcf" + ".sort" } publishDir = [ enabled: false ] } - } - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:MERGE_GENOTYPEGVCFS' { - ext.prefix = "joint_germline" - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/sentieon_haplotyper/joint_variant_calling/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - pattern: "*{vcf.gz,vcf.gz.tbi}" - ] - } + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:MERGE_GENOTYPEGVCFS' { + ext.prefix = "joint_germline" + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_haplotyper/joint_variant_calling/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:MERGE_VQSR' { - ext.prefix = "joint_germline_recalibrated" - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/sentieon_haplotyper/joint_variant_calling/"}, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - pattern: "*{vcf.gz,vcf.gz.tbi}" - ] + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:MERGE_VQSR' { + ext.prefix = "joint_germline_recalibrated" + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_haplotyper/joint_variant_calling/"}, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } } withName: 'SENTIEON_VARCAL_INDEL' { @@ -78,5 +78,4 @@ process { ext.args = '--sensitivity 99.9 --var_type SNP' } - } diff --git a/conf/modules/umi.config b/conf/modules/umi.config index ed4f29570b..98040ce359 100644 --- a/conf/modules/umi.config +++ b/conf/modules/umi.config @@ -62,6 +62,7 @@ process { enabled: false ] } + withName: 'GROUPREADSBYUMI' { publishDir = [ [ path: { "${params.outdir}/reports/umi/" }, diff --git a/conf/test.config b/conf/test.config index 55822c97ca..81653565c7 100644 --- a/conf/test.config +++ b/conf/test.config @@ -46,6 +46,7 @@ params { } process { + withName:'.*:FREEC_SOMATIC'{ ext.args = { [ @@ -69,27 +70,26 @@ process { } } - if (params.tools && params.tools.split(',').contains('mutect2')) { - if (params.joint_mutect2) { - withName: 'MUTECT2_PAIRED' { - ext.args = { params.ignore_soft_clipped_bases ? - "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" : - "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" } - } + if (params.joint_mutect2) { + withName: 'MUTECT2_PAIRED' { + ext.args = { params.ignore_soft_clipped_bases ? + "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" : + "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" } } - else { - withName: '.*MUTECT2_PAIRED'{ - //sample name from when the test data was generated - ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " } - } + } + else { + withName: 'MUTECT2_PAIRED'{ + //sample name from when the test data was generated + ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " } } } - withName: '.*:FILTERVARIANTTRANCHES'{ + withName: 'FILTERVARIANTTRANCHES'{ ext.args = { "--info-key CNN_1D --indel-tranche 0" } } } + // Enable container engines/virtualisation envs for CI testing // only works when specified with the profile ENV // otherwise tests can be done with the regular provided profiles diff --git a/conf/test/cache.config b/conf/test/cache.config index 481322dfc6..4e4119d3a7 100644 --- a/conf/test/cache.config +++ b/conf/test/cache.config @@ -63,6 +63,7 @@ process { ext.sentieon_auth_data_base64 = secrets.SENTIEON_AUTH_DATA_BASE64 } + // This must contain .* in order to properly overwrite the standard config in test cases withName:'.*:FREEC_SOMATIC'{ ext.args = { [ @@ -86,14 +87,12 @@ process { } } - if (params.tools && params.tools.split(',').contains('mutect2')) { - withName: '.*MUTECT2_PAIRED'{ - //sample name from when the test data was generated - ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " } - } + withName: 'MUTECT2_PAIRED'{ + //sample name from when the test data was generated + ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " } } - withName: '.*:FILTERVARIANTTRANCHES'{ + withName: 'FILTERVARIANTTRANCHES'{ ext.args = { "--info-key CNN_1D --indel-tranche 0" } } } diff --git a/docs/images/sarek_subway.png b/docs/images/sarek_subway.png index 0989560c48..e2a689b1ca 100644 Binary files a/docs/images/sarek_subway.png and b/docs/images/sarek_subway.png differ diff --git a/docs/images/sarek_subway.svg b/docs/images/sarek_subway.svg index 29b8183b5c..ad8352b1c7 100644 --- a/docs/images/sarek_subway.svg +++ b/docs/images/sarek_subway.svg @@ -4,14 +4,14 @@ mappingensemblvepsnpeffbcftools, vcftoolsconcatenate(germline)multiqcannotationvariant calling: SNPs, Indels, SV, CNV, MSIvariant callinghaplotypecallermantastrelka2tiddittidditascatmsisensorprocontrolfreeccnvkitmantaOptionally Sentieon accelerated + id="tspan2660">Optionally Sentieon acceleratedSNPs & IndelsSV & CNVMSI diff --git a/docs/output.md b/docs/output.md index 76a6b9c6e9..c4d8cfd04d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -394,7 +394,7 @@ If the haplotype-called VCF files are not filtered, then Sarek should be run wit - `.haplotypecaller.g.vcf.gz` and `.haplotypecaller.g.vcf.gz.tbi` - VCF with tabix index -**Output directory: `{outdir}/variantcalling/sentieon_haplotyper/joint_variant_calling/`** +**Output directory: `{outdir}/variantcalling/haplotypecaller/joint_variant_calling/`** - `joint_germline.vcf.gz` and `joint_germline.vcf.gz.tbi` - VCF with tabix index diff --git a/docs/posters/ISMB_ECCB_2023_FHanssen.pdf b/docs/posters/ISMB_ECCB_2023_FHanssen.pdf new file mode 100644 index 0000000000..e99c617057 Binary files /dev/null and b/docs/posters/ISMB_ECCB_2023_FHanssen.pdf differ diff --git a/docs/usage.md b/docs/usage.md index 2ed80ca550..b807523ba1 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -2,6 +2,8 @@ ## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/sarek/usage](https://nf-co.re/sarek/usage) +> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ + # Introduction Sarek is a workflow designed to detect germline and somatic variants on whole genome, whole exome, or targeted sequencing data. @@ -1023,11 +1025,6 @@ Enable with `--vep_spliceregion`. For more details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#spliceregion) and [here](https://www.ensembl.info/2018/10/26/cool-stuff-the-vep-can-do-splice-site-variant-annotation/)." -## Requested resources for the tools - -Resource requests are difficult to generalize and are often dependent on input data size. Currently, the number of cpus and memory requested by default were adapted from tests on 5 ICGC paired whole-genome sequencing samples with approximately 40X and 80X depth. -For targeted data analysis, this is overshooting by a lot. In this case resources for each process can be limited by either setting `--max_memory` and `-max_cpus` or tailoring the request by process name as described [here](#resource-requests). If you are using sarek for a certain data type regulary, and would like to make these requests available to others on your system, an institution-specific, pipeline-specific config file can be added [here](https://github.com/nf-core/configs/tree/master/conf/pipeline/sarek). - ## MultiQC related issues ### Plots for SnpEff are missing @@ -1050,3 +1047,8 @@ Sentieon's [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcft ### QualCal (BQSR) Currently, Sentieon's version of BQSR, QualCal, is not available in Sarek. Recent Illumina sequencers tend to provide well-calibrated BQs, so BQSR may not provide much benefit. By default Sarek runs GATK's BQSR; that can be skipped by adding the option `--skip_tools baserecalibrator`. + +## Requested resources for the tools + +Resource requests are difficult to generalize and are often dependent on input data size. Currently, the number of cpus and memory requested by default were adapted from tests on 5 ICGC paired whole-genome sequencing samples with approximately 40X and 80X depth. +For targeted data analysis, this is overshooting by a lot. In this case resources for each process can be limited by either setting `--max_memory` and `-max_cpus` or tailoring the request by process name as described [here](#resource-requests). If you are using sarek for a certain data type regulary, and would like to make these requests available to others on your system, an institution-specific, pipeline-specific config file can be added [here](https://github.com/nf-core/configs/tree/master/conf/pipeline/sarek). diff --git a/modules.json b/modules.json index 53db751786..a90b2fea51 100644 --- a/modules.json +++ b/modules.json @@ -438,7 +438,7 @@ }, "tiddit/sv": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "1c90a501d102b800c27697f5ef39a6e217ab1915", "installed_by": ["modules"] }, "untar": { diff --git a/modules/nf-core/tiddit/sv/main.nf b/modules/nf-core/tiddit/sv/main.nf index 1ebc8565f4..67a0670dbc 100644 --- a/modules/nf-core/tiddit/sv/main.nf +++ b/modules/nf-core/tiddit/sv/main.nf @@ -2,10 +2,10 @@ process TIDDIT_SV { tag "$meta.id" label 'process_medium' - conda "bioconda::tiddit=3.3.2" + conda "bioconda::tiddit=3.6.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tiddit:3.3.2--py310hc2b7f4b_0' : - 'biocontainers/tiddit:3.3.2--py310hc2b7f4b_0' }" + 'https://depot.galaxyproject.org/singularity/tiddit:3.6.1--py38h24c8ff8_0' : + 'biocontainers/tiddit:3.6.1--py38h24c8ff8_0' }" input: tuple val(meta), path(input), path(input_index) diff --git a/nextflow.config b/nextflow.config index 367f7cbf37..b5e8345c1b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -73,8 +73,8 @@ params { sentieon_haplotyper_emit_mode = "variant" // default value for Sentieon haplotyper // Annotation - vep_out_format = 'vcf' - vep_custom_args = null // No custom arguments for VEP + vep_out_format = "vcf" + vep_custom_args = "--everything --filter_common --per_gene --total_length --offline --format vcf" // Default arguments for VEP vep_dbnsfp = null // dbnsfp plugin disabled within VEP dbnsfp = null // No dbnsfp processed file dbnsfp_tbi = null // No dbnsfp processed file index diff --git a/nextflow_schema.json b/nextflow_schema.json index 63956f9873..8d96f33d61 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -105,7 +105,7 @@ "tools": { "type": "string", "fa_icon": "fas fa-toolbox", - "description": "Tools to use for variant calling and/or for annotation.", + "description": "Tools to use for duplicate marking, variant calling and/or for annotation.", "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", "pattern": "^((ascat|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? [ meta + [ num_intervals:num_intervals ], cram, intervals ] } + // Run, if --tools mpileup keep_bcftools_mpileup = false BCFTOOLS_MPILEUP(cram_intervals, fasta, keep_bcftools_mpileup) + //Only run, if --tools ControlFreec SAMTOOLS_MPILEUP(cram_intervals, fasta) // Figuring out if there is one or more vcf(s) from the same sample diff --git a/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf b/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf index 8ec2811be6..ca4e1688fa 100644 --- a/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf +++ b/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf @@ -1,7 +1,13 @@ -include { VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main' -include { SENTIEON_HAPLOTYPER } from '../../../modules/nf-core/sentieon/haplotyper/main' -include { GATK4_MERGEVCFS as MERGE_SENTIEON_HAPLOTYPER_VCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_SENTIEON_HAPLOTYPER_GVCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +// +// SENTIEON HAPLOTYPER germline variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { GATK4_MERGEVCFS as MERGE_SENTIEON_HAPLOTYPER_GVCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_SENTIEON_HAPLOTYPER_VCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { SENTIEON_HAPLOTYPER } from '../../../modules/nf-core/sentieon/haplotyper/main' +include { VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main' workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER { take: @@ -59,8 +65,6 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER { emit_vcf, emit_mode_items.contains('gvcf')) - versions = versions.mix(SENTIEON_HAPLOTYPER.out.versions) - if (joint_germline) { genotype_intervals = SENTIEON_HAPLOTYPER.out.gvcf .join(SENTIEON_HAPLOTYPER.out.gvcf_tbi, failOnMismatch: true) @@ -101,8 +105,6 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER { // Only when using intervals MERGE_SENTIEON_HAPLOTYPER_VCFS(vcfs_for_merging, dict) - versions = versions.mix(MERGE_SENTIEON_HAPLOTYPER_VCFS.out.versions) - haplotyper_vcf = Channel.empty().mix( MERGE_SENTIEON_HAPLOTYPER_VCFS.out.vcf, haplotyper_vcf_branch.no_intervals) @@ -142,12 +144,14 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER { MERGE_SENTIEON_HAPLOTYPER_GVCFS(gvcfs_for_merging, dict) - versions = versions.mix(MERGE_SENTIEON_HAPLOTYPER_GVCFS.out.versions) - gvcf = Channel.empty().mix( MERGE_SENTIEON_HAPLOTYPER_GVCFS.out.vcf, haplotyper_gvcf_branch.no_intervals) + versions = versions.mix(SENTIEON_HAPLOTYPER.out.versions) + versions = versions.mix(MERGE_SENTIEON_HAPLOTYPER_VCFS.out.versions) + versions = versions.mix(MERGE_SENTIEON_HAPLOTYPER_GVCFS.out.versions) + emit: versions vcf diff --git a/subworkflows/local/bam_variant_calling_single_strelka/main.nf b/subworkflows/local/bam_variant_calling_single_strelka/main.nf index 1d3e34d81b..ab6b3373c3 100644 --- a/subworkflows/local/bam_variant_calling_single_strelka/main.nf +++ b/subworkflows/local/bam_variant_calling_single_strelka/main.nf @@ -1,6 +1,12 @@ -include { GATK4_MERGEVCFS as MERGE_STRELKA } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_MERGEVCFS as MERGE_STRELKA_GENOME } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { STRELKA_GERMLINE as STRELKA_SINGLE } from '../../../modules/nf-core/strelka/germline/main' +// +// STRELKA2 single sample variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { GATK4_MERGEVCFS as MERGE_STRELKA } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_STRELKA_GENOME } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { STRELKA_GERMLINE as STRELKA_SINGLE } from '../../../modules/nf-core/strelka/germline/main' workflow BAM_VARIANT_CALLING_SINGLE_STRELKA { take: @@ -36,7 +42,7 @@ workflow BAM_VARIANT_CALLING_SINGLE_STRELKA { // Only when using intervals genome_vcf_to_merge = genome_vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() - vcf_to_merge = vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() + vcf_to_merge = vcf.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}.groupTuple() MERGE_STRELKA(vcf_to_merge, dict) MERGE_STRELKA_GENOME(genome_vcf_to_merge, dict) diff --git a/subworkflows/local/bam_variant_calling_single_tiddit/main.nf b/subworkflows/local/bam_variant_calling_single_tiddit/main.nf index 32697dde94..356ce7c2fa 100644 --- a/subworkflows/local/bam_variant_calling_single_tiddit/main.nf +++ b/subworkflows/local/bam_variant_calling_single_tiddit/main.nf @@ -1,3 +1,9 @@ +// +// TIDDIT single sample variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + include { TABIX_BGZIPTABIX as TABIX_BGZIP_TIDDIT_SV } from '../../../modules/nf-core/tabix/bgziptabix/main' include { TIDDIT_SV } from '../../../modules/nf-core/tiddit/sv/main' @@ -15,7 +21,7 @@ workflow BAM_VARIANT_CALLING_SINGLE_TIDDIT { TABIX_BGZIP_TIDDIT_SV(TIDDIT_SV.out.vcf) ploidy = TIDDIT_SV.out.ploidy - vcf = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi.map{ meta, gz, tbi -> [ meta + [ variantcaller: 'tiddit'], gz ] } + vcf = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi.map{ meta, gz, tbi -> [ meta + [ variantcaller: 'tiddit'], gz ] } versions = versions.mix(TABIX_BGZIP_TIDDIT_SV.out.versions) versions = versions.mix(TIDDIT_SV.out.versions) diff --git a/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf b/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf index 3f98f54807..64f45508ab 100644 --- a/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_ascat/main.nf @@ -1,3 +1,9 @@ +// +// ASCAT variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + include { ASCAT } from '../../../modules/nf-core/ascat/main' workflow BAM_VARIANT_CALLING_SOMATIC_ASCAT { diff --git a/subworkflows/local/bam_variant_calling_somatic_controlfreec/main.nf b/subworkflows/local/bam_variant_calling_somatic_controlfreec/main.nf index 93b91e605a..a2e7e17cff 100644 --- a/subworkflows/local/bam_variant_calling_somatic_controlfreec/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_controlfreec/main.nf @@ -1,8 +1,14 @@ -include { CONTROLFREEC_FREEC as FREEC_SOMATIC } from '../../../modules/nf-core/controlfreec/freec/main' +// +// CONTROLFREEC somatc variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { CONTROLFREEC_FREEC as FREEC_SOMATIC } from '../../../modules/nf-core/controlfreec/freec/main' include { CONTROLFREEC_ASSESSSIGNIFICANCE as ASSESS_SIGNIFICANCE } from '../../../modules/nf-core/controlfreec/assesssignificance/main' -include { CONTROLFREEC_FREEC2BED as FREEC2BED } from '../../../modules/nf-core/controlfreec/freec2bed/main' -include { CONTROLFREEC_FREEC2CIRCOS as FREEC2CIRCOS } from '../../../modules/nf-core/controlfreec/freec2circos/main' -include { CONTROLFREEC_MAKEGRAPH as MAKEGRAPH } from '../../../modules/nf-core/controlfreec/makegraph/main' +include { CONTROLFREEC_FREEC2BED as FREEC2BED } from '../../../modules/nf-core/controlfreec/freec2bed/main' +include { CONTROLFREEC_FREEC2CIRCOS as FREEC2CIRCOS } from '../../../modules/nf-core/controlfreec/freec2circos/main' +include { CONTROLFREEC_MAKEGRAPH as MAKEGRAPH } from '../../../modules/nf-core/controlfreec/makegraph/main' workflow BAM_VARIANT_CALLING_SOMATIC_CONTROLFREEC { take: diff --git a/subworkflows/local/bam_variant_calling_somatic_manta/main.nf b/subworkflows/local/bam_variant_calling_somatic_manta/main.nf index c0db37ed5c..7eb5e6687d 100644 --- a/subworkflows/local/bam_variant_calling_somatic_manta/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_manta/main.nf @@ -1,3 +1,9 @@ +// +// MANTA somatic variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + include { MANTA_SOMATIC } from '../../../modules/nf-core/manta/somatic/main' workflow BAM_VARIANT_CALLING_SOMATIC_MANTA { diff --git a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf index 153a4fdcc0..991f484d5c 100644 --- a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf @@ -1,17 +1,18 @@ // -// Run GATK mutect2 in tumor normal mode, getepileupsummaries, calculatecontamination, learnreadorientationmodel and filtermutectcalls +// +// MUTECT2: tumor-normal mode variantcalling: getpileupsummaries, calculatecontamination, learnreadorientationmodel and filtermutectcalls // -include { GATK4_MERGEVCFS as MERGE_MUTECT2 } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/nf-core/gatk4/calculatecontamination/main' -include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../modules/nf-core/gatk4/filtermutectcalls/main' -include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_NORMAL} from '../../../modules/nf-core/gatk4/gatherpileupsummaries/main' -include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/gatk4/gatherpileupsummaries/main' -include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_NORMAL } from '../../../modules/nf-core/gatk4/getpileupsummaries/main' -include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/gatk4/getpileupsummaries/main' -include { GATK4_LEARNREADORIENTATIONMODEL as LEARNREADORIENTATIONMODEL } from '../../../modules/nf-core/gatk4/learnreadorientationmodel/main' -include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../modules/nf-core/gatk4/mergemutectstats/main' -include { GATK4_MUTECT2 as MUTECT2_PAIRED } from '../../../modules/nf-core/gatk4/mutect2/main' +include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/nf-core/gatk4/calculatecontamination/main' +include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../modules/nf-core/gatk4/filtermutectcalls/main' +include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_NORMAL } from '../../../modules/nf-core/gatk4/gatherpileupsummaries/main' +include { GATK4_GATHERPILEUPSUMMARIES as GATHERPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/gatk4/gatherpileupsummaries/main' +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_NORMAL } from '../../../modules/nf-core/gatk4/getpileupsummaries/main' +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/gatk4/getpileupsummaries/main' +include { GATK4_LEARNREADORIENTATIONMODEL as LEARNREADORIENTATIONMODEL } from '../../../modules/nf-core/gatk4/learnreadorientationmodel/main' +include { GATK4_MERGEMUTECTSTATS as MERGEMUTECTSTATS } from '../../../modules/nf-core/gatk4/mergemutectstats/main' +include { GATK4_MERGEVCFS as MERGE_MUTECT2 } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_MUTECT2 as MUTECT2_PAIRED } from '../../../modules/nf-core/gatk4/mutect2/main' workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 { take: @@ -105,7 +106,6 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 { normal: [ meta, input_list[0], input_index_list[0], intervals ] } - // Prepare input channel for normal pileup summaries. // Remember, the input channel contains tumor-normal pairs, so there will be multiple copies of the normal sample for each tumor for a given patient. // Therefore, we use unique function to generate normal pileup summaries once for each patient for better efficiency. diff --git a/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf b/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf index 7cc2fc4bbb..02c729f93e 100644 --- a/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_strelka/main.nf @@ -1,3 +1,9 @@ +// +// STRELKA2 tumor-normal variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + include { GATK4_MERGEVCFS as MERGE_STRELKA_INDELS } from '../../../modules/nf-core/gatk4/mergevcfs/main' include { GATK4_MERGEVCFS as MERGE_STRELKA_SNVS } from '../../../modules/nf-core/gatk4/mergevcfs/main' include { STRELKA_SOMATIC } from '../../../modules/nf-core/strelka/somatic/main' diff --git a/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf b/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf index 411c670ac1..259520fce1 100644 --- a/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf @@ -1,3 +1,9 @@ +// +// TIDDIT single sample variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + include { BAM_VARIANT_CALLING_SINGLE_TIDDIT as TIDDIT_NORMAL } from '../bam_variant_calling_single_tiddit/main.nf' include { BAM_VARIANT_CALLING_SINGLE_TIDDIT as TIDDIT_TUMOR } from '../bam_variant_calling_single_tiddit/main.nf' include { SVDB_MERGE } from '../../../modules/nf-core/svdb/merge/main.nf' diff --git a/subworkflows/local/bam_variant_calling_tumor_only_controlfreec/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_controlfreec/main.nf index 42e0b0567c..993faf127c 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_controlfreec/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_controlfreec/main.nf @@ -1,3 +1,9 @@ +// +// CONTROLFREEC tumor-only variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + include { CONTROLFREEC_FREEC as FREEC_TUMORONLY } from '../../../modules/nf-core/controlfreec/freec/main' include { CONTROLFREEC_ASSESSSIGNIFICANCE as ASSESS_SIGNIFICANCE } from '../../../modules/nf-core/controlfreec/assesssignificance/main' include { CONTROLFREEC_FREEC2BED as FREEC2BED } from '../../../modules/nf-core/controlfreec/freec2bed/main' diff --git a/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf index 8bc8f8f28a..10045c7356 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf @@ -1,3 +1,9 @@ +// +// MANTA single sample variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + include { MANTA_TUMORONLY } from '../../../modules/nf-core/manta/tumoronly/main' // Seems to be the consensus on upstream modules implementation too diff --git a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf index 9120abd25c..d776d89878 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf @@ -1,6 +1,8 @@ // -// Run GATK mutect2 in tumor only mode, getepileupsummaries, calculatecontamination and filtermutectcalls +// GATK MUTECT2 in tumor only mode: getepileupsummaries, calculatecontamination and filtermutectcalls // +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run include { GATK4_MERGEVCFS as MERGE_MUTECT2 } from '../../../modules/nf-core/gatk4/mergevcfs/main' include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/nf-core/gatk4/calculatecontamination/main' @@ -87,10 +89,10 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 { MERGEMUTECTSTATS(stats_to_merge) // Mix intervals and no_intervals channels together - vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals) - tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals) + vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals) + tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals) stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals) - f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals) + f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals) // Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2 LEARNREADORIENTATIONMODEL(f1r2) diff --git a/subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf b/subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf deleted file mode 100644 index 1b7e757751..0000000000 --- a/subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf +++ /dev/null @@ -1,46 +0,0 @@ -// -// MAPPING -// -// For all modules here: -// A when clause condition is defined in the conf/modules.config to determine if the module should be run - -include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main' -include { BWA_MEM as BWAMEM1_MEM } from '../../../modules/nf-core/bwa/mem/main' -include { DRAGMAP_ALIGN } from '../../../modules/nf-core/dragmap/align/main' - -workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP { - take: - reads // channel: [mandatory] meta, reads - index // channel: [mandatory] index - sort // boolean: [mandatory] true -> sort, false -> don't sort - - main: - - versions = Channel.empty() - reports = Channel.empty() - - // Only one of the following should be run - BWAMEM1_MEM(reads, index.map{ it -> [ [ id:'index' ], it ] }, sort) // If aligner is bwa-mem - BWAMEM2_MEM(reads, index.map{ it -> [ [ id:'index' ], it ] }, sort) // If aligner is bwa-mem2 - DRAGMAP_ALIGN(reads, index.map{ it -> [ [ id:'index' ], it ] }, sort) // If aligner is dragmap - - // Get the bam files from the aligner - // Only one aligner is run - bam = Channel.empty() - bam = bam.mix(BWAMEM1_MEM.out.bam) - bam = bam.mix(BWAMEM2_MEM.out.bam) - bam = bam.mix(DRAGMAP_ALIGN.out.bam) - - // Gather reports of all tools used - reports = reports.mix(DRAGMAP_ALIGN.out.log) - - // Gather versions of all tools used - versions = versions.mix(BWAMEM1_MEM.out.versions) - versions = versions.mix(BWAMEM2_MEM.out.versions) - versions = versions.mix(DRAGMAP_ALIGN.out.versions) - - emit: - bam // channel: [ [meta], bam ] - reports - versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main.nf b/subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main.nf index 0699eb5c19..914cf55ec2 100644 --- a/subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main.nf +++ b/subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main.nf @@ -36,8 +36,7 @@ workflow FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON { bam = bam.mix(DRAGMAP_ALIGN.out.bam) bam = bam.mix(SENTIEON_BWAMEM.out.bam_and_bai.map{ meta, bam, bai -> [ meta, bam ] }) - bai = Channel.empty() - bai = bai.mix(SENTIEON_BWAMEM.out.bam_and_bai.map{ meta, bam, bai -> [ meta, bai ] }) + bai = SENTIEON_BWAMEM.out.bam_and_bai.map{ meta, bam, bai -> [ meta, bai ] } // Gather reports of all tools used reports = reports.mix(DRAGMAP_ALIGN.out.log) diff --git a/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf b/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf index 51377194ae..c237e64014 100644 --- a/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf +++ b/subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf @@ -6,17 +6,18 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { FGBIO_CALLMOLECULARCONSENSUSREADS as CALLUMICONSENSUS } from '../../../modules/nf-core/fgbio/callmolecularconsensusreads/main.nf' -include { FGBIO_FASTQTOBAM as FASTQTOBAM } from '../../../modules/nf-core/fgbio/fastqtobam/main' -include { FGBIO_GROUPREADSBYUMI as GROUPREADSBYUMI } from '../../../modules/nf-core/fgbio/groupreadsbyumi/main' -include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP as ALIGN_UMI } from '../fastq_align_bwamem_mem2_dragmap/main' -include { SAMBLASTER } from '../../../modules/nf-core/samblaster/main' -include { SAMTOOLS_BAM2FQ as BAM2FASTQ } from '../../../modules/nf-core/samtools/bam2fq/main.nf' +include { FGBIO_CALLMOLECULARCONSENSUSREADS as CALLUMICONSENSUS } from '../../../modules/nf-core/fgbio/callmolecularconsensusreads/main.nf' +include { FGBIO_FASTQTOBAM as FASTQTOBAM } from '../../../modules/nf-core/fgbio/fastqtobam/main' +include { FGBIO_GROUPREADSBYUMI as GROUPREADSBYUMI } from '../../../modules/nf-core/fgbio/groupreadsbyumi/main' +include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON as ALIGN_UMI } from '../fastq_align_bwamem_mem2_dragmap_sentieon/main' +include { SAMBLASTER } from '../../../modules/nf-core/samblaster/main' +include { SAMTOOLS_BAM2FQ as BAM2FASTQ } from '../../../modules/nf-core/samtools/bam2fq/main.nf' workflow FASTQ_CREATE_UMI_CONSENSUS_FGBIO { take: reads // channel: [mandatory] [ val(meta), [ reads ] ] fasta // channel: [mandatory] /path/to/reference/fasta + fai // channel: [optional] /path/to/reference/fasta_fai, needed for Sentieon map_index // channel: [mandatory] Pre-computed mapping index groupreadsbyumi_strategy // string: [mandatory] grouping strategy - default: "Adjacency" @@ -37,7 +38,7 @@ workflow FASTQ_CREATE_UMI_CONSENSUS_FGBIO { // appropriately tagged interleaved FASTQ reads are mapped to the reference // bams will not be sorted (hence, sort = false) sort = false - ALIGN_UMI(BAM2FASTQ.out.reads, map_index, sort) + ALIGN_UMI(BAM2FASTQ.out.reads, map_index, sort, fasta, fai) // samblaster is used in order to tag mates information in the BAM file // this is used in order to group reads by UMI diff --git a/subworkflows/local/post_variantcalling/main.nf b/subworkflows/local/post_variantcalling/main.nf new file mode 100644 index 0000000000..bf23ff13d4 --- /dev/null +++ b/subworkflows/local/post_variantcalling/main.nf @@ -0,0 +1,27 @@ +// +// POST VARIANT CALLING: processes run on variantcalled but not annotated VCFs +// + +include { CONCATENATE_GERMLINE_VCFS } from '../vcf_concatenate_germline/main' + +workflow POST_VARIANTCALLING { + + take: + vcfs + concatenate_vcfs + + main: + versions = Channel.empty() + + if(concatenate_vcfs){ + CONCATENATE_GERMLINE_VCFS(vcfs) + + vcfs = vcfs.mix(CONCATENATE_GERMLINE_VCFS.out.vcfs) + versions = versions.mix(CONCATENATE_GERMLINE_VCFS.out.versions) + } + + emit: + vcfs // post processed vcfs + + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/vcf_annotate_all/main.nf b/subworkflows/local/vcf_annotate_all/main.nf index 31e73df4ca..5b759d8818 100644 --- a/subworkflows/local/vcf_annotate_all/main.nf +++ b/subworkflows/local/vcf_annotate_all/main.nf @@ -47,9 +47,9 @@ workflow VCF_ANNOTATE_ALL { vcf_for_vep = vcf.map{ meta, vcf -> [ meta, vcf, [] ] } VCF_ANNOTATE_ENSEMBLVEP(vcf_for_vep, fasta, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) - reports = reports.mix(VCF_ANNOTATE_ENSEMBLVEP.out.reports) - vcf_ann = vcf_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.vcf_tbi) - tab_ann = tab_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.tab) + reports = reports.mix(VCF_ANNOTATE_ENSEMBLVEP.out.reports) + vcf_ann = vcf_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.vcf_tbi) + tab_ann = tab_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.tab) json_ann = json_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.json) versions = versions.mix(VCF_ANNOTATE_ENSEMBLVEP.out.versions) } diff --git a/subworkflows/local/vcf_concatenate_germline/main.nf b/subworkflows/local/vcf_concatenate_germline/main.nf new file mode 100644 index 0000000000..87f46b22e1 --- /dev/null +++ b/subworkflows/local/vcf_concatenate_germline/main.nf @@ -0,0 +1,42 @@ +// +// CONCATENATE Germline VCFs +// + +// Concatenation of germline vcf-files +include { ADD_INFO_TO_VCF } from '../../../modules/local/add_info_to_vcf/main' +include { TABIX_BGZIPTABIX as TABIX_EXT_VCF } from '../../../modules/nf-core/tabix/bgziptabix/main' +include { BCFTOOLS_CONCAT as GERMLINE_VCFS_CONCAT } from '../../../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_SORT as GERMLINE_VCFS_CONCAT_SORT } from '../../../modules/nf-core/bcftools/sort/main' +include { TABIX_TABIX as TABIX_GERMLINE_VCFS_CONCAT_SORT } from '../../../modules/nf-core/tabix/tabix/main' + +workflow CONCATENATE_GERMLINE_VCFS { + + take: + vcfs + + main: + versions = Channel.empty() + + // Concatenate vcf-files + ADD_INFO_TO_VCF(vcfs) + TABIX_EXT_VCF(ADD_INFO_TO_VCF.out.vcf) + + // Gather vcfs and vcf-tbis for concatenating germline-vcfs + germline_vcfs_with_tbis = TABIX_EXT_VCF.out.gz_tbi.map{ meta, vcf, tbi -> [ meta.subMap('id'), vcf, tbi ] }.groupTuple() + + GERMLINE_VCFS_CONCAT(germline_vcfs_with_tbis) + GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT.out.vcf) + TABIX_GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT_SORT.out.vcf) + + // Gather versions of all tools used + versions = versions.mix(ADD_INFO_TO_VCF.out.versions) + versions = versions.mix(TABIX_EXT_VCF.out.versions) + versions = versions.mix(GERMLINE_VCFS_CONCAT.out.versions) + versions = versions.mix(GERMLINE_VCFS_CONCAT.out.versions) + versions = versions.mix(TABIX_GERMLINE_VCFS_CONCAT_SORT.out.versions) + + emit: + vcfs = germline_vcfs_with_tbis // post processed vcfs + + versions // channel: [ versions.yml ] +} diff --git a/tests/config/tags.yml b/tests/config/tags.yml index 8116fb3c23..017246e117 100644 --- a/tests/config/tags.yml +++ b/tests/config/tags.yml @@ -124,7 +124,7 @@ sentieon/bwamem: - tests/test_alignment_sentieon_bwamem.yml ## markduplicates -markduplicates: +gatk4/markduplicates: - conf/modules/markduplicates.config - modules/nf-core/gatk4/markduplicates/main.nf - modules/nf-core/mosdepth/main.nf @@ -454,4 +454,6 @@ concatenate_vcfs: - subworkflows/local/bam_variant_calling_mpileup/main.nf - subworkflows/local/bam_variant_calling_single_strelka/main.nf - subworkflows/local/bam_variant_calling_single_tiddit/main.nf + - subworkflows/local/post_variantcalling/main.nf + - subworkflows/local/vcf_concatenate_germline/main.nf - tests/test_concat_germline_vcfs.yml diff --git a/tests/test_markduplicates_from_bam.yml b/tests/test_markduplicates_from_bam.yml index dee006127f..d6191216bd 100644 --- a/tests/test_markduplicates_from_bam.yml +++ b/tests/test_markduplicates_from_bam.yml @@ -2,7 +2,7 @@ command: nextflow run main.nf -profile test_cache,markduplicates_bam --outdir results tags: - input_bam - - markduplicates + - gatk4/markduplicates - preprocessing files: - path: results/csv/markduplicates.csv @@ -48,6 +48,7 @@ # conda changes md5sums for test - path: results/reports/samtools/test/test.recal.cram.stats # conda changes md5sums for test + - name: Run skip markduplicates bam from step markduplicates command: nextflow run main.nf -profile test_cache,markduplicates_bam,skip_markduplicates --outdir results tags: diff --git a/tests/test_markduplicates_from_cram.yml b/tests/test_markduplicates_from_cram.yml index ab2e020c85..f36619f719 100644 --- a/tests/test_markduplicates_from_cram.yml +++ b/tests/test_markduplicates_from_cram.yml @@ -2,7 +2,7 @@ command: nextflow run main.nf -profile test_cache,markduplicates_cram --outdir results tags: - input_cram - - markduplicates + - gatk4/markduplicates - preprocessing files: - path: results/csv/markduplicates.csv diff --git a/tests/test_sentieon_dedup_from_bam.yml b/tests/test_sentieon_dedup_from_bam.yml index 4784a45816..cba467d07f 100644 --- a/tests/test_sentieon_dedup_from_bam.yml +++ b/tests/test_sentieon_dedup_from_bam.yml @@ -2,7 +2,6 @@ command: nextflow run main.nf -profile test_cache,sentieon_dedup_bam --outdir results tags: - input_bam - - markduplicates - preprocessing - sentieon/dedup files: diff --git a/tests/test_sentieon_dedup_from_cram.yml b/tests/test_sentieon_dedup_from_cram.yml index ce96acfb5c..e92fff1c60 100644 --- a/tests/test_sentieon_dedup_from_cram.yml +++ b/tests/test_sentieon_dedup_from_cram.yml @@ -2,7 +2,6 @@ command: nextflow run main.nf -profile test_cache,sentieon_dedup_cram --outdir results tags: - input_cram - - markduplicates - preprocessing - sentieon/dedup files: diff --git a/tests/test_sentieon_haplotyper.yml b/tests/test_sentieon_haplotyper.yml index 80b58984d0..5e06ccf604 100644 --- a/tests/test_sentieon_haplotyper.yml +++ b/tests/test_sentieon_haplotyper.yml @@ -85,7 +85,7 @@ - variant_calling files: - path: results/csv/variantcalled.csv - md5sum: b1d10b32d106b180a773782c7f3b127b + should_exist: false - path: results/multiqc - path: results/preprocessing/converted/test/test.converted.cram # binary changes md5sums on reruns @@ -123,7 +123,7 @@ - variant_calling files: - path: results/csv/variantcalled.csv - md5sum: eacdbbd51f3381ca33c9d0a51283c2dc + md5sum: 4d3dd4f6dcb34a91a949641f2b1ac202 - path: results/multiqc - path: results/preprocessing/converted/test/test.converted.cram # binary changes md5sums on reruns diff --git a/tests/test_sentieon_joint_germline.yml b/tests/test_sentieon_joint_germline.yml index f21bcd0d2f..99b5d4e826 100644 --- a/tests/test_sentieon_joint_germline.yml +++ b/tests/test_sentieon_joint_germline.yml @@ -7,7 +7,7 @@ - sentieon/haplotyper files: - path: results/csv/variantcalled.csv - md5sum: e7b30e6034ecb5928c96a4f96b9be4da + md5sum: 6ec10f6455c2b5290c7f6fc687c529ca - path: results/multiqc - path: results/preprocessing/recalibrated/test/test.recal.cram should_exist: false diff --git a/tests/test_umi.yml b/tests/test_umi.yml index 19aeac27d3..52be524ecd 100644 --- a/tests/test_umi.yml +++ b/tests/test_umi.yml @@ -50,6 +50,16 @@ # text-based file changes md5sums on reruns - path: results/reports/samtools/test/test.recal.cram.stats # text-based file changes md5sums on reruns + +- name: Run Sentieon-FGBio UMI combination test + command: nextflow run main.nf -profile test_cache,umi --outdir results --aligner "sentieon-bwamem" + tags: + - preprocessing + - umi + exit_code: 1 + stdout: + contains: + - "Sentieon BWA is currently not compatible with FGBio UMI handeling. Please choose a different aligner." # - name: Run UMI TSO test # command: nextflow run main.nf -profile test_cache,umi_tso --outdir results # tags: diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 26a1186237..669dac1bd5 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -205,6 +205,10 @@ if (params.step == 'mapping' && params.aligner.contains("dragmap") && !(params.s log.warn("DragMap was specified as aligner. Base recalibration is not contained in --skip_tools. It is recommended to skip baserecalibration when using DragMap\nhttps://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode") } +if (params.step == 'mapping' && params.aligner.contains("sentieon-bwamem") && params.umi_read_structure) { + error("Sentieon BWA is currently not compatible with FGBio UMI handeling. Please choose a different aligner.") +} + if (params.tools && params.tools.contains("sentieon_haplotyper") && params.joint_germline && (!params.sentieon_haplotyper_emit_mode || !(params.sentieon_haplotyper_emit_mode.contains('gvcf')))) { error("When setting the option `--joint_germline` and including `sentieon_haplotyper` among the requested tools, please set `--sentieon_haplotyper_emit_mode` to include `gvcf`.") } @@ -332,95 +336,91 @@ if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && p */ // Create samplesheets to restart from different steps -include { CHANNEL_ALIGN_CREATE_CSV } from '../subworkflows/local/channel_align_create_csv/main' -include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../subworkflows/local/channel_markduplicates_create_csv/main' -include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../subworkflows/local/channel_baserecalibrator_create_csv/main' -include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../subworkflows/local/channel_applybqsr_create_csv/main' -include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../subworkflows/local/channel_variant_calling_create_csv/main' +include { CHANNEL_ALIGN_CREATE_CSV } from '../subworkflows/local/channel_align_create_csv/main' +include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../subworkflows/local/channel_markduplicates_create_csv/main' +include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../subworkflows/local/channel_baserecalibrator_create_csv/main' +include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../subworkflows/local/channel_applybqsr_create_csv/main' +include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../subworkflows/local/channel_variant_calling_create_csv/main' // Download annotation cache if needed -include { PREPARE_CACHE } from '../subworkflows/local/prepare_cache/main' +include { PREPARE_CACHE } from '../subworkflows/local/prepare_cache/main' // Build indices if needed -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome/main' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome/main' // Build intervals if needed -include { PREPARE_INTERVALS } from '../subworkflows/local/prepare_intervals/main' +include { PREPARE_INTERVALS } from '../subworkflows/local/prepare_intervals/main' // Build CNVkit reference if needed -include { PREPARE_REFERENCE_CNVKIT } from '../subworkflows/local/prepare_reference_cnvkit/main' +include { PREPARE_REFERENCE_CNVKIT } from '../subworkflows/local/prepare_reference_cnvkit/main' // Convert BAM files to FASTQ files -include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../subworkflows/local/bam_convert_samtools/main' -include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_UMI } from '../subworkflows/local/bam_convert_samtools/main' +include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_INPUT } from '../subworkflows/local/bam_convert_samtools/main' +include { BAM_CONVERT_SAMTOOLS as CONVERT_FASTQ_UMI } from '../subworkflows/local/bam_convert_samtools/main' // Run FASTQC -include { FASTQC } from '../modules/nf-core/fastqc/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' // TRIM/SPLIT FASTQ Files -include { FASTP } from '../modules/nf-core/fastp/main' +include { FASTP } from '../modules/nf-core/fastp/main' // Create umi consensus bams from fastq -include { FASTQ_CREATE_UMI_CONSENSUS_FGBIO } from '../subworkflows/local/fastq_create_umi_consensus_fgbio/main' +include { FASTQ_CREATE_UMI_CONSENSUS_FGBIO } from '../subworkflows/local/fastq_create_umi_consensus_fgbio/main' // Map input reads to reference genome -include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON } from '../subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main' +include { FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP_SENTIEON } from '../subworkflows/local/fastq_align_bwamem_mem2_dragmap_sentieon/main' // Merge and index BAM files (optional) -include { BAM_MERGE_INDEX_SAMTOOLS } from '../subworkflows/local/bam_merge_index_samtools/main' +include { BAM_MERGE_INDEX_SAMTOOLS } from '../subworkflows/local/bam_merge_index_samtools/main' // Convert BAM files -include { SAMTOOLS_CONVERT as BAM_TO_CRAM } from '../modules/nf-core/samtools/convert/main' -include { SAMTOOLS_CONVERT as BAM_TO_CRAM_MAPPING } from '../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as BAM_TO_CRAM } from '../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as BAM_TO_CRAM_MAPPING } from '../modules/nf-core/samtools/convert/main' // Convert CRAM files (optional) -include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../modules/nf-core/samtools/convert/main' -include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM } from '../modules/nf-core/samtools/convert/main' +include { SAMTOOLS_CONVERT as CRAM_TO_BAM_RECAL } from '../modules/nf-core/samtools/convert/main' // Mark Duplicates (+QC) -include { BAM_MARKDUPLICATES } from '../subworkflows/local/bam_markduplicates/main' -include { BAM_MARKDUPLICATES_SPARK } from '../subworkflows/local/bam_markduplicates_spark/main' -include { BAM_SENTIEON_DEDUP } from '../subworkflows/local/bam_sentieon_dedup/main' +include { BAM_MARKDUPLICATES } from '../subworkflows/local/bam_markduplicates/main' +include { BAM_MARKDUPLICATES_SPARK } from '../subworkflows/local/bam_markduplicates_spark/main' +include { BAM_SENTIEON_DEDUP } from '../subworkflows/local/bam_sentieon_dedup/main' // QC on CRAM -include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../subworkflows/local/cram_qc_mosdepth_samtools/main' -include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../subworkflows/local/cram_qc_mosdepth_samtools/main' +include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_NO_MD } from '../subworkflows/local/cram_qc_mosdepth_samtools/main' +include { CRAM_QC_MOSDEPTH_SAMTOOLS as CRAM_QC_RECAL } from '../subworkflows/local/cram_qc_mosdepth_samtools/main' // Create recalibration tables -include { BAM_BASERECALIBRATOR } from '../subworkflows/local/bam_baserecalibrator/main' -include { BAM_BASERECALIBRATOR_SPARK } from '../subworkflows/local/bam_baserecalibrator_spark/main' +include { BAM_BASERECALIBRATOR } from '../subworkflows/local/bam_baserecalibrator/main' +include { BAM_BASERECALIBRATOR_SPARK } from '../subworkflows/local/bam_baserecalibrator_spark/main' // Create recalibrated cram files to use for variant calling (+QC) -include { BAM_APPLYBQSR } from '../subworkflows/local/bam_applybqsr/main' -include { BAM_APPLYBQSR_SPARK } from '../subworkflows/local/bam_applybqsr_spark/main' +include { BAM_APPLYBQSR } from '../subworkflows/local/bam_applybqsr/main' +include { BAM_APPLYBQSR_SPARK } from '../subworkflows/local/bam_applybqsr_spark/main' // Variant calling on a single normal sample -include { BAM_VARIANT_CALLING_GERMLINE_ALL } from '../subworkflows/local/bam_variant_calling_germline_all/main' +include { BAM_VARIANT_CALLING_GERMLINE_ALL } from '../subworkflows/local/bam_variant_calling_germline_all/main' // Variant calling on a single tumor sample -include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../subworkflows/local/bam_variant_calling_tumor_only_all/main' +include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../subworkflows/local/bam_variant_calling_tumor_only_all/main' // Variant calling on tumor/normal pair -include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../subworkflows/local/bam_variant_calling_somatic_all/main' +include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../subworkflows/local/bam_variant_calling_somatic_all/main' -// Concatenation of germline vcf-files -include { ADD_INFO_TO_VCF } from '../modules/local/add_info_to_vcf/main' -include { TABIX_BGZIPTABIX as TABIX_EXT_VCF } from '../modules/nf-core/tabix/bgziptabix/main' -include { BCFTOOLS_CONCAT as GERMLINE_VCFS_CONCAT } from '../modules/nf-core/bcftools/concat/main' -include { BCFTOOLS_SORT as GERMLINE_VCFS_CONCAT_SORT } from '../modules/nf-core/bcftools/sort/main' -include { TABIX_TABIX as TABIX_GERMLINE_VCFS_CONCAT_SORT } from '../modules/nf-core/tabix/tabix/main' +// POST VARIANTCALLING: e.g. merging +include { POST_VARIANTCALLING } from '../subworkflows/local/post_variantcalling/main' // QC on VCF files -include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../subworkflows/local/vcf_qc_bcftools_vcftools/main' +include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../subworkflows/local/vcf_qc_bcftools_vcftools/main' // Annotation -include { VCF_ANNOTATE_ALL } from '../subworkflows/local/vcf_annotate_all/main' +include { VCF_ANNOTATE_ALL } from '../subworkflows/local/vcf_annotate_all/main' // REPORTING VERSIONS OF SOFTWARE USED -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' // MULTIQC -include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -431,20 +431,20 @@ include { MULTIQC } from '../modules/nf-c workflow SAREK { // MULTIQC - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) // To gather all QC reports for MultiQC - reports = Channel.empty() + reports = Channel.empty() // To gather used softwares versions for MultiQC versions = Channel.empty() // Download cache if needed // Assuming that if the cache is provided, the user has already downloaded it - ensemblvep_info = params.vep_cache ? [] : Channel.of([ [ id:"${params.vep_genome}.${params.vep_cache_version}" ], params.vep_genome, params.vep_species, params.vep_cache_version ]) - snpeff_info = params.snpeff_cache ? [] : Channel.of([ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], params.snpeff_genome, params.snpeff_db ]) + ensemblvep_info = params.vep_cache ? [] : Channel.of([ [ id:"${params.vep_genome}.${params.vep_cache_version}" ], params.vep_genome, params.vep_species, params.vep_cache_version ]) + snpeff_info = params.snpeff_cache ? [] : Channel.of([ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], params.snpeff_genome, params.snpeff_db ]) if (params.download_cache) { PREPARE_CACHE(ensemblvep_info, snpeff_info) @@ -509,16 +509,16 @@ workflow SAREK { known_sites_indels = dbsnp.concat(known_indels).collect() known_sites_indels_tbi = dbsnp_tbi.concat(known_indels_tbi).collect() - known_sites_snps = dbsnp.concat(known_snps).collect() - known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect() + known_sites_snps = dbsnp.concat(known_snps).collect() + known_sites_snps_tbi = dbsnp_tbi.concat(known_snps_tbi).collect() // Build intervals if needed PREPARE_INTERVALS(fasta_fai, params.intervals, params.no_intervals) // Intervals for speed up preprocessing/variant calling by spread/gather // [interval.bed] all intervals in one file - intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined - intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined + intervals_bed_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_combined + intervals_bed_gz_tbi_combined = params.no_intervals ? Channel.value([]) : PREPARE_INTERVALS.out.intervals_bed_gz_tbi_combined // For QC during preprocessing, we don't need any intervals (MOSDEPTH doesn't take them for WGS) intervals_for_preprocessing = params.wes ? @@ -599,6 +599,7 @@ workflow SAREK { FASTQ_CREATE_UMI_CONSENSUS_FGBIO( input_fastq, fasta, + fasta_fai, index_alignement, params.group_by_umi_strategy) @@ -1167,18 +1168,9 @@ workflow SAREK { params.joint_mutect2 ) - if (params.concatenate_vcfs) { - // Concatenate vcf-files - ADD_INFO_TO_VCF(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all) - TABIX_EXT_VCF(ADD_INFO_TO_VCF.out.vcf) - - // Gather vcfs and vcf-tbis for concatenating germline-vcfs - germline_vcfs_with_tbis = TABIX_EXT_VCF.out.gz_tbi.map{ meta, vcf, tbi -> [ meta.subMap('id'), vcf, tbi ] }.groupTuple() - - GERMLINE_VCFS_CONCAT(germline_vcfs_with_tbis) - GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT.out.vcf) - TABIX_GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT_SORT.out.vcf) - } + // POST VARIANTCALLING + POST_VARIANTCALLING(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_all, + params.concatenate_vcfs) // Gather vcf files for annotation and QC vcf_to_annotate = Channel.empty() @@ -1193,22 +1185,22 @@ workflow SAREK { vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.vcf_all) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.vcf_all) - // Gather used softwares versions - versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.versions) - versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.versions) - versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.versions) - // QC VCF_QC_BCFTOOLS_VCFTOOLS(vcf_to_annotate, intervals_bed_combined) - versions = versions.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.versions) reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.bcftools_stats.collect{ meta, stats -> stats }) reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_counts.collect{ meta, counts -> counts }) reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_tstv_qual.collect{ meta, qual -> qual }) reports = reports.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.vcftools_filter_summary.collect{ meta, summary -> summary }) - vcf_to_csv = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.gvcf_sentieon_haplotyper) - CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_csv) + CHANNEL_VARIANT_CALLING_CREATE_CSV(vcf_to_annotate) + + // Gather used variant calling softwares versions + versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.versions) + versions = versions.mix(BAM_VARIANT_CALLING_SOMATIC_ALL.out.versions) + versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.versions) + versions = versions.mix(POST_VARIANTCALLING.out.versions) + versions = versions.mix(VCF_QC_BCFTOOLS_VCFTOOLS.out.versions) // ANNOTATE if (params.step == 'annotate') vcf_to_annotate = input_sample