From 4c78bd0b69262cb97b391dd211de4d8fbdae548b Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Apr 2022 13:40:46 +0200 Subject: [PATCH 01/12] add vcf vc workflow --- conf/modules.config | 20 ++++++++++++++++++++ subworkflows/nf-core/vcf_qc.nf | 27 +++++++++++++++++++++++++++ workflows/sarek.nf | 5 +++++ 3 files changed, 52 insertions(+) create mode 100644 subworkflows/nf-core/vcf_qc.nf diff --git a/conf/modules.config b/conf/modules.config index 206b622b23..f8c69ef949 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -903,6 +903,26 @@ process{ ext.prefix = {"${meta.id}.somatic_snvs"} } + // VCF QC + withName: 'BCFTOOLS_STATS'{ + ext.when = { !(params.skip_tools && params.skip_tools.contains('bcftools')) } + } + + withName: 'VCFTOOLS_.*'{ + ext.when = { !(params.skip_tools && params.skip_tools.contains('vcftools')) } + } + + withName: 'VCFTOOLS_TSTV_COUNT'{ + ext.args = "--TsTv-by-count" + } + + withName: 'VCFTOOLS_TSTV_QUAL'{ + ext.args = "--TsTv-by-qual" + } + + withName: 'VCFTOOLS_SUMMARY'{ + ext.args = "--FILTER-summary" + } } //withName: 'GENOMICSDBIMPORT' { diff --git a/subworkflows/nf-core/vcf_qc.nf b/subworkflows/nf-core/vcf_qc.nf new file mode 100644 index 0000000000..82c06c9614 --- /dev/null +++ b/subworkflows/nf-core/vcf_qc.nf @@ -0,0 +1,27 @@ +include { BCFTOOLS_STATS } from '../../modules/nf-core/modules/bcftools/stats/main' +include { VCFTOOLS as VCFTOOLS_TSTV_COUNT } from '../../modules/nf-core/modules/vcftools/main' +include { VCFTOOLS as VCFTOOLS_TSTV_QUAL } from '../../modules/nf-core/modules/vcftools/main' +include { VCFTOOLS as VCFTOOLS_SUMMARY } from '../../modules/nf-core/modules/vcftools/main' + +workflow VCF_QC { + + take: + vcf + target_bed + + main: + + ch_versions = Channel.empty() + + BCFTOOLS_STATS(vcf) + VCFTOOLS_TSTV_COUNT(vcf, target_bed, []) + VCFTOOLS_TSTV_QUAL(vcf, target_bed,[]) + VCFTOOLS_SUMMARY(vcf, target_bed,[]) + + emit: + versions = ch_versions + bcftools_stats = BCFTOOLS_STATS.out.stats + vcftools_tstv_counts = VCFTOOLS_TSTV_COUNT.out.tstv_count + vcftools_tstv_qual = VCFTOOLS_TSTV_QUAL.out.tstv_qual + vcftools_filter_summary = VCFTOOLS_SUMMARY.out.filter_summary +} diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 0563f45693..4f08214f58 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -183,6 +183,8 @@ include { TUMOR_ONLY_VARIANT_CALLING } from '../subworkflows // Variant calling on tumor/normal pair include { PAIR_VARIANT_CALLING } from '../subworkflows/local/pair_variant_calling' +include { VCF_QC } from '../subworkflows/nf-core/vcf_qc' + // Annotation include { ANNOTATE } from '../subworkflows/local/annotate' @@ -693,6 +695,9 @@ workflow SAREK { ch_versions = ch_versions.mix(PAIR_VARIANT_CALLING.out.versions) ch_versions = ch_versions.mix(TUMOR_ONLY_VARIANT_CALLING.out.versions) + //QC + VCF_QC(vcf_to_annotate, intervals_bed_combined) + // ANNOTATE if (params.step == 'annotate') vcf_to_annotate = ch_input_sample From e5995cf3a3557eeb23b36164d248b3196c82e1ae Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Apr 2022 16:56:36 +0200 Subject: [PATCH 02/12] Suggestion for how to deal with multi tools for annotation and qc --- conf/modules.config | 11 +++++++++++ .../nf-core/variantcalling/strelka/single/main.nf | 7 ++++++- subworkflows/nf-core/vcf_qc.nf | 8 ++++---- workflows/sarek.nf | 2 +- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f8c69ef949..a1451a351f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -906,10 +906,21 @@ process{ // VCF QC withName: 'BCFTOOLS_STATS'{ ext.when = { !(params.skip_tools && params.skip_tools.contains('bcftools')) } + ext.prefix = { "${meta.variantcaller}_${vcf.baseName.minus(".vcf")}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/bcftools" }, + ] } + //"${meta.id}".concat('.').concat(reads.get(0).name.findAll(/part_([0-9]+)?/).last()) withName: 'VCFTOOLS_.*'{ ext.when = { !(params.skip_tools && params.skip_tools.contains('vcftools')) } + ext.prefix = { "${vcf.baseName}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/vcftools" }, + ] } withName: 'VCFTOOLS_TSTV_COUNT'{ diff --git a/subworkflows/nf-core/variantcalling/strelka/single/main.nf b/subworkflows/nf-core/variantcalling/strelka/single/main.nf index e90ec8118a..16d4b041ee 100644 --- a/subworkflows/nf-core/variantcalling/strelka/single/main.nf +++ b/subworkflows/nf-core/variantcalling/strelka/single/main.nf @@ -63,13 +63,18 @@ workflow RUN_STRELKA_SINGLE { strelka_genome_vcf.no_intervals, strelka_vcf.no_intervals) + strelka_vcf_out = strelka_vcf.map{ meta, vcf -> + meta.variantcaller = "Strelka" + [meta, vcf] + } ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_GENOME.out.versions) ch_versions = ch_versions.mix(CONCAT_STRELKA.out.versions) ch_versions = ch_versions.mix(CONCAT_STRELKA_GENOME.out.versions) ch_versions = ch_versions.mix(STRELKA_GERMLINE.out.versions) + strelka_vcf_out.view() emit: - strelka_vcf + strelka_vcf = strelka_vcf_out versions = ch_versions } diff --git a/subworkflows/nf-core/vcf_qc.nf b/subworkflows/nf-core/vcf_qc.nf index 82c06c9614..d463712f53 100644 --- a/subworkflows/nf-core/vcf_qc.nf +++ b/subworkflows/nf-core/vcf_qc.nf @@ -19,9 +19,9 @@ workflow VCF_QC { VCFTOOLS_SUMMARY(vcf, target_bed,[]) emit: - versions = ch_versions - bcftools_stats = BCFTOOLS_STATS.out.stats - vcftools_tstv_counts = VCFTOOLS_TSTV_COUNT.out.tstv_count - vcftools_tstv_qual = VCFTOOLS_TSTV_QUAL.out.tstv_qual + versions = ch_versions + bcftools_stats = BCFTOOLS_STATS.out.stats + vcftools_tstv_counts = VCFTOOLS_TSTV_COUNT.out.tstv_count + vcftools_tstv_qual = VCFTOOLS_TSTV_QUAL.out.tstv_qual vcftools_filter_summary = VCFTOOLS_SUMMARY.out.filter_summary } diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 4f08214f58..48bd588f64 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -675,7 +675,7 @@ workflow SAREK { chr_files, mappability) - // Gather vcf files for annotation + // Gather vcf files for annotation and QC vcf_to_annotate = Channel.empty() vcf_to_annotate = vcf_to_annotate.mix(GERMLINE_VARIANT_CALLING.out.deepvariant_vcf) vcf_to_annotate = vcf_to_annotate.mix(GERMLINE_VARIANT_CALLING.out.freebayes_vcf) From 9413cff7152cc510dc86d520142a3369fcf5154b Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Apr 2022 18:01:38 +0200 Subject: [PATCH 03/12] add tool naming to meta-map for all callers --- .../variantcalling/deepvariant/main.nf | 12 ++++++++---- .../nf-core/variantcalling/freebayes/main.nf | 8 ++++++-- .../variantcalling/manta/germline/main.nf | 16 ++++++++++------ .../variantcalling/manta/somatic/main.nf | 15 ++++++++++++--- .../variantcalling/manta/tumoronly/main.nf | 5 ++++- .../variantcalling/strelka/single/main.nf | 19 +++++++++---------- .../variantcalling/strelka/somatic/main.nf | 4 ++++ 7 files changed, 53 insertions(+), 26 deletions(-) diff --git a/subworkflows/nf-core/variantcalling/deepvariant/main.nf b/subworkflows/nf-core/variantcalling/deepvariant/main.nf index 0e0520aca3..f5568c83b6 100644 --- a/subworkflows/nf-core/variantcalling/deepvariant/main.nf +++ b/subworkflows/nf-core/variantcalling/deepvariant/main.nf @@ -54,10 +54,14 @@ workflow RUN_DEEPVARIANT { // Mix output channels for "no intervals" and "with intervals" results deepvariant_vcf = Channel.empty().mix( - CONCAT_DEEPVARIANT_GVCF.out.vcf, - CONCAT_DEEPVARIANT_VCF.out.vcf, - DEEPVARIANT.out.gvcf, - DEEPVARIANT.out.vcf) + CONCAT_DEEPVARIANT_GVCF.out.vcf, + CONCAT_DEEPVARIANT_VCF.out.vcf, + DEEPVARIANT.out.gvcf, + DEEPVARIANT.out.vcf) + .map{ meta, vcf -> + meta.variantcaller = "Deepvariant" + [meta, vcf] + } ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_GVCF.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_VCF.out.versions) diff --git a/subworkflows/nf-core/variantcalling/freebayes/main.nf b/subworkflows/nf-core/variantcalling/freebayes/main.nf index 3e909a5e4e..fec674ff4b 100644 --- a/subworkflows/nf-core/variantcalling/freebayes/main.nf +++ b/subworkflows/nf-core/variantcalling/freebayes/main.nf @@ -39,8 +39,12 @@ workflow RUN_FREEBAYES { // Mix output channels for "no intervals" and "with intervals" results freebayes_vcf = Channel.empty().mix( - CONCAT_FREEBAYES.out.vcf, - FREEBAYES.out.vcf) + CONCAT_FREEBAYES.out.vcf, + FREEBAYES.out.vcf) + .map{ meta, vcf -> + meta.variantcaller = "FreeBayes" + [meta, vcf] + } ch_versions = ch_versions.mix(BGZIP_VC_FREEBAYES.out.versions) ch_versions = ch_versions.mix(CONCAT_FREEBAYES.out.versions) diff --git a/subworkflows/nf-core/variantcalling/manta/germline/main.nf b/subworkflows/nf-core/variantcalling/manta/germline/main.nf index dec574f959..71c405b898 100644 --- a/subworkflows/nf-core/variantcalling/manta/germline/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/germline/main.nf @@ -77,12 +77,16 @@ workflow RUN_MANTA_GERMLINE { // Mix output channels for "no intervals" and "with intervals" results manta_vcf = Channel.empty().mix( - CONCAT_MANTA_DIPLOID.out.vcf, - CONCAT_MANTA_SMALL_INDELS.out.vcf, - CONCAT_MANTA_SV.out.vcf, - manta_diploid_sv_vcf.no_intervals, - manta_small_indels_vcf.no_intervals, - manta_sv_vcf.no_intervals) + CONCAT_MANTA_DIPLOID.out.vcf, + CONCAT_MANTA_SMALL_INDELS.out.vcf, + CONCAT_MANTA_SV.out.vcf, + manta_diploid_sv_vcf.no_intervals, + manta_small_indels_vcf.no_intervals, + manta_sv_vcf.no_intervals) + .map{ meta, vcf -> + meta.variantcaller = "Manta" + [meta, vcf] + } ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) diff --git a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf index 249b3ec091..3c72cc7a68 100644 --- a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf @@ -103,17 +103,26 @@ workflow RUN_MANTA_SOMATIC { manta_candidate_small_indels_vcf.no_intervals, manta_diploid_sv_vcf.no_intervals, manta_somatic_sv_vcf.no_intervals - ) + ).map{ meta, vcf -> + meta.variantcaller = "Manta" + [meta, vcf] + } manta_candidate_small_indels_vcf = Channel.empty().mix( CONCAT_MANTA_SMALL_INDELS.out.vcf, manta_candidate_small_indels_vcf.no_intervals - ) + ).map{ meta, vcf -> + meta.variantcaller = "Manta" + [meta, vcf] + } manta_candidate_small_indels_vcf_tbi = Channel.empty().mix( CONCAT_MANTA_SMALL_INDELS.out.tbi, manta_candidate_small_indels_vcf_tbi.no_intervals - ) + ).map{ meta, vcf -> + meta.variantcaller = "Manta" + [meta, vcf] + } ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) diff --git a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf index 13696c0db7..ba699bcb27 100644 --- a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf @@ -80,7 +80,10 @@ workflow RUN_MANTA_TUMORONLY { manta_small_indels_vcf.no_intervals, manta_candidate_sv_vcf.no_intervals, manta_tumor_sv_vcf.no_intervals - ) + ).map{ meta, vcf -> + meta.variantcaller = "Manta" + [meta, vcf] + } ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SMALL_INDELS.out.versions) diff --git a/subworkflows/nf-core/variantcalling/strelka/single/main.nf b/subworkflows/nf-core/variantcalling/strelka/single/main.nf index 16d4b041ee..bdaf5dbce6 100644 --- a/subworkflows/nf-core/variantcalling/strelka/single/main.nf +++ b/subworkflows/nf-core/variantcalling/strelka/single/main.nf @@ -58,23 +58,22 @@ workflow RUN_STRELKA_SINGLE { // Mix output channels for "no intervals" and "with intervals" results strelka_vcf = Channel.empty().mix( - CONCAT_STRELKA.out.vcf, - CONCAT_STRELKA_GENOME.out.vcf, - strelka_genome_vcf.no_intervals, - strelka_vcf.no_intervals) + CONCAT_STRELKA.out.vcf, + CONCAT_STRELKA_GENOME.out.vcf, + strelka_genome_vcf.no_intervals, + strelka_vcf.no_intervals) + .map{ meta, vcf -> + meta.variantcaller = "Strelka" + [meta, vcf] + } - strelka_vcf_out = strelka_vcf.map{ meta, vcf -> - meta.variantcaller = "Strelka" - [meta, vcf] - } ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_GENOME.out.versions) ch_versions = ch_versions.mix(CONCAT_STRELKA.out.versions) ch_versions = ch_versions.mix(CONCAT_STRELKA_GENOME.out.versions) ch_versions = ch_versions.mix(STRELKA_GERMLINE.out.versions) - strelka_vcf_out.view() emit: - strelka_vcf = strelka_vcf_out + strelka_vcf versions = ch_versions } diff --git a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf index b0bc7509d8..913b24b3a5 100644 --- a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf +++ b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf @@ -58,6 +58,10 @@ workflow RUN_STRELKA_SOMATIC { CONCAT_STRELKA_INDELS.out.vcf, strelka_vcf_snvs.no_intervals, strelka_vcf_indels.no_intervals) + .map{ meta, vcf -> + meta.variantcaller = "Strelka" + [meta, vcf] + } ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_INDELS.out.versions) From 2ceb02cc690e993c100005e8465e88da20df281d Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Apr 2022 18:56:59 +0200 Subject: [PATCH 04/12] add tool for annotation to annotate multiple tools for the same sample --- conf/modules.config | 35 ++--------------------------------- 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index a1451a351f..2c969ade83 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -912,11 +912,10 @@ process{ path: { "${params.outdir}/reports/bcftools" }, ] } - //"${meta.id}".concat('.').concat(reads.get(0).name.findAll(/part_([0-9]+)?/).last()) withName: 'VCFTOOLS_.*'{ ext.when = { !(params.skip_tools && params.skip_tools.contains('vcftools')) } - ext.prefix = { "${vcf.baseName}" } + ext.prefix = { "${meta.variantcaller}_${variant_file.baseName.minus(".vcf")}" } publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/reports/vcftools" }, @@ -936,36 +935,6 @@ process{ } } -//withName: 'GENOMICSDBIMPORT' { -// -//} - -// withName: manta_somatic { -// publishDir = [ -// path: { "${params.outdir}/variant_calling' -// publish_files = ['vcf.gz':'manta', 'vcf.gz.tbi':'manta'] -// } -// withName: msisensorpro_msi { -// publishDir = [ -// path: { "${params.outdir}/variant_calling' -// publish_files = ['list':'msisensorpro'] -// } -// withName: strelka_somatic { -// publishDir = [ -// path: { "${params.outdir}/variant_calling' -// publish_files = ['vcf.gz':'strelka', 'vcf.gz.tbi':'strelka'] -// } -// withName: strelka_somatic_bp { -// publishDir = [ -// path: { "${params.outdir}/variant_calling' -// publish_files = ['vcf.gz':'strelka', 'vcf.gz.tbi':'strelka'] -// } -// withName: mutect2_somatic { -// publishDir = [ -// path: { "${params.outdir}/variant_calling' -// publish_files = ['vcf.gz':'mutect2', 'vcf.gz.tbi':'mutect2'] -// } - // ANNOTATE process { @@ -983,7 +952,7 @@ process { publishDir = [ enabled: true, mode: params.publish_dir_mode, - path: { "${params.outdir}/annotation/${meta.id}" }, + path: { "${params.outdir}/annotation/${meta.id}/${meta.variantcaller}" }, pattern: "*{gz,gz.tbi}" ] } From 88a25d8b109b2bcfa24234021fde89275d836d85 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Apr 2022 22:40:13 +0200 Subject: [PATCH 05/12] Add multiqc --- conf/modules.config | 11 +++++----- subworkflows/nf-core/vcf_qc.nf | 3 +++ workflows/sarek.nf | 37 +++++++++++++++++++--------------- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 2c969ade83..d37cc38a49 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -982,11 +982,12 @@ if (params.tools && (params.tools.contains('merge'))) { } } -// process { -// withName:'MULTIQC' { -// errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} -// } -// } +process { + withName:'MULTIQC' { + errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} + ext.args = { params.multiqc_config ? "--config $multiqc_custom_config" : "" } + } +} // process { // withName: CUSTOM_DUMPSOFTWAREVERSIONS { diff --git a/subworkflows/nf-core/vcf_qc.nf b/subworkflows/nf-core/vcf_qc.nf index d463712f53..19e385ab67 100644 --- a/subworkflows/nf-core/vcf_qc.nf +++ b/subworkflows/nf-core/vcf_qc.nf @@ -18,6 +18,9 @@ workflow VCF_QC { VCFTOOLS_TSTV_QUAL(vcf, target_bed,[]) VCFTOOLS_SUMMARY(vcf, target_bed,[]) + ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions) + ch_versions = ch_versions.mix(VCFTOOLS_TSTV_COUNT.out.versions) + emit: versions = ch_versions bcftools_stats = BCFTOOLS_STATS.out.stats diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 48bd588f64..7214f372d4 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -407,7 +407,7 @@ workflow SAREK { ch_cram_no_markduplicates = BAM_TO_CRAM.out.cram // Gather QC reports - ch_reports = ch_reports.mix(BAM_TO_CRAM.out.qc) + ch_reports = ch_reports.mix(BAM_TO_CRAM.out.qc.collect{it[1]}.ifEmpty([])) // Gather used softwares versions ch_versions = ch_versions.mix(BAM_TO_CRAM.out.versions) @@ -698,6 +698,12 @@ workflow SAREK { //QC VCF_QC(vcf_to_annotate, intervals_bed_combined) + ch_versions = ch_versions.mix(VCF_QC.out.versions) + ch_reports = ch_reports.mix(VCF_QC.out.bcftools_stats.collect{it[1]}.ifEmpty([])) + ch_reports = ch_reports.mix(VCF_QC.out.vcftools_tstv_counts.collect{it[1]}.ifEmpty([])) + ch_reports = ch_reports.mix(VCF_QC.out.vcftools_tstv_qual.collect{it[1]}.ifEmpty([])) + ch_reports = ch_reports.mix(VCF_QC.out.vcftools_filter_summary.collect{it[1]}.ifEmpty([])) + // ANNOTATE if (params.step == 'annotate') vcf_to_annotate = ch_input_sample @@ -718,27 +724,26 @@ workflow SAREK { } ch_version_yaml = Channel.empty() - if (!(params.skip_tools && params.skip_tools.contains('versions'))) { + if (!(params.skip_tools && params.skip_tools.contains('versions'))) { CUSTOM_DUMPSOFTWAREVERSIONS(ch_versions.unique().collectFile(name: 'collated_versions.yml')) ch_version_yaml = CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect() } - // workflow_summary = WorkflowSarek.paramsSummaryMultiqc(workflow, summary_params) - // ch_workflow_summary = Channel.value(workflow_summary) - - // ch_multiqc_files = Channel.empty() - // ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_config) - // ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) - // ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - // ch_multiqc_files = ch_multiqc_files.mix(ch_version_yaml) - // ch_multiqc_files = ch_multiqc_files.mix(ch_reports) + if (!(params.skip_tools && params.skip_tools.contains('multiqc'))) { + workflow_summary = WorkflowSarek.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) - // multiqc_report = Channel.empty() - // if (!(params.skip_tools && params.skip_tools.contains('multiqc'))) { - // MULTIQC(ch_multiqc_files.collect()) - // multiqc_report = MULTIQC.out.report.toList() + ch_multiqc_files = Channel.empty().mix(ch_version_yaml, + ch_multiqc_custom_config.collect().ifEmpty([]), + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), + ch_reports.collect(), + Channel.from(ch_multiqc_config)) + //println ch_multiqc_config + //ch_multiqc_files.collect().view() - // } + MULTIQC(ch_multiqc_files.collect()) + //multiqc_report = MULTIQC.out.report.toList() + } } /* From 8f69345bebb928c0b725ef2e5fb2abed4eb66bf3 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Apr 2022 23:38:44 +0200 Subject: [PATCH 06/12] tampering with the config --- assets/multiqc_config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index e3db6a9321..da018dffe9 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -16,7 +16,7 @@ export_plots: true top_modules: - "fastqc": - name: "FastQC" + name: "FastQC (raw)" path_filters_exclude: - "*trimmed_fastqc*" - "cutadapt" From c183f1eea640082daf1ad7035ee760983298b32d Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 19 Apr 2022 23:39:02 +0200 Subject: [PATCH 07/12] remove comments --- workflows/sarek.nf | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 7214f372d4..1a06dcb633 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -738,11 +738,9 @@ workflow SAREK { ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), ch_reports.collect(), Channel.from(ch_multiqc_config)) - //println ch_multiqc_config - //ch_multiqc_files.collect().view() - MULTIQC(ch_multiqc_files.collect()) - //multiqc_report = MULTIQC.out.report.toList() + MULTIQC(ch_multiqc_files.collect()) + multiqc_report = MULTIQC.out.report.toList() } } @@ -851,7 +849,8 @@ def extract_csv(csv_file) { } else if (row.vcf) { meta.id = meta.sample def vcf = file(row.vcf, checkIfExists: true) - meta.data_type = "vcf" + meta.data_type = "vcf" + meta.variantcaller = "" return [meta, vcf] } else { log.warn "Missing or unknown field in csv file header" From dad599e480c4491cd0072cd2f0db698fecc0c7cf Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 20 Apr 2022 14:35:44 +0200 Subject: [PATCH 08/12] Fix trimgalore used output --- workflows/sarek.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 1a06dcb633..2ca8d2eee3 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -314,6 +314,8 @@ workflow SAREK { ch_reads = ch_input_fastq } + ch_reads.view() + // UMI consensus calling if (params.umi_read_structure) { CREATE_UMI_CONSENSUS(ch_reads, @@ -331,7 +333,7 @@ workflow SAREK { ch_versions = ch_versions.mix(ALIGNMENT_TO_FASTQ_UMI.out.versions) ch_versions = ch_versions.mix(CREATE_UMI_CONSENSUS.out.versions) } else { - ch_input_sample_to_split = ch_input_fastq + ch_input_sample_to_split = ch_reads } // SPLIT OF FASTQ FILES WITH SEQKIT_SPLIT2 From d6eaa4483a4d735c043871affab9d140d666118b Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 20 Apr 2022 17:55:06 +0200 Subject: [PATCH 09/12] Fix BQSR output for reporting --- assets/multiqc_config.yml | 73 ++++++++++++++----- conf/modules.config | 30 ++++++-- .../controlfreec/somatic/main.nf | 2 - workflows/sarek.nf | 18 +++-- 4 files changed, 90 insertions(+), 33 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index da018dffe9..2304f860ca 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,4 +1,4 @@ -custom_logo: ../../../docs/images/nf-core_sarek_logo.png +custom_logo: "nf-core-sarek_logo_light.png" custom_logo_url: https://github.com/nf-core/sarek/ custom_logo_title: "nf-core/sarek" @@ -14,21 +14,56 @@ report_section_order: export_plots: true -top_modules: - - "fastqc": - name: "FastQC (raw)" - path_filters_exclude: - - "*trimmed_fastqc*" - - "cutadapt" - - "fastqc": - name: "FastQC after trimming" - info: "FastQC after applying TrimGalore." - path_filters: - - "*trimmed_fastqc*" - - "picard" - - "gatk" - - "samtools" - - "qualimap" - - "bcftools" - - "vcftools" - - "snpeff" +# Run only these modules +run_modules: + - custom_content + - fastqc + - cutadapt + - picard + - samtools + - qualimap + - gatk + - bcftools + - vcftools + - snpeff + - vep + +module_order: + - fastqc: + name: "FastQC (raw)" + path_filters_exclude: + - "*_val_*.zip" + - cutadapt: + name: "Cutadapt" + - fastqc: + name: "FastQC (trimmed)" + path_filters: + - "*_val_*.zip" + - picard: + name: "GATK4 MarkDuplicates" + info: " metrics generated either by GATK4 MarkDuplicates or EstimateLibraryComplexity (with --use_gatk_spark)." + - samtools: + name: "Samtools Flagstat" + - qualimap: + name: "Qualimap" + - gatk: + name: "GATK4 BQSR" + - bcftools: + name: "Bcftools" + - vcftools: + name: "Vcftools" + - snpeff: + name: "SNPeff" + - vep: + name: "VEP" + +extra_fn_clean_exts: + - "_val" +# Customise the module search patterns to speed up execution time +# - Skip module sub-tools that we are not interested in +# - Replace file-content searching with filename pattern searching +# - Don't add anything that is the same as the MultiQC default +# See https://multiqc.info/docs/#optimise-file-search-patterns for details +# sp: +# cutadapt: +# fn: "*trimming_report.txt" diff --git a/conf/modules.config b/conf/modules.config index d37cc38a49..aea86b8379 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -196,9 +196,24 @@ process { withName: 'TRIMGALORE' { ext.args = '--fastqc' publishDir = [ - enabled: true, - mode: params.publish_dir_mode, - path: { "${params.outdir}/trimgalore/${meta.id}" } + [ + path: { "${params.outdir}/trimgalore/${meta.id}/fastqc" }, + mode: params.publish_dir_mode, + pattern: "*.{html,zip}", + enabled: true + ], + [ + path: { "${params.outdir}/trimgalore/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.fq.gz", + enabled: true + ], + [ + path: { "${params.outdir}/trimgalore/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.txt", + enabled: true + ] ] } @@ -386,9 +401,12 @@ process { errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} ext.args = '--quiet' publishDir = [ - enabled: true, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/fastqc/${meta.id}" } + [ + path: { "${params.outdir}/fastqc/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.{html,zip}", + enabled: true + ] ] } diff --git a/subworkflows/nf-core/variantcalling/controlfreec/somatic/main.nf b/subworkflows/nf-core/variantcalling/controlfreec/somatic/main.nf index 32b85445e1..ea7b2b10b6 100644 --- a/subworkflows/nf-core/variantcalling/controlfreec/somatic/main.nf +++ b/subworkflows/nf-core/variantcalling/controlfreec/somatic/main.nf @@ -54,8 +54,6 @@ workflow RUN_CONTROLFREEC_SOMATIC { } .groupTuple(size: num_intervals, sort:true)) - mpileup_normal.no_intervals.view() - controlfreec_input_normal = Channel.empty().mix( CAT_MPILEUP_NORMAL.out.file_out, mpileup_normal.no_intervals diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 2ca8d2eee3..165edd7c72 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -200,9 +200,9 @@ include { MULTIQC } from '../modules/nf-c ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_config = Channel.fromPath(file("$projectDir/assets/multiqc_config.yml", checkIfExists: true)) ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() - +ch_sarek_logo = Channel.fromPath(file("$projectDir/assets/nf-core-sarek_logo_light.png", checkIfExists: true)) def multiqc_report = [] /* @@ -308,14 +308,14 @@ workflow SAREK { ch_reads = RUN_TRIMGALORE.out.reads - ch_reports = ch_reports.mix(RUN_TRIMGALORE.out.trim_zip.collect{it[1]}.ifEmpty([])) + ch_reports = ch_reports.mix(RUN_TRIMGALORE.out.trim_zip.collect{it[1]}.ifEmpty([]), + RUN_TRIMGALORE.out.trim_html.collect{it[1]}.ifEmpty([]), + RUN_TRIMGALORE.out.trim_log.collect{it[1]}.ifEmpty([])) ch_versions = ch_versions.mix(RUN_TRIMGALORE.out.versions) } else { ch_reads = ch_input_fastq } - ch_reads.view() - // UMI consensus calling if (params.umi_read_structure) { CREATE_UMI_CONSENSUS(ch_reads, @@ -506,6 +506,8 @@ workflow SAREK { // Create CSV to restart from this step PREPARE_RECALIBRATION_CSV(ch_table_bqsr) + + ch_reports = ch_reports.mix(ch_table_bqsr.map{ meta, table -> table}) } } @@ -722,6 +724,9 @@ workflow SAREK { // Gather used softwares versions ch_versions = ch_versions.mix(ANNOTATE.out.versions) + ch_reports = ch_reports.mix(ANNOTATE.out.reports) + + ch_reports.view() } } @@ -739,7 +744,8 @@ workflow SAREK { ch_multiqc_custom_config.collect().ifEmpty([]), ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), ch_reports.collect(), - Channel.from(ch_multiqc_config)) + ch_multiqc_config, + ch_sarek_logo) MULTIQC(ch_multiqc_files.collect()) multiqc_report = MULTIQC.out.report.toList() From daffd96ef3439c895e954dc06ddf49d7655c0fc0 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 21 Apr 2022 11:13:06 +0200 Subject: [PATCH 10/12] Add snpeff file size as recommended in multiQC issue 229 --- assets/multiqc_config.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 2304f860ca..145a817eae 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -67,3 +67,8 @@ extra_fn_clean_exts: # sp: # cutadapt: # fn: "*trimming_report.txt" + +sp: + snpeff: + contents: "SnpEff_version" + max_filesize: 5000000 From c034f5fa9d1c29abfc1a2390d7f57654f325a65c Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 21 Apr 2022 13:26:52 +0200 Subject: [PATCH 11/12] Finish configuration for MQC --- assets/multiqc_config.yml | 8 -------- .../nf-core/gatk4/prepare_recalibration/main.nf | 11 ++++++----- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 145a817eae..9c69efef1f 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -59,14 +59,6 @@ module_order: extra_fn_clean_exts: - "_val" -# Customise the module search patterns to speed up execution time -# - Skip module sub-tools that we are not interested in -# - Replace file-content searching with filename pattern searching -# - Don't add anything that is the same as the MultiQC default -# See https://multiqc.info/docs/#optimise-file-search-patterns for details -# sp: -# cutadapt: -# fn: "*trimming_report.txt" sp: snpeff: diff --git a/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf b/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf index e05e873b97..20c4b115ae 100644 --- a/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf +++ b/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf @@ -25,22 +25,23 @@ workflow PREPARE_RECALIBRATION { .map{ meta, cram, crai, intervals -> new_meta = meta.clone() new_meta.id = num_intervals == 1 ? meta.sample : meta.sample + "_" + intervals.baseName - [new_meta, cram, crai, intervals] + intervals_new = params.no_intervals ? [] : intervals + [new_meta, cram, crai, intervals_new] } // Run Baserecalibrator BASERECALIBRATOR(cram_intervals, fasta, fasta_fai, dict, known_sites, known_sites_tbi) // Figuring out if there is one or more table(s) from the same sample - ch_table = BASERECALIBRATOR.out.table + table_to_merge = BASERECALIBRATOR.out.table .map{ meta, table -> meta.id = meta.sample [meta, table] }.groupTuple(size: num_intervals) .branch{ - single: it[1].size() == 1 - multiple: it[1].size() > 1 - }.set{table_to_merge} + single: num_intervals == 1 + multiple: num_intervals > 1 + } // STEP 3.5: MERGING RECALIBRATION TABLES From 25ded7d585eb5762db4add760fc511096f981234 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 21 Apr 2022 13:35:55 +0200 Subject: [PATCH 12/12] Fix fastqc output path --- conf/modules.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index aea86b8379..e42d20381b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -197,7 +197,7 @@ process { ext.args = '--fastqc' publishDir = [ [ - path: { "${params.outdir}/trimgalore/${meta.id}/fastqc" }, + path: { "${params.outdir}/reports/trimgalore/${meta.id}/fastqc" }, mode: params.publish_dir_mode, pattern: "*.{html,zip}", enabled: true @@ -209,7 +209,7 @@ process { enabled: true ], [ - path: { "${params.outdir}/trimgalore/${meta.id}" }, + path: { "${params.outdir}/reports/trimgalore/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.txt", enabled: true @@ -402,7 +402,7 @@ process { ext.args = '--quiet' publishDir = [ [ - path: { "${params.outdir}/fastqc/${meta.id}" }, + path: { "${params.outdir}/reports/fastqc/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.{html,zip}", enabled: true