diff --git a/CHANGELOG.md b/CHANGELOG.md index d2c0d4c869..4643bab21a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -73,6 +73,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#513](https://github.com/nf-core/sarek/pull/513), [#527](https://github.com/nf-core/sarek/pull/527) - CNV is back - [#529](https://github.com/nf-core/sarek/pull/529) - Do not save `versions.yml` files - [#524](https://github.com/nf-core/sarek/pull/524) - Fix intervals usage by counting the actual list of scatter/gather files produced and not overall number of intervals +- [#549](https://github.com/nf-core/sarek/pull/549) - Fix unique lanes required for Freebayes: issue [#311](https://github.com/nf-core/sarek/issues/311), replaces `meta.clone()` with actual copy of map to avoid issues with https://nfcore.slack.com/archives/C027CM7P08M/p1644241819942339 ### Deprecated diff --git a/conf/igenomes.config b/conf/igenomes.config index fbd9881bba..b960797e5e 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -38,7 +38,7 @@ params { ac_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci.gc" bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/" bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/" - hashmap = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/dragmap/" + dragmap = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/dragmap/" chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Chromosomes" dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" @@ -51,6 +51,8 @@ params { known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Control-FREEC/out100m2_hg38.gem" + pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz" + pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi" snpeff_db = 'GRCh38.99' snpeff_genome = 'GRCh38' vep_cache_version = 104 diff --git a/conf/modules.config b/conf/modules.config index baf058e8cd..5e447ce785 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -475,13 +475,9 @@ process{ // For unknown reasons, CONCAT_VCF sometimes fails with SIGPIPE // (exit code 141). Rerunning the process will usually work. errorStrategy = {task.exitStatus == 141 ? 'retry' : 'terminate'} - ext.args = { params.no_intervals ? "-n" : "" } - } - withName : 'TABIX_VC_.*' { - ext.when = { params.no_intervals } + ext.args = { params.no_intervals ? "-n" : "" } //Why ConcatVCF is never run when no_intervals is set.. } withName : 'BGZIP_VC_.*' { - ext.when = { !params.no_intervals } publishDir = [ enabled: false ] @@ -525,8 +521,15 @@ process{ ] } withName: 'FREEBAYES' { + ext.prefix = {"${meta.id}.freebayes"} //To make sure no naming conflicts ensue with module BCFTOOLS_SORT & the naming being correct in the output folder ext.args = '--min-alternate-fraction 0.1 --min-mapping-quality 1' ext.when = { params.tools && params.tools.contains('freebayes') } + publishDir = [ + enabled: false + ] + } + + withName: 'BCFTOOLS_SORT' { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/variant_calling/${meta.id}/freebayes" }, @@ -534,6 +537,7 @@ process{ saveAs: { meta.num_intervals > 1 ? null : it } ] } + withName : 'TABIX_VC_FREEBAYES' { publishDir = [ mode: params.publish_dir_mode, @@ -900,6 +904,18 @@ process{ } } + //FREEBAYES + withName: 'NFCORE_SAREK:SAREK:PAIR_VARIANT_CALLING:RUN_FREEBAYES_SOMATIC:FREEBAYES' { + ext.args = "--pooled-continuous \ + --pooled-discrete \ + --genotype-qualities \ + --report-genotype-likelihood-max \ + --allele-balance-priors-off \ + --min-alternate-fraction 0.03 \ + --min-repeat-entropy 1 \ + --min-alternate-count 2 " + } + //MANTA withName: 'CONCAT_MANTA_SOMATIC' { ext.prefix = {"${meta.id}.somatic_sv"} diff --git a/docs/images/sarek_subway.png b/docs/images/sarek_subway.png index b1ce4b48bf..542ee03578 100644 Binary files a/docs/images/sarek_subway.png and b/docs/images/sarek_subway.png differ diff --git a/docs/images/sarek_subway.svg b/docs/images/sarek_subway.svg index f1b41165c5..16c0535ffa 100644 --- a/docs/images/sarek_subway.svg +++ b/docs/images/sarek_subway.svg @@ -26,15 +26,15 @@ inkscape:pagecheckerboard="false" inkscape:document-units="mm" showgrid="false" - inkscape:zoom="0.3993225" - inkscape:cx="227.88598" - inkscape:cy="289.2399" - inkscape:window-width="1600" - inkscape:window-height="847" - inkscape:window-x="1" + inkscape:zoom="1.1294546" + inkscape:cx="220.0177" + inkscape:cy="331.57597" + inkscape:window-width="2560" + inkscape:window-height="1027" + inkscape:window-x="1440" inkscape:window-y="25" - inkscape:window-maximized="0" - inkscape:current-layer="layer1" + inkscape:window-maximized="1" + inkscape:current-layer="layer4" width="211mm" fit-margin-top="0" fit-margin-left="0" @@ -1775,24 +1775,6 @@ id="circle1664-96" style="display:inline;fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:3.54334;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" transform="scale(1,-1)" /> - - - - @@ -1829,28 +1811,28 @@ inkscape:export-xdpi="90" inkscape:export-filename="./polygon4618.png" id="text7023" - y="78.274117" - x="547.4516" + y="76.948265" + x="548.77747" style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:12.5px;line-height:1.25;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro';display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.937508" xml:space="preserve">deepvariant freebayes haplotypecaller manta strelka2 tiddit mutect2 ascat msisensorpro controlfreec + cnvkit - deepvariant freebayes manta strelka2 - @@ -2320,49 +2300,14 @@ d="m 408.81856,-257.59438 v -3.75 -3.75 h 47.8125 v 3.75 3.75 z" id="path7187" inkscape:connector-curvature="0" /> - - - - - - - - - + transform="scale(1,-1)" + rx="9.3755903" + ry="9.375" /> @@ -2526,11 +2471,11 @@ transform="scale(1,-1)" style="display:inline;fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:3.54334;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" id="circle5352" - cx="558.50732" + cx="549.13232" cy="228.10608" r="9.375" /> + + + + + + + transform="translate(-45.332838,34.253667)"> - + id="g5553" + transform="translate(-9.375,0.84066785)" + style="stroke-width:1.00002"> + transform="translate(26.582934,0.84066785)" + style="display:inline;stroke-width:1.00002" + id="g5553-4"> + + + + id="g5553-42-5"> - + id="g5553-4-6"> diff --git a/modules.json b/modules.json index 2f80d79252..02497f14d8 100644 --- a/modules.json +++ b/modules.json @@ -6,6 +6,9 @@ "ascat": { "git_sha": "f0800157544a82ae222931764483331a81812012" }, + "bcftools/sort": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, "bcftools/stats": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, @@ -130,7 +133,7 @@ "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" }, "gatk4/markduplicates": { - "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7" + "git_sha": "df2620cfc7e4c21b14ed03c1c928f09fbabf83c4" }, "gatk4/markduplicatesspark": { "git_sha": "e04970b7d249365cafa5a52912f9a28840481c05" diff --git a/modules/nf-core/modules/bcftools/sort/main.nf b/modules/nf-core/modules/bcftools/sort/main.nf new file mode 100644 index 0000000000..82204d07f2 --- /dev/null +++ b/modules/nf-core/modules/bcftools/sort/main.nf @@ -0,0 +1,35 @@ +process BCFTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::bcftools=1.14" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0': + 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.gz") , emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bcftools \\ + sort \\ + --output ${prefix}.vcf.gz \\ + $args \\ + $vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bcftools/sort/meta.yml b/modules/nf-core/modules/bcftools/sort/meta.yml new file mode 100644 index 0000000000..0c244a486e --- /dev/null +++ b/modules/nf-core/modules/bcftools/sort/meta.yml @@ -0,0 +1,43 @@ +name: bcftools_sort +description: Sorts VCF files +keywords: + - sorting + - VCF + - variant calling +tools: + - sort: + description: Sort VCF files by coordinates. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + tool_dev_url: https://github.com/samtools/bcftools + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: The VCF/BCF file to be sorted + pattern: "*.{vcf.gz,vcf,bcf}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Sorted VCF file + pattern: "*.{vcf.gz}" + +authors: + - "@Gwennid" diff --git a/modules/nf-core/modules/gatk4/markduplicates/main.nf b/modules/nf-core/modules/gatk4/markduplicates/main.nf index 97a8c3e1e4..68e4a21ae3 100644 --- a/modules/nf-core/modules/gatk4/markduplicates/main.nf +++ b/modules/nf-core/modules/gatk4/markduplicates/main.nf @@ -1,6 +1,6 @@ process GATK4_MARKDUPLICATES { tag "$meta.id" - label 'process_low' + label 'process_medium' conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 5050490505..9b1859ae45 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -38,32 +38,28 @@ workflow GERMLINE_VARIANT_CALLING { // Remap channel with intervals cram_recalibrated_intervals = cram_recalibrated.combine(intervals) .map{ meta, cram, crai, intervals, num_intervals -> - new_meta = meta.clone() - // If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples - new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName - new_meta.num_intervals = num_intervals + def new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName //If no interval file provided (0) then add empty list intervals_new = num_intervals == 0 ? [] : intervals - [new_meta, cram, crai, intervals_new] + [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals], + cram, crai, intervals_new] } // Remap channel with gzipped intervals + indexes cram_recalibrated_intervals_gz_tbi = cram_recalibrated.combine(intervals_bed_gz_tbi) .map{ meta, cram, crai, bed_tbi, num_intervals -> - new_meta = meta.clone() - // If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples - new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + bed_tbi[0].simpleName - new_meta.num_intervals = num_intervals + def new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + bed_tbi[0].simpleName //If no interval file provided (0) then add empty list bed_new = num_intervals == 0 ? [] : bed_tbi[0] tbi_new = num_intervals == 0 ? [] : bed_tbi[1] - [new_meta, cram, crai, bed_new, tbi_new] + [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals], + cram, crai, bed_new, tbi_new] } // DEEPVARIANT diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf index 72441bb89e..6d0f1b59b0 100644 --- a/subworkflows/local/pair_variant_calling.nf +++ b/subworkflows/local/pair_variant_calling.nf @@ -4,6 +4,7 @@ include { GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main' include { MSISENSORPRO_MSI_SOMATIC } from '../../modules/nf-core/modules/msisensorpro/msi_somatic/main' include { RUN_CONTROLFREEC_SOMATIC } from '../nf-core/variantcalling/controlfreec/somatic/main.nf' +include { RUN_FREEBAYES as RUN_FREEBAYES_SOMATIC } from '../nf-core/variantcalling/freebayes/main.nf' include { RUN_MANTA_SOMATIC } from '../nf-core/variantcalling/manta/somatic/main.nf' include { RUN_STRELKA_SOMATIC } from '../nf-core/variantcalling/strelka/somatic/main.nf' @@ -34,6 +35,7 @@ workflow PAIR_VARIANT_CALLING { ch_versions = Channel.empty() //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config + freebayes_vcf = Channel.empty() manta_vcf = Channel.empty() strelka_vcf = Channel.empty() msisensorpro_output = Channel.empty() @@ -42,32 +44,28 @@ workflow PAIR_VARIANT_CALLING { // Remap channel with intervals cram_pair_intervals = cram_pair.combine(intervals) .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals, num_intervals -> - new_meta = meta.clone() - // If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples - new_meta.id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + intervals.baseName - new_meta.num_intervals = num_intervals + new_id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + intervals.baseName //If no interval file provided (0) then add empty list intervals_new = num_intervals == 0 ? [] : intervals - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals_new] + [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:num_intervals], + normal_cram, normal_crai, tumor_cram, tumor_crai, intervals_new] } // Remap channel with gzipped intervals + indexes cram_pair_intervals_gz_tbi = cram_pair.combine(intervals_bed_gz_tbi) .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed_tbi, num_intervals -> - new_meta = meta.clone() - // If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples - new_meta.id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + bed_tbi[0].simpleName - new_meta.num_intervals = num_intervals + new_id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + bed_tbi[0].simpleName //If no interval file provided (0) then add empty list bed_new = num_intervals == 0 ? [] : bed_tbi[0] tbi_new = num_intervals == 0 ? [] : bed_tbi[1] - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed_new, tbi_new] + [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:num_intervals], + normal_cram, normal_crai, tumor_cram, tumor_crai, bed_new, tbi_new] } if (tools.contains('controlfreec')){ @@ -93,6 +91,13 @@ workflow PAIR_VARIANT_CALLING { ch_versions = ch_versions.mix(RUN_CONTROLFREEC_SOMATIC.out.versions) } + if (tools.contains('freebayes')){ + RUN_FREEBAYES_SOMATIC(cram_pair_intervals, fasta, fasta_fai, intervals_bed_combine_gz) + + freebayes_vcf = RUN_FREEBAYES_SOMATIC.out.freebayes_vcf + ch_versions = ch_versions.mix(RUN_FREEBAYES_SOMATIC.out.versions) + } + if (tools.contains('manta')) { RUN_MANTA_SOMATIC( cram_pair_intervals_gz_tbi, fasta, @@ -114,14 +119,14 @@ workflow PAIR_VARIANT_CALLING { .map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, vcf, bed_tbi, num_intervals -> // If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples - new_meta.id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + bed_tbi[0].simpleName - new_meta.num_intervals = num_intervals + new_id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + bed_tbi[0].simpleName //If no interval file provided (0) then add empty list bed_new = num_intervals == 0 ? [] : bed_tbi[0] tbi_new = num_intervals == 0 ? [] : bed_tbi[1] - [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, vcf, vcf_tbi, bed_new, tbi_new] + [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:num_intervals], + normal_cram, normal_crai, tumor_cram, tumor_crai, vcf, vcf_tbi, bed_new, tbi_new] } } else { cram_pair_strelka = cram_pair_intervals_gz_tbi.map{ @@ -172,6 +177,7 @@ workflow PAIR_VARIANT_CALLING { // } emit: + freebayes_vcf manta_vcf msisensorpro_output mutect2_vcf diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf index f382cb79d0..99aa57430d 100644 --- a/subworkflows/local/tumor_variant_calling.nf +++ b/subworkflows/local/tumor_variant_calling.nf @@ -3,7 +3,6 @@ // Should be only run on patients without normal sample // -//include { RUN_CONTROLFREEC } from '../nf-core/variantcalling/controlfreec/main.nf' include { RUN_FREEBAYES } from '../nf-core/variantcalling/freebayes/main.nf' include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main' include { RUN_MANTA_TUMORONLY } from '../nf-core/variantcalling/manta/tumoronly/main.nf' @@ -44,32 +43,28 @@ workflow TUMOR_ONLY_VARIANT_CALLING { // Remap channel with intervals cram_recalibrated_intervals = cram_recalibrated.combine(intervals) .map{ meta, cram, crai, intervals, num_intervals -> - new_meta = meta.clone() - // If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples - new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName - new_meta.num_intervals = num_intervals + new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName //If no interval file provided (0) then add empty list intervals_new = num_intervals == 0 ? [] : intervals - [new_meta, cram, crai, intervals_new] + [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals], + cram, crai, intervals_new] } // Remap channel with gzipped intervals + indexes cram_recalibrated_intervals_gz_tbi = cram_recalibrated.combine(intervals_bed_gz_tbi) .map{ meta, cram, crai, bed_tbi, num_intervals -> - new_meta = meta.clone() - // If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples - new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + bed_tbi[0].simpleName - new_meta.num_intervals = num_intervals + new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + bed_tbi[0].simpleName //If no interval file provided (0) then add empty list bed_new = num_intervals == 0 ? [] : bed_tbi[0] tbi_new = num_intervals == 0 ? [] : bed_tbi[1] - [new_meta, cram, crai, bed_new, tbi_new] + [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals], + cram, crai, bed_new, tbi_new] } if(tools.contains('controlfreec')){ diff --git a/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf b/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf index abdbf674ae..8383c1fa74 100644 --- a/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf +++ b/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf @@ -22,16 +22,15 @@ workflow PREPARE_RECALIBRATION { cram_intervals = cram.combine(intervals) .map{ meta, cram, crai, intervals, num_intervals -> - new_meta = meta.clone() // If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples - new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName - new_meta.num_intervals = num_intervals + new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName //If no interval file provided (0) then add empty list intervals_new = num_intervals == 0 ? [] : intervals - [new_meta, cram, crai, intervals_new] + [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals], + cram, crai, intervals_new] } // Run Baserecalibrator @@ -40,11 +39,10 @@ workflow PREPARE_RECALIBRATION { // Figuring out if there is one or more table(s) from the same sample table_to_merge = BASERECALIBRATOR.out.table .map{ meta, table -> - new_meta = meta.clone() - new_meta.id = meta.sample - def groupKey = groupKey(new_meta, meta.num_intervals) - [new_meta, table] + new_meta = [patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), table] }.groupTuple() .branch{ //Warning: size() calculates file size not list length here, so use num_intervals instead @@ -58,12 +56,9 @@ workflow PREPARE_RECALIBRATION { GATHERBQSRREPORTS(table_to_merge.multiple) table_bqsr = table_to_merge.single.mix(GATHERBQSRREPORTS.out.table) .map{ meta, table -> - new_meta = meta.clone() - - // remove no longer necessary fields to make sure joining can be done correctly - new_meta.remove('num_intervals') - - [new_meta, table] + // remove no longer necessary fields to make sure joining can be done correctly: num_intervals + [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type], + table] } // Gather versions of all tools used diff --git a/subworkflows/nf-core/gatk4/prepare_recalibration_spark/main.nf b/subworkflows/nf-core/gatk4/prepare_recalibration_spark/main.nf index 0ccc0efadf..a0e8671601 100644 --- a/subworkflows/nf-core/gatk4/prepare_recalibration_spark/main.nf +++ b/subworkflows/nf-core/gatk4/prepare_recalibration_spark/main.nf @@ -22,16 +22,15 @@ workflow PREPARE_RECALIBRATION_SPARK { cram_intervals = cram.combine(intervals) .map{ meta, cram, crai, intervals, num_intervals -> - new_meta = meta.clone() // If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples - new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName - new_meta.num_intervals = num_intervals + new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName //If no interval file provided (0) then add empty list intervals_new = num_intervals == 0 ? [] : intervals - [new_meta, cram, crai, intervals_new] + [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals], + cram, crai, intervals_new] } // Run Baserecalibrator spark @@ -40,11 +39,10 @@ workflow PREPARE_RECALIBRATION_SPARK { // Figuring out if there is one or more table(s) from the same sample table_to_merge = BASERECALIBRATOR_SPARK.out.table .map{ meta, table -> - new_meta = meta.clone() - new_meta.id = meta.sample - def groupKey = groupKey(new_meta, meta.num_intervals) - [new_meta, table] + new_meta = [patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), table] }.groupTuple() .branch{ //Warning: size() calculates file size not list length here, so use num_intervals instead @@ -58,12 +56,9 @@ workflow PREPARE_RECALIBRATION_SPARK { GATHERBQSRREPORTS(table_to_merge.multiple) table_bqsr = table_to_merge.single.mix(GATHERBQSRREPORTS.out.table) .map{ meta, table -> - new_meta = meta.clone() - - // remove no longer necessary fields to make sure joining can be done correctly - new_meta.remove('num_intervals') - - [new_meta, table] + // remove no longer necessary fields to make sure joining can be done correctly: num_intervals + [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type], + table] } // Gather versions of all tools used diff --git a/subworkflows/nf-core/gatk4/recalibrate/main.nf b/subworkflows/nf-core/gatk4/recalibrate/main.nf index ec83bbbfdb..450cb26056 100644 --- a/subworkflows/nf-core/gatk4/recalibrate/main.nf +++ b/subworkflows/nf-core/gatk4/recalibrate/main.nf @@ -20,16 +20,14 @@ workflow RECALIBRATE { cram_intervals = cram.combine(intervals) .map{ meta, cram, crai, recal, intervals, num_intervals -> - new_meta = meta.clone() - // If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples - new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName - new_meta.num_intervals = num_intervals + new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName //If no interval file provided (0) then add empty list intervals_new = num_intervals == 0 ? [] : intervals - [new_meta, cram, crai, recal, intervals_new] + [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals], + cram, crai, recal, intervals_new] } // Run Applybqsr @@ -39,12 +37,9 @@ workflow RECALIBRATE { MERGE_INDEX_CRAM(APPLYBQSR.out.cram, fasta) ch_cram_recal_out = MERGE_INDEX_CRAM.out.cram_crai.map{ meta, cram, crai -> - new_meta = meta.clone() - - // remove no longer necessary fields to make sure joining can be done correctly - new_meta.remove('num_intervals') - - [new_meta, cram, crai] + // remove no longer necessary fields to make sure joining can be done correctly: num_intervals + [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.id, data_type:meta.data_type], + cram, crai] } // Gather versions of all tools used diff --git a/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf b/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf index ecb70e06bd..86b54ff521 100644 --- a/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf +++ b/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf @@ -20,16 +20,14 @@ workflow RECALIBRATE_SPARK { cram_intervals = cram.combine(intervals) .map{ meta, cram, crai, recal, intervals, num_intervals -> - new_meta = meta.clone() - // If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples - new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName - new_meta.num_intervals = num_intervals + new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName //If no interval file provided (0) then add empty list intervals_new = num_intervals == 0 ? [] : intervals - [new_meta, cram, crai, recal, intervals_new] + [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals], + cram, crai, recal, intervals_new] } // Run Applybqsr spark @@ -39,12 +37,9 @@ workflow RECALIBRATE_SPARK { MERGE_INDEX_CRAM(APPLYBQSR_SPARK.out.cram, fasta) ch_cram_recal_out = MERGE_INDEX_CRAM.out.cram_crai.map{ meta, cram, crai -> - new_meta = meta.clone() - - // remove no longer necessary fields to make sure joining can be done correctly - new_meta.remove('num_intervals') - - [new_meta, cram, crai] + // remove no longer necessary fields to make sure joining can be done correctly: num_intervals + [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.id, data_type:meta.data_type], + cram, crai] } // Gather versions of all tools used diff --git a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf index 19e4a37350..d95488dfc4 100644 --- a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf @@ -69,11 +69,10 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { CONCAT_MUTECT2( BGZIP_VC_MUTECT2.out.output .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fai, intervals_bed_combine_gz) @@ -90,11 +89,10 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { MERGEMUTECTSTATS( mutect2_stats_branch.intervals .map{ meta, stats -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, stats] + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), stats] }.groupTuple()) mutect2_stats = Channel.empty().mix( @@ -107,11 +105,10 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { LEARNREADORIENTATIONMODEL( mutect2_f1r2_branch.intervals .map{ meta, f1r2 -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, f1r2] + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), f1r2] }.groupTuple()) // @@ -124,16 +121,20 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { GETPILEUPSUMMARIES_TUMOR ( pileup.tumor.map{ meta, cram, crai, intervals -> - new_meta = meta.clone() - new_meta.id = new_meta.num_intervals <= 1 ? new_meta.tumor_id : new_meta.tumor_id + "_" + intervals.baseName + + new_id = meta.num_intervals <= 1 ? meta.tumor_id : meta.tumor_id + "_" + intervals.baseName + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:meta.num_intervals] + [new_meta, cram, crai, intervals] }, fasta, fai, dict, germline_resource, germline_resource_tbi ) GETPILEUPSUMMARIES_NORMAL ( pileup.normal.map{ meta, cram, crai, intervals -> - new_meta = meta.clone() - new_meta.id = new_meta.num_intervals <= 1 ? new_meta.normal_id : new_meta.normal_id + "_" + intervals.baseName + + new_id = meta.num_intervals <= 1 ? meta.tumor_id : meta.tumor_id + "_" + intervals.baseName + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:meta.num_intervals] + [new_meta, cram, crai, intervals] }, fasta, fai, dict, germline_resource, germline_resource_tbi ) @@ -152,38 +153,35 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { GATHERPILEUPSUMMARIES_NORMAL( GETPILEUPSUMMARIES_NORMAL.out.table .map{ meta, table -> - new_meta = meta.clone() - new_meta.id = new_meta.normal_id - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, table] + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.normal_id, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), table] }.groupTuple(), dict) gather_table_normal = Channel.empty().mix( GATHERPILEUPSUMMARIES_NORMAL.out.table, pileup_table_normal.no_intervals).map{ meta, table -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [new_meta, table] } GATHERPILEUPSUMMARIES_TUMOR( GETPILEUPSUMMARIES_TUMOR.out.table .map{ meta, table -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id, num_intervals:meta.num_intervals] - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, table] + [groupKey(new_meta, meta.num_intervals), table] }.groupTuple(), dict) gather_table_tumor = Channel.empty().mix( GATHERPILEUPSUMMARIES_TUMOR.out.table, pileup_table_tumor.no_intervals).map{ meta, table -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] + [new_meta, table] } @@ -195,11 +193,6 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { // //Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables. // - mutect2_vcf.view() - mutect2_tbi.view() - LEARNREADORIENTATIONMODEL.out.artifactprior.view() - CALCULATECONTAMINATION.out.segmentation.view() - CALCULATECONTAMINATION.out.contamination.view() ch_filtermutect = mutect2_vcf.join(mutect2_tbi) .join(mutect2_stats) .join(LEARNREADORIENTATIONMODEL.out.artifactprior) @@ -233,9 +226,8 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING { contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ] segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ] - filtered_vcf = FILTERMUTECTCALLS.out.vcf.map{ meta, vcf -> - meta.variantcaller = "Mutect2" - [meta, vcf]} // channel: [ val(meta), [ vcf ] ] + filtered_vcf = FILTERMUTECTCALLS.out.vcf.map{ meta, vcf -> [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"Mutect2"], + vcf]} // channel: [ val(meta), [ vcf ] ] filtered_tbi = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ] filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ] diff --git a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf index 6b8ccf1da1..5f07d2e147 100644 --- a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf @@ -67,11 +67,9 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { CONCAT_MUTECT2( BGZIP_VC_MUTECT2.out.output .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fai, intervals_bed_combine_gz) @@ -88,11 +86,9 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { MERGEMUTECTSTATS( mutect2_stats_branch.intervals .map{ meta, stats -> - new_meta = meta.clone() - new_meta.id = new_meta.sample + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, stats] + [groupKey(new_meta, meta.num_intervals), stats] }.groupTuple()) mutect2_stats = Channel.empty().mix( @@ -106,11 +102,9 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { Channel.empty().mix( mutect2_f1r2_branch.intervals .map{ meta, f1r2 -> - new_meta = meta.clone() - new_meta.id = new_meta.sample + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, f1r2] + [groupKey(new_meta, meta.num_intervals), f1r2] }.groupTuple(), mutect2_f1r2_branch.no_intervals)) @@ -128,11 +122,9 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { GATHERPILEUPSUMMARIES( GETPILEUPSUMMARIES.out.table .map{ meta, table -> - new_meta = meta.clone() - new_meta.id = new_meta.sample + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, table] + [groupKey(new_meta, meta.num_intervals), table] }.groupTuple(), dict) @@ -179,9 +171,8 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING { contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ] segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ] - filtered_vcf = FILTERMUTECTCALLS.out.vcf.map{ meta, vcf -> - meta.variantcaller = "Mutect2" - [meta, vcf] } // channel: [ val(meta), [ vcf ] ] + filtered_vcf = FILTERMUTECTCALLS.out.vcf.map{ meta, vcf -> [[patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Mutect2"] + , vcf] } // channel: [ val(meta), [ vcf ] ] filtered_index = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ] filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ] diff --git a/subworkflows/nf-core/merge_index_cram.nf b/subworkflows/nf-core/merge_index_cram.nf index 7c8cfa37bc..49cc34720a 100644 --- a/subworkflows/nf-core/merge_index_cram.nf +++ b/subworkflows/nf-core/merge_index_cram.nf @@ -17,11 +17,10 @@ workflow MERGE_INDEX_CRAM { // Figuring out if there is one or more cram(s) from the same sample ch_cram_to_merge = ch_cram.map{ meta, cram -> - new_meta = meta.clone() - new_meta.id = meta.sample - def groupKey = groupKey(new_meta, meta.num_intervals) - [new_meta, cram] + new_meta = [patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), cram] }.groupTuple() .branch{ //Warning: size() calculates file size not list length here, so use num_intervals instead diff --git a/subworkflows/nf-core/variantcalling/controlfreec/somatic/main.nf b/subworkflows/nf-core/variantcalling/controlfreec/somatic/main.nf index f48b12a97a..c5e0b07678 100644 --- a/subworkflows/nf-core/variantcalling/controlfreec/somatic/main.nf +++ b/subworkflows/nf-core/variantcalling/controlfreec/somatic/main.nf @@ -40,20 +40,17 @@ workflow RUN_CONTROLFREEC_SOMATIC { //Merge mpileup only when intervals and natural order sort them CAT_MPILEUP_NORMAL( mpileup_normal.intervals.map{ meta, pileup -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, pileup] + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), pileup] }.groupTuple(sort:true)) CAT_MPILEUP_TUMOR(mpileup_tumor.intervals .map{ meta, pileup -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, pileup] + [groupKey(new_meta, meta.num_intervals), pileup] } .groupTuple(sort:true)) @@ -61,8 +58,8 @@ workflow RUN_CONTROLFREEC_SOMATIC { CAT_MPILEUP_NORMAL.out.file_out, mpileup_normal.no_intervals ).map{ meta, pileup -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] + [new_meta, pileup] } @@ -70,8 +67,7 @@ workflow RUN_CONTROLFREEC_SOMATIC { CAT_MPILEUP_TUMOR.out.file_out, mpileup_tumor.no_intervals ).map{ meta, pileup -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] [new_meta, pileup] } diff --git a/subworkflows/nf-core/variantcalling/controlfreec/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/controlfreec/tumoronly/main.nf index 71cefaf1e6..30c03ea743 100644 --- a/subworkflows/nf-core/variantcalling/controlfreec/tumoronly/main.nf +++ b/subworkflows/nf-core/variantcalling/controlfreec/tumoronly/main.nf @@ -31,11 +31,9 @@ workflow RUN_CONTROLFREEC_TUMORONLY { //Merge mpileup only when intervals and natural order sort them CAT_MPILEUP_TUMOR(mpileup_tumor.intervals .map{ meta, pileup -> - new_meta = meta.clone() - new_meta.id = new_meta.sample + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, pileup] + [groupKey(new_meta, meta.num_intervals), pileup] } .groupTuple(sort:true)) @@ -43,8 +41,8 @@ workflow RUN_CONTROLFREEC_TUMORONLY { CAT_MPILEUP_TUMOR.out.file_out, mpileup_tumor.no_intervals ).map{ meta, pileup -> - new_meta = meta.clone() - new_meta.id = new_meta.sample + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] + [new_meta, pileup] } diff --git a/subworkflows/nf-core/variantcalling/deepvariant/main.nf b/subworkflows/nf-core/variantcalling/deepvariant/main.nf index 666822420e..0929ea7eca 100644 --- a/subworkflows/nf-core/variantcalling/deepvariant/main.nf +++ b/subworkflows/nf-core/variantcalling/deepvariant/main.nf @@ -42,11 +42,10 @@ workflow RUN_DEEPVARIANT { CONCAT_DEEPVARIANT_VCF( BGZIP_VC_DEEPVARIANT_VCF.out.output .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -54,11 +53,10 @@ workflow RUN_DEEPVARIANT { CONCAT_DEEPVARIANT_GVCF( BGZIP_VC_DEEPVARIANT_GVCF.out.output .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -70,8 +68,7 @@ workflow RUN_DEEPVARIANT { deepvariant_gvcf_out.no_intervals, deepvariant_vcf_out.no_intervals) .map{ meta, vcf -> - meta.variantcaller = "Deepvariant" - [meta, vcf] + [[patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Deepvariant"], vcf] } ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_GVCF.out.versions) diff --git a/subworkflows/nf-core/variantcalling/freebayes/main.nf b/subworkflows/nf-core/variantcalling/freebayes/main.nf index bcc06087e1..2e2eb1f96c 100644 --- a/subworkflows/nf-core/variantcalling/freebayes/main.nf +++ b/subworkflows/nf-core/variantcalling/freebayes/main.nf @@ -1,3 +1,4 @@ +include { BCFTOOLS_SORT } from '../../../../modules/nf-core/modules/bcftools/sort/main' include { TABIX_BGZIP as BGZIP_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/bgzip/main' include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../../../modules/local/concat_vcf/main' include { FREEBAYES } from '../../../../modules/nf-core/modules/freebayes/main' @@ -26,7 +27,8 @@ workflow RUN_FREEBAYES { }.set{freebayes_vcf_out} // Only when no intervals - TABIX_VC_FREEBAYES(freebayes_vcf_out.no_intervals) + BCFTOOLS_SORT(freebayes_vcf_out.no_intervals) + TABIX_VC_FREEBAYES(BCFTOOLS_SORT.out.vcf) // Only when using intervals BGZIP_VC_FREEBAYES(freebayes_vcf_out.intervals) @@ -34,11 +36,12 @@ workflow RUN_FREEBAYES { CONCAT_FREEBAYES( BGZIP_VC_FREEBAYES.out.output .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + new_id = meta.tumor_id ? meta.tumor_id + "_vs_" + meta.normal_id : meta.sample + + new_meta = meta.tumor_id ? [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:meta.num_intervals] + : [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:new_id, num_intervals:meta.num_intervals] + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -48,10 +51,15 @@ workflow RUN_FREEBAYES { CONCAT_FREEBAYES.out.vcf, freebayes_vcf_out.no_intervals) .map{ meta, vcf -> - meta.variantcaller = "FreeBayes" - [meta, vcf] + + new_id = meta.tumor_id ? meta.tumor_id + "_vs_" + meta.normal_id : meta.sample + + new_meta = meta.tumor_id ? [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:meta.num_intervals, variantcaller:"Freebayes"] + : [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:new_id, num_intervals:meta.num_intervals, variantcaller:"Freebayes"] + [new_meta, vcf] } + ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions) ch_versions = ch_versions.mix(BGZIP_VC_FREEBAYES.out.versions) ch_versions = ch_versions.mix(CONCAT_FREEBAYES.out.versions) ch_versions = ch_versions.mix(FREEBAYES.out.versions) diff --git a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf index 1ff59e542d..fe7e24fbbf 100644 --- a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf +++ b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf @@ -44,10 +44,11 @@ workflow RUN_HAPLOTYPECALLER { CONCAT_HAPLOTYPECALLER( BGZIP_VC_HAPLOTYPECALLER.out.output .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - [new_meta, vcf] - }.groupTuple(size: num_intervals), + + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] + + [groupKey(new_meta, new_meta.num_intervals), vcf] + }.groupTuple(), fasta_fai, intervals_bed_gz) diff --git a/subworkflows/nf-core/variantcalling/manta/germline/main.nf b/subworkflows/nf-core/variantcalling/manta/germline/main.nf index 864fbc075e..4d71dc1250 100644 --- a/subworkflows/nf-core/variantcalling/manta/germline/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/germline/main.nf @@ -43,11 +43,10 @@ workflow RUN_MANTA_GERMLINE { CONCAT_MANTA_SMALL_INDELS( BGZIP_VC_MANTA_SMALL_INDELS.out.output .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -57,11 +56,10 @@ workflow RUN_MANTA_GERMLINE { CONCAT_MANTA_SV( BGZIP_VC_MANTA_SV.out.output .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] + + [ groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -71,11 +69,10 @@ workflow RUN_MANTA_GERMLINE { CONCAT_MANTA_DIPLOID( BGZIP_VC_MANTA_DIPLOID.out.output .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -89,8 +86,7 @@ workflow RUN_MANTA_GERMLINE { //manta_small_indels_vcf.no_intervals, manta_sv_vcf.no_intervals) .map{ meta, vcf -> - meta.variantcaller = "Manta" - [meta, vcf] + [ [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Manta"], vcf] } ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions) diff --git a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf index e6005000c2..c3d108fc7a 100644 --- a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf @@ -52,11 +52,10 @@ workflow RUN_MANTA_SOMATIC { CONCAT_MANTA_SV( BGZIP_VC_MANTA_SV.out.output.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] + + [ groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -65,11 +64,9 @@ workflow RUN_MANTA_SOMATIC { CONCAT_MANTA_SMALL_INDELS( BGZIP_VC_MANTA_SMALL_INDELS.out.output.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -78,11 +75,9 @@ workflow RUN_MANTA_SOMATIC { CONCAT_MANTA_DIPLOID( BGZIP_VC_MANTA_DIPLOID.out.output.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -91,11 +86,9 @@ workflow RUN_MANTA_SOMATIC { CONCAT_MANTA_SOMATIC( BGZIP_VC_MANTA_SOMATIC.out.output.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -111,24 +104,24 @@ workflow RUN_MANTA_SOMATIC { manta_diploid_sv_vcf.no_intervals, manta_somatic_sv_vcf.no_intervals ).map{ meta, vcf -> - meta.variantcaller = "Manta" - [meta, vcf] + [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"Manta"], + vcf] } manta_candidate_small_indels_vcf = Channel.empty().mix( CONCAT_MANTA_SMALL_INDELS.out.vcf, manta_candidate_small_indels_vcf.no_intervals ).map{ meta, vcf -> - meta.variantcaller = "Manta" - [meta, vcf] + [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"Manta"], + vcf] } manta_candidate_small_indels_vcf_tbi = Channel.empty().mix( CONCAT_MANTA_SMALL_INDELS.out.tbi, manta_candidate_small_indels_vcf_tbi.no_intervals ).map{ meta, vcf -> - meta.variantcaller = "Manta" - [meta, vcf] + [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"Manta"], + vcf] } ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) diff --git a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf index a09168cd1d..b0c1bfc807 100644 --- a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf @@ -42,11 +42,10 @@ workflow RUN_MANTA_TUMORONLY { CONCAT_MANTA_SMALL_INDELS( BGZIP_VC_MANTA_SMALL_INDELS.out.output.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -55,11 +54,10 @@ workflow RUN_MANTA_TUMORONLY { CONCAT_MANTA_SV( BGZIP_VC_MANTA_SV.out.output.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -68,11 +66,10 @@ workflow RUN_MANTA_TUMORONLY { CONCAT_MANTA_TUMOR( BGZIP_VC_MANTA_TUMOR.out.output.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -86,8 +83,8 @@ workflow RUN_MANTA_TUMORONLY { manta_candidate_sv_vcf.no_intervals, manta_tumor_sv_vcf.no_intervals ).map{ meta, vcf -> - meta.variantcaller = "Manta" - [meta, vcf] + [[patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Manta"], + vcf] } ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions) diff --git a/subworkflows/nf-core/variantcalling/strelka/single/main.nf b/subworkflows/nf-core/variantcalling/strelka/single/main.nf index a9b0e5d1d9..4d06d8a3a8 100644 --- a/subworkflows/nf-core/variantcalling/strelka/single/main.nf +++ b/subworkflows/nf-core/variantcalling/strelka/single/main.nf @@ -34,11 +34,10 @@ workflow RUN_STRELKA_SINGLE { CONCAT_STRELKA( BGZIP_VC_STRELKA.out.output .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -48,11 +47,10 @@ workflow RUN_STRELKA_SINGLE { CONCAT_STRELKA_GENOME( BGZIP_VC_STRELKA_GENOME.out.output .map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.sample - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals] + + [groupKey(new_meta, meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -64,8 +62,7 @@ workflow RUN_STRELKA_SINGLE { //strelka_genome_vcf.no_intervals, strelka_vcf.no_intervals) .map{ meta, vcf -> - meta.variantcaller = "Strelka" - [meta, vcf] + [[patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Strelka"], vcf] } ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions) diff --git a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf index 1c7353c568..e34117bb16 100644 --- a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf +++ b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf @@ -32,11 +32,9 @@ workflow RUN_STRELKA_SOMATIC { BGZIP_VC_STRELKA_SNVS(strelka_vcf_snvs.intervals) CONCAT_STRELKA_SNVS(BGZIP_VC_STRELKA_SNVS.out.output.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + [groupKey(new_meta, new_meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -44,11 +42,9 @@ workflow RUN_STRELKA_SOMATIC { BGZIP_VC_STRELKA_INDELS(strelka_vcf_indels.intervals) CONCAT_STRELKA_INDELS(BGZIP_VC_STRELKA_INDELS.out.output.map{ meta, vcf -> - new_meta = meta.clone() - new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id + new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals] - def groupKey = groupKey(meta, meta.num_intervals) - [new_meta, vcf] + [groupKey(new_meta, new_meta.num_intervals), vcf] }.groupTuple(), fasta_fai, intervals_bed_gz) @@ -60,8 +56,8 @@ workflow RUN_STRELKA_SOMATIC { strelka_vcf_snvs.no_intervals, strelka_vcf_indels.no_intervals) .map{ meta, vcf -> - meta.variantcaller = "Strelka" - [meta, vcf] + [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"Strelka"] + , vcf] } ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions) diff --git a/tests/csv/3.0/fastq_pair.csv b/tests/csv/3.0/fastq_pair.csv index ce9e72fd97..0958c9aff9 100644 --- a/tests/csv/3.0/fastq_pair.csv +++ b/tests/csv/3.0/fastq_pair.csv @@ -1,3 +1,3 @@ patient,gender,status,sample,lane,fastq_1,fastq_2 test,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz -test,XX,1,test2,test2_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz +test,XX,1,test2,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz diff --git a/tests/test_pair.yml b/tests/test_pair.yml index eedd7a6490..ee7ecd547b 100644 --- a/tests/test_pair.yml +++ b/tests/test_pair.yml @@ -25,7 +25,7 @@ - path: results/preprocessing/csv/recalibrated_test.csv - path: results/preprocessing/csv/recalibrated_test2.csv - path: results/reports/fastqc/test-test_L1 - - path: results/reports/fastqc/test2-test2_L1 + - path: results/reports/fastqc/test2-test_L1 - path: results/reports/markduplicates/test/test.md.metrics - path: results/reports/markduplicates/test2/test2.md.metrics - path: results/reports/qualimap/test/test.mapped diff --git a/tests/test_tools.yml b/tests/test_tools.yml index b7f5e451ab..e72fb284a2 100644 --- a/tests/test_tools.yml +++ b/tests/test_tools.yml @@ -98,46 +98,67 @@ - path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz.tbi - name: Run variant calling on germline sample with freebayes - command: nextflow run main.nf -profile test,tools_germline,docker --tools freebayes + command: nextflow run main.nf -profile test,targeted,docker --tools freebayes tags: - freebayes - germline - variant_calling files: - - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz - - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz.tbi + - path: results/variant_calling/test/freebayes/test.vcf.gz + - path: results/variant_calling/test/freebayes/test.vcf.gz.tbi - name: Run variant calling on germline sample with freebayes without intervals - command: nextflow run main.nf -profile test,tools_germline,docker --tools freebayes --no_intervals + command: nextflow run main.nf -profile test,docker --tools freebayes --no_intervals tags: - freebayes - germline - no_intervals - variant_calling files: - - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz - - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz.tbi + - path: results/variant_calling/test/freebayes/test.vcf.gz + - path: results/variant_calling/test/freebayes/test.vcf.gz.tbi -- name: Run variant calling on tumor_only sample with freebayes - command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools freebayes +- name: Run variant calling on somatic sample with freebayes + command: nextflow run main.nf -profile test,pair,targeted,docker --tools freebayes tags: - freebayes - - tumor_only + - somatic - variant_calling files: - - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz - - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz.tbi + - path: results/variant_calling/test2_vs_test/freebayes/test2_vs_test.vcf.gz + - path: results/variant_calling/test2_vs_test/freebayes/test2_vs_test.vcf.gz.tbi -- name: Run variant calling on tumor_only sample with freebayes without intervals - command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools freebayes --no_intervals +- name: Run variant calling on somatic sample with freebayes without intervals + command: nextflow run main.nf -profile test,pair,docker --tools freebayes --no_intervals tags: - freebayes + - somatic - no_intervals - - tumor_only - variant_calling files: - - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz - - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz.tbi + - path: results/variant_calling/test2_vs_test/freebayes/test2_vs_test.vcf.gz + - path: results/variant_calling/test2_vs_test/freebayes/test2_vs_test.vcf.gz.tbi + +# - name: Run variant calling on tumor_only sample with freebayes +# command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools freebayes +# tags: +# - freebayes +# - tumor_only +# - variant_calling +# files: +# - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz +# - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz.tbi + +# - name: Run variant calling on tumor_only sample with freebayes without intervals +# command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools freebayes --no_intervals +# tags: +# - freebayes +# - no_intervals +# - tumor_only +# - variant_calling +# files: +# - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz +# - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz.tbi - name: Run variant calling on germline sample with haplotypecaller command: nextflow run main.nf -profile test,tools_germline,docker --tools haplotypecaller diff --git a/workflows/sarek.nf b/workflows/sarek.nf index b763d45926..50b7c130e6 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -375,44 +375,32 @@ workflow SAREK { // STEP 1: MAPPING READS TO REFERENCE GENOME // reads will be sorted - ch_reads_to_map = ch_reads_to_map.map{ meta, reads -> - new_meta = meta.clone() - // update ID when no multiple lanes or splitted fastqs - new_meta.id = meta.size * meta.numLanes == 1 ? meta.sample : meta.id + new_id = meta.size * meta.numLanes == 1 ? meta.sample : meta.id - [new_meta, reads] + [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, numLanes:meta.numLanes, read_group:meta.read_group, data_type:meta.data_type, size:meta.size], + reads] } GATK4_MAPPING(ch_reads_to_map, ch_map_index, true) // Grouping the bams from the same samples not to stall the workflow ch_bam_mapped = GATK4_MAPPING.out.bam.map{ meta, bam -> - new_meta = meta.clone() - numLanes = meta.numLanes ?: 1 size = meta.size ?: 1 - // remove no longer necessary fields - new_meta.remove('read_group') // Now in the BAM header - new_meta.remove('numLanes') // Was only needed for mapping - new_meta.remove('size') // Was only needed for mapping - // update ID to be based on the sample name - new_meta.id = meta.sample - // update data_type - new_meta.data_type = 'bam' - + // remove no longer necessary fields: + // read_group: Now in the BAM header + // numLanes: Was only needed for mapping + // size: Was only needed for mapping + new_meta = [patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:"bam"] // Use groupKey to make sure that the correct group can advance as soon as it is complete // and not stall the workflow until all reads from all channels are mapped - def groupKey = groupKey(new_meta, numLanes * size) - - //Returns the values we need - [groupKey, new_meta, bam] - }.groupTuple(by:[0,1]) - .map{ groupKey, new_meta, bam -> [new_meta, bam] } + [ groupKey(new_meta, numLanes * size), bam] + }.groupTuple() // gatk4 markduplicates can handle multiple bams as input, so no need to merge/index here // Except if and only if skipping markduplicates or saving mapped bams @@ -531,9 +519,8 @@ workflow SAREK { ch_cram_markduplicates_no_spark, ch_cram_markduplicates_spark, ch_cram_no_markduplicates_restart).map{ meta, cram, crai -> - meta_new = meta.clone() - meta_new.data_type = "cram" //Make sure correct data types are carried through - [meta_new, cram, crai] + //Make sure correct data types are carried through + [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.id, data_type:"cram"], cram, crai] } // Create CSV to restart from this step @@ -970,11 +957,15 @@ def extract_csv(csv_file) { def fastq_1 = file(row.fastq_1, checkIfExists: true) def fastq_2 = file(row.fastq_2, checkIfExists: true) def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' - def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.patient}_${row.sample}\\tLB:${row.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" + + def flowcell = flowcellLaneFromFastq(fastq_1) + //Don't use a random element for ID, it breaks resuming + def read_group = "\"@RG\\tID:${flowcell}.${row.sample}.${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.patient}_${row.sample}\\tLB:${row.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" + meta.numLanes = numLanes.toInteger() meta.read_group = read_group.toString() meta.data_type = "fastq" - meta.test = "test" + meta.size = 1 // default number of splitted fastq return [meta, [fastq_1, fastq_2]] // start from BAM @@ -982,7 +973,7 @@ def extract_csv(csv_file) { meta.id = "${row.sample}-${row.lane}".toString() def bam = file(row.bam, checkIfExists: true) def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' - def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\"" + def read_group = "\"@RG\\tID:${row_sample}_${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\"" meta.numLanes = numLanes.toInteger() meta.read_group = read_group.toString() meta.data_type = "bam" @@ -1031,6 +1022,34 @@ def extract_csv(csv_file) { } } +// Parse first line of a FASTQ file, return the flowcell id and lane number. +def flowcellLaneFromFastq(path) { + // expected format: + // xx:yy:FLOWCELLID:LANE:... (seven fields) + // or + // FLOWCELLID:LANE:xx:... (five fields) + def line + path.withInputStream { + InputStream gzipStream = new java.util.zip.GZIPInputStream(it) + Reader decoder = new InputStreamReader(gzipStream, 'ASCII') + BufferedReader buffered = new BufferedReader(decoder) + line = buffered.readLine() + } + assert line.startsWith('@') + line = line.substring(1) + def fields = line.split(':') + String fcid + + if (fields.size() >= 7) { + // CASAVA 1.8+ format, from https://support.illumina.com/help/BaseSpace_OLH_009008/Content/Source/Informatics/BS/FileFormat_FASTQ-files_swBS.htm + // "@::::::: :::" + fcid = fields[2] + } else if (fields.size() == 5) { + fcid = fields[0] + } + return fcid +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END