diff --git a/CHANGELOG.md b/CHANGELOG.md
index d2c0d4c869..4643bab21a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -73,6 +73,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#513](https://github.com/nf-core/sarek/pull/513), [#527](https://github.com/nf-core/sarek/pull/527) - CNV is back
- [#529](https://github.com/nf-core/sarek/pull/529) - Do not save `versions.yml` files
- [#524](https://github.com/nf-core/sarek/pull/524) - Fix intervals usage by counting the actual list of scatter/gather files produced and not overall number of intervals
+- [#549](https://github.com/nf-core/sarek/pull/549) - Fix unique lanes required for Freebayes: issue [#311](https://github.com/nf-core/sarek/issues/311), replaces `meta.clone()` with actual copy of map to avoid issues with https://nfcore.slack.com/archives/C027CM7P08M/p1644241819942339
### Deprecated
diff --git a/conf/igenomes.config b/conf/igenomes.config
index fbd9881bba..b960797e5e 100644
--- a/conf/igenomes.config
+++ b/conf/igenomes.config
@@ -38,7 +38,7 @@ params {
ac_loci_gc = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/ASCAT/1000G_phase3_GRCh38_maf0.3.loci.gc"
bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/"
bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/"
- hashmap = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/dragmap/"
+ dragmap = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/dragmap/"
chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Chromosomes"
dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz"
dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi"
@@ -51,6 +51,8 @@ params {
known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz"
known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi"
mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Control-FREEC/out100m2_hg38.gem"
+ pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz"
+ pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi"
snpeff_db = 'GRCh38.99'
snpeff_genome = 'GRCh38'
vep_cache_version = 104
diff --git a/conf/modules.config b/conf/modules.config
index baf058e8cd..5e447ce785 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -475,13 +475,9 @@ process{
// For unknown reasons, CONCAT_VCF sometimes fails with SIGPIPE
// (exit code 141). Rerunning the process will usually work.
errorStrategy = {task.exitStatus == 141 ? 'retry' : 'terminate'}
- ext.args = { params.no_intervals ? "-n" : "" }
- }
- withName : 'TABIX_VC_.*' {
- ext.when = { params.no_intervals }
+ ext.args = { params.no_intervals ? "-n" : "" } //Why ConcatVCF is never run when no_intervals is set..
}
withName : 'BGZIP_VC_.*' {
- ext.when = { !params.no_intervals }
publishDir = [
enabled: false
]
@@ -525,8 +521,15 @@ process{
]
}
withName: 'FREEBAYES' {
+ ext.prefix = {"${meta.id}.freebayes"} //To make sure no naming conflicts ensue with module BCFTOOLS_SORT & the naming being correct in the output folder
ext.args = '--min-alternate-fraction 0.1 --min-mapping-quality 1'
ext.when = { params.tools && params.tools.contains('freebayes') }
+ publishDir = [
+ enabled: false
+ ]
+ }
+
+ withName: 'BCFTOOLS_SORT' {
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/${meta.id}/freebayes" },
@@ -534,6 +537,7 @@ process{
saveAs: { meta.num_intervals > 1 ? null : it }
]
}
+
withName : 'TABIX_VC_FREEBAYES' {
publishDir = [
mode: params.publish_dir_mode,
@@ -900,6 +904,18 @@ process{
}
}
+ //FREEBAYES
+ withName: 'NFCORE_SAREK:SAREK:PAIR_VARIANT_CALLING:RUN_FREEBAYES_SOMATIC:FREEBAYES' {
+ ext.args = "--pooled-continuous \
+ --pooled-discrete \
+ --genotype-qualities \
+ --report-genotype-likelihood-max \
+ --allele-balance-priors-off \
+ --min-alternate-fraction 0.03 \
+ --min-repeat-entropy 1 \
+ --min-alternate-count 2 "
+ }
+
//MANTA
withName: 'CONCAT_MANTA_SOMATIC' {
ext.prefix = {"${meta.id}.somatic_sv"}
diff --git a/docs/images/sarek_subway.png b/docs/images/sarek_subway.png
index b1ce4b48bf..542ee03578 100644
Binary files a/docs/images/sarek_subway.png and b/docs/images/sarek_subway.png differ
diff --git a/docs/images/sarek_subway.svg b/docs/images/sarek_subway.svg
index f1b41165c5..16c0535ffa 100644
--- a/docs/images/sarek_subway.svg
+++ b/docs/images/sarek_subway.svg
@@ -26,15 +26,15 @@
inkscape:pagecheckerboard="false"
inkscape:document-units="mm"
showgrid="false"
- inkscape:zoom="0.3993225"
- inkscape:cx="227.88598"
- inkscape:cy="289.2399"
- inkscape:window-width="1600"
- inkscape:window-height="847"
- inkscape:window-x="1"
+ inkscape:zoom="1.1294546"
+ inkscape:cx="220.0177"
+ inkscape:cy="331.57597"
+ inkscape:window-width="2560"
+ inkscape:window-height="1027"
+ inkscape:window-x="1440"
inkscape:window-y="25"
- inkscape:window-maximized="0"
- inkscape:current-layer="layer1"
+ inkscape:window-maximized="1"
+ inkscape:current-layer="layer4"
width="211mm"
fit-margin-top="0"
fit-margin-left="0"
@@ -1775,24 +1775,6 @@
id="circle1664-96"
style="display:inline;fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:3.54334;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
transform="scale(1,-1)" />
-
-
-
-
@@ -1829,28 +1811,28 @@
inkscape:export-xdpi="90"
inkscape:export-filename="./polygon4618.png"
id="text7023"
- y="78.274117"
- x="547.4516"
+ y="76.948265"
+ x="548.77747"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:12.5px;line-height:1.25;font-family:'Maven Pro';-inkscape-font-specification:'Maven Pro';display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.937508"
xml:space="preserve">deepvariant
freebayes
haplotypecaller
manta
strelka2
tiddit
mutect2
ascat
msisensorpro
controlfreec
+ cnvkit
- deepvariant
freebayes
manta
strelka2
-
@@ -2320,49 +2300,14 @@
d="m 408.81856,-257.59438 v -3.75 -3.75 h 47.8125 v 3.75 3.75 z"
id="path7187"
inkscape:connector-curvature="0" />
-
-
-
-
-
-
-
-
-
+ transform="scale(1,-1)"
+ rx="9.3755903"
+ ry="9.375" />
@@ -2526,11 +2471,11 @@
transform="scale(1,-1)"
style="display:inline;fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:3.54334;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
id="circle5352"
- cx="558.50732"
+ cx="549.13232"
cy="228.10608"
r="9.375" />
+
+
+
+
+
+
+ transform="translate(-45.332838,34.253667)">
-
+ id="g5553"
+ transform="translate(-9.375,0.84066785)"
+ style="stroke-width:1.00002">
+ transform="translate(26.582934,0.84066785)"
+ style="display:inline;stroke-width:1.00002"
+ id="g5553-4">
+
+
+
+ id="g5553-42-5">
-
+ id="g5553-4-6">
diff --git a/modules.json b/modules.json
index 2f80d79252..02497f14d8 100644
--- a/modules.json
+++ b/modules.json
@@ -6,6 +6,9 @@
"ascat": {
"git_sha": "f0800157544a82ae222931764483331a81812012"
},
+ "bcftools/sort": {
+ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+ },
"bcftools/stats": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
@@ -130,7 +133,7 @@
"git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7"
},
"gatk4/markduplicates": {
- "git_sha": "169b2b96c1167f89ab07127b7057c1d90a6996c7"
+ "git_sha": "df2620cfc7e4c21b14ed03c1c928f09fbabf83c4"
},
"gatk4/markduplicatesspark": {
"git_sha": "e04970b7d249365cafa5a52912f9a28840481c05"
diff --git a/modules/nf-core/modules/bcftools/sort/main.nf b/modules/nf-core/modules/bcftools/sort/main.nf
new file mode 100644
index 0000000000..82204d07f2
--- /dev/null
+++ b/modules/nf-core/modules/bcftools/sort/main.nf
@@ -0,0 +1,35 @@
+process BCFTOOLS_SORT {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda (params.enable_conda ? "bioconda::bcftools=1.14" : null)
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0':
+ 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }"
+
+ input:
+ tuple val(meta), path(vcf)
+
+ output:
+ tuple val(meta), path("*.gz") , emit: vcf
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ bcftools \\
+ sort \\
+ --output ${prefix}.vcf.gz \\
+ $args \\
+ $vcf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/modules/bcftools/sort/meta.yml b/modules/nf-core/modules/bcftools/sort/meta.yml
new file mode 100644
index 0000000000..0c244a486e
--- /dev/null
+++ b/modules/nf-core/modules/bcftools/sort/meta.yml
@@ -0,0 +1,43 @@
+name: bcftools_sort
+description: Sorts VCF files
+keywords:
+ - sorting
+ - VCF
+ - variant calling
+tools:
+ - sort:
+ description: Sort VCF files by coordinates.
+ homepage: http://samtools.github.io/bcftools/bcftools.html
+ documentation: http://www.htslib.org/doc/bcftools.html
+ tool_dev_url: https://github.com/samtools/bcftools
+ doi: "10.1093/bioinformatics/btp352"
+ licence: ["MIT"]
+
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - vcf:
+ type: file
+ description: The VCF/BCF file to be sorted
+ pattern: "*.{vcf.gz,vcf,bcf}"
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - vcf:
+ type: file
+ description: Sorted VCF file
+ pattern: "*.{vcf.gz}"
+
+authors:
+ - "@Gwennid"
diff --git a/modules/nf-core/modules/gatk4/markduplicates/main.nf b/modules/nf-core/modules/gatk4/markduplicates/main.nf
index 97a8c3e1e4..68e4a21ae3 100644
--- a/modules/nf-core/modules/gatk4/markduplicates/main.nf
+++ b/modules/nf-core/modules/gatk4/markduplicates/main.nf
@@ -1,6 +1,6 @@
process GATK4_MARKDUPLICATES {
tag "$meta.id"
- label 'process_low'
+ label 'process_medium'
conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf
index 5050490505..9b1859ae45 100644
--- a/subworkflows/local/germline_variant_calling.nf
+++ b/subworkflows/local/germline_variant_calling.nf
@@ -38,32 +38,28 @@ workflow GERMLINE_VARIANT_CALLING {
// Remap channel with intervals
cram_recalibrated_intervals = cram_recalibrated.combine(intervals)
.map{ meta, cram, crai, intervals, num_intervals ->
- new_meta = meta.clone()
-
// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
- new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName
- new_meta.num_intervals = num_intervals
+ def new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName
//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals
- [new_meta, cram, crai, intervals_new]
+ [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals],
+ cram, crai, intervals_new]
}
// Remap channel with gzipped intervals + indexes
cram_recalibrated_intervals_gz_tbi = cram_recalibrated.combine(intervals_bed_gz_tbi)
.map{ meta, cram, crai, bed_tbi, num_intervals ->
- new_meta = meta.clone()
-
// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
- new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + bed_tbi[0].simpleName
- new_meta.num_intervals = num_intervals
+ def new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + bed_tbi[0].simpleName
//If no interval file provided (0) then add empty list
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
- [new_meta, cram, crai, bed_new, tbi_new]
+ [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals],
+ cram, crai, bed_new, tbi_new]
}
// DEEPVARIANT
diff --git a/subworkflows/local/pair_variant_calling.nf b/subworkflows/local/pair_variant_calling.nf
index 72441bb89e..6d0f1b59b0 100644
--- a/subworkflows/local/pair_variant_calling.nf
+++ b/subworkflows/local/pair_variant_calling.nf
@@ -4,6 +4,7 @@
include { GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main'
include { MSISENSORPRO_MSI_SOMATIC } from '../../modules/nf-core/modules/msisensorpro/msi_somatic/main'
include { RUN_CONTROLFREEC_SOMATIC } from '../nf-core/variantcalling/controlfreec/somatic/main.nf'
+include { RUN_FREEBAYES as RUN_FREEBAYES_SOMATIC } from '../nf-core/variantcalling/freebayes/main.nf'
include { RUN_MANTA_SOMATIC } from '../nf-core/variantcalling/manta/somatic/main.nf'
include { RUN_STRELKA_SOMATIC } from '../nf-core/variantcalling/strelka/somatic/main.nf'
@@ -34,6 +35,7 @@ workflow PAIR_VARIANT_CALLING {
ch_versions = Channel.empty()
//TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config
+ freebayes_vcf = Channel.empty()
manta_vcf = Channel.empty()
strelka_vcf = Channel.empty()
msisensorpro_output = Channel.empty()
@@ -42,32 +44,28 @@ workflow PAIR_VARIANT_CALLING {
// Remap channel with intervals
cram_pair_intervals = cram_pair.combine(intervals)
.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals, num_intervals ->
- new_meta = meta.clone()
-
// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
- new_meta.id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + intervals.baseName
- new_meta.num_intervals = num_intervals
+ new_id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + intervals.baseName
//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals
- [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, intervals_new]
+ [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:num_intervals],
+ normal_cram, normal_crai, tumor_cram, tumor_crai, intervals_new]
}
// Remap channel with gzipped intervals + indexes
cram_pair_intervals_gz_tbi = cram_pair.combine(intervals_bed_gz_tbi)
.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed_tbi, num_intervals ->
- new_meta = meta.clone()
-
// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
- new_meta.id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + bed_tbi[0].simpleName
- new_meta.num_intervals = num_intervals
+ new_id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + bed_tbi[0].simpleName
//If no interval file provided (0) then add empty list
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
- [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, bed_new, tbi_new]
+ [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:num_intervals],
+ normal_cram, normal_crai, tumor_cram, tumor_crai, bed_new, tbi_new]
}
if (tools.contains('controlfreec')){
@@ -93,6 +91,13 @@ workflow PAIR_VARIANT_CALLING {
ch_versions = ch_versions.mix(RUN_CONTROLFREEC_SOMATIC.out.versions)
}
+ if (tools.contains('freebayes')){
+ RUN_FREEBAYES_SOMATIC(cram_pair_intervals, fasta, fasta_fai, intervals_bed_combine_gz)
+
+ freebayes_vcf = RUN_FREEBAYES_SOMATIC.out.freebayes_vcf
+ ch_versions = ch_versions.mix(RUN_FREEBAYES_SOMATIC.out.versions)
+ }
+
if (tools.contains('manta')) {
RUN_MANTA_SOMATIC( cram_pair_intervals_gz_tbi,
fasta,
@@ -114,14 +119,14 @@ workflow PAIR_VARIANT_CALLING {
.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai, vcf, bed_tbi, num_intervals ->
// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
- new_meta.id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + bed_tbi[0].simpleName
- new_meta.num_intervals = num_intervals
+ new_id = num_intervals <= 1 ? meta.tumor_id + "_vs_" + meta.normal_id : meta.tumor_id + "_vs_" + meta.normal_id + "_" + bed_tbi[0].simpleName
//If no interval file provided (0) then add empty list
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
- [new_meta, normal_cram, normal_crai, tumor_cram, tumor_crai, vcf, vcf_tbi, bed_new, tbi_new]
+ [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:num_intervals],
+ normal_cram, normal_crai, tumor_cram, tumor_crai, vcf, vcf_tbi, bed_new, tbi_new]
}
} else {
cram_pair_strelka = cram_pair_intervals_gz_tbi.map{
@@ -172,6 +177,7 @@ workflow PAIR_VARIANT_CALLING {
// }
emit:
+ freebayes_vcf
manta_vcf
msisensorpro_output
mutect2_vcf
diff --git a/subworkflows/local/tumor_variant_calling.nf b/subworkflows/local/tumor_variant_calling.nf
index f382cb79d0..99aa57430d 100644
--- a/subworkflows/local/tumor_variant_calling.nf
+++ b/subworkflows/local/tumor_variant_calling.nf
@@ -3,7 +3,6 @@
// Should be only run on patients without normal sample
//
-//include { RUN_CONTROLFREEC } from '../nf-core/variantcalling/controlfreec/main.nf'
include { RUN_FREEBAYES } from '../nf-core/variantcalling/freebayes/main.nf'
include { GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING } from '../../subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main'
include { RUN_MANTA_TUMORONLY } from '../nf-core/variantcalling/manta/tumoronly/main.nf'
@@ -44,32 +43,28 @@ workflow TUMOR_ONLY_VARIANT_CALLING {
// Remap channel with intervals
cram_recalibrated_intervals = cram_recalibrated.combine(intervals)
.map{ meta, cram, crai, intervals, num_intervals ->
- new_meta = meta.clone()
-
// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
- new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName
- new_meta.num_intervals = num_intervals
+ new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName
//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals
- [new_meta, cram, crai, intervals_new]
+ [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals],
+ cram, crai, intervals_new]
}
// Remap channel with gzipped intervals + indexes
cram_recalibrated_intervals_gz_tbi = cram_recalibrated.combine(intervals_bed_gz_tbi)
.map{ meta, cram, crai, bed_tbi, num_intervals ->
- new_meta = meta.clone()
-
// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
- new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + bed_tbi[0].simpleName
- new_meta.num_intervals = num_intervals
+ new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + bed_tbi[0].simpleName
//If no interval file provided (0) then add empty list
bed_new = num_intervals == 0 ? [] : bed_tbi[0]
tbi_new = num_intervals == 0 ? [] : bed_tbi[1]
- [new_meta, cram, crai, bed_new, tbi_new]
+ [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals],
+ cram, crai, bed_new, tbi_new]
}
if(tools.contains('controlfreec')){
diff --git a/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf b/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf
index abdbf674ae..8383c1fa74 100644
--- a/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf
+++ b/subworkflows/nf-core/gatk4/prepare_recalibration/main.nf
@@ -22,16 +22,15 @@ workflow PREPARE_RECALIBRATION {
cram_intervals = cram.combine(intervals)
.map{ meta, cram, crai, intervals, num_intervals ->
- new_meta = meta.clone()
// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
- new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName
- new_meta.num_intervals = num_intervals
+ new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName
//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals
- [new_meta, cram, crai, intervals_new]
+ [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals],
+ cram, crai, intervals_new]
}
// Run Baserecalibrator
@@ -40,11 +39,10 @@ workflow PREPARE_RECALIBRATION {
// Figuring out if there is one or more table(s) from the same sample
table_to_merge = BASERECALIBRATOR.out.table
.map{ meta, table ->
- new_meta = meta.clone()
- new_meta.id = meta.sample
- def groupKey = groupKey(new_meta, meta.num_intervals)
- [new_meta, table]
+ new_meta = [patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), table]
}.groupTuple()
.branch{
//Warning: size() calculates file size not list length here, so use num_intervals instead
@@ -58,12 +56,9 @@ workflow PREPARE_RECALIBRATION {
GATHERBQSRREPORTS(table_to_merge.multiple)
table_bqsr = table_to_merge.single.mix(GATHERBQSRREPORTS.out.table)
.map{ meta, table ->
- new_meta = meta.clone()
-
- // remove no longer necessary fields to make sure joining can be done correctly
- new_meta.remove('num_intervals')
-
- [new_meta, table]
+ // remove no longer necessary fields to make sure joining can be done correctly: num_intervals
+ [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type],
+ table]
}
// Gather versions of all tools used
diff --git a/subworkflows/nf-core/gatk4/prepare_recalibration_spark/main.nf b/subworkflows/nf-core/gatk4/prepare_recalibration_spark/main.nf
index 0ccc0efadf..a0e8671601 100644
--- a/subworkflows/nf-core/gatk4/prepare_recalibration_spark/main.nf
+++ b/subworkflows/nf-core/gatk4/prepare_recalibration_spark/main.nf
@@ -22,16 +22,15 @@ workflow PREPARE_RECALIBRATION_SPARK {
cram_intervals = cram.combine(intervals)
.map{ meta, cram, crai, intervals, num_intervals ->
- new_meta = meta.clone()
// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
- new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName
- new_meta.num_intervals = num_intervals
+ new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName
//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals
- [new_meta, cram, crai, intervals_new]
+ [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals],
+ cram, crai, intervals_new]
}
// Run Baserecalibrator spark
@@ -40,11 +39,10 @@ workflow PREPARE_RECALIBRATION_SPARK {
// Figuring out if there is one or more table(s) from the same sample
table_to_merge = BASERECALIBRATOR_SPARK.out.table
.map{ meta, table ->
- new_meta = meta.clone()
- new_meta.id = meta.sample
- def groupKey = groupKey(new_meta, meta.num_intervals)
- [new_meta, table]
+ new_meta = [patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), table]
}.groupTuple()
.branch{
//Warning: size() calculates file size not list length here, so use num_intervals instead
@@ -58,12 +56,9 @@ workflow PREPARE_RECALIBRATION_SPARK {
GATHERBQSRREPORTS(table_to_merge.multiple)
table_bqsr = table_to_merge.single.mix(GATHERBQSRREPORTS.out.table)
.map{ meta, table ->
- new_meta = meta.clone()
-
- // remove no longer necessary fields to make sure joining can be done correctly
- new_meta.remove('num_intervals')
-
- [new_meta, table]
+ // remove no longer necessary fields to make sure joining can be done correctly: num_intervals
+ [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type],
+ table]
}
// Gather versions of all tools used
diff --git a/subworkflows/nf-core/gatk4/recalibrate/main.nf b/subworkflows/nf-core/gatk4/recalibrate/main.nf
index ec83bbbfdb..450cb26056 100644
--- a/subworkflows/nf-core/gatk4/recalibrate/main.nf
+++ b/subworkflows/nf-core/gatk4/recalibrate/main.nf
@@ -20,16 +20,14 @@ workflow RECALIBRATE {
cram_intervals = cram.combine(intervals)
.map{ meta, cram, crai, recal, intervals, num_intervals ->
- new_meta = meta.clone()
-
// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
- new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName
- new_meta.num_intervals = num_intervals
+ new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName
//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals
- [new_meta, cram, crai, recal, intervals_new]
+ [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals],
+ cram, crai, recal, intervals_new]
}
// Run Applybqsr
@@ -39,12 +37,9 @@ workflow RECALIBRATE {
MERGE_INDEX_CRAM(APPLYBQSR.out.cram, fasta)
ch_cram_recal_out = MERGE_INDEX_CRAM.out.cram_crai.map{ meta, cram, crai ->
- new_meta = meta.clone()
-
- // remove no longer necessary fields to make sure joining can be done correctly
- new_meta.remove('num_intervals')
-
- [new_meta, cram, crai]
+ // remove no longer necessary fields to make sure joining can be done correctly: num_intervals
+ [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.id, data_type:meta.data_type],
+ cram, crai]
}
// Gather versions of all tools used
diff --git a/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf b/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf
index ecb70e06bd..86b54ff521 100644
--- a/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf
+++ b/subworkflows/nf-core/gatk4/recalibrate_spark/main.nf
@@ -20,16 +20,14 @@ workflow RECALIBRATE_SPARK {
cram_intervals = cram.combine(intervals)
.map{ meta, cram, crai, recal, intervals, num_intervals ->
- new_meta = meta.clone()
-
// If either no scatter/gather is done, i.e. no interval (0) or one interval (1), then don't rename samples
- new_meta.id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName
- new_meta.num_intervals = num_intervals
+ new_id = num_intervals <= 1 ? meta.sample : meta.sample + "_" + intervals.baseName
//If no interval file provided (0) then add empty list
intervals_new = num_intervals == 0 ? [] : intervals
- [new_meta, cram, crai, recal, intervals_new]
+ [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, data_type:meta.data_type, num_intervals:num_intervals],
+ cram, crai, recal, intervals_new]
}
// Run Applybqsr spark
@@ -39,12 +37,9 @@ workflow RECALIBRATE_SPARK {
MERGE_INDEX_CRAM(APPLYBQSR_SPARK.out.cram, fasta)
ch_cram_recal_out = MERGE_INDEX_CRAM.out.cram_crai.map{ meta, cram, crai ->
- new_meta = meta.clone()
-
- // remove no longer necessary fields to make sure joining can be done correctly
- new_meta.remove('num_intervals')
-
- [new_meta, cram, crai]
+ // remove no longer necessary fields to make sure joining can be done correctly: num_intervals
+ [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.id, data_type:meta.data_type],
+ cram, crai]
}
// Gather versions of all tools used
diff --git a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf
index 19e4a37350..d95488dfc4 100644
--- a/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf
+++ b/subworkflows/nf-core/gatk4/tumor_normal_somatic_variant_calling/main.nf
@@ -69,11 +69,10 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
CONCAT_MUTECT2(
BGZIP_VC_MUTECT2.out.output
.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fai,
intervals_bed_combine_gz)
@@ -90,11 +89,10 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
MERGEMUTECTSTATS(
mutect2_stats_branch.intervals
.map{ meta, stats ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, stats]
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), stats]
}.groupTuple())
mutect2_stats = Channel.empty().mix(
@@ -107,11 +105,10 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
LEARNREADORIENTATIONMODEL(
mutect2_f1r2_branch.intervals
.map{ meta, f1r2 ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, f1r2]
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), f1r2]
}.groupTuple())
//
@@ -124,16 +121,20 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
GETPILEUPSUMMARIES_TUMOR ( pileup.tumor.map{
meta, cram, crai, intervals ->
- new_meta = meta.clone()
- new_meta.id = new_meta.num_intervals <= 1 ? new_meta.tumor_id : new_meta.tumor_id + "_" + intervals.baseName
+
+ new_id = meta.num_intervals <= 1 ? meta.tumor_id : meta.tumor_id + "_" + intervals.baseName
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:meta.num_intervals]
+
[new_meta, cram, crai, intervals]
},
fasta, fai, dict, germline_resource, germline_resource_tbi )
GETPILEUPSUMMARIES_NORMAL ( pileup.normal.map{
meta, cram, crai, intervals ->
- new_meta = meta.clone()
- new_meta.id = new_meta.num_intervals <= 1 ? new_meta.normal_id : new_meta.normal_id + "_" + intervals.baseName
+
+ new_id = meta.num_intervals <= 1 ? meta.tumor_id : meta.tumor_id + "_" + intervals.baseName
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:meta.num_intervals]
+
[new_meta, cram, crai, intervals]
},
fasta, fai, dict, germline_resource, germline_resource_tbi )
@@ -152,38 +153,35 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
GATHERPILEUPSUMMARIES_NORMAL(
GETPILEUPSUMMARIES_NORMAL.out.table
.map{ meta, table ->
- new_meta = meta.clone()
- new_meta.id = new_meta.normal_id
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, table]
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.normal_id, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), table]
}.groupTuple(),
dict)
gather_table_normal = Channel.empty().mix(
GATHERPILEUPSUMMARIES_NORMAL.out.table,
pileup_table_normal.no_intervals).map{ meta, table ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
+
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
[new_meta, table]
}
GATHERPILEUPSUMMARIES_TUMOR(
GETPILEUPSUMMARIES_TUMOR.out.table
.map{ meta, table ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id, num_intervals:meta.num_intervals]
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, table]
+ [groupKey(new_meta, meta.num_intervals), table]
}.groupTuple(),
dict)
gather_table_tumor = Channel.empty().mix(
GATHERPILEUPSUMMARIES_TUMOR.out.table,
pileup_table_tumor.no_intervals).map{ meta, table ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
+
[new_meta, table]
}
@@ -195,11 +193,6 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
//
//Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables.
//
- mutect2_vcf.view()
- mutect2_tbi.view()
- LEARNREADORIENTATIONMODEL.out.artifactprior.view()
- CALCULATECONTAMINATION.out.segmentation.view()
- CALCULATECONTAMINATION.out.contamination.view()
ch_filtermutect = mutect2_vcf.join(mutect2_tbi)
.join(mutect2_stats)
.join(LEARNREADORIENTATIONMODEL.out.artifactprior)
@@ -233,9 +226,8 @@ workflow GATK_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING {
contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ]
segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ]
- filtered_vcf = FILTERMUTECTCALLS.out.vcf.map{ meta, vcf ->
- meta.variantcaller = "Mutect2"
- [meta, vcf]} // channel: [ val(meta), [ vcf ] ]
+ filtered_vcf = FILTERMUTECTCALLS.out.vcf.map{ meta, vcf -> [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"Mutect2"],
+ vcf]} // channel: [ val(meta), [ vcf ] ]
filtered_tbi = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ]
filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ]
diff --git a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf
index 6b8ccf1da1..5f07d2e147 100644
--- a/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf
+++ b/subworkflows/nf-core/gatk4/tumor_only_somatic_variant_calling/main.nf
@@ -67,11 +67,9 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING {
CONCAT_MUTECT2(
BGZIP_VC_MUTECT2.out.output
.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fai,
intervals_bed_combine_gz)
@@ -88,11 +86,9 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING {
MERGEMUTECTSTATS(
mutect2_stats_branch.intervals
.map{ meta, stats ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, stats]
+ [groupKey(new_meta, meta.num_intervals), stats]
}.groupTuple())
mutect2_stats = Channel.empty().mix(
@@ -106,11 +102,9 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING {
Channel.empty().mix(
mutect2_f1r2_branch.intervals
.map{ meta, f1r2 ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, f1r2]
+ [groupKey(new_meta, meta.num_intervals), f1r2]
}.groupTuple(),
mutect2_f1r2_branch.no_intervals))
@@ -128,11 +122,9 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING {
GATHERPILEUPSUMMARIES(
GETPILEUPSUMMARIES.out.table
.map{ meta, table ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, table]
+ [groupKey(new_meta, meta.num_intervals), table]
}.groupTuple(),
dict)
@@ -179,9 +171,8 @@ workflow GATK_TUMOR_ONLY_SOMATIC_VARIANT_CALLING {
contamination_table = CALCULATECONTAMINATION.out.contamination // channel: [ val(meta), [ contamination ] ]
segmentation_table = CALCULATECONTAMINATION.out.segmentation // channel: [ val(meta), [ segmentation ] ]
- filtered_vcf = FILTERMUTECTCALLS.out.vcf.map{ meta, vcf ->
- meta.variantcaller = "Mutect2"
- [meta, vcf] } // channel: [ val(meta), [ vcf ] ]
+ filtered_vcf = FILTERMUTECTCALLS.out.vcf.map{ meta, vcf -> [[patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Mutect2"]
+ , vcf] } // channel: [ val(meta), [ vcf ] ]
filtered_index = FILTERMUTECTCALLS.out.tbi // channel: [ val(meta), [ tbi ] ]
filtered_stats = FILTERMUTECTCALLS.out.stats // channel: [ val(meta), [ stats ] ]
diff --git a/subworkflows/nf-core/merge_index_cram.nf b/subworkflows/nf-core/merge_index_cram.nf
index 7c8cfa37bc..49cc34720a 100644
--- a/subworkflows/nf-core/merge_index_cram.nf
+++ b/subworkflows/nf-core/merge_index_cram.nf
@@ -17,11 +17,10 @@ workflow MERGE_INDEX_CRAM {
// Figuring out if there is one or more cram(s) from the same sample
ch_cram_to_merge = ch_cram.map{ meta, cram ->
- new_meta = meta.clone()
- new_meta.id = meta.sample
- def groupKey = groupKey(new_meta, meta.num_intervals)
- [new_meta, cram]
+ new_meta = [patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:meta.data_type, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), cram]
}.groupTuple()
.branch{
//Warning: size() calculates file size not list length here, so use num_intervals instead
diff --git a/subworkflows/nf-core/variantcalling/controlfreec/somatic/main.nf b/subworkflows/nf-core/variantcalling/controlfreec/somatic/main.nf
index f48b12a97a..c5e0b07678 100644
--- a/subworkflows/nf-core/variantcalling/controlfreec/somatic/main.nf
+++ b/subworkflows/nf-core/variantcalling/controlfreec/somatic/main.nf
@@ -40,20 +40,17 @@ workflow RUN_CONTROLFREEC_SOMATIC {
//Merge mpileup only when intervals and natural order sort them
CAT_MPILEUP_NORMAL( mpileup_normal.intervals.map{ meta, pileup ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, pileup]
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), pileup]
}.groupTuple(sort:true))
CAT_MPILEUP_TUMOR(mpileup_tumor.intervals
.map{ meta, pileup ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, pileup]
+ [groupKey(new_meta, meta.num_intervals), pileup]
}
.groupTuple(sort:true))
@@ -61,8 +58,8 @@ workflow RUN_CONTROLFREEC_SOMATIC {
CAT_MPILEUP_NORMAL.out.file_out,
mpileup_normal.no_intervals
).map{ meta, pileup ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
+
[new_meta, pileup]
}
@@ -70,8 +67,7 @@ workflow RUN_CONTROLFREEC_SOMATIC {
CAT_MPILEUP_TUMOR.out.file_out,
mpileup_tumor.no_intervals
).map{ meta, pileup ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
[new_meta, pileup]
}
diff --git a/subworkflows/nf-core/variantcalling/controlfreec/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/controlfreec/tumoronly/main.nf
index 71cefaf1e6..30c03ea743 100644
--- a/subworkflows/nf-core/variantcalling/controlfreec/tumoronly/main.nf
+++ b/subworkflows/nf-core/variantcalling/controlfreec/tumoronly/main.nf
@@ -31,11 +31,9 @@ workflow RUN_CONTROLFREEC_TUMORONLY {
//Merge mpileup only when intervals and natural order sort them
CAT_MPILEUP_TUMOR(mpileup_tumor.intervals
.map{ meta, pileup ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, pileup]
+ [groupKey(new_meta, meta.num_intervals), pileup]
}
.groupTuple(sort:true))
@@ -43,8 +41,8 @@ workflow RUN_CONTROLFREEC_TUMORONLY {
CAT_MPILEUP_TUMOR.out.file_out,
mpileup_tumor.no_intervals
).map{ meta, pileup ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
+
[new_meta, pileup]
}
diff --git a/subworkflows/nf-core/variantcalling/deepvariant/main.nf b/subworkflows/nf-core/variantcalling/deepvariant/main.nf
index 666822420e..0929ea7eca 100644
--- a/subworkflows/nf-core/variantcalling/deepvariant/main.nf
+++ b/subworkflows/nf-core/variantcalling/deepvariant/main.nf
@@ -42,11 +42,10 @@ workflow RUN_DEEPVARIANT {
CONCAT_DEEPVARIANT_VCF(
BGZIP_VC_DEEPVARIANT_VCF.out.output
.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -54,11 +53,10 @@ workflow RUN_DEEPVARIANT {
CONCAT_DEEPVARIANT_GVCF(
BGZIP_VC_DEEPVARIANT_GVCF.out.output
.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -70,8 +68,7 @@ workflow RUN_DEEPVARIANT {
deepvariant_gvcf_out.no_intervals,
deepvariant_vcf_out.no_intervals)
.map{ meta, vcf ->
- meta.variantcaller = "Deepvariant"
- [meta, vcf]
+ [[patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Deepvariant"], vcf]
}
ch_versions = ch_versions.mix(BGZIP_VC_DEEPVARIANT_GVCF.out.versions)
diff --git a/subworkflows/nf-core/variantcalling/freebayes/main.nf b/subworkflows/nf-core/variantcalling/freebayes/main.nf
index bcc06087e1..2e2eb1f96c 100644
--- a/subworkflows/nf-core/variantcalling/freebayes/main.nf
+++ b/subworkflows/nf-core/variantcalling/freebayes/main.nf
@@ -1,3 +1,4 @@
+include { BCFTOOLS_SORT } from '../../../../modules/nf-core/modules/bcftools/sort/main'
include { TABIX_BGZIP as BGZIP_VC_FREEBAYES } from '../../../../modules/nf-core/modules/tabix/bgzip/main'
include { CONCAT_VCF as CONCAT_FREEBAYES } from '../../../../modules/local/concat_vcf/main'
include { FREEBAYES } from '../../../../modules/nf-core/modules/freebayes/main'
@@ -26,7 +27,8 @@ workflow RUN_FREEBAYES {
}.set{freebayes_vcf_out}
// Only when no intervals
- TABIX_VC_FREEBAYES(freebayes_vcf_out.no_intervals)
+ BCFTOOLS_SORT(freebayes_vcf_out.no_intervals)
+ TABIX_VC_FREEBAYES(BCFTOOLS_SORT.out.vcf)
// Only when using intervals
BGZIP_VC_FREEBAYES(freebayes_vcf_out.intervals)
@@ -34,11 +36,12 @@ workflow RUN_FREEBAYES {
CONCAT_FREEBAYES(
BGZIP_VC_FREEBAYES.out.output
.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ new_id = meta.tumor_id ? meta.tumor_id + "_vs_" + meta.normal_id : meta.sample
+
+ new_meta = meta.tumor_id ? [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:meta.num_intervals]
+ : [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:new_id, num_intervals:meta.num_intervals]
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -48,10 +51,15 @@ workflow RUN_FREEBAYES {
CONCAT_FREEBAYES.out.vcf,
freebayes_vcf_out.no_intervals)
.map{ meta, vcf ->
- meta.variantcaller = "FreeBayes"
- [meta, vcf]
+
+ new_id = meta.tumor_id ? meta.tumor_id + "_vs_" + meta.normal_id : meta.sample
+
+ new_meta = meta.tumor_id ? [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:new_id, num_intervals:meta.num_intervals, variantcaller:"Freebayes"]
+ : [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:new_id, num_intervals:meta.num_intervals, variantcaller:"Freebayes"]
+ [new_meta, vcf]
}
+ ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions)
ch_versions = ch_versions.mix(BGZIP_VC_FREEBAYES.out.versions)
ch_versions = ch_versions.mix(CONCAT_FREEBAYES.out.versions)
ch_versions = ch_versions.mix(FREEBAYES.out.versions)
diff --git a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf
index 1ff59e542d..fe7e24fbbf 100644
--- a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf
+++ b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf
@@ -44,10 +44,11 @@ workflow RUN_HAPLOTYPECALLER {
CONCAT_HAPLOTYPECALLER(
BGZIP_VC_HAPLOTYPECALLER.out.output
.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
- [new_meta, vcf]
- }.groupTuple(size: num_intervals),
+
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, new_meta.num_intervals), vcf]
+ }.groupTuple(),
fasta_fai,
intervals_bed_gz)
diff --git a/subworkflows/nf-core/variantcalling/manta/germline/main.nf b/subworkflows/nf-core/variantcalling/manta/germline/main.nf
index 864fbc075e..4d71dc1250 100644
--- a/subworkflows/nf-core/variantcalling/manta/germline/main.nf
+++ b/subworkflows/nf-core/variantcalling/manta/germline/main.nf
@@ -43,11 +43,10 @@ workflow RUN_MANTA_GERMLINE {
CONCAT_MANTA_SMALL_INDELS(
BGZIP_VC_MANTA_SMALL_INDELS.out.output
.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -57,11 +56,10 @@ workflow RUN_MANTA_GERMLINE {
CONCAT_MANTA_SV(
BGZIP_VC_MANTA_SV.out.output
.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
+
+ [ groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -71,11 +69,10 @@ workflow RUN_MANTA_GERMLINE {
CONCAT_MANTA_DIPLOID(
BGZIP_VC_MANTA_DIPLOID.out.output
.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -89,8 +86,7 @@ workflow RUN_MANTA_GERMLINE {
//manta_small_indels_vcf.no_intervals,
manta_sv_vcf.no_intervals)
.map{ meta, vcf ->
- meta.variantcaller = "Manta"
- [meta, vcf]
+ [ [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Manta"], vcf]
}
ch_versions = ch_versions.mix(BGZIP_VC_MANTA_DIPLOID.out.versions)
diff --git a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf
index e6005000c2..c3d108fc7a 100644
--- a/subworkflows/nf-core/variantcalling/manta/somatic/main.nf
+++ b/subworkflows/nf-core/variantcalling/manta/somatic/main.nf
@@ -52,11 +52,10 @@ workflow RUN_MANTA_SOMATIC {
CONCAT_MANTA_SV(
BGZIP_VC_MANTA_SV.out.output.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
+
+ [ groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -65,11 +64,9 @@ workflow RUN_MANTA_SOMATIC {
CONCAT_MANTA_SMALL_INDELS(
BGZIP_VC_MANTA_SMALL_INDELS.out.output.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -78,11 +75,9 @@ workflow RUN_MANTA_SOMATIC {
CONCAT_MANTA_DIPLOID(
BGZIP_VC_MANTA_DIPLOID.out.output.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -91,11 +86,9 @@ workflow RUN_MANTA_SOMATIC {
CONCAT_MANTA_SOMATIC(
BGZIP_VC_MANTA_SOMATIC.out.output.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -111,24 +104,24 @@ workflow RUN_MANTA_SOMATIC {
manta_diploid_sv_vcf.no_intervals,
manta_somatic_sv_vcf.no_intervals
).map{ meta, vcf ->
- meta.variantcaller = "Manta"
- [meta, vcf]
+ [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"Manta"],
+ vcf]
}
manta_candidate_small_indels_vcf = Channel.empty().mix(
CONCAT_MANTA_SMALL_INDELS.out.vcf,
manta_candidate_small_indels_vcf.no_intervals
).map{ meta, vcf ->
- meta.variantcaller = "Manta"
- [meta, vcf]
+ [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"Manta"],
+ vcf]
}
manta_candidate_small_indels_vcf_tbi = Channel.empty().mix(
CONCAT_MANTA_SMALL_INDELS.out.tbi,
manta_candidate_small_indels_vcf_tbi.no_intervals
).map{ meta, vcf ->
- meta.variantcaller = "Manta"
- [meta, vcf]
+ [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"Manta"],
+ vcf]
}
ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions)
diff --git a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf
index a09168cd1d..b0c1bfc807 100644
--- a/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf
+++ b/subworkflows/nf-core/variantcalling/manta/tumoronly/main.nf
@@ -42,11 +42,10 @@ workflow RUN_MANTA_TUMORONLY {
CONCAT_MANTA_SMALL_INDELS(
BGZIP_VC_MANTA_SMALL_INDELS.out.output.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -55,11 +54,10 @@ workflow RUN_MANTA_TUMORONLY {
CONCAT_MANTA_SV(
BGZIP_VC_MANTA_SV.out.output.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -68,11 +66,10 @@ workflow RUN_MANTA_TUMORONLY {
CONCAT_MANTA_TUMOR(
BGZIP_VC_MANTA_TUMOR.out.output.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -86,8 +83,8 @@ workflow RUN_MANTA_TUMORONLY {
manta_candidate_sv_vcf.no_intervals,
manta_tumor_sv_vcf.no_intervals
).map{ meta, vcf ->
- meta.variantcaller = "Manta"
- [meta, vcf]
+ [[patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Manta"],
+ vcf]
}
ch_versions = ch_versions.mix(BGZIP_VC_MANTA_SV.out.versions)
diff --git a/subworkflows/nf-core/variantcalling/strelka/single/main.nf b/subworkflows/nf-core/variantcalling/strelka/single/main.nf
index a9b0e5d1d9..4d06d8a3a8 100644
--- a/subworkflows/nf-core/variantcalling/strelka/single/main.nf
+++ b/subworkflows/nf-core/variantcalling/strelka/single/main.nf
@@ -34,11 +34,10 @@ workflow RUN_STRELKA_SINGLE {
CONCAT_STRELKA(
BGZIP_VC_STRELKA.out.output
.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -48,11 +47,10 @@ workflow RUN_STRELKA_SINGLE {
CONCAT_STRELKA_GENOME(
BGZIP_VC_STRELKA_GENOME.out.output
.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.sample
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ new_meta = [patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals]
+
+ [groupKey(new_meta, meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -64,8 +62,7 @@ workflow RUN_STRELKA_SINGLE {
//strelka_genome_vcf.no_intervals,
strelka_vcf.no_intervals)
.map{ meta, vcf ->
- meta.variantcaller = "Strelka"
- [meta, vcf]
+ [[patient:meta.patient, sample:meta.sample, status:meta.status, gender:meta.gender, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"Strelka"], vcf]
}
ch_versions = ch_versions.mix(BGZIP_VC_STRELKA.out.versions)
diff --git a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf
index 1c7353c568..e34117bb16 100644
--- a/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf
+++ b/subworkflows/nf-core/variantcalling/strelka/somatic/main.nf
@@ -32,11 +32,9 @@ workflow RUN_STRELKA_SOMATIC {
BGZIP_VC_STRELKA_SNVS(strelka_vcf_snvs.intervals)
CONCAT_STRELKA_SNVS(BGZIP_VC_STRELKA_SNVS.out.output.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ [groupKey(new_meta, new_meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -44,11 +42,9 @@ workflow RUN_STRELKA_SOMATIC {
BGZIP_VC_STRELKA_INDELS(strelka_vcf_indels.intervals)
CONCAT_STRELKA_INDELS(BGZIP_VC_STRELKA_INDELS.out.output.map{ meta, vcf ->
- new_meta = meta.clone()
- new_meta.id = new_meta.tumor_id + "_vs_" + new_meta.normal_id
+ new_meta = [patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals]
- def groupKey = groupKey(meta, meta.num_intervals)
- [new_meta, vcf]
+ [groupKey(new_meta, new_meta.num_intervals), vcf]
}.groupTuple(),
fasta_fai,
intervals_bed_gz)
@@ -60,8 +56,8 @@ workflow RUN_STRELKA_SOMATIC {
strelka_vcf_snvs.no_intervals,
strelka_vcf_indels.no_intervals)
.map{ meta, vcf ->
- meta.variantcaller = "Strelka"
- [meta, vcf]
+ [[patient:meta.patient, normal_id:meta.normal_id, tumor_id:meta.tumor_id, gender:meta.gender, id:meta.tumor_id + "_vs_" + meta.normal_id, num_intervals:meta.num_intervals, variantcaller:"Strelka"]
+ , vcf]
}
ch_versions = ch_versions.mix(BGZIP_VC_STRELKA_SNVS.out.versions)
diff --git a/tests/csv/3.0/fastq_pair.csv b/tests/csv/3.0/fastq_pair.csv
index ce9e72fd97..0958c9aff9 100644
--- a/tests/csv/3.0/fastq_pair.csv
+++ b/tests/csv/3.0/fastq_pair.csv
@@ -1,3 +1,3 @@
patient,gender,status,sample,lane,fastq_1,fastq_2
test,XX,0,test,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz
-test,XX,1,test2,test2_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz
+test,XX,1,test2,test_L1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz
diff --git a/tests/test_pair.yml b/tests/test_pair.yml
index eedd7a6490..ee7ecd547b 100644
--- a/tests/test_pair.yml
+++ b/tests/test_pair.yml
@@ -25,7 +25,7 @@
- path: results/preprocessing/csv/recalibrated_test.csv
- path: results/preprocessing/csv/recalibrated_test2.csv
- path: results/reports/fastqc/test-test_L1
- - path: results/reports/fastqc/test2-test2_L1
+ - path: results/reports/fastqc/test2-test_L1
- path: results/reports/markduplicates/test/test.md.metrics
- path: results/reports/markduplicates/test2/test2.md.metrics
- path: results/reports/qualimap/test/test.mapped
diff --git a/tests/test_tools.yml b/tests/test_tools.yml
index b7f5e451ab..e72fb284a2 100644
--- a/tests/test_tools.yml
+++ b/tests/test_tools.yml
@@ -98,46 +98,67 @@
- path: results/variant_calling/sample1/deepvariant/sample1.vcf.gz.tbi
- name: Run variant calling on germline sample with freebayes
- command: nextflow run main.nf -profile test,tools_germline,docker --tools freebayes
+ command: nextflow run main.nf -profile test,targeted,docker --tools freebayes
tags:
- freebayes
- germline
- variant_calling
files:
- - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz
- - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz.tbi
+ - path: results/variant_calling/test/freebayes/test.vcf.gz
+ - path: results/variant_calling/test/freebayes/test.vcf.gz.tbi
- name: Run variant calling on germline sample with freebayes without intervals
- command: nextflow run main.nf -profile test,tools_germline,docker --tools freebayes --no_intervals
+ command: nextflow run main.nf -profile test,docker --tools freebayes --no_intervals
tags:
- freebayes
- germline
- no_intervals
- variant_calling
files:
- - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz
- - path: results/variant_calling/sample1/freebayes/sample1.vcf.gz.tbi
+ - path: results/variant_calling/test/freebayes/test.vcf.gz
+ - path: results/variant_calling/test/freebayes/test.vcf.gz.tbi
-- name: Run variant calling on tumor_only sample with freebayes
- command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools freebayes
+- name: Run variant calling on somatic sample with freebayes
+ command: nextflow run main.nf -profile test,pair,targeted,docker --tools freebayes
tags:
- freebayes
- - tumor_only
+ - somatic
- variant_calling
files:
- - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz
- - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz.tbi
+ - path: results/variant_calling/test2_vs_test/freebayes/test2_vs_test.vcf.gz
+ - path: results/variant_calling/test2_vs_test/freebayes/test2_vs_test.vcf.gz.tbi
-- name: Run variant calling on tumor_only sample with freebayes without intervals
- command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools freebayes --no_intervals
+- name: Run variant calling on somatic sample with freebayes without intervals
+ command: nextflow run main.nf -profile test,pair,docker --tools freebayes --no_intervals
tags:
- freebayes
+ - somatic
- no_intervals
- - tumor_only
- variant_calling
files:
- - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz
- - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz.tbi
+ - path: results/variant_calling/test2_vs_test/freebayes/test2_vs_test.vcf.gz
+ - path: results/variant_calling/test2_vs_test/freebayes/test2_vs_test.vcf.gz.tbi
+
+# - name: Run variant calling on tumor_only sample with freebayes
+# command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools freebayes
+# tags:
+# - freebayes
+# - tumor_only
+# - variant_calling
+# files:
+# - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz
+# - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz.tbi
+
+# - name: Run variant calling on tumor_only sample with freebayes without intervals
+# command: nextflow run main.nf -profile test,tools_tumoronly,docker --tools freebayes --no_intervals
+# tags:
+# - freebayes
+# - no_intervals
+# - tumor_only
+# - variant_calling
+# files:
+# - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz
+# - path: results/variant_calling/sample2/freebayes/sample2.vcf.gz.tbi
- name: Run variant calling on germline sample with haplotypecaller
command: nextflow run main.nf -profile test,tools_germline,docker --tools haplotypecaller
diff --git a/workflows/sarek.nf b/workflows/sarek.nf
index b763d45926..50b7c130e6 100644
--- a/workflows/sarek.nf
+++ b/workflows/sarek.nf
@@ -375,44 +375,32 @@ workflow SAREK {
// STEP 1: MAPPING READS TO REFERENCE GENOME
// reads will be sorted
-
ch_reads_to_map = ch_reads_to_map.map{ meta, reads ->
- new_meta = meta.clone()
-
// update ID when no multiple lanes or splitted fastqs
- new_meta.id = meta.size * meta.numLanes == 1 ? meta.sample : meta.id
+ new_id = meta.size * meta.numLanes == 1 ? meta.sample : meta.id
- [new_meta, reads]
+ [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:new_id, numLanes:meta.numLanes, read_group:meta.read_group, data_type:meta.data_type, size:meta.size],
+ reads]
}
GATK4_MAPPING(ch_reads_to_map, ch_map_index, true)
// Grouping the bams from the same samples not to stall the workflow
ch_bam_mapped = GATK4_MAPPING.out.bam.map{ meta, bam ->
- new_meta = meta.clone()
-
numLanes = meta.numLanes ?: 1
size = meta.size ?: 1
- // remove no longer necessary fields
- new_meta.remove('read_group') // Now in the BAM header
- new_meta.remove('numLanes') // Was only needed for mapping
- new_meta.remove('size') // Was only needed for mapping
-
// update ID to be based on the sample name
- new_meta.id = meta.sample
-
// update data_type
- new_meta.data_type = 'bam'
-
+ // remove no longer necessary fields:
+ // read_group: Now in the BAM header
+ // numLanes: Was only needed for mapping
+ // size: Was only needed for mapping
+ new_meta = [patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.sample, data_type:"bam"]
// Use groupKey to make sure that the correct group can advance as soon as it is complete
// and not stall the workflow until all reads from all channels are mapped
- def groupKey = groupKey(new_meta, numLanes * size)
-
- //Returns the values we need
- [groupKey, new_meta, bam]
- }.groupTuple(by:[0,1])
- .map{ groupKey, new_meta, bam -> [new_meta, bam] }
+ [ groupKey(new_meta, numLanes * size), bam]
+ }.groupTuple()
// gatk4 markduplicates can handle multiple bams as input, so no need to merge/index here
// Except if and only if skipping markduplicates or saving mapped bams
@@ -531,9 +519,8 @@ workflow SAREK {
ch_cram_markduplicates_no_spark,
ch_cram_markduplicates_spark,
ch_cram_no_markduplicates_restart).map{ meta, cram, crai ->
- meta_new = meta.clone()
- meta_new.data_type = "cram" //Make sure correct data types are carried through
- [meta_new, cram, crai]
+ //Make sure correct data types are carried through
+ [[patient:meta.patient, sample:meta.sample, gender:meta.gender, status:meta.status, id:meta.id, data_type:"cram"], cram, crai]
}
// Create CSV to restart from this step
@@ -970,11 +957,15 @@ def extract_csv(csv_file) {
def fastq_1 = file(row.fastq_1, checkIfExists: true)
def fastq_2 = file(row.fastq_2, checkIfExists: true)
def CN = params.seq_center ? "CN:${params.seq_center}\\t" : ''
- def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.patient}_${row.sample}\\tLB:${row.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\""
+
+ def flowcell = flowcellLaneFromFastq(fastq_1)
+ //Don't use a random element for ID, it breaks resuming
+ def read_group = "\"@RG\\tID:${flowcell}.${row.sample}.${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.patient}_${row.sample}\\tLB:${row.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\""
+
meta.numLanes = numLanes.toInteger()
meta.read_group = read_group.toString()
meta.data_type = "fastq"
- meta.test = "test"
+
meta.size = 1 // default number of splitted fastq
return [meta, [fastq_1, fastq_2]]
// start from BAM
@@ -982,7 +973,7 @@ def extract_csv(csv_file) {
meta.id = "${row.sample}-${row.lane}".toString()
def bam = file(row.bam, checkIfExists: true)
def CN = params.seq_center ? "CN:${params.seq_center}\\t" : ''
- def read_group = "\"@RG\\tID:${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\""
+ def read_group = "\"@RG\\tID:${row_sample}_${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\""
meta.numLanes = numLanes.toInteger()
meta.read_group = read_group.toString()
meta.data_type = "bam"
@@ -1031,6 +1022,34 @@ def extract_csv(csv_file) {
}
}
+// Parse first line of a FASTQ file, return the flowcell id and lane number.
+def flowcellLaneFromFastq(path) {
+ // expected format:
+ // xx:yy:FLOWCELLID:LANE:... (seven fields)
+ // or
+ // FLOWCELLID:LANE:xx:... (five fields)
+ def line
+ path.withInputStream {
+ InputStream gzipStream = new java.util.zip.GZIPInputStream(it)
+ Reader decoder = new InputStreamReader(gzipStream, 'ASCII')
+ BufferedReader buffered = new BufferedReader(decoder)
+ line = buffered.readLine()
+ }
+ assert line.startsWith('@')
+ line = line.substring(1)
+ def fields = line.split(':')
+ String fcid
+
+ if (fields.size() >= 7) {
+ // CASAVA 1.8+ format, from https://support.illumina.com/help/BaseSpace_OLH_009008/Content/Source/Informatics/BS/FileFormat_FASTQ-files_swBS.htm
+ // "@::::::: :::"
+ fcid = fields[2]
+ } else if (fields.size() == 5) {
+ fcid = fields[0]
+ }
+ return fcid
+}
+
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
THE END