Skip to content

Commit

Permalink
Merge pull request #1171 from FriederikeHanssen/germline_resource
Browse files Browse the repository at this point in the history
fix getpileupsummary should not run if no germline resource is provided
  • Loading branch information
FriederikeHanssen authored Aug 17, 2023
2 parents 721c8d1 + aaa1f63 commit 66af584
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 70 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#1155](https://github.com/nf-core/sarek/pull/1155) - Restore proper rendering in `usage.md`
- [#1163](https://github.com/nf-core/sarek/pull/1163) - Correcting location of output folder for joint variant calling with GATK's haplotypecaller
- [#1169](https://github.com/nf-core/sarek/pull/1169) - Updating Sentieon-modules. (The conda-check in the Sentieon-modules was moved to the script-section. The version of Sentieon remain unchanged.)
- [#1171](https://github.com/nf-core/sarek/pull/1171) - Fix channel logic for germline resource to skip GetPileupSummary if not provided
- [#1172](https://github.com/nf-core/sarek/pull/1172) - Publish gvcf files when all intervals are processed at once ([#764](https://github.com/nf-core/sarek/issues/764))
- [#1173](https://github.com/nf-core/sarek/pull/1173) - Fixed duplicated entries in joint germline recalibrated VCF ([#966](https://github.com/nf-core/sarek/pull/966), [#1102](https://github.com/nf-core/sarek/pull/1102)),
fixed grouping joint germline recalibrated VCF ([#1137](https://github.com/nf-core/sarek/pull/1137))
Expand Down
17 changes: 4 additions & 13 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -70,21 +70,12 @@ process {
}
}

if (params.joint_mutect2) {
withName: 'MUTECT2_PAIRED' {
ext.args = { params.ignore_soft_clipped_bases ?
"--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" :
"--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" }
}
}
else {
withName: 'MUTECT2_PAIRED'{
//sample name from when the test data was generated
ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " }
}
withName: '.*:MUTECT2_PAIRED'{
//sample name from when the test data was generated
ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal" }
}

withName: 'FILTERVARIANTTRANCHES'{
withName: '.*:FILTERVARIANTTRANCHES'{
ext.args = { "--info-key CNN_1D --indel-tranche 0" }
}
}
Expand Down
6 changes: 3 additions & 3 deletions conf/test/cache.config
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,12 @@ process {
}
}

withName: 'MUTECT2_PAIRED'{
withName: '.*:MUTECT2_PAIRED'{
//sample name from when the test data was generated
ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " }
ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal" }
}

withName: 'FILTERVARIANTTRANCHES'{
withName: '.*:FILTERVARIANTTRANCHES'{
ext.args = { "--info-key CNN_1D --indel-tranche 0" }
}
}
Expand Down
16 changes: 11 additions & 5 deletions subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {
main:
versions = Channel.empty()

//If no germline resource is provided, then create an empty channel to avoid GetPileupsummaries from being run
germline_resource_pileup = germline_resource_tbi ? germline_resource : Channel.empty()
germline_resource_pileup_tbi = germline_resource_tbi ?: Channel.empty()

Expand Down Expand Up @@ -143,13 +144,18 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {
// This is necessary because we generated one normal pileup summary for each patient but we need run calculate contamination for each tumor-normal pair.
pileup_table_tumor = Channel.empty().mix(GATHERPILEUPSUMMARIES_TUMOR.out.table, pileup_table_tumor_branch.no_intervals).map{meta, table -> [ meta - meta.subMap('normal_id', 'tumor_id', 'num_intervals') + [id:meta.patient], meta.id, table ] }
pileup_table_normal= Channel.empty().mix(GATHERPILEUPSUMMARIES_NORMAL.out.table, pileup_table_normal_branch.no_intervals).map{meta, table -> [ meta - meta.subMap('normal_id', 'tumor_id', 'num_intervals') + [id:meta.patient], meta.id, table ] }

ch_calculatecontamination_in_tables = pileup_table_tumor.combine(
pileup_table_normal, by:0).map{
meta, tumor_id, tumor_table, normal_id, normal_table -> [ meta + [ id: tumor_id + "_vs_" + normal_id ], tumor_table, normal_table]
}

CALCULATECONTAMINATION(ch_calculatecontamination_in_tables)

// Initialize empty channel: Contamination calculation is run on pileup table, pileup is not run if germline resource is not provided
ch_seg_to_filtermutectcalls = Channel.empty()
ch_cont_to_filtermutectcalls = Channel.empty()

if (joint_mutect2) {
// Reduce the meta to only patient name
ch_seg_to_filtermutectcalls = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('tumor_id') + [id: meta.patient], seg]}.groupTuple()
Expand All @@ -163,11 +169,11 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {

// Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables
vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true)
.join(stats, failOnDuplicate: true, failOnMismatch: true)
.join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true)
.join(ch_seg_to_filtermutectcalls, failOnDuplicate: true, failOnMismatch: true)
.join(ch_cont_to_filtermutectcalls, failOnDuplicate: true, failOnMismatch: true)
.map{ meta, vcf, tbi, stats, orientation, seg, cont -> [ meta, vcf, tbi, stats, orientation, seg, cont, [] ] }
.join(stats, failOnDuplicate: true, failOnMismatch: true)
.join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true)
.join(ch_seg_to_filtermutectcalls)
.join(ch_cont_to_filtermutectcalls)
.map{ meta, vcf, tbi, stats, orientation, seg, cont -> [ meta, vcf, tbi, stats, orientation, seg, cont, [] ] }

FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {
main:
versions = Channel.empty()

//If no germline resource is provided, then create an empty channel to avoid GetPileupsummaries from being run
germline_resource_pileup = germline_resource_tbi ? germline_resource : Channel.empty()
germline_resource_pileup_tbi = germline_resource_tbi ?: Channel.empty()

Expand Down Expand Up @@ -118,6 +119,10 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {
// Contamination and segmentation tables created using calculatecontamination on the pileup summary table
CALCULATECONTAMINATION(pileup_table.map{ meta, table -> [ meta, table, [] ] })

// Initialize empty channel: Contamination calculation is run on pileup table, pileup is not run if germline resource is not provided
calculatecontamination_out_seg = Channel.empty()
calculatecontamination_out_cont = Channel.empty()

if (joint_mutect2) {
// Remove sample names and retain patient name as the main identifier
calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('sample') + [id:meta.patient], seg ] }.groupTuple()
Expand All @@ -133,8 +138,8 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {
vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true)
.join(stats, failOnDuplicate: true, failOnMismatch: true)
.join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true)
.join(calculatecontamination_out_seg, failOnDuplicate: true, failOnMismatch: true)
.join(calculatecontamination_out_cont, failOnDuplicate: true, failOnMismatch: true)
.join(calculatecontamination_out_seg)
.join(calculatecontamination_out_cont)
.map{ meta, vcf, tbi, stats, artifactprior, seg, cont -> [ meta, vcf, tbi, stats, artifactprior, seg, cont, [] ] }

FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict)
Expand Down
85 changes: 42 additions & 43 deletions tests/test_tools_manually.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
- variant_calling
files:
- path: results/csv/variantcalled.csv
md5sum: d3c9f0559d48696c54f3c463b1606586
md5sum: f87290ce1c6ea523e08354ed6c258b0b
- path: results/multiqc
- path: results/no_intervals.bed
md5sum: f3dac01ea66b95fe477446fde2d31489
Expand All @@ -53,7 +53,7 @@
- path: results/no_intervals.bed.gz.tbi
md5sum: f3dac01ea66b95fe477446fde2d31489
- path: results/reports/bcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.bcftools_stats.txt
md5sum: d83942d74fbbf1c3770b3c04bf622c63
md5sum: 9876607145d11c6b8492264936d7a82c
- path: results/reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.FILTER.summary
md5sum: b25d4d2a64f9590d0ffb119fd3adb06e
- path: results/reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.TsTv.count
Expand Down Expand Up @@ -91,10 +91,10 @@
- variant_calling
files:
- path: results/csv/variantcalled.csv
md5sum: d3c9f0559d48696c54f3c463b1606586
md5sum: f87290ce1c6ea523e08354ed6c258b0b
- path: results/multiqc
- path: results/reports/bcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.bcftools_stats.txt
md5sum: d83942d74fbbf1c3770b3c04bf622c63
md5sum: 9876607145d11c6b8492264936d7a82c
- path: results/reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.FILTER.summary
md5sum: b25d4d2a64f9590d0ffb119fd3adb06e
- path: results/reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.TsTv.count
Expand Down Expand Up @@ -123,100 +123,99 @@
md5sum: c09dff3f145d77d4848992e244811c08
- path: results/variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz.tbi
# binary changes md5sums on reruns
- name: Run joint calling on somatic samples with mutect2
command: nextflow run main.nf -profile test,tools_somatic --input tests/csv/3.0/recalibrated_somatic_joint.csv --tools mutect2 --step variant_calling --joint_mutect2 --outdir results
- name: Run joint calling on tumor only samples with mutect2
command: nextflow run main.nf -profile test_cache,tools_tumoronly --input tests/csv/3.0/recalibrated_tumoronly_joint.csv --tools mutect2 --joint_mutect2 --outdir results
tags:
- mutect2_manual
- manual
- somatic
- tumor_only
- variant_calling
- multi_sample
- joint_somatic
- joint_tumoronly
files:
- path: results/csv/variantcalled.csv
md5sum: 7aa65d5d625ce77cd8cabd58162bd71a
md5sum: f87290ce1c6ea523e08354ed6c258b0b
- path: results/multiqc
- path: results/reports/bcftools/mutect2/test/test.mutect2.filtered.bcftools_stats.txt
md5sum: d75da410d57960944f54d02b2b5cdcac
md5sum: a0cdc26fb7d8c446dd0283fed71a24d5
- path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.FILTER.summary
md5sum: e0eb3e34fc15f3b452bfc43f032cc8be
md5sum: e1e42b6f65cbdba116cff72a56e40c4b
- path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.TsTv.count
md5sum: aa51bde6080c015c6aa6c8254977dd11
md5sum: c00e1639a41deb107099487676a6cf37
- path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.TsTv.qual
md5sum: 262f843f68d072c457ca28b56da3ede8
- path: results/variant_calling/mutect2/sample1/sample1.mutect2.pileups.table
md5sum: 16077fdb885a8afe64c7669477471354
md5sum: a21016aa99e5cbf32eeae1b405ca6d8d
- path: results/variant_calling/mutect2/test/sample2.mutect2.contamination.table
md5sum: 46c708c943b453da89a3da08acfdb2a7
- path: results/variant_calling/mutect2/sample2/sample2.mutect2.pileups.table
md5sum: 9afe42339f590937166edcf4746c22ec
- path: results/variant_calling/mutect2/test/sample2.mutect2.segmentation.table
md5sum: f4643d9319bde4efbfbe516d6fb13052
- path: results/variant_calling/mutect2/test/sample3.mutect2.contamination.table
md5sum: 11440fe64b5b953d7efb9cf47e330364
- path: results/variant_calling/mutect2/sample3/sample3.mutect2.pileups.table
md5sum: fd0c1f7819717b7f94e52f6611f4b2e0
- path: results/variant_calling/mutect2/test/sample2_vs_sample1.mutect2.contamination.table
md5sum: 46c708c943b453da89a3da08acfdb2a7
- path: results/variant_calling/mutect2/test/sample3_vs_sample1.mutect2.contamination.table
md5sum: 11440fe64b5b953d7efb9cf47e330364
- path: results/variant_calling/mutect2/test/sample2_vs_sample1.mutect2.segmentation.table
md5sum: f4643d9319bde4efbfbe516d6fb13052
- path: results/variant_calling/mutect2/test/sample3_vs_sample1.mutect2.segmentation.table
- path: results/variant_calling/mutect2/test/sample3.mutect2.segmentation.table
md5sum: 38f83e2f98b206640644dd93d5e96f4e
- path: results/variant_calling/mutect2/test/test.mutect2.artifactprior.tar.gz
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz.filteringStats.tsv
md5sum: dee72b4c5c9bbda01d44fd3e00f1b404
md5sum: f237666ae325fde0c06b8bc62d2846fc
- path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz.tbi
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz.stats
md5sum: 094cb75b0bda28e92b6718ff33d136e2
md5sum: 22e58aef3b14b335fa487d40b590ffeb
- path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz.tbi
# binary changes md5sums on reruns
- name: Run joint calling on tumor only samples with mutect2
command: nextflow run main.nf -profile test,tools_somatic --input tests/csv/3.0/recalibrated_tumoronly_joint.csv --tools mutect2 --step variant_calling --joint_mutect2 --outdir results
- name: Run joint calling on somatic samples with mutect2
command: nextflow run main.nf -profile test_cache,tools_somatic --input tests/csv/3.0/recalibrated_somatic_joint.csv --tools mutect2 --joint_mutect2 --outdir results
tags:
- mutect2_manual
- manual
- tumoronly
- somatic
- variant_calling
- multi_sample
- joint_tumoronly
- joint_somatic
files:
- path: results/csv/variantcalled.csv
md5sum: 7aa65d5d625ce77cd8cabd58162bd71a
md5sum: f87290ce1c6ea523e08354ed6c258b0b
- path: results/multiqc
- path: results/reports/bcftools/mutect2/test/test.mutect2.filtered.bcftools_stats.txt
md5sum: a0cdc26fb7d8c446dd0283fed71a24d5
md5sum: d75da410d57960944f54d02b2b5cdcac
- path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.FILTER.summary
md5sum: e1e42b6f65cbdba116cff72a56e40c4b
md5sum: e0eb3e34fc15f3b452bfc43f032cc8be
- path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.TsTv.count
md5sum: c00e1639a41deb107099487676a6cf37
md5sum: aa51bde6080c015c6aa6c8254977dd11
- path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.TsTv.qual
md5sum: a21016aa99e5cbf32eeae1b405ca6d8d
- path: results/variant_calling/mutect2/test/sample2.mutect2.contamination.table
md5sum: 46c708c943b453da89a3da08acfdb2a7
md5sum: 262f843f68d072c457ca28b56da3ede8
- path: results/variant_calling/mutect2/sample1/sample1.mutect2.pileups.table
md5sum: 16077fdb885a8afe64c7669477471354
- path: results/variant_calling/mutect2/sample2/sample2.mutect2.pileups.table
md5sum: 9afe42339f590937166edcf4746c22ec
- path: results/variant_calling/mutect2/test/sample2.mutect2.segmentation.table
md5sum: f4643d9319bde4efbfbe516d6fb13052
- path: results/variant_calling/mutect2/test/sample3.mutect2.contamination.table
md5sum: 11440fe64b5b953d7efb9cf47e330364
- path: results/variant_calling/mutect2/sample3/sample3.mutect2.pileups.table
md5sum: fd0c1f7819717b7f94e52f6611f4b2e0
- path: results/variant_calling/mutect2/test/sample3.mutect2.segmentation.table
- path: results/variant_calling/mutect2/test/sample2_vs_sample1.mutect2.contamination.table
md5sum: 46c708c943b453da89a3da08acfdb2a7
- path: results/variant_calling/mutect2/test/sample3_vs_sample1.mutect2.contamination.table
md5sum: 11440fe64b5b953d7efb9cf47e330364
- path: results/variant_calling/mutect2/test/sample2_vs_sample1.mutect2.segmentation.table
md5sum: f4643d9319bde4efbfbe516d6fb13052
- path: results/variant_calling/mutect2/test/sample3_vs_sample1.mutect2.segmentation.table
md5sum: 38f83e2f98b206640644dd93d5e96f4e
- path: results/variant_calling/mutect2/test/test.mutect2.artifactprior.tar.gz
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz.filteringStats.tsv
md5sum: f237666ae325fde0c06b8bc62d2846fc
md5sum: dee72b4c5c9bbda01d44fd3e00f1b404
- path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz.tbi
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz.stats
md5sum: 22e58aef3b14b335fa487d40b590ffeb
md5sum: 094cb75b0bda28e92b6718ff33d136e2
- path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz.tbi
# binary changes md5sums on reruns
- name: Run full pipeline on tumoronly with most tools
Expand Down
Loading

0 comments on commit 66af584

Please sign in to comment.