From 35175114428cbdbe9100cfd467a3b0526b7c9b24 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 22 Oct 2020 16:49:33 +0200 Subject: [PATCH] add: MSIsensor to Pair Somatic Variant Calling --- .github/workflows/ci.yml | 6 +- conf/modules.config | 8 +++ main.nf | 59 ++----------------- modules/local/subworkflow/build_indices.nf | 7 +++ .../local/subworkflow/pair_variant_calling.nf | 31 ++++++---- modules/nf-core/software/manta/somatic.nf | 2 +- modules/nf-core/software/msisensor/msi.nf | 48 +++++++++++++++ modules/nf-core/software/msisensor/scan.nf | 39 ++++++++++++ modules/nf-core/software/strelka/germline.nf | 2 +- modules/nf-core/software/strelka/somatic.nf | 2 +- 10 files changed, 132 insertions(+), 72 deletions(-) create mode 100644 modules/nf-core/software/msisensor/msi.nf create mode 100644 modules/nf-core/software/msisensor/scan.nf diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 092c833766..130e9afc07 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -150,13 +150,13 @@ jobs: strategy: matrix: # tool: [Haplotypecaller, Freebayes, Manta, mpileup, MSIsensor, Strelka, TIDDIT] - tool: [Haplotypecaller, Manta, Strelka] + tool: [Haplotypecaller, Manta, MSIsensor, Strelka] intervals: [--no_intervals, ''] exclude: - tool: Manta intervals: --no_intervals - # - tool: MSIsensor - # intervals: --no_intervals + - tool: MSIsensor + intervals: --no_intervals - tool: Strelka intervals: --no_intervals # - tool: TIDDIT diff --git a/conf/modules.config b/conf/modules.config index 42245ffd82..8138f1ae4b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -27,6 +27,10 @@ params { publish_dir = "reference" publish_files = "false" } + 'msisensor_scan' { + publish_dir = "reference" + publish_files = "false" + } 'samtools_faidx' { publish_dir = "reference" publish_files = "false" @@ -152,6 +156,10 @@ params { publish_dir = "variant_calling" publish_files = ['vcf.gz':'manta', 'vcf.gz.tbi':'manta'] } + 'msisensor_msi' { + publish_by_id = "true" + publish_dir = "variant_calling" + } 'strelka_somatic' { publish_by_id = "true" publish_dir = "variant_calling" diff --git a/main.nf b/main.nf index 60ff222c6a..b4ad8316ea 100644 --- a/main.nf +++ b/main.nf @@ -179,6 +179,7 @@ if (params.save_reference) modules['bwamem2_index'].publish_files if (params.save_reference) modules['create_intervals_bed'].publish_files = ['bed':'intervals'] if (params.save_reference) modules['dict'].publish_files = ['dict':'dict'] if (params.save_reference) modules['samtools_faidx'].publish_files = ['fai':'fai'] +if (params.save_reference) modules['msisensor_scan'].publish_files = ['list':'msi'] if (params.save_reference) modules['tabix_dbsnp'].publish_files = ['vcf.gz.tbi':'dbsnp'] if (params.save_reference) modules['tabix_germline_resource'].publish_files = ['vcf.gz.tbi':'germline_resource'] if (params.save_reference) modules['tabix_known_indels'].publish_files = ['vcf.gz.tbi':'known_indels'] @@ -288,6 +289,7 @@ include { BUILD_INDICES } from './modules/local/subworkflow/build_indices' addPa bwamem2_index_options: modules['bwamem2_index'], create_intervals_bed_options: modules['create_intervals_bed'], gatk_dict_options: modules['dict'], + msisensor_scan_options: modules['msisensor_scan'], samtools_faidx_options: modules['samtools_faidx'], tabix_dbsnp_options: modules['tabix_dbsnp'], tabix_germline_resource_options: modules['tabix_germline_resource'], @@ -423,6 +425,7 @@ workflow { known_indels_tbi = params.known_indels ? params.known_indels_index ? file(params.known_indels_index) : BUILD_INDICES.out.known_indels_tbi.collect() : file("${params.outdir}/no_file") pon_tbi = params.pon ? params.pon_index ? file(params.pon_index) : BUILD_INDICES.out.pon_tbi : file("${params.outdir}/no_file") + msisensor_scan = BUILD_INDICES.out.msisensor_scan /* ================================================================================ PREPROCESSING @@ -565,6 +568,7 @@ workflow { fai, fasta, intervals, + msisensor_scan, target_bed, tools) @@ -1272,61 +1276,6 @@ workflow.onComplete { // """ // } -// // STEP MSISENSOR.1 - SCAN - -// // Scan reference genome for microsatellites -// process MSIsensor_scan { -// label 'cpus_1' -// label 'memory_max' - -// tag "${fasta}" - -// input: -// file(fasta) from fasta -// file(fastaFai) from fai - -// output: -// file "microsatellites.list" into msi_scan_ch - -// when: 'msisensor' in tools - -// script: -// """ -// msisensor scan -d ${fasta} -o microsatellites.list -// """ -// } - -// // STEP MSISENSOR.2 - SCORE - -// // Score the normal vs somatic pair of bams - -// process MSIsensor_msi { -// label 'cpus_4' -// label 'memory_max' - -// tag "${idSampleTumor}_vs_${idSampleNormal}" - -// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/MSIsensor", mode: params.publish_dir_mode - -// input: -// set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from pairBamMsisensor -// file msiSites from msi_scan_ch - -// output: -// set val("Msisensor"), idPatient, file("${idSampleTumor}_vs_${idSampleNormal}_msisensor"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_dis"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_germline"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_somatic") into msisensor_out_ch - -// when: 'msisensor' in tools - -// script: -// """ -// msisensor msi -d ${msiSites} \ -// -b 4 \ -// -n ${bamNormal} \ -// -t ${bamTumor} \ -// -o ${idSampleTumor}_vs_${idSampleNormal}_msisensor -// """ -// } - // // STEP ASCAT.1 - ALLELECOUNTER // // Run commands and code from Malin Larsson diff --git a/modules/local/subworkflow/build_indices.nf b/modules/local/subworkflow/build_indices.nf index 0f4eaba448..4717e11b5c 100644 --- a/modules/local/subworkflow/build_indices.nf +++ b/modules/local/subworkflow/build_indices.nf @@ -9,6 +9,7 @@ params.bwa_index_options = [:] params.bwamem2_index_options = [:] params.create_intervals_bed_options = [:] params.gatk_dict_options = [:] +params.msisensor_scan_options = [:] params.samtools_faidx_options = [:] params.tabix_dbsnp_options = [:] params.tabix_germline_resource_options = [:] @@ -26,6 +27,7 @@ include { HTSLIB_TABIX as TABIX_DBSNP } from '../../nf-core/softw include { HTSLIB_TABIX as TABIX_GERMLINE_RESOURCE } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_germline_resource_options) include { HTSLIB_TABIX as TABIX_KNOWN_INDELS } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_known_indels_options) include { HTSLIB_TABIX as TABIX_PON } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_pon_options) +include { MSISENSOR_SCAN } from '../../nf-core/software/msisensor/scan.nf' addParams(options: params.msisensor_scan_options) include { SAMTOOLS_FAIDX } from '../../nf-core/software/samtools/faidx.nf' addParams(options: params.samtools_faidx_options) workflow BUILD_INDICES{ @@ -66,6 +68,10 @@ workflow BUILD_INDICES{ if (!(params.known_indels_index) && params.known_indels && ('mapping' in step || 'preparerecalibration' in step)) result_known_indels_tbi = TABIX_KNOWN_INDELS(known_indels) + result_msisensor_scan = Channel.empty() + if ('msisensor' in tools) + result_msisensor_scan = MSISENSOR_SCAN(fasta, result_fai) + result_pon_tbi = Channel.empty() if (!(params.pon_index) && params.pon && ('tnscope' in tools || 'mutect2' in tools)) result_pon_tbi = TABIX_PON(pon) @@ -107,5 +113,6 @@ workflow BUILD_INDICES{ germline_resource_tbi = result_germline_resource_tbi intervals = result_intervals known_indels_tbi = result_known_indels_tbi + msisensor_scan = result_msisensor_scan pon_tbi = result_pon_tbi } \ No newline at end of file diff --git a/modules/local/subworkflow/pair_variant_calling.nf b/modules/local/subworkflow/pair_variant_calling.nf index f2d4f57de4..207f0dc44e 100644 --- a/modules/local/subworkflow/pair_variant_calling.nf +++ b/modules/local/subworkflow/pair_variant_calling.nf @@ -4,23 +4,26 @@ ================================================================================ */ -params.strelka_options = [:] params.manta_options = [:] +params.msisensor_msi_options = [:] +params.strelka_options = [:] -include { STRELKA_SOMATIC as STRELKA } from '../../nf-core/software/strelka/somatic' addParams(options: params.strelka_options) include { MANTA_SOMATIC as MANTA } from '../../nf-core/software/manta/somatic' addParams(options: params.manta_options) +include { MSISENSOR_MSI } from '../../nf-core/software/msisensor/msi' addParams(options: params.msisensor_msi_options) +include { STRELKA_SOMATIC as STRELKA } from '../../nf-core/software/strelka/somatic' addParams(options: params.strelka_options) workflow PAIR_VARIANT_CALLING { take: - bam // channel: [mandatory] bam - dbsnp // channel: [mandatory] dbsnp - dbsnp_tbi // channel: [mandatory] dbsnp_tbi - dict // channel: [mandatory] dict - fai // channel: [mandatory] fai - fasta // channel: [mandatory] fasta - intervals // channel: [mandatory] intervals - target_bed // channel: [optional] target_bed - tools // list: [mandatory] list of tools + bam // channel: [mandatory] bam + dbsnp // channel: [mandatory] dbsnp + dbsnp_tbi // channel: [mandatory] dbsnp_tbi + dict // channel: [mandatory] dict + fai // channel: [mandatory] fai + fasta // channel: [mandatory] fasta + intervals // channel: [mandatory] intervals + msisensor_scan // channel: [optional] msisensor_scan + target_bed // channel: [optional] target_bed + tools // list: [mandatory] list of tools main: @@ -64,6 +67,12 @@ workflow PAIR_VARIANT_CALLING { manta_vcf = manta_candidate_small_indels_vcf.mix(manta_candidate_sv_vcf,manta_diploid_sv_vcf,manta_somatic_sv_vcf) } + if ('msisensor' in tools) { + MSISENSOR_MSI( + bam_pair, + msisensor_scan) + } + if ('strelka' in tools) { STRELKA( bam_pair, diff --git a/modules/nf-core/software/manta/somatic.nf b/modules/nf-core/software/manta/somatic.nf index 8877bed4fd..f018b586a0 100644 --- a/modules/nf-core/software/manta/somatic.nf +++ b/modules/nf-core/software/manta/somatic.nf @@ -20,7 +20,7 @@ process MANTA_SOMATIC { container container input: - tuple val(meta), path(bam_normal), path (bai_normal), path(bam_tumor), path (bai_tumor) + tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor) path fasta path fai path target_bed diff --git a/modules/nf-core/software/msisensor/msi.nf b/modules/nf-core/software/msisensor/msi.nf new file mode 100644 index 0000000000..79ce4925fa --- /dev/null +++ b/modules/nf-core/software/msisensor/msi.nf @@ -0,0 +1,48 @@ +include { initOptions; saveFiles; getSoftwareName } from './../functions' + +params.options = [:] +def options = initOptions(params.options) + +environment = params.enable_conda ? "bioconda::msisensor=0.5" : null +container = "quay.io/biocontainers/msisensor:0.5--hb3646a4_2" +if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2" + +process MSISENSOR_MSI { + tag "${meta.id}" + + label 'CPUS_1' + label 'MEMORY_MAX' + + publishDir params.outdir, mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda environment + container container + + input: + tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor) + path msisensor_scan + + output: + tuple val(meta), path("*.list") + + script: + def software = getSoftwareName(task.process) + def ioptions = initOptions(options) + def prefix = ioptions.suffix ? "msisensor_${meta.id}${ioptions.suffix}" : "msisensor_${meta.id}" + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + """ + msisensor msi -d ${msisensor_scan} \ + -b 4 \ + -t ${bam_tumor} \ + -n ${bam_normal} \ + -o ${prefix} + + mv ${prefix} ${prefix}.list + mv ${prefix}_dis ${prefix}_dis.list + mv ${prefix}_germline ${prefix}_germline.list + mv ${prefix}_somatic ${prefix}_somatic.list + """ +} \ No newline at end of file diff --git a/modules/nf-core/software/msisensor/scan.nf b/modules/nf-core/software/msisensor/scan.nf new file mode 100644 index 0000000000..180c73f4c7 --- /dev/null +++ b/modules/nf-core/software/msisensor/scan.nf @@ -0,0 +1,39 @@ +include { initOptions; saveFiles; getSoftwareName } from './../functions' + +params.options = [:] +def options = initOptions(params.options) + +environment = params.enable_conda ? "bioconda::msisensor=0.5" : null +container = "quay.io/biocontainers/msisensor:0.5--hb3646a4_2" +if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2" + +process MSISENSOR_SCAN { + tag "${fasta}" + + label 'CPUS_1' + label 'MEMORY_MAX' + + publishDir params.outdir, mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } + + conda environment + container container + + input: + path fasta + path fai + + output: + path "microsatellites.list" + + script: + def software = getSoftwareName(task.process) + def ioptions = initOptions(options) + def prefix = ioptions.suffix ? "msisensor_${ioptions.suffix}" : "msisensor_" + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + """ + msisensor scan -d ${fasta} -o microsatellites.list + """ +} \ No newline at end of file diff --git a/modules/nf-core/software/strelka/germline.nf b/modules/nf-core/software/strelka/germline.nf index 1d9dc2665b..d03430d3e0 100644 --- a/modules/nf-core/software/strelka/germline.nf +++ b/modules/nf-core/software/strelka/germline.nf @@ -20,7 +20,7 @@ process STRELKA_GERMLINE { container container input: - tuple val(meta), path(bam), path (bai) + tuple val(meta), path(bam), path(bai) path fasta path fai path target_bed diff --git a/modules/nf-core/software/strelka/somatic.nf b/modules/nf-core/software/strelka/somatic.nf index c6a8277cac..d6b374cc15 100644 --- a/modules/nf-core/software/strelka/somatic.nf +++ b/modules/nf-core/software/strelka/somatic.nf @@ -20,7 +20,7 @@ process STRELKA_SOMATIC { container container input: - tuple val(meta), path(bam_normal), path (bai_normal), path(bam_tumor), path (bai_tumor) + tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor) path fasta path fai path target_bed