diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 18695a1d22..2d316d9d02 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,6 +34,8 @@ jobs: - name: Run pipeline with test data run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker + - name: Show results + run: ls -lR results aligner: name: Run aligner tests @@ -54,6 +56,8 @@ jobs: NXF_VER: '20.11.0-edge' - name: Run ${{ matrix.profile }} test run: nextflow run ${GITHUB_WORKSPACE} -profile test,docker --aligner ${{ matrix.aligner }} + - name: Show results + run: ls -lR results germline: name: Run input from a folder test and restart from step tests @@ -76,12 +80,20 @@ jobs: run: git clone --single-branch --branch sarek https://github.com/nf-core/test-datasets.git data - name: Run germline test with ${{ matrix.markduplicates }} --step mapping run: nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.markduplicates }} --input data/testdata/tiny/normal --save_bam_mapped + - name: Show results + run: ls -lR results - name: Run germline test with ${{ matrix.markduplicates }} --step prepare_recalibration run: nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.markduplicates }} --input=false --step prepare_recalibration -resume + - name: Show results + run: ls -lR results - name: Run germline test with ${{ matrix.markduplicates }} --step recalibrate run: nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.markduplicates }} --input=false --step recalibrate -resume + - name: Show results + run: ls -lR results - name: Run germline test with ${{ matrix.markduplicates }} --step variantCalling run: nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.markduplicates }} --input=false --step variantCalling + - name: Show results + run: ls -lR results # annotation: # env: @@ -126,24 +138,27 @@ jobs: # - name: Run test for minimal genomes # run: nextflow run ${GITHUB_WORKSPACE} -profile test,docker --skipQC all --genome ${{ matrix.genome }} ${{ matrix.intervals }} --tools Manta,mpileup,Strelka,FreeBayes - # profile: - # env: - # NXF_ANSI_LOG: false - # runs-on: ubuntu-latest - # strategy: - # matrix: - # profile: [test_split_fastq, test_targeted, test_trimming, test_no_gatk_spark, test_umi_tso, test_umi_qiaseq] - # steps: - # - uses: actions/checkout@v2 - # - name: Install Nextflow - # run: | - # wget -qO- get.nextflow.io | bash - # sudo mv nextflow /usr/local/bin/ - # env: - # # Only check Nextflow pipeline minimum version - # NXF_VER: '20.11.0-edge' - # - name: Run ${{ matrix.profile }} test - # run: nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }},docker + profile: + env: + NXF_ANSI_LOG: false + runs-on: ubuntu-latest + strategy: + matrix: + # profile: [test_split_fastq, test_targeted, test_trimming, test_use_gatk_spark, test_umi_tso, test_umi_qiaseq] + profile: [test_use_gatk_spark, test_targeted] + steps: + - uses: actions/checkout@v2 + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + env: + # Only check Nextflow pipeline minimum version + NXF_VER: '20.11.0-edge' + - name: Run ${{ matrix.profile }} test + run: nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }},docker + - name: Show results + run: ls -lR results tools: name: Run tool tests @@ -153,13 +168,13 @@ jobs: strategy: matrix: # tool: [Haplotypecaller, Freebayes, Manta, mpileup, MSIsensor, Strelka, TIDDIT] - tool: [Haplotypecaller, Strelka] + tool: [Haplotypecaller, Manta, MSIsensor, Strelka] intervals: [--no_intervals, ''] exclude: - # - tool: Manta - # intervals: --no_intervals - # - tool: MSIsensor - # intervals: --no_intervals + - tool: Manta + intervals: --no_intervals + - tool: MSIsensor + intervals: --no_intervals - tool: Strelka intervals: --no_intervals # - tool: TIDDIT @@ -175,3 +190,5 @@ jobs: NXF_VER: '20.11.0-edge' - name: Run ${{ matrix.tool }} test run: nextflow run ${GITHUB_WORKSPACE} -profile test_tool,docker --tools ${{ matrix.tool }} ${{ matrix.intervals }} + - name: Show results + run: ls -lR results diff --git a/conf/modules.config b/conf/modules.config index cb73ab1d10..7476bda3c5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -27,6 +27,14 @@ params { publish_dir = "reference" publish_files = "false" } + 'index_target_bed' { + publish_dir = "reference" + publish_files = "false" + } + 'msisensor_scan' { + publish_dir = "reference" + publish_files = "false" + } 'samtools_faidx' { publish_dir = "reference" publish_files = "false" @@ -144,6 +152,29 @@ params { publish_dir = "variant_calling" publish_files = ['vcf.gz':'strelka', 'vcf.gz.tbi':'strelka'] } +// TUMOR_VARIANT_CALLING + +// PAIR_VARIANT_CALLING + 'manta_somatic' { + publish_by_id = "true" + publish_dir = "variant_calling" + publish_files = ['vcf.gz':'manta', 'vcf.gz.tbi':'manta'] + } + 'msisensor_msi' { + publish_by_id = "true" + publish_dir = "variant_calling" + publish_files = ['list':'msisensor'] + } + 'strelka_somatic' { + publish_by_id = "true" + publish_dir = "variant_calling" + publish_files = ['vcf.gz':'strelka', 'vcf.gz.tbi':'strelka'] + } + 'strelka_somatic_bp' { + publish_by_id = "true" + publish_dir = "variant_calling" + publish_files = ['vcf.gz':'strelka', 'vcf.gz.tbi':'strelka'] + } // QC_TRIM 'fastqc' { args = "--quiet" diff --git a/lib/Completion.groovy b/lib/Completion.groovy index 996276b8e6..956a87574e 100644 --- a/lib/Completion.groovy +++ b/lib/Completion.groovy @@ -3,7 +3,7 @@ */ class Completion { - static void email(workflow, params, summary, run_name, baseDir, multiqc_report, log) { + static void email(workflow, params, summary, run_name, projectDir, multiqc_report, log) { // Set up the e-mail variables def subject = "[$workflow.manifest.name] Successful: $workflow.runName" @@ -56,18 +56,18 @@ class Completion { // Render the TXT template def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$baseDir/assets/email_template.txt") + def tf = new File("$projectDir/assets/email_template.txt") def txt_template = engine.createTemplate(tf).make(email_fields) def email_txt = txt_template.toString() // Render the HTML template - def hf = new File("$baseDir/assets/email_template.html") + def hf = new File("$projectDir/assets/email_template.html") def html_template = engine.createTemplate(hf).make(email_fields) def email_html = html_template.toString() // Render the sendmail template - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ] - def sf = new File("$baseDir/assets/sendmail_template.txt") + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ] + def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) def sendmail_html = sendmail_template.toString() diff --git a/lib/Schema.groovy b/lib/Schema.groovy index 4c7215e699..a4ac82173a 100644 --- a/lib/Schema.groovy +++ b/lib/Schema.groovy @@ -108,7 +108,7 @@ class JSON { summary['MarkDuplicates'] = "Options" summary['Java options'] = params.markdup_java_options - summary['GATK Spark'] = params.no_gatk_spark ? 'No' : 'Yes' + summary['GATK Spark'] = params.use_gatk_spark ? 'Yes' : 'No' summary['Save BAMs mapped'] = params.save_bam_mapped ? 'Yes' : 'No' summary['Skip MarkDuplicates'] = params.skip_markduplicates ? 'Yes' : 'No' @@ -126,7 +126,7 @@ class JSON { if (params.cf_ploidy) summary['ploidy'] = params.cf_ploidy } - if ('haplotypecaller' in tools) summary['GVCF'] = params.no_gvcf ? 'No' : 'Yes' + if ('haplotypecaller' in tools) summary['GVCF'] = params.generate_gvcf ? 'Yes' : 'No' if ('strelka' in tools && 'manta' in tools) summary['Strelka BP'] = params.no_strelka_bp ? 'No' : 'Yes' if (params.pon && ('mutect2' in tools || (params.sentieon && 'tnscope' in tools))) summary['Panel of normals'] = params.pon diff --git a/main.nf b/main.nf index 5260fdd0ff..d571c4f9a0 100644 --- a/main.nf +++ b/main.nf @@ -94,6 +94,8 @@ anno_list = define_anno_list() annotate_tools = params.annotate_tools ? params.annotate_tools.split(',').collect{it.trim().toLowerCase().replaceAll('-', '')} : [] if (!check_parameter_list(annotate_tools,anno_list)) exit 1, 'Unknown tool(s) to annotate, see --help for more information' +if (!(params.aligner in ['bwa-mem', 'bwa-mem2'])) exit 1, 'Unknown aligner, see --help for more information' + // // Check parameters if ((params.ascat_ploidy && !params.ascat_purity) || (!params.ascat_ploidy && params.ascat_purity)) exit 1, 'Please specify both --ascat_purity and --ascat_ploidy, or none of them' if (params.cf_window && params.cf_coeff) exit 1, 'Please specify either --cf_window OR --cf_coeff, but not both of them' @@ -176,6 +178,8 @@ if (params.save_reference) modules['bwa_index'].publish_files if (params.save_reference) modules['bwamem2_index'].publish_files = ['0123':'bwamem2', 'amb':'bwamem2', 'ann':'bwamem2', 'bwt.2bit.64':'bwamem2', 'bwt.8bit.32':'bwamem2', 'pac':'bwamem2'] if (params.save_reference) modules['create_intervals_bed'].publish_files = ['bed':'intervals'] if (params.save_reference) modules['dict'].publish_files = ['dict':'dict'] +if (params.save_reference) modules['index_target_bed'].publish_files = ['bed.gz':'target', 'bed.gz.tbi':'target'] +if (params.save_reference) modules['msisensor_scan'].publish_files = ['list':'msi'] if (params.save_reference) modules['samtools_faidx'].publish_files = ['fai':'fai'] if (params.save_reference) modules['tabix_dbsnp'].publish_files = ['vcf.gz.tbi':'dbsnp'] if (params.save_reference) modules['tabix_germline_resource'].publish_files = ['vcf.gz.tbi':'germline_resource'] @@ -286,6 +290,8 @@ include { BUILD_INDICES } from './modules/local/subworkflow/build_indices' addPa bwamem2_index_options: modules['bwamem2_index'], create_intervals_bed_options: modules['create_intervals_bed'], gatk_dict_options: modules['dict'], + index_target_bed_options: modules['index_target_bed'], + msisensor_scan_options: modules['msisensor_scan'], samtools_faidx_options: modules['samtools_faidx'], tabix_dbsnp_options: modules['tabix_dbsnp'], tabix_germline_resource_options: modules['tabix_germline_resource'], @@ -315,12 +321,20 @@ include { RECALIBRATE } from './modules/local/subworkflow/recalibrate' addParams samtools_stats_options: modules['samtools_stats_recalibrate'] ) include { GERMLINE_VARIANT_CALLING } from './modules/local/subworkflow/germline_variant_calling' addParams( - haplotypecaller_options: modules['haplotypecaller'], - genotypegvcf_options: modules['genotypegvcf'], concat_gvcf_options: modules['concat_gvcf'], concat_haplotypecaller_options: modules['concat_haplotypecaller'], + genotypegvcf_options: modules['genotypegvcf'], + haplotypecaller_options: modules['haplotypecaller'], strelka_options: modules['strelka_germline'] ) +// include { TUMOR_VARIANT_CALLING } from './modules/local/subworkflow/tumor_variant_calling' addParams( +// ) +include { PAIR_VARIANT_CALLING } from './modules/local/subworkflow/pair_variant_calling' addParams( + manta_options: modules['manta_somatic'], + msisensor_msi_options: modules['msisensor_msi'], + strelka_bp_options: modules['strelka_somatic_bp'], + strelka_options: modules['strelka_somatic'] +) /* -------------------------------------------------------------------------------- @@ -402,6 +416,7 @@ workflow { known_indels, pon, step, + target_bed, tools) intervals = BUILD_INDICES.out.intervals @@ -415,6 +430,8 @@ workflow { known_indels_tbi = params.known_indels ? params.known_indels_index ? file(params.known_indels_index) : BUILD_INDICES.out.known_indels_tbi.collect() : file("${params.outdir}/no_file") pon_tbi = params.pon ? params.pon_index ? file(params.pon_index) : BUILD_INDICES.out.pon_tbi : file("${params.outdir}/no_file") + msisensor_scan = BUILD_INDICES.out.msisensor_scan + target_bed_gz_tbi = BUILD_INDICES.out.target_bed_gz_tbi /* -------------------------------------------------------------------------------- PREPROCESSING @@ -530,26 +547,52 @@ workflow { fasta, intervals, target_bed, + target_bed_gz_tbi, tools) - /* - -------------------------------------------------------------------------------- - SOMATIC VARIANT CALLING - -------------------------------------------------------------------------------- - */ +/* +-------------------------------------------------------------------------------- + SOMATIC VARIANT CALLING +-------------------------------------------------------------------------------- +*/ - /* - -------------------------------------------------------------------------------- - ANNOTATION - -------------------------------------------------------------------------------- - */ + // TUMOR_VARIANT_CALLING( + // bam_variant_calling, + // dbsnp, + // dbsnp_tbi, + // dict, + // fai, + // fasta, + // intervals, + // target_bed, + // target_bed_gz_tbi, + // tools) + + PAIR_VARIANT_CALLING( + bam_variant_calling, + dbsnp, + dbsnp_tbi, + dict, + fai, + fasta, + intervals, + msisensor_scan, + target_bed, + target_bed_gz_tbi, + tools) +/* +-------------------------------------------------------------------------------- + ANNOTATION +-------------------------------------------------------------------------------- +*/ - /* - -------------------------------------------------------------------------------- - MultiQC - -------------------------------------------------------------------------------- - */ + +/* +-------------------------------------------------------------------------------- + MULTIQC +-------------------------------------------------------------------------------- +*/ // GET_SOFTWARE_VERSIONS() @@ -563,9 +606,9 @@ workflow { /* -------------------------------------------------------------------------------- - SEND COMPLETION EMAIL + SEND COMPLETION EMAIL -------------------------------------------------------------------------------- - */ +*/ workflow.onComplete { def multiqc_report = [] @@ -1149,171 +1192,6 @@ workflow.onComplete { // vcf_sentieon_compressed = vcf_sentieon_compressed.dump(tag:'Sentieon VCF indexed') -// // STEP STRELKA.2 - SOMATIC PAIR - -// process Strelka { -// label 'cpus_max' -// label 'memory_max' - -// tag "${idSampleTumor}_vs_${idSampleNormal}" - -// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Strelka", mode: params.publish_dir_mode - -// input: -// set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from pairBamStrelka -// file(dict) from dict -// file(fasta) from fasta -// file(fastaFai) from fai -// file(targetBED) from ch_target_bed - -// output: -// set val("Strelka"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfStrelka - -// when: 'strelka' in tools - -// script: -// beforeScript = params.target_bed ? "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz" : "" -// options = params.target_bed ? "--exome --callRegions call_targets.bed.gz" : "" -// """ -// ${beforeScript} -// configureStrelkaSomaticWorkflow.py \ -// --tumor ${bamTumor} \ -// --normal ${bamNormal} \ -// --referenceFasta ${fasta} \ -// ${options} \ -// --runDir Strelka - -// python Strelka/runWorkflow.py -m local -j ${task.cpus} - -// mv Strelka/results/variants/somatic.indels.vcf.gz \ -// Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz -// mv Strelka/results/variants/somatic.indels.vcf.gz.tbi \ -// Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz.tbi -// mv Strelka/results/variants/somatic.snvs.vcf.gz \ -// Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz -// mv Strelka/results/variants/somatic.snvs.vcf.gz.tbi \ -// Strelka_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz.tbi -// """ -// } - -// vcfStrelka = vcfStrelka.dump(tag:'Strelka') - -// // STEP MANTA.2 - SOMATIC PAIR - -// process Manta { -// label 'cpus_max' -// label 'memory_max' - -// tag "${idSampleTumor}_vs_${idSampleNormal}" - -// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Manta", mode: params.publish_dir_mode - -// input: -// set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from pairBamManta -// file(fasta) from fasta -// file(fastaFai) from fai -// file(targetBED) from ch_target_bed - -// output: -// set val("Manta"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfManta -// set idPatient, idSampleNormal, idSampleTumor, file("*.candidateSmallIndels.vcf.gz"), file("*.candidateSmallIndels.vcf.gz.tbi") into mantaToStrelka - -// when: 'manta' in tools - -// script: -// beforeScript = params.target_bed ? "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz" : "" -// options = params.target_bed ? "--exome --callRegions call_targets.bed.gz" : "" -// """ -// ${beforeScript} -// configManta.py \ -// --normalBam ${bamNormal} \ -// --tumorBam ${bamTumor} \ -// --reference ${fasta} \ -// ${options} \ -// --runDir Manta - -// python Manta/runWorkflow.py -m local -j ${task.cpus} - -// mv Manta/results/variants/candidateSmallIndels.vcf.gz \ -// Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSmallIndels.vcf.gz -// mv Manta/results/variants/candidateSmallIndels.vcf.gz.tbi \ -// Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSmallIndels.vcf.gz.tbi -// mv Manta/results/variants/candidateSV.vcf.gz \ -// Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSV.vcf.gz -// mv Manta/results/variants/candidateSV.vcf.gz.tbi \ -// Manta_${idSampleTumor}_vs_${idSampleNormal}.candidateSV.vcf.gz.tbi -// mv Manta/results/variants/diploidSV.vcf.gz \ -// Manta_${idSampleTumor}_vs_${idSampleNormal}.diploidSV.vcf.gz -// mv Manta/results/variants/diploidSV.vcf.gz.tbi \ -// Manta_${idSampleTumor}_vs_${idSampleNormal}.diploidSV.vcf.gz.tbi -// mv Manta/results/variants/somaticSV.vcf.gz \ -// Manta_${idSampleTumor}_vs_${idSampleNormal}.somaticSV.vcf.gz -// mv Manta/results/variants/somaticSV.vcf.gz.tbi \ -// Manta_${idSampleTumor}_vs_${idSampleNormal}.somaticSV.vcf.gz.tbi -// """ -// } - -// vcfManta = vcfManta.dump(tag:'Manta') - -// // Remmaping channels to match input for StrelkaBP -// pairBamStrelkaBP = pairBamStrelkaBP.map { -// idPatientNormal, idSampleNormal, bamNormal, baiNormal, idSampleTumor, bamTumor, baiTumor -> -// [idPatientNormal, idSampleNormal, idSampleTumor, bamNormal, baiNormal, bamTumor, baiTumor] -// }.join(mantaToStrelka, by:[0,1,2]).map { -// idPatientNormal, idSampleNormal, idSampleTumor, bamNormal, baiNormal, bamTumor, baiTumor, mantaCSI, mantaCSIi -> -// [idPatientNormal, idSampleNormal, bamNormal, baiNormal, idSampleTumor, bamTumor, baiTumor, mantaCSI, mantaCSIi] -// } - -// // STEP STRELKA.3 - SOMATIC PAIR - BEST PRACTICES - -// process StrelkaBP { -// label 'cpus_max' -// label 'memory_max' - -// tag "${idSampleTumor}_vs_${idSampleNormal}" - -// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Strelka", mode: params.publish_dir_mode - -// input: -// set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(mantaCSI), file(mantaCSIi) from pairBamStrelkaBP -// file(dict) from dict -// file(fasta) from fasta -// file(fastaFai) from fai -// file(targetBED) from ch_target_bed - -// output: -// set val("Strelka"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfStrelkaBP - -// when: 'strelka' in tools && 'manta' in tools && !params.no_strelka_bp - -// script: -// beforeScript = params.target_bed ? "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz" : "" -// options = params.target_bed ? "--exome --callRegions call_targets.bed.gz" : "" -// """ -// ${beforeScript} -// configureStrelkaSomaticWorkflow.py \ -// --tumor ${bamTumor} \ -// --normal ${bamNormal} \ -// --referenceFasta ${fasta} \ -// --indelCandidates ${mantaCSI} \ -// ${options} \ -// --runDir Strelka - -// python Strelka/runWorkflow.py -m local -j ${task.cpus} - -// mv Strelka/results/variants/somatic.indels.vcf.gz \ -// StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz -// mv Strelka/results/variants/somatic.indels.vcf.gz.tbi \ -// StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_indels.vcf.gz.tbi -// mv Strelka/results/variants/somatic.snvs.vcf.gz \ -// StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz -// mv Strelka/results/variants/somatic.snvs.vcf.gz.tbi \ -// StrelkaBP_${idSampleTumor}_vs_${idSampleNormal}_somatic_snvs.vcf.gz.tbi -// """ -// } - -// vcfStrelkaBP = vcfStrelkaBP.dump(tag:'Strelka BP') - // // STEP CNVkit // process CNVkit { @@ -1346,61 +1224,6 @@ workflow.onComplete { // """ // } -// // STEP MSISENSOR.1 - SCAN - -// // Scan reference genome for microsatellites -// process MSIsensor_scan { -// label 'cpus_1' -// label 'memory_max' - -// tag "${fasta}" - -// input: -// file(fasta) from fasta -// file(fastaFai) from fai - -// output: -// file "microsatellites.list" into msi_scan_ch - -// when: 'msisensor' in tools - -// script: -// """ -// msisensor scan -d ${fasta} -o microsatellites.list -// """ -// } - -// // STEP MSISENSOR.2 - SCORE - -// // Score the normal vs somatic pair of bams - -// process MSIsensor_msi { -// label 'cpus_4' -// label 'memory_max' - -// tag "${idSampleTumor}_vs_${idSampleNormal}" - -// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/MSIsensor", mode: params.publish_dir_mode - -// input: -// set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from pairBamMsisensor -// file msiSites from msi_scan_ch - -// output: -// set val("Msisensor"), idPatient, file("${idSampleTumor}_vs_${idSampleNormal}_msisensor"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_dis"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_germline"), file("${idSampleTumor}_vs_${idSampleNormal}_msisensor_somatic") into msisensor_out_ch - -// when: 'msisensor' in tools - -// script: -// """ -// msisensor msi -d ${msiSites} \ -// -b 4 \ -// -n ${bamNormal} \ -// -t ${bamTumor} \ -// -o ${idSampleTumor}_vs_${idSampleNormal}_msisensor -// """ -// } - // // STEP ASCAT.1 - ALLELECOUNTER // // Run commands and code from Malin Larsson diff --git a/modules/local/process/index_target_bed.nf b/modules/local/process/index_target_bed.nf new file mode 100644 index 0000000000..d03eaa581a --- /dev/null +++ b/modules/local/process/index_target_bed.nf @@ -0,0 +1,32 @@ +include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions' + +params.options = [:] +def options = initOptions(params.options) + +environment = params.enable_conda ? "bioconda::htslib=1.11" : null +container = "quay.io/biocontainers/htslib:1.11--hd3b49d5_0" +if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/htslib:1.11--hd3b49d5_0" + +process INDEX_TARGET_BED { + label 'cpus_8' + + tag "${target_bed}" + + publishDir params.outdir, mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } + + conda environment + container container + + input: + path target_bed + + output: + tuple path("${target_bed}.gz"), path("${target_bed}.gz.tbi") + + script: + """ + bgzip --threads ${task.cpus} -c ${target_bed} > ${target_bed}.gz + tabix ${target_bed}.gz + """ +} \ No newline at end of file diff --git a/modules/local/subworkflow/build_indices.nf b/modules/local/subworkflow/build_indices.nf index 0f4eaba448..696e22a733 100644 --- a/modules/local/subworkflow/build_indices.nf +++ b/modules/local/subworkflow/build_indices.nf @@ -9,6 +9,8 @@ params.bwa_index_options = [:] params.bwamem2_index_options = [:] params.create_intervals_bed_options = [:] params.gatk_dict_options = [:] +params.index_target_bed_options = [:] +params.msisensor_scan_options = [:] params.samtools_faidx_options = [:] params.tabix_dbsnp_options = [:] params.tabix_germline_resource_options = [:] @@ -26,6 +28,8 @@ include { HTSLIB_TABIX as TABIX_DBSNP } from '../../nf-core/softw include { HTSLIB_TABIX as TABIX_GERMLINE_RESOURCE } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_germline_resource_options) include { HTSLIB_TABIX as TABIX_KNOWN_INDELS } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_known_indels_options) include { HTSLIB_TABIX as TABIX_PON } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_pon_options) +include { INDEX_TARGET_BED } from '../process/index_target_bed' addParams(options: params.index_target_bed_options) +include { MSISENSOR_SCAN } from '../../nf-core/software/msisensor/scan.nf' addParams(options: params.msisensor_scan_options) include { SAMTOOLS_FAIDX } from '../../nf-core/software/samtools/faidx.nf' addParams(options: params.samtools_faidx_options) workflow BUILD_INDICES{ @@ -36,6 +40,7 @@ workflow BUILD_INDICES{ known_indels // channel: [optional] known_indels pon // channel: [optional] pon step // value: [mandatory] starting step + target_bed // channel: [optionnal] target_bed tools // list: [optional] tools to run main: @@ -58,6 +63,10 @@ workflow BUILD_INDICES{ if (!(params.dbsnp_index) && params.dbsnp && ('mapping' in step || 'preparerecalibration' in step || 'controlfreec' in tools || 'haplotypecaller' in tools || 'mutect2' in tools || 'tnscope' in tools)) result_dbsnp_tbi = TABIX_DBSNP(dbsnp) + result_target_bed = Channel.empty() + if ((params.target_bed) && ('manta' in tools || 'strelka' in tools)) + result_target_bed = INDEX_TARGET_BED(target_bed) + result_germline_resource_tbi = Channel.empty() if (!(params.germline_resource_index) && params.germline_resource && 'mutect2' in tools) result_germline_resource_tbi = TABIX_GERMLINE_RESOURCE(germline_resource) @@ -66,10 +75,15 @@ workflow BUILD_INDICES{ if (!(params.known_indels_index) && params.known_indels && ('mapping' in step || 'preparerecalibration' in step)) result_known_indels_tbi = TABIX_KNOWN_INDELS(known_indels) + result_msisensor_scan = Channel.empty() + if ('msisensor' in tools) + result_msisensor_scan = MSISENSOR_SCAN(fasta, result_fai) + result_pon_tbi = Channel.empty() if (!(params.pon_index) && params.pon && ('tnscope' in tools || 'mutect2' in tools)) result_pon_tbi = TABIX_PON(pon) + result_intervals = Channel.empty() if (params.no_intervals) { file("${params.outdir}/no_intervals.bed").text = "no_intervals\n" result_intervals = Channel.from(file("${params.outdir}/no_intervals.bed")) @@ -107,5 +121,7 @@ workflow BUILD_INDICES{ germline_resource_tbi = result_germline_resource_tbi intervals = result_intervals known_indels_tbi = result_known_indels_tbi + msisensor_scan = result_msisensor_scan pon_tbi = result_pon_tbi + target_bed_gz_tbi = result_target_bed } \ No newline at end of file diff --git a/modules/local/subworkflow/germline_variant_calling.nf b/modules/local/subworkflow/germline_variant_calling.nf index 1964b12a61..c2288519a2 100644 --- a/modules/local/subworkflow/germline_variant_calling.nf +++ b/modules/local/subworkflow/germline_variant_calling.nf @@ -18,15 +18,16 @@ include { STRELKA_GERMLINE as STRELKA } from '../../nf-core/software workflow GERMLINE_VARIANT_CALLING { take: - bam // channel: [mandatory] bam - dbsnp // channel: [mandatory] dbsnp - dbsnp_tbi // channel: [mandatory] dbsnp_tbi - dict // channel: [mandatory] dict - fai // channel: [mandatory] fai - fasta // channel: [mandatory] fasta - intervals // channel: [mandatory] intervals - target_bed // channel: [optional] target_bed - tools // list: [mandatory] list of tools + bam // channel: [mandatory] bam + dbsnp // channel: [mandatory] dbsnp + dbsnp_tbi // channel: [mandatory] dbsnp_tbi + dict // channel: [mandatory] dict + fai // channel: [mandatory] fai + fasta // channel: [mandatory] fasta + intervals // channel: [mandatory] intervals + target_bed // channel: [optional] target_bed + target_bed_gz_tbi // channel: [optional] target_bed_gz_tbi + tools // list: [mandatory] list of tools main: @@ -113,7 +114,7 @@ workflow GERMLINE_VARIANT_CALLING { bam, fasta, fai, - target_bed) + target_bed_gz_tbi) strelka_vcf = STRELKA.out.vcf } diff --git a/modules/local/subworkflow/pair_variant_calling.nf b/modules/local/subworkflow/pair_variant_calling.nf new file mode 100644 index 0000000000..37b21807d6 --- /dev/null +++ b/modules/local/subworkflow/pair_variant_calling.nf @@ -0,0 +1,109 @@ +/* +================================================================================ + SOMATIC VARIANT CALLING +================================================================================ +*/ + +params.manta_options = [:] +params.msisensor_msi_options = [:] +params.strelka_options = [:] +params.strelka_bp_options = [:] + +include { MANTA_SOMATIC as MANTA } from '../../nf-core/software/manta/somatic' addParams(options: params.manta_options) +include { MSISENSOR_MSI } from '../../nf-core/software/msisensor/msi' addParams(options: params.msisensor_msi_options) +include { STRELKA_SOMATIC as STRELKA } from '../../nf-core/software/strelka/somatic' addParams(options: params.strelka_options) +include { STRELKA_SOMATIC_BEST_PRACTICES as STRELKA_BP } from '../../nf-core/software/strelka/somatic' addParams(options: params.strelka_bp_options) + +workflow PAIR_VARIANT_CALLING { + take: + bam // channel: [mandatory] bam + dbsnp // channel: [mandatory] dbsnp + dbsnp_tbi // channel: [mandatory] dbsnp_tbi + dict // channel: [mandatory] dict + fai // channel: [mandatory] fai + fasta // channel: [mandatory] fasta + intervals // channel: [mandatory] intervals + msisensor_scan // channel: [optional] msisensor_scan + target_bed // channel: [optional] target_bed + target_bed_gz_tbi // channel: [optional] target_bed_gz_tbi + tools // list: [mandatory] list of tools + + main: + + bam.map{ meta, bam, bai -> + patient = meta.patient + sample = meta.sample + gender = meta.gender + status = meta.status + [patient, sample, gender, status, bam, bai] + }.branch{ + normal: it[3] == 0 + tumor: it[3] == 1 + }.set{ bam_to_cross } + + bam_pair = bam_to_cross.normal.cross(bam_to_cross.tumor).map { normal, tumor -> + def meta = [:] + meta.patient = normal[0] + meta.normal = normal[1] + meta.tumor = tumor[1] + meta.gender = normal[2] + meta.id = "${meta.tumor}_vs_${meta.normal}" + + [meta, normal[4], normal[5], tumor[4], tumor[5]] + } + + manta_vcf = Channel.empty() + strelka_vcf = Channel.empty() + + if ('manta' in tools) { + MANTA( + bam_pair, + fasta, + fai, + target_bed_gz_tbi) + + manta_candidate_small_indels_vcf = MANTA.out.candidate_small_indels_vcf + manta_candidate_sv_vcf = MANTA.out.candidate_sv_vcf + manta_diploid_sv_vcf = MANTA.out.diploid_sv_vcf + manta_somatic_sv_vcf = MANTA.out.somatic_sv_vcf + manta_csi_for_strelka_bp = MANTA.out.manta_csi_for_strelka_bp + + manta_vcf = manta_candidate_small_indels_vcf.mix(manta_candidate_sv_vcf,manta_diploid_sv_vcf,manta_somatic_sv_vcf) + + if ('strelka' in tools) { + STRELKA_BP( + manta_csi_for_strelka_bp, + fasta, + fai, + target_bed_gz_tbi) + + strelka_indels_vcf = STRELKA_BP.out.indels_vcf + strelka_snvs_vcf = STRELKA_BP.out.snvs_vcf + + strelka_vcf = strelka_vcf.mix(strelka_indels_vcf,strelka_snvs_vcf) + } + } + + if ('msisensor' in tools) { + MSISENSOR_MSI( + bam_pair, + msisensor_scan) + } + + if ('strelka' in tools) { + STRELKA( + bam_pair, + fasta, + fai, + target_bed_gz_tbi) + + strelka_indels_vcf = STRELKA.out.indels_vcf + strelka_snvs_vcf = STRELKA.out.snvs_vcf + + strelka_vcf = strelka_vcf.mix(strelka_indels_vcf,strelka_snvs_vcf) + } + + emit: + manta_vcf = manta_vcf + strelka_vcf = strelka_vcf +} diff --git a/modules/nf-core/software/gatk/markduplicates.nf b/modules/nf-core/software/gatk/markduplicates.nf index 91bdfe76c8..b5be16d0e1 100644 --- a/modules/nf-core/software/gatk/markduplicates.nf +++ b/modules/nf-core/software/gatk/markduplicates.nf @@ -30,11 +30,21 @@ process GATK_MARKDUPLICATES { markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\"" metrics = 'markduplicates' in params.skip_qc ? '' : "-M ${meta.sample}.bam.metrics" - if (params.no_gatk_spark) + if (params.use_gatk_spark) + """ + gatk --java-options ${markdup_java_options} \ + MarkDuplicatesSpark \ + -I ${meta.sample}.bam \ + -O ${meta.sample}.md.bam \ + ${metrics} \ + --tmp-dir . \ + --create-output-bam-index true \ + --spark-master local[${task.cpus}] + """ + else """ gatk --java-options ${markdup_java_options} \ MarkDuplicates \ - --MAX_RECORDS_IN_RAM 50000 \ --INPUT ${meta.sample}.bam \ --METRICS_FILE ${meta.sample}.bam.metrics \ --TMP_DIR . \ @@ -43,15 +53,4 @@ process GATK_MARKDUPLICATES { --OUTPUT ${meta.sample}.md.bam mv ${meta.sample}.md.bai ${meta.sample}.md.bam.bai """ - else - """ - gatk --java-options ${markdup_java_options} \ - MarkDuplicatesSpark \ - -I ${meta.sample}.bam \ - -O ${meta.sample}.md.bam \ - ${metrics} \ - --tmp-dir . \ - --create-output-bam-index true \ - --spark-master local[${task.cpus}] - """ } \ No newline at end of file diff --git a/modules/nf-core/software/manta/somatic.nf b/modules/nf-core/software/manta/somatic.nf new file mode 100644 index 0000000000..98b5a9b6ed --- /dev/null +++ b/modules/nf-core/software/manta/somatic.nf @@ -0,0 +1,65 @@ +include { initOptions; saveFiles; getSoftwareName } from './../functions' + +params.options = [:] +def options = initOptions(params.options) + +environment = params.enable_conda ? "bioconda::manta=1.6.0" : null +container = "quay.io/biocontainers/manta:1.6.0--py27_0" +if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/manta:1.6.0--py27_0" + +process MANTA_SOMATIC { + tag "${meta.id}" + + label 'CPUS_MAX' + label 'MEMORY_MAX' + + publishDir params.outdir, mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda environment + container container + + input: + tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor) + path fasta + path fai + tuple path(target_bed), path(target_bed_tbi) + + output: + tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor), path("*.candidateSmallIndels.vcf.gz"), path("*.candidateSmallIndels.vcf.gz.tbi"), emit: manta_csi_for_strelka_bp + tuple val(meta), path("*.candidateSmallIndels.vcf.gz"), path("*.candidateSmallIndels.vcf.gz.tbi"), emit: candidate_small_indels_vcf + tuple val(meta), path("*.candidateSV.vcf.gz"), path("*.candidateSV.vcf.gz.tbi"), emit: candidate_sv_vcf + tuple val(meta), path("*.diploidSV.vcf.gz"), path("*.diploidSV.vcf.gz.tbi"), emit: diploid_sv_vcf + tuple val(meta), path("*.somaticSV.vcf.gz"), path("*.somaticSV.vcf.gz.tbi"), emit: somatic_sv_vcf + path "*.version.txt", emit: version + + script: + def software = getSoftwareName(task.process) + def ioptions = initOptions(options) + def prefix = ioptions.suffix ? "manta_${meta.id}${ioptions.suffix}" : "manta_${meta.id}" + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + options_manta = params.target_bed ? "--exome --callRegions ${target_bed}" : "" + """ + configManta.py \ + --tumorBam ${bam_tumor} \ + --normalBam ${bam_normal} \ + --reference ${fasta} \ + ${options_manta} \ + --runDir manta + + python manta/runWorkflow.py -m local -j ${task.cpus} + + mv manta/results/variants/candidateSmallIndels.vcf.gz ${prefix}.candidateSmallIndels.vcf.gz + mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi ${prefix}.candidateSmallIndels.vcf.gz.tbi + mv manta/results/variants/candidateSV.vcf.gz ${prefix}.candidateSV.vcf.gz + mv manta/results/variants/candidateSV.vcf.gz.tbi ${prefix}.candidateSV.vcf.gz.tbi + mv manta/results/variants/diploidSV.vcf.gz ${prefix}.diploidSV.vcf.gz + mv manta/results/variants/diploidSV.vcf.gz.tbi ${prefix}.diploidSV.vcf.gz.tbi + mv manta/results/variants/somaticSV.vcf.gz ${prefix}.somaticSV.vcf.gz + mv manta/results/variants/somaticSV.vcf.gz.tbi ${prefix}.somaticSV.vcf.gz.tbi + + echo configManta.py --version &> ${software}.version.txt #2>&1 + """ +} \ No newline at end of file diff --git a/modules/nf-core/software/msisensor/msi.nf b/modules/nf-core/software/msisensor/msi.nf new file mode 100644 index 0000000000..79ce4925fa --- /dev/null +++ b/modules/nf-core/software/msisensor/msi.nf @@ -0,0 +1,48 @@ +include { initOptions; saveFiles; getSoftwareName } from './../functions' + +params.options = [:] +def options = initOptions(params.options) + +environment = params.enable_conda ? "bioconda::msisensor=0.5" : null +container = "quay.io/biocontainers/msisensor:0.5--hb3646a4_2" +if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2" + +process MSISENSOR_MSI { + tag "${meta.id}" + + label 'CPUS_1' + label 'MEMORY_MAX' + + publishDir params.outdir, mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda environment + container container + + input: + tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor) + path msisensor_scan + + output: + tuple val(meta), path("*.list") + + script: + def software = getSoftwareName(task.process) + def ioptions = initOptions(options) + def prefix = ioptions.suffix ? "msisensor_${meta.id}${ioptions.suffix}" : "msisensor_${meta.id}" + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + """ + msisensor msi -d ${msisensor_scan} \ + -b 4 \ + -t ${bam_tumor} \ + -n ${bam_normal} \ + -o ${prefix} + + mv ${prefix} ${prefix}.list + mv ${prefix}_dis ${prefix}_dis.list + mv ${prefix}_germline ${prefix}_germline.list + mv ${prefix}_somatic ${prefix}_somatic.list + """ +} \ No newline at end of file diff --git a/modules/nf-core/software/msisensor/scan.nf b/modules/nf-core/software/msisensor/scan.nf new file mode 100644 index 0000000000..180c73f4c7 --- /dev/null +++ b/modules/nf-core/software/msisensor/scan.nf @@ -0,0 +1,39 @@ +include { initOptions; saveFiles; getSoftwareName } from './../functions' + +params.options = [:] +def options = initOptions(params.options) + +environment = params.enable_conda ? "bioconda::msisensor=0.5" : null +container = "quay.io/biocontainers/msisensor:0.5--hb3646a4_2" +if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2" + +process MSISENSOR_SCAN { + tag "${fasta}" + + label 'CPUS_1' + label 'MEMORY_MAX' + + publishDir params.outdir, mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } + + conda environment + container container + + input: + path fasta + path fai + + output: + path "microsatellites.list" + + script: + def software = getSoftwareName(task.process) + def ioptions = initOptions(options) + def prefix = ioptions.suffix ? "msisensor_${ioptions.suffix}" : "msisensor_" + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + """ + msisensor scan -d ${fasta} -o microsatellites.list + """ +} \ No newline at end of file diff --git a/modules/nf-core/software/strelka/germline.nf b/modules/nf-core/software/strelka/germline.nf index 278af2adff..6295ac1d66 100644 --- a/modules/nf-core/software/strelka/germline.nf +++ b/modules/nf-core/software/strelka/germline.nf @@ -20,10 +20,10 @@ process STRELKA_GERMLINE { container container input: - tuple val(meta), path(bam), path (bai) + tuple val(meta), path(bam), path(bai) path fasta path fai - path target_bed + tuple path(target_bed), path(target_bed_tbi) output: tuple val(meta), path("*_variants.vcf.gz"), path("*_variants.vcf.gz.tbi"), emit: vcf @@ -37,10 +37,8 @@ process STRELKA_GERMLINE { // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter // using the Nextflow "task" variable e.g. "--threads $task.cpus" - beforeScript = params.target_bed ? "bgzip --threads ${task.cpus} -c ${target_bed} > call_targets.bed.gz ; tabix call_targets.bed.gz" : "" - options_strelka = params.target_bed ? ioptions.args : "" + options_strelka = params.target_bed ? "--exome --callRegions ${target_bed}" : "" """ - ${beforeScript} configureStrelkaGermlineWorkflow.py \ --bam ${bam} \ --referenceFasta ${fasta} \ @@ -49,12 +47,9 @@ process STRELKA_GERMLINE { python strelka/runWorkflow.py -m local -j ${task.cpus} - mv strelka/results/variants/genome.*.vcf.gz ${prefix}_genome.vcf.gz - + mv strelka/results/variants/genome.*.vcf.gz ${prefix}_genome.vcf.gz mv strelka/results/variants/genome.*.vcf.gz.tbi ${prefix}_genome.vcf.gz.tbi - - mv strelka/results/variants/variants.vcf.gz ${prefix}_variants.vcf.gz - + mv strelka/results/variants/variants.vcf.gz ${prefix}_variants.vcf.gz mv strelka/results/variants/variants.vcf.gz.tbi ${prefix}_variants.vcf.gz.tbi echo configureStrelkaGermlineWorkflow.py --version &> ${software}.version.txt #2>&1 diff --git a/modules/nf-core/software/strelka/somatic.nf b/modules/nf-core/software/strelka/somatic.nf new file mode 100644 index 0000000000..0cfb2b8458 --- /dev/null +++ b/modules/nf-core/software/strelka/somatic.nf @@ -0,0 +1,109 @@ +include { initOptions; saveFiles; getSoftwareName } from './../functions' + +params.options = [:] +def options = initOptions(params.options) + +environment = params.enable_conda ? "bioconda::strelka=2.9.10" : null +container = "quay.io/biocontainers/strelka:2.9.10--0" +if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0" + +process STRELKA_SOMATIC { + tag "${meta.id}" + + label 'CPUS_MAX' + label 'MEMORY_MAX' + + publishDir params.outdir, mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda environment + container container + + input: + tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor) + path fasta + path fai + tuple path(target_bed), path(target_bed_tbi) + + output: + tuple val(meta), path("*_somatic_indels.vcf.gz"), path("*_somatic_indels.vcf.gz.tbi"), emit: indels_vcf + tuple val(meta), path("*_somatic_snvs.vcf.gz"), path("*_somatic_snvs.vcf.gz.tbi"), emit: snvs_vcf + path "*.version.txt", emit: version + + script: + def software = getSoftwareName(task.process) + def ioptions = initOptions(options) + def prefix = ioptions.suffix ? "strelka_${meta.id}${ioptions.suffix}" : "strelka_${meta.id}" + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + options_strelka = params.target_bed ? "--exome --callRegions ${target_bed}" : "" + """ + configureStrelkaSomaticWorkflow.py \ + --tumor ${bam_tumor} \ + --normal ${bam_normal} \ + --referenceFasta ${fasta} \ + ${options_strelka} \ + --runDir strelka + + python strelka/runWorkflow.py -m local -j ${task.cpus} + + mv strelka/results/variants/somatic.indels.vcf.gz ${prefix}_somatic_indels.vcf.gz + mv strelka/results/variants/somatic.indels.vcf.gz.tbi ${prefix}_somatic_indels.vcf.gz.tbi + mv strelka/results/variants/somatic.snvs.vcf.gz ${prefix}_somatic_snvs.vcf.gz + mv strelka/results/variants/somatic.snvs.vcf.gz.tbi ${prefix}_somatic_snvs.vcf.gz.tbi + + echo configureStrelkaSomaticWorkflow.py --version &> ${software}.version.txt #2>&1 + """ +} + +process STRELKA_SOMATIC_BEST_PRACTICES { + tag "${meta.id}" + + label 'CPUS_MAX' + label 'MEMORY_MAX' + + publishDir params.outdir, mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda environment + container container + + input: + tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor), path(manta_csi), path(manta_csi_tbi) + path fasta + path fai + tuple path(target_bed), path(target_bed_tbi) + + output: + tuple val(meta), path("*_somatic_indels.vcf.gz"), path("*_somatic_indels.vcf.gz.tbi"), emit: indels_vcf + tuple val(meta), path("*_somatic_snvs.vcf.gz"), path("*_somatic_snvs.vcf.gz.tbi"), emit: snvs_vcf + path "*.version.txt", emit: version + + script: + def software = getSoftwareName(task.process) + def ioptions = initOptions(options) + def prefix = ioptions.suffix ? "strelka_bp_${meta.id}${ioptions.suffix}" : "strelka_bp_${meta.id}" + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "$ioptions.args" variable + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + options_strelka = params.target_bed ? "--exome --callRegions ${target_bed}" : "" + """ + configureStrelkaSomaticWorkflow.py \ + --tumor ${bam_tumor} \ + --normal ${bam_normal} \ + --referenceFasta ${fasta} \ + --indelCandidates ${manta_csi} \ + ${options_strelka} \ + --runDir strelka + + python strelka/runWorkflow.py -m local -j ${task.cpus} + + mv strelka/results/variants/somatic.indels.vcf.gz ${prefix}_somatic_indels.vcf.gz + mv strelka/results/variants/somatic.indels.vcf.gz.tbi ${prefix}_somatic_indels.vcf.gz.tbi + mv strelka/results/variants/somatic.snvs.vcf.gz ${prefix}_somatic_snvs.vcf.gz + mv strelka/results/variants/somatic.snvs.vcf.gz.tbi ${prefix}_somatic_snvs.vcf.gz.tbi + + echo configureStrelkaSomaticWorkflow.py --version &> ${software}.version.txt #2>&1 + """ +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 28f8ac9a98..35b6533370 100644 --- a/nextflow.config +++ b/nextflow.config @@ -94,8 +94,9 @@ params { name = false // No default name tracedir = "${params.outdir}/pipeline_info" - // Singularity containers - pull_docker_container = false // Pull default container by default + // Modules + enable_conda = false // conda is enabled with the profile conda or with this params + pull_docker_container = false // Pull default container by default for Singularity // Base specifications // Defaults only, expecting to be overwritten @@ -132,7 +133,7 @@ try { } profiles { - conda { process.conda = "$projectDir/environment.yml" } + conda { params.enable_conda = true } debug { process.beforeScript = 'echo $HOSTNAME' } docker { docker.enabled = true diff --git a/nextflow_schema.json b/nextflow_schema.json index 4881d35470..13015b2f3e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -752,6 +752,29 @@ "fa_icon": "fas fa-users-cog" } } + }, + "modules": { + "title": "Modules", + "type": "object", + "description": "", + "default": "", + "properties": { + "pull_docker_container": { + "type": "boolean", + "fa_icon": "fab fa-docker", + "description": "Force pull and use of Docker container instead of default Singularity ones", + "hidden": true, + "help_text": "This may be useful if you are unable to download Singularity containers due to proxy issues." + }, + "enable_conda": { + "type": "boolean", + "fa_icon": "fas snake", + "description": "Enable conda", + "hidden": true, + "help_text": "" + } + }, + "fa_icon": "fas fa-cog" } }, "allOf": [