diff --git a/README.md b/README.md index 1e6760a7..10e016c2 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,7 @@ The following processes and tools can be run with `oncoanalyser`: - HRD status prediction (`CHORD`) - Mutational signature fitting (`Sigs`) - Tissue of origin prediction (`CUPPA`) +- Neoepitope prediction (`Neo`) - Report generation (`ORANGE`, `linxreport`) ## Usage diff --git a/conf/hmf_data.config b/conf/hmf_data.config index 6fdcba0b..f80f086a 100644 --- a/conf/hmf_data.config +++ b/conf/hmf_data.config @@ -20,8 +20,12 @@ params { isofox_gc_ratios = 'rna_pipeline/read_100_exp_gc_ratios.csv' // LILAC lilac_resources = 'dna_pipeline/immune/' + // Neo + neo_resources = 'neo/binding/' // ORANGE cohort_mapping = 'orange/cohort_mapping.tsv' + // Hartwig cohort RNA data + cohort_tpm_medians = 'neo/tpm_cohort/hmf_tpm_medians.37.csv' cohort_percentiles = 'orange/cohort_percentiles.tsv' alt_sj_distribution = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.37.csv' gene_exp_distribution = 'rna_pipeline/isofox.hmf_3444.gene_distribution.37.csv' @@ -72,8 +76,12 @@ params { isofox_gc_ratios = 'rna_pipeline/read_100_exp_gc_ratios.csv' // LILAC lilac_resources = 'dna_pipeline/immune/' + // Neo + neo_resources = 'neo/binding/' // ORANGE cohort_mapping = 'orange/cohort_mapping.tsv' + // Hartwig cohort RNA data + cohort_tpm_medians = 'neo/tpm_cohort/hmf_tpm_medians.38.csv' cohort_percentiles = 'orange/cohort_percentiles.tsv' alt_sj_distribution = 'rna_pipeline/isofox.hmf_3444.alt_sj_cohort.38.csv' gene_exp_distribution = 'rna_pipeline/isofox.hmf_3444.gene_distribution.38.csv' diff --git a/conf/modules.config b/conf/modules.config index cc09c657..0dfd3002 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -218,6 +218,34 @@ process { ] } + withName: 'NEO_(?:SCORER|FINDER)' { + ext.jarPath = '/opt/neo/neo.jar' + } + + withName: 'NEO_SCORER' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/scorer/" }, + ] + } + + withName: '.*:NEO_PREDICTION:ANNOTATE_FUSIONS' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/annotated_fusions/${filename}" }, + ] + } + + withName: 'NEO_FINDER' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/finder/" }, + ] + } + withName: 'CUPPA' { publishDir = [ path: { "${params.outdir}" }, diff --git a/docs/output.md b/docs/output.md index 59f9f0c6..6fb6596b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -71,6 +71,8 @@ output/ - [Sigs](#sigs) - Mutational signature fitting - [Tissue of origin prediction](#tissue-of-origin-prediction) - [CUPPA](#cuppa) - Tissue of origin prediction +- [Neoepitope prediction](#neoepitope-prediction) + - [Neo](#neo) - Neoepitope prediction - [Report generation](#report-generation) - [ORANGE](#orange) - Key results summary - [linxreport](#linxreport) - Interactive LINX report @@ -473,6 +475,23 @@ signatures to tumor sample data. [CUPPA](https://github.com/hartwigmedical/hmftools/tree/master/cuppa) predicts tissue of origin for a given tumor sample using DNA and/or RNA features generated by upstream hmftools components. +### Neoepitope prediction + +#### Neo + +
+Output files + +- `/neo/` + - `.neo.neo_data.tsv`: Neoepitope candidates. + - `.neo.neoepitope.tsv`: LINX fusion neoepitopes. + - `.neo.peptide_scores.tsv`: Peptide binding likelihood and scoring. + +
+ +[Neo](https://github.com/hartwigmedical/hmftools/tree/master/neo) builds comprehensive neoepitope predictions from DNA +data with additional annotations made using RNA data. + ### Report generation #### ORANGE diff --git a/lib/Constants.groovy b/lib/Constants.groovy index cb827975..15d41689 100644 --- a/lib/Constants.groovy +++ b/lib/Constants.groovy @@ -47,6 +47,7 @@ class Constants { LILAC, LINX, MARKDUPS, + NEO, ORANGE, PAVE, PURPLE, diff --git a/lib/Processes.groovy b/lib/Processes.groovy index 98245f6d..f4770838 100644 --- a/lib/Processes.groovy +++ b/lib/Processes.groovy @@ -7,7 +7,17 @@ import Utils class Processes { public static getRunStages(include, exclude, manual_select, log) { - def processes = manual_select ? [] : Constants.Process.values().toList() + + // Get default processes + // NOTE(SW): currently set all except Neo to run by default; Process.NEO excluded to be more concise in code + def processes + if (manual_select) { + processes = [] + } else { + processes = Constants.Process.values().toList() + processes.remove(Constants.Process.NEO) + } + def include_list = this.getProcessList(include, log) def exclude_list = this.getProcessList(exclude, log) this.checkIncludeExcludeList(include_list, exclude_list, log) diff --git a/modules/local/linx/somatic/main.nf b/modules/local/linx/somatic/main.nf index 5218a8f0..04b78f89 100644 --- a/modules/local/linx/somatic/main.nf +++ b/modules/local/linx/somatic/main.nf @@ -38,6 +38,7 @@ process LINX_SOMATIC { -known_fusion_file ${known_fusion_data} \\ -driver_gene_panel ${driver_gene_panel} \\ -write_vis_data \\ + -write_neo_epitopes \\ -output_dir linx_somatic/ cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/neo/Dockerfile b/modules/local/neo/Dockerfile new file mode 100644 index 00000000..aa0a03e3 --- /dev/null +++ b/modules/local/neo/Dockerfile @@ -0,0 +1,22 @@ +FROM mambaorg/micromamba:0.24.0 + +USER root + +RUN \ + apt-get update && \ + apt-get install -y procps wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN \ + mkdir -p /opt/neo/ && \ + wget -O /opt/neo/neo.jar 'https://github.com/hartwigmedical/hmftools/releases/download/neo-v1.2_beta/neo_v1.2_beta.jar' + +USER mambauser + +RUN \ + micromamba install -y -n base -c bioconda -c conda-forge \ + 'openjdk >=8' && \ + micromamba clean --all --yes + +ENV PATH="/opt/conda/bin:/opt/conda/condabin:${PATH}" diff --git a/modules/local/neo/annotate_fusions/environment.yml b/modules/local/neo/annotate_fusions/environment.yml new file mode 100644 index 00000000..d4251c57 --- /dev/null +++ b/modules/local/neo/annotate_fusions/environment.yml @@ -0,0 +1,7 @@ +name: isofox +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-isofox=1.7.1 diff --git a/modules/local/neo/annotate_fusions/main.nf b/modules/local/neo/annotate_fusions/main.nf new file mode 100644 index 00000000..44e59737 --- /dev/null +++ b/modules/local/neo/annotate_fusions/main.nf @@ -0,0 +1,56 @@ +process ANNOTATE_FUSIONS { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.1--hdfd78af_0' : + 'biocontainers/hmftools-isofox:1.7.1--hdfd78af_0' }" + + input: + tuple val(meta), path(neo_finder_dir), path(bam), path(bai) + val read_length + path genome_fasta + val genome_ver + path genome_fai + path ensembl_data_resources + + output: + tuple val(meta), path('*isf.neoepitope.tsv'), emit: annotated_fusions + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + mkdir -p isofox/ + + isofox \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -bam_file ${bam} \\ + -functions NEO_EPITOPES \\ + -neo_dir ${neo_finder_dir} \\ + -read_length ${read_length} \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -threads ${task.cpus} \\ + -output_dir ./ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + isofox: \$(isofox -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.sample_id}.isf.neoepitope.tsv + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/neo/annotate_fusions/meta.yml b/modules/local/neo/annotate_fusions/meta.yml new file mode 100644 index 00000000..83a3fb0b --- /dev/null +++ b/modules/local/neo/annotate_fusions/meta.yml @@ -0,0 +1,63 @@ +name: annotate_fusions +description: Annotate neoeptitopes with RNA fusion data +keywords: + - neoepitopes + - rna + - rnaseq +tools: + - isofox: + description: Characterises and counts gene, transcript features + homepage: https://github.com/hartwigmedical/hmftools/tree/master/isofox + documentation: https://github.com/hartwigmedical/hmftools/tree/master/isofox + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - neo_finder_dir: + type: directory + description: Neo Finder directory + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - bai: + type: file + description: BAI file + pattern: "*.{bai}" + - read_length: + type: integer + description: Read length + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - annotated_fusions: + type: file + description: Annotated neoepitopes file + pattern: "*.{tsv}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" + - "@charlesshale" diff --git a/modules/local/neo/finder/main.nf b/modules/local/neo/finder/main.nf new file mode 100644 index 00000000..7a5071ca --- /dev/null +++ b/modules/local/neo/finder/main.nf @@ -0,0 +1,52 @@ +process NEO_FINDER { + tag "${meta.id}" + label 'process_low' + + container 'docker.io/scwatts/neo:1.2_beta--1' + + input: + tuple val(meta), path(purple_dir), path(linx_annotation_dir) + path genome_fasta + val genome_ver + path genome_fai + path ensembl_data_resources + + output: + tuple val(meta), path('neo_finder/'), emit: neo_finder_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + mkdir -p neo_finder/ + + java \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + -jar ${task.ext.jarPath} \\ + ${args} \\ + -sample ${meta.sample_id} \\ + -linx_dir ${linx_annotation_dir} \\ + -somatic_vcf ${purple_dir}/${meta.sample_id}.purple.somatic.vcf.gz \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -log_debug \\ + -output_dir neo_finder/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + neo: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p neo_finder/ + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} + diff --git a/modules/local/neo/finder/meta.yml b/modules/local/neo/finder/meta.yml new file mode 100644 index 00000000..01dc2fcb --- /dev/null +++ b/modules/local/neo/finder/meta.yml @@ -0,0 +1,52 @@ +name: neo_finder +description: Identify candidate neoeptitops +keywords: + - neoepitopes +tools: + - neo: + description: Predict and score neoepitopes + homepage: https://github.com/hartwigmedical/hmftools/tree/master/neo + documentation: https://github.com/hartwigmedical/hmftools/tree/master/neo + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - purple_dir: + type: directory + description: PURPLE output directory + - linx_annotation_dir: + type: directory + description: LINX somatic annotation output directory + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_ver: + type: string + description: Reference genome version + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - neo_finder_dir: + type: directory + description: Neo Finder output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" + - "@charlesshale" diff --git a/modules/local/neo/scorer/main.nf b/modules/local/neo/scorer/main.nf new file mode 100644 index 00000000..056a8b1b --- /dev/null +++ b/modules/local/neo/scorer/main.nf @@ -0,0 +1,71 @@ +process NEO_SCORER { + tag "${meta.id}" + label 'process_medium' + + container 'docker.io/scwatts/neo:1.2_beta--1' + + input: + tuple val(meta), path(isofox_dir), path(purple_dir), path(sage_vcf), path(lilac_dir), path(neo_finder_dir), path(annotated_fusions) + path ensembl_data_resources + path neo_resources, stageAs: 'neo_reference_data' + path cohort_tpm_medians + + output: + tuple val(meta), path('neo_scorer/'), emit: neo_scorer_dir + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def rna_sample_arg = meta.containsKey('sample_rna_id') ? "-rna_sample ${meta.sample_rna_id}" : '' + def rna_somatic_vcf_arg = meta.containsKey('sample_rna_id') ? "-rna_somatic_vcf ${sage_vcf}" : '' + + def cancer_type_arg = meta.containsKey('cancer_type') ? "-cancer_type ${meta.cancer_type}" : '' + + """ + isofox_dir_arg='' + if [[ -n "${isofox_dir}" ]]; then + isofox_dir_local=isofox__prepared/; + + cp -rL ${isofox_dir} \${isofox_dir_local}/; + cp -r ${annotated_fusions} \${isofox_dir_local}/; + + isofox_dir_arg="-isofox_dir \${isofox_dir_local}"; + fi; + + mkdir -p neo_scorer/ + + java \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + -cp ${task.ext.jarPath} \\ + com.hartwig.hmftools.neo.score.NeoScorer \\ + ${args} \\ + -sample ${meta.sample_id} \\ + ${cancer_type_arg} \\ + ${rna_sample_arg} \\ + \${isofox_dir_arg} \\ + -purple_dir ${purple_dir} \\ + ${rna_somatic_vcf_arg} \\ + -lilac_dir ${lilac_dir} \\ + -neo_dir ${neo_finder_dir} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + -score_file_dir ${neo_resources} \\ + -cancer_tpm_medians_file ${cohort_tpm_medians} \\ + -log_debug \\ + -output_dir neo_scorer/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + neo: \$(java -jar ${task.ext.jarPath} -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p neo_scorer/ + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/neo/scorer/meta.yml b/modules/local/neo/scorer/meta.yml new file mode 100644 index 00000000..c870da4d --- /dev/null +++ b/modules/local/neo/scorer/meta.yml @@ -0,0 +1,62 @@ +name: neo_scorer +description: Score and rank candidate neoeptitops +keywords: + - neoepitopes +tools: + - neo: + description: Predict and score neoepitopes + homepage: https://github.com/hartwigmedical/hmftools/tree/master/neo + documentation: https://github.com/hartwigmedical/hmftools/tree/master/neo + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - isofox_dir: + type: directory + description: Isofox output directory (optional) + - purple_dir: + type: directory + description: PURPLE output directory + - sage_vcf: + type: file + description: SAGE VCF file + pattern: "*.{vcf.gz}" + - lilac_dir: + type: directory + description: LILAC output directory + - neo_finder_dir: + type: directory + description: Neo Finder output directory + - annotated_fusions: + type: file + description: Annotated neoepitopes file + pattern: "*.{tsv}" + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory + - neo_resources: + type: directory + description: HMF Neo resources directory + - cohort_tpm_medians: + type: file + description: HMF cohort TPM medians file + pattern: "*.{csv}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - neo_score_dir: + type: directory + description: Neo Scorer output directory + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" + - "@charlesshale" diff --git a/subworkflows/local/neo_prediction/main.nf b/subworkflows/local/neo_prediction/main.nf new file mode 100644 index 00000000..c069ed42 --- /dev/null +++ b/subworkflows/local/neo_prediction/main.nf @@ -0,0 +1,214 @@ +// +// Neo identifies and scores neoepitopes +// + +import Constants +import Utils + +include { ANNOTATE_FUSIONS } from '../../../modules/local/neo/annotate_fusions/main' +include { NEO_FINDER } from '../../../modules/local/neo/finder/main' +include { NEO_SCORER } from '../../../modules/local/neo/scorer/main' + +workflow NEO_PREDICTION { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] + ch_isofox // channel: [mandatory] [ meta, isofox_dir ] + ch_purple // channel: [mandatory] [ meta, purple_dir ] + ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ] + ch_lilac // channel: [mandatory] [ meta, lilac_dir ] + ch_linx // channel: [mandatory] [ meta, linx_annotation_dir ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + genome_fai // channel: [mandatory] /path/to/genome_fai + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + neo_resources // channel: [mandatory] /path/to/neo_resources/ + cohort_tpm_medians // channel: [mandatory] /path/to/cohort_tpm_medians/ + + // Params + isofox_read_length // string: [mandatory] Isofox read length + + main: + // Channel for versions.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // + // MODULE: Neo finder + // + // Select input sources + // channel: [ meta, purple_dir, linx_annotation_dir ] + ch_finder_inputs_selected = WorkflowOncoanalyser.groupByMeta( + ch_purple, + ch_linx, + ) + .map { meta, purple_dir, linx_annotation_dir -> + + def inputs = [ + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + Utils.selectCurrentOrExisting(linx_annotation_dir, meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR), + ] + + return [meta, *inputs] + } + + // Sort inputs + // channel: runnable: [ meta, purple_dir, linx_annotation_dir ] + // channel: skip: [ meta ] + ch_finder_inputs_sorted = ch_finder_inputs_selected + .branch { meta, purple_dir, linx_annotation_dir -> + + def has_normal_dna = Utils.hasNormalDna(meta) + + def has_runnable_inputs = purple_dir && linx_annotation_dir && has_normal_dna + + runnable: has_runnable_inputs + skip: true + return meta + } + + // Create process input channel + // channel: sample_data: [ meta_finder, purple_dir, linx_annotation_dir ] + ch_finder_inputs = ch_finder_inputs_sorted.runnable + .map { meta, purple_dir, linx_annotation_dir -> + + def meta_finder = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_finder, purple_dir, linx_annotation_dir] + } + + // Run process + NEO_FINDER( + ch_finder_inputs, + genome_fasta, + genome_version, + genome_fai, + ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(NEO_FINDER.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, neo_finder_dir ] + ch_finder_out = WorkflowOncoanalyser.restoreMeta(NEO_FINDER.out.neo_finder_dir, ch_inputs) + + // + // MODULE: Fusion annotation (Isofox) + // + // Annotate the fusion-derived neoepitope using Isofox where RNA data is available + + // Select input sources and sort + // channel: runnable: [ meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ] + // channel: skip: [ meta ] + ch_isofox_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_finder_out, + ch_tumor_rna_bam, + ) + .map { meta, neo_finder_dir, tumor_bam, tumor_bai -> + return [ + meta, + neo_finder_dir, + Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_RNA_TUMOR), + Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_RNA_TUMOR), + ] + } + .branch { meta, neo_finder_dir, tumor_bam, tumor_bai -> + runnable: Utils.hasTumorRna(meta) + return [meta, neo_finder_dir, tumor_bam, tumor_bai] + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna ] + ch_isofox_inputs = ch_isofox_inputs_sorted.runnable + .map { meta, neo_finder_dir, tumor_bam_rna, tumor_bai_rna -> + + def meta_isofox = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + ] + + return [meta_isofox, neo_finder_dir, tumor_bam_rna, tumor_bai_rna] + } + + // Run process + ANNOTATE_FUSIONS( + ch_isofox_inputs, + isofox_read_length, + genome_fasta, + genome_version, + genome_fai, + ensembl_data_resources, + ) + + ch_versions = ch_versions.mix(ANNOTATE_FUSIONS.out.versions) + + // Set outputs, restoring original meta + // channel: [ meta, annotated_fusions ] + ch_annotate_fusions_out = Channel.empty() + .mix( + WorkflowOncoanalyser.restoreMeta(ANNOTATE_FUSIONS.out.annotated_fusions, ch_inputs), + ch_isofox_inputs_sorted.skip.map { meta -> [meta, []] }, + ) + + + // + // MODULE: Neo scorer + // + // Select input sources and prepare input channel + // channel: [ meta_scorer, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotated_fusions ] + ch_scorer_inputs = WorkflowOncoanalyser.groupByMeta( + ch_isofox, + ch_purple, + ch_sage_somatic_append, + ch_lilac, + ch_finder_out, + ch_annotate_fusions_out, + ) + .map { meta, isofox_dir, purple_dir, sage_somatic_append, lilac_dir, neo_finder_dir, annotated_fusions -> + + def meta_scorer = [ + key: meta.group_id, + id: meta.group_id, + sample_id: Utils.getTumorDnaSampleName(meta), + cancer_type: meta[Constants.InfoField.CANCER_TYPE], + ] + + if (Utils.hasTumorRna(meta)) { + meta_scorer.sample_rna_id = Utils.getTumorRnaSampleName(meta) + } + + def inputs = [ + Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX_DIR), + Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), + Utils.selectCurrentOrExisting(sage_somatic_append, meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR), + Utils.selectCurrentOrExisting(lilac_dir, meta, Constants.INPUT.LILAC_DIR), + neo_finder_dir, + annotated_fusions, + ] + + return [meta_scorer, *inputs] + } + + // Run process + NEO_SCORER( + ch_scorer_inputs, + ensembl_data_resources, + neo_resources, + cohort_tpm_medians, + ) + + ch_versions = ch_versions.mix(NEO_SCORER.out.versions) + + emit: + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/sage_append/main.nf b/subworkflows/local/sage_append/main.nf index a7ded0be..39c48362 100644 --- a/subworkflows/local/sage_append/main.nf +++ b/subworkflows/local/sage_append/main.nf @@ -21,6 +21,9 @@ workflow SAGE_APPEND { genome_fai // channel: [mandatory] /path/to/genome_fai genome_dict // channel: [mandatory] /path/to/genome_dict + // Params + run_germline // boolean: [mandatory] Run germline flag + main: // Channel for version.yml files // channel: [ versions.yml ] @@ -63,7 +66,7 @@ workflow SAGE_APPEND { def has_smlv_germline = file(purple_dir).resolve("${tumor_dna_id}.purple.germline.vcf.gz") def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_APPEND_VCF_NORMAL) - runnable: has_normal_dna && has_tumor_rna && has_smlv_germline && !has_existing + runnable: has_normal_dna && has_tumor_rna && has_smlv_germline && !has_existing && run_germline skip: true return meta } diff --git a/workflows/targeted.nf b/workflows/targeted.nf index f2d4f0ba..9189ef93 100644 --- a/workflows/targeted.nf +++ b/workflows/targeted.nf @@ -45,6 +45,9 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true // Check mandatory parameters if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +// Used in Isofox subworkflow only +isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_TARGETED + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS @@ -185,7 +188,6 @@ workflow TARGETED { isofox_counts = params.isofox_counts ? file(params.isofox_counts) : panel_data.isofox_counts isofox_gc_ratios = params.isofox_gc_ratios ? file(params.isofox_gc_ratios) : panel_data.isofox_gc_ratios - isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_TARGETED isofox_gene_ids = params.isofox_gene_ids ? file(params.isofox_gene_ids) : panel_data.isofox_gene_ids isofox_tpm_norm = params.isofox_tpm_norm ? file(params.isofox_tpm_norm) : panel_data.isofox_tpm_norm @@ -476,6 +478,7 @@ workflow TARGETED { ref_data.genome_version, ref_data.genome_fai, ref_data.genome_dict, + true, // run_germline ) ch_versions = ch_versions.mix(SAGE_APPEND.out.versions) diff --git a/workflows/wgts.nf b/workflows/wgts.nf index 0541966d..98c808b7 100644 --- a/workflows/wgts.nf +++ b/workflows/wgts.nf @@ -47,6 +47,9 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true // Check mandatory parameters if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +// Used in Isofox and Neo subworkflows +isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_WTS + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS @@ -67,6 +70,7 @@ include { ISOFOX_QUANTIFICATION } from '../subworkflows/local/isofox_quantificat include { LILAC_CALLING } from '../subworkflows/local/lilac_calling' include { LINX_ANNOTATION } from '../subworkflows/local/linx_annotation' include { LINX_PLOTTING } from '../subworkflows/local/linx_plotting' +include { NEO_PREDICTION } from '../subworkflows/local/neo_prediction' include { ORANGE_REPORTING } from '../subworkflows/local/orange_reporting' include { PAVE_ANNOTATION } from '../subworkflows/local/pave_annotation' include { PREPARE_REFERENCE } from '../subworkflows/local/prepare_reference' @@ -188,7 +192,6 @@ workflow WGTS { isofox_counts = params.isofox_counts ? file(params.isofox_counts) : hmf_data.isofox_counts isofox_gc_ratios = params.isofox_gc_ratios ? file(params.isofox_gc_ratios) : hmf_data.isofox_gc_ratios - isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_WTS ISOFOX_QUANTIFICATION( ch_inputs, @@ -465,9 +468,7 @@ workflow WGTS { // channel: [ meta, sage_append_vcf ] ch_sage_somatic_append_out = Channel.empty() ch_sage_germline_append_out = Channel.empty() - if (run_config.stages.orange) { - - // NOTE(SW): currently used only for ORANGE but will also be used for Neo once implemented + if (run_config.stages.orange || run_config.stages.neo) { SAGE_APPEND( ch_inputs, @@ -477,6 +478,7 @@ workflow WGTS { ref_data.genome_version, ref_data.genome_fai, ref_data.genome_dict, + run_config.stages.orange, // run_germline [run for ORANGE but not Neo] ) ch_versions = ch_versions.mix(SAGE_APPEND.out.versions) @@ -708,6 +710,32 @@ workflow WGTS { } + // + // SUBWORKFLOW: Run Neo to identify and score neoepitopes + // + if (run_config.stages.neo) { + + NEO_PREDICTION( + ch_inputs, + ch_align_rna_tumor_out, + ch_isofox_out, + ch_purple_out, + ch_sage_somatic_append_out, + ch_lilac_out, + ch_linx_somatic_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + hmf_data.ensembl_data_resources, + hmf_data.neo_resources, + hmf_data.cohort_tpm_medians, + isofox_read_length, + ) + + ch_versions = ch_versions.mix(NEO_PREDICTION.out.versions) + + } + // // SUBWORKFLOW: Run CUPPA predict tissue of origin //