diff --git a/CHANGELOG.md b/CHANGELOG.md index cc3df0da..d1381e82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,12 +3,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.6.1dev - [date] +## v2.7.0dev - [date] ### `Added` - Added `PYOPENMS_CHROMATOGRAMEXTRACTOR` extracting MS1 Chromatograms and visualize them in multiQC report [#329](https://github.com/nf-core/mhcquant/pull/329) - Added `OPENMS_IDMASSACCURACY` and `DATAMASH_HISTOGRAM` to compute fragment mass errors and visualizte them in multiQC report [#332](https://github.com/nf-core/mhcquant/pull/332) +- Added global fdr evaluation in new local subworkflow `RESCORE` [#338](https://github.com/nf-core/mhcquant/pull/338) ### `Fixed` diff --git a/conf/modules.config b/conf/modules.config index a4d54a0f..4ea02418 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -81,7 +81,17 @@ process { ] } - withName: 'OPENMS_IDMERGER*' { + withName: 'OPENMS_IDMERGER|OPENMS_IDMERGER_QUANT' { + ext.args = [ + "-annotate_file_origin true", + "-merge_proteins_add_PSMs" + ].join(' ').trim() + publishDir = [ + enabled: false + ] + } + withName: 'OPENMS_IDMERGER_GLOBAL' { + label = 'process_high_memory' ext.args = [ "-annotate_file_origin true", "-merge_proteins_add_PSMs" @@ -106,6 +116,36 @@ process { ] } + withName: 'OPENMS_IDFILTER_Q_VALUE_GLOBAL' { + label = 'process_high_memory' + ext.prefix = {"${meta.id}_pout_filtered"} + ext.args = [ + "-remove_decoys", + "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", + "-delete_unreferenced_peptide_hits", + (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold + ].join(' ').trim() + publishDir = [ + enabled: false + ] + } + + withName: 'OPENMS_IDFILTER_GLOBAL' { + label = 'process_high_memory' + ext.prefix = {"${meta.id}_pout_filtered"} + ext.args = [ + "-remove_decoys", + "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", + "-delete_unreferenced_peptide_hits", + (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold + ].join(' ').trim() + publishDir = [ + path: {"${params.outdir}/intermediate_results/rescoring"}, + mode: params.publish_dir_mode, + pattern: '*.idXML' + ] + } + withName: 'OPENMS_IDFILTER_QUANT' { ext.prefix = {"${meta.spectra}_fdr_filtered"} ext.args = "-best:spectrum_per_peptide 'sequence+charge+modification'" @@ -149,7 +189,7 @@ process { process { if (params.rescoring_engine == 'mokapot') { - withName: 'NFCORE_MHCQUANT:MHCQUANT:OPENMS_IDSCORESWITCHER' { + withName: 'NFCORE_MHCQUANT:MHCQUANT:RESCORE:OPENMS_IDSCORESWITCHER' { ext.prefix = {"${meta.id}"} ext.args = [ "-new_score q-value", @@ -293,6 +333,22 @@ process { ] } + withName: 'OPENMS_PERCOLATORADAPTER_GLOBAL' { + label = 'process_high' + ext.args = [ + "-seed 4711", + "-trainFDR 0.05", + "-testFDR 0.05", + "-enzyme no_enzyme", + "-subset_max_train ${params.subset_max_train}", + "-post_processing_tdc", + (params.fdr_level != 'psm_level_fdrs') ? "-" + params.fdr_level : "" + ].join(' ').trim() + publishDir = [ + enabled: false + ] + } + withName: 'OPENMS_PSMFEATUREEXTRACTOR' { publishDir = [ path: {"${params.outdir}/intermediate_results/rescoring"}, @@ -339,6 +395,15 @@ process { ] } + withName: 'OPENMS_TEXTEXPORTER_GLOBAL' { + label = 'process_high_memory' + publishDir = [ + path: {"${params.outdir}/global_fdr"}, + mode: params.publish_dir_mode, + pattern: '*.tsv' + ] + } + withName: 'OPENMS_IDCONFLICTRESOLVER' { publishDir = [ path: {"${params.outdir}/intermediate_results/features"}, diff --git a/docs/output.md b/docs/output.md index 58d69c37..ead4ad44 100644 --- a/docs/output.md +++ b/docs/output.md @@ -87,6 +87,8 @@ This folder contains the intermediate results from various steps of the MHCquant - `{Sample}_{Condition}_pout.idXML`: Unfiltered percolator output. - `{Sample}_{Condition}_pout_filtered.idXML`: FDR-filtered percolator output. + - `global_fdr`: Contains global FDR-filtered list of all runs in a `tsv` file + - `features`: Holds information of quantified features in `featureXML` files as a result of the [FeatureFinderIdentification](https://openms.de/doxygen/release/3.0.0/html/TOPP_FeatureFinderIdentification.html) in the quantification mode. - `ion_annotations` diff --git a/modules/local/openms_percolatoradapter.nf b/modules/local/openms_percolatoradapter.nf index 0fd348be..ac09160f 100644 --- a/modules/local/openms_percolatoradapter.nf +++ b/modules/local/openms_percolatoradapter.nf @@ -1,6 +1,6 @@ process OPENMS_PERCOLATORADAPTER { tag "$meta.id" - label 'process_low' + label 'process_medium' conda "bioconda::openms-thirdparty=3.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -18,31 +18,33 @@ process OPENMS_PERCOLATORADAPTER { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.id}_pout" - def args = task.ext.args ?: '' - - """ - OMP_NUM_THREADS=$task.cpus \\ - PercolatorAdapter -in $merged_with_features \\ - -out ${prefix}.idXML \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - openms-thirdparty: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') - END_VERSIONS - """ + def prefix = task.ext.prefix ?: "${meta.id}_pout" + def args = task.ext.args ?: '' + """ + PercolatorAdapter \\ + -in $merged_with_features \\ + -out ${prefix}.idXML \\ + -threads $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + PercolatorAdapter: \$(PercolatorAdapter 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) + percolator: \$(percolator -h 2>&1 | grep -E '^Percolator version(.*)' | sed 's/Percolator version //g') + END_VERSIONS + """ stub: - def prefix = task.ext.prefix ?: "${meta.id}_pout" - def args = task.ext.args ?: '' - - """ - touch ${prefix}.idXML - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - openms-thirdparty: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') - END_VERSIONS - """ + def prefix = task.ext.prefix ?: "${meta.id}_pout" + def args = task.ext.args ?: '' + + """ + touch ${prefix}.idXML + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + PercolatorAdapter: \$(PercolatorAdapter 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1) + percolator: \$(percolator -h 2>&1 | grep -E '^Percolator version(.*)' | sed 's/Percolator version //g') + END_VERSIONS + """ } diff --git a/nextflow.config b/nextflow.config index 4f03badc..25d3f86f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -53,10 +53,11 @@ params { ms2pip_model_dir = null deeplc_calibration_set_size = 0.15 - // Percolator settings + // Rescoring engine settings fdr_threshold = 0.01 fdr_level = 'peptide_level_fdrs' subset_max_train = 0 + global_fdr = false // IDfilter settings peptide_min_length = 8 diff --git a/nextflow_schema.json b/nextflow_schema.json index 49310fcb..7928bf06 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -315,6 +315,13 @@ "default": 0.01, "description": "Specify the false discovery rate threshold at which peptide hits should be selected." }, + "global_fdr": { + "type": "boolean", + "default": false, + "fa_icon": "fas fa-less-than", + "description": "Compute global FDR and backfilter sample-specific FDRs" + }, + "subset_max_train": { "type": "integer", "hidden": true, diff --git a/subworkflows/local/rescore.nf b/subworkflows/local/rescore.nf new file mode 100644 index 00000000..35ef649c --- /dev/null +++ b/subworkflows/local/rescore.nf @@ -0,0 +1,89 @@ +/* + * Prepares the raw or compressed data holding spectra information for the subsequent database search. + */ + +// +// MODULE: Loaded from modules/local/ +// + +include { MS2RESCORE } from '../../modules/local/ms2rescore' +include { OPENMS_PSMFEATUREEXTRACTOR } from '../../modules/local/openms_psmfeatureextractor' +include { OPENMS_PERCOLATORADAPTER; + OPENMS_PERCOLATORADAPTER as OPENMS_PERCOLATORADAPTER_GLOBAL } from '../../modules/local/openms_percolatoradapter' +include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_GLOBAL } from '../../modules/local/openms_textexporter' +// +// MODULE: Installed directly from nf-core/modules +// + +include { OPENMS_IDMERGER as OPENMS_IDMERGER_GLOBAL } from '../../modules/nf-core/openms/idmerger/main' +include { OPENMS_IDSCORESWITCHER } from '../../modules/nf-core/openms/idscoreswitcher/main.nf' +include { OPENMS_IDFILTER as OPENMS_IDFILTER_Q_VALUE; + OPENMS_IDFILTER as OPENMS_IDFILTER_Q_VALUE_GLOBAL; + OPENMS_IDFILTER as OPENMS_IDFILTER_GLOBAL } from '../../modules/nf-core/openms/idfilter/main' + +workflow RESCORE { + + take: + ch_merged_runs + + main: + ch_versions = Channel.empty() + + // Compute features via ms2rescore + MS2RESCORE(ch_merged_runs) + ch_versions = ch_versions.mix(MS2RESCORE.out.versions) + + if (params.rescoring_engine == 'mokapot') { + log.warn "The rescoring engine is set to mokapot. This rescoring engine currently only supports psm-level-fdr via ms2rescore." + if (params.global_fdr) { + log.warn "Global FDR is currently not supported by mokapot. The global_fdr parameter will be ignored." + } + // Switch comet e-value to mokapot q-value + OPENMS_IDSCORESWITCHER(MS2RESCORE.out.idxml) + ch_versions = ch_versions.mix(OPENMS_IDSCORESWITCHER.out.versions) + ch_rescored_runs = OPENMS_IDSCORESWITCHER.out.idxml + + // Filter by mokapot q-value + OPENMS_IDFILTER_Q_VALUE(ch_rescored_runs.map {group_meta, idxml -> [group_meta, idxml, []]}) + ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE.out.versions) + ch_filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.filtered + + } else { + // Extract PSM features for Percolator + OPENMS_PSMFEATUREEXTRACTOR(MS2RESCORE.out.idxml.join(MS2RESCORE.out.feature_names)) + ch_versions = ch_versions.mix(OPENMS_PSMFEATUREEXTRACTOR.out.versions) + + // Run Percolator with local FDR + OPENMS_PERCOLATORADAPTER(OPENMS_PSMFEATUREEXTRACTOR.out.idxml) + ch_versions = ch_versions.mix(OPENMS_PERCOLATORADAPTER.out.versions) + ch_pout = OPENMS_PERCOLATORADAPTER.out.idxml + + if (params.global_fdr) { + // Merge all samples into one group + OPENMS_IDMERGER_GLOBAL(OPENMS_PSMFEATUREEXTRACTOR.out.idxml.map {group_meta, idxml -> [[id:'global'], idxml] }.groupTuple()) + // Run Percolator with global FDR + OPENMS_PERCOLATORADAPTER_GLOBAL(OPENMS_IDMERGER_GLOBAL.out.idxml) + ch_rescored_runs = OPENMS_PERCOLATORADAPTER_GLOBAL.out.idxml + // Filter by global percolator q-value + OPENMS_IDFILTER_Q_VALUE_GLOBAL(ch_rescored_runs.map {id, idxml -> [id, idxml, []]}) + ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE_GLOBAL.out.versions) + // Backfilter sample_condition runs according to global FDR + OPENMS_IDFILTER_GLOBAL(ch_pout.combine(OPENMS_IDFILTER_Q_VALUE_GLOBAL.out.filtered.map{ it[1] })) + ch_filter_q_value = OPENMS_IDFILTER_GLOBAL.out.filtered + // Save globally merged runs in tsv + OPENMS_TEXTEXPORTER_GLOBAL(OPENMS_PERCOLATORADAPTER_GLOBAL.out.idxml) + + } else { + ch_rescored_runs = ch_pout + // Filter by percolator q-value + OPENMS_IDFILTER_Q_VALUE(ch_rescored_runs.map {group_meta, idxml -> [group_meta, idxml, []]}) + ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE.out.versions) + ch_filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.filtered + } + } + + emit: + rescored_runs = ch_rescored_runs + fdr_filtered = ch_filter_q_value + versions = ch_versions +} diff --git a/workflows/mhcquant.nf b/workflows/mhcquant.nf index 5ef44b4d..ea6ef8ce 100644 --- a/workflows/mhcquant.nf +++ b/workflows/mhcquant.nf @@ -13,9 +13,6 @@ include { PYOPENMS_CHROMATOGRAMEXTRACTOR } from '../modules/local/pyopenms_chrom include { OPENMS_COMETADAPTER } from '../modules/local/openms_cometadapter' include { OPENMS_PEPTIDEINDEXER } from '../modules/local/openms_peptideindexer' include { DATAMASH_HISTOGRAM } from '../modules/local/datamash_histogram' -include { MS2RESCORE } from '../modules/local/ms2rescore' -include { OPENMS_PSMFEATUREEXTRACTOR } from '../modules/local/openms_psmfeatureextractor' -include { OPENMS_PERCOLATORADAPTER } from '../modules/local/openms_percolatoradapter' include { PYOPENMS_IONANNOTATOR } from '../modules/local/pyopenms_ionannotator' include { OPENMS_TEXTEXPORTER } from '../modules/local/openms_textexporter' include { OPENMS_MZTABEXPORTER } from '../modules/local/openms_mztabexporter' @@ -24,6 +21,7 @@ include { OPENMS_MZTABEXPORTER } from '../modules/local/openms_mztabex // SUBWORKFLOW: Loaded from subworkflows/local/ // include { PREPARE_SPECTRA } from '../subworkflows/local/prepare_spectra' +include { RESCORE } from '../subworkflows/local/rescore' include { QUANT } from '../subworkflows/local/quant' /* @@ -35,16 +33,14 @@ include { QUANT } from '../subworkflows/local/quant' // // MODULE: Installed directly from nf-core/modules // -include { OPENMS_DECOYDATABASE } from '../modules/nf-core/openms/decoydatabase/main' -include { OPENMS_IDMASSACCURACY } from '../modules/nf-core/openms/idmassaccuracy/main' -include { OPENMS_IDMERGER } from '../modules/nf-core/openms/idmerger/main' -include { OPENMS_IDSCORESWITCHER } from '../modules/nf-core/openms/idscoreswitcher/main.nf' -include { OPENMS_IDFILTER as OPENMS_IDFILTER_Q_VALUE } from '../modules/nf-core/openms/idfilter/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mhcquant_pipeline' +include { OPENMS_DECOYDATABASE } from '../modules/nf-core/openms/decoydatabase/main' +include { OPENMS_IDMASSACCURACY } from '../modules/nf-core/openms/idmassaccuracy/main' +include { OPENMS_IDMERGER } from '../modules/nf-core/openms/idmerger/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mhcquant_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -128,42 +124,22 @@ workflow MHCQUANT { .groupTuple() .join(OPENMS_IDMERGER.out.idxml) .map { meta, mzml, idxml -> [meta, idxml, mzml, []] } - .set { ch_ms2rescore_in } - - MS2RESCORE(ch_ms2rescore_in) - ch_versions = ch_versions.mix(MS2RESCORE.out.versions) - - if (params.rescoring_engine == 'percolator') { - // Extract PSM features for Percolator - OPENMS_PSMFEATUREEXTRACTOR(MS2RESCORE.out.idxml.join(MS2RESCORE.out.feature_names)) - ch_versions = ch_versions.mix(OPENMS_PSMFEATUREEXTRACTOR.out.versions) - - // Run Percolator - OPENMS_PERCOLATORADAPTER(OPENMS_PSMFEATUREEXTRACTOR.out.idxml) - ch_versions = ch_versions.mix(OPENMS_PERCOLATORADAPTER.out.versions) - ch_rescored_runs = OPENMS_PERCOLATORADAPTER.out.idxml - } else { - log.warn "The rescoring engine is set to mokapot. This rescoring engine currently only supports psm-level-fdr via ms2rescore." - // Switch comet e-value to mokapot q-value - OPENMS_IDSCORESWITCHER(MS2RESCORE.out.idxml) - ch_versions = ch_versions.mix(OPENMS_IDSCORESWITCHER.out.versions) - ch_rescored_runs = OPENMS_IDSCORESWITCHER.out.idxml - } - - // Filter by percolator q-value - OPENMS_IDFILTER_Q_VALUE(ch_rescored_runs.map {group_meta, idxml -> [group_meta, idxml, []]}) - ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE.out.versions) - ch_filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.filtered + .set { ch_rescore_in } + // + // SUBWORKFLOW: RESCORE WITH MOKKAPOT OR PERCOLATOR AND FILTER BY Q-VALUE ON LOCAL/GLOBAL FDR + // + RESCORE( ch_rescore_in ) + ch_versions = ch_versions.mix(RESCORE.out.versions) // // SUBWORKFLOW: QUANT // if (params.quantify) { - QUANT(merge_meta_map, ch_rescored_runs, ch_filter_q_value, ch_clean_mzml_file) + QUANT(merge_meta_map, RESCORE.out.rescored_runs, RESCORE.out.fdr_filtered, ch_clean_mzml_file) ch_versions = ch_versions.mix(QUANT.out.versions) ch_output = QUANT.out.consensusxml } else { - ch_output = ch_filter_q_value + ch_output = RESCORE.out.fdr_filtered } // Annotate Ions for follow-up spectrum validation @@ -171,7 +147,7 @@ workflow MHCQUANT { // Join the ch_filtered_idxml and the ch_mzml_file ch_clean_mzml_file.map { meta, mzml -> [ groupKey([id: "${meta.sample}_${meta.condition}"], meta.group_count), mzml] } .groupTuple() - .join(ch_filter_q_value) + .join(RESCORE.out.fdr_filtered) .set{ ch_ion_annotator_input } // Annotate spectra with ion fragmentation information