diff --git a/CHANGELOG.md b/CHANGELOG.md index 357cc912..286aa207 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,17 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- Adding MS²Rescore module with the underlying python CLI [#288](https://github.com/nf-core/mhcquant/issues/288) +- Added MS²Rescore module with the underlying python CLI [#293](https://github.com/nf-core/mhcquant/pull/293) - Template update 2.11 [#300](https://github.com/nf-core/mhcquant/pull/300) - Template update 2.12 [#303](https://github.com/nf-core/mhcquant/pull/303) ### `Fixed` -- Create only one decoy database [#287](https://github.com/nf-core/mhcquant/issues/287) +- Create only one decoy database [#299](https://github.com/nf-core/mhcquant/pull/299) +- Use `groupKey` to streamline group-wise processing [#310](https://github.com/nf-core/mhcquant/pull/310) +- Replace `PYOPENMS_IDFILTER` with `OPENMS_IDFILTER` [#310](https://github.com/nf-core/mhcquant/pull/310) +- Added nf-core modules [#310](https://github.com/nf-core/mhcquant/pull/310) ### `Deprecated` -- Removed MS²PIP and DeepLC modules. These feature generators are now called via the MS²Rescore framework +- Removed MS²PIP and DeepLC modules. These feature generators are now called via the MS²Rescore framework [#293](https://github.com/nf-core/mhcquant/pull/293) ## v2.5.0 - nfcore/mhcquant "Angry Bird" - 2023/10/09 diff --git a/README.md b/README.md index 8434830b..10158f28 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ Additional functionality contained by the pipeline currently includes: - Facilitates the input to, the call of and output integration of Percolator (`PercolatorAdapter`) > [!WARNING] -> The HLA prediction feature is outdated and will be reworked in the following releases +> The neo-epitope search and HLA prediction feature is broken and will be reworked in the following releases. See [#248](https://github.com/nf-core/mhcquant/issues/248) and [#278](https://github.com/nf-core/mhcquant/issues/278) #### Prediction of HLA class 1 peptides diff --git a/bin/IDFilter.py b/bin/IDFilter.py deleted file mode 100755 index 47d6db80..00000000 --- a/bin/IDFilter.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python -# Written by Jonas Scheid under the MIT license - -from pyopenms import * -import pandas as pd -import os -import argparse - - -def parse_args() -> argparse.Namespace: - """ - Parse command line arguments. - - :return: parsed arguments - :rtype: argparse.Namespace - """ - parser = argparse.ArgumentParser(description="Filter idXML by a given whitelist of peptides.") - parser.add_argument("--input", required=True, type=str, help="Input idXML file.") - parser.add_argument( - "--whitelist", required=True, type=str, help="IdXML file, which peptide IDs are used as whitelist filter." - ) - parser.add_argument("--output", required=True, type=str, help="Filtered idXML file.") - - return parser.parse_args() - - -def parse_idxml(path: str) -> tuple[list, list]: - """ - Parse idXML file and return PeptideIdentification and ProteinIdentification objects. - - :param path: path to idXML file - :type path: str - :return: ProteinIdentification and PeptideIdentification objects - :rtype: (list, list) - """ - protein_ids = [] - peptide_ids = [] - IdXMLFile().load(path, protein_ids, peptide_ids) - - return protein_ids, peptide_ids - - -def filter_run(protein_ids, peptide_ids, whitelist) -> tuple[list, list]: - """ - Filter Protein and PeptideIdentifications of one run by a whitelist of PeptideIdentifications. - - :param protein_ids: ProteinIdentification objects - :type protein_ids: list - :param peptide_ids: PeptideIdentification objects - :type peptide_ids: list - :param whitelist: PeptideIdentification objects to keep in the run - :type whitelist: list - """ - filter = IDFilter() - ids_to_keep = [ - peptide_id - for peptide_id in peptide_ids - for hit in peptide_id.getHits() - if hit.getSequence().toString() in whitelist - ] - filter.keepPeptidesWithMatchingSequences(peptide_ids, ids_to_keep, ignore_mods=False) - # We only want to have unique peptide sequences - filter.keepBestPerPeptide(peptide_ids, ignore_mods=False, ignore_charges=False, nr_best_spectrum=1) - filter.removeEmptyIdentifications(peptide_ids) - # We only want to have protein accessions that are referenced by the fdr-filtered peptide hits - filter.removeUnreferencedProteins(protein_ids, peptide_ids) - - return protein_ids, peptide_ids - - -def main(): - args = parse_args() - - # Read idXML files of runs - protein_ids, peptide_ids = parse_idxml(args.input) - - # Read file containing peptides to keep - whitelist_protein_ids, whitelist_peptide_ids = parse_idxml(args.whitelist) - # Get string representation of peptide sequences in fdr_filtered_peptides - whitelist_peptides = [hit.getSequence().toString() for id in whitelist_peptide_ids for hit in id.getHits()] - - # Filter runs for peptides only in the fdr_filtered_peptides list - protein_id_filtered, peptide_ids_filtered = filter_run(protein_ids, peptide_ids, whitelist_peptides) - - # Write filtered run to idXML file - IdXMLFile().store(args.output, protein_id_filtered, peptide_ids_filtered) - - -if __name__ == "__main__": - main() diff --git a/conf/base.config b/conf/base.config index 4bdccc70..50cc36c6 100644 --- a/conf/base.config +++ b/conf/base.config @@ -57,8 +57,8 @@ process { cache = false } withName:TDF2MZML { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 10.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 10.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } } } diff --git a/conf/modules.config b/conf/modules.config index f712a61a..b0d55c2c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,11 +18,106 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nf-core module configs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +process { + + withName: 'THERMORAWFILEPARSER' { + ext.args = "-f 2" + ext.prefix = {"${raw.baseName}"} + publishDir = [ + path: {"${params.outdir}"}, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: false + ] + } + + withName: 'OPENMS_DECOYDATABASE' { + ext.args = [ + "-decoy_string DECOY_", + "-decoy_string_position prefix", + "-enzyme 'no cleavage'" + ].join(' ').trim() + publishDir = [ + enabled: false + ] + } + + withName: 'OPENMS_PEAKPICKERHIRES' { + ext.prefix = {"${mzml.baseName}_picked"} + ext.args = "-algorithm:ms_levels ${params.pick_ms_levels}" + publishDir = [ + enabled: false + ] + } + + withName: 'OPENMS_IDMERGER*' { + ext.args = [ + "-annotate_file_origin true", + "-merge_proteins_add_PSMs" + ].join(' ').trim() + publishDir = [ + enabled: false + ] + } + + withName: 'OPENMS_IDFILTER_Q_VALUE' { + ext.prefix = {"${meta.id}_pout_filtered"} + ext.args = [ + "-remove_decoys", + "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", + "-delete_unreferenced_peptide_hits", + (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold + ].join(' ').trim() + publishDir = [ + path: {"${params.outdir}/intermediate_results/rescoring"}, + mode: params.publish_dir_mode, + pattern: '*.idXML' + ] + } + + withName: 'OPENMS_IDFILTER_QUANT' { + ext.prefix = {"${meta.spectra}_fdr_filtered"} + ext.args = "-best:spectrum_per_peptide 'sequence+charge+modification'" + publishDir = [ + enabled: false + ] + } + + withName: 'OPENMS_IDFILTER_PSMS' { + ext.prefix = {"${meta.id}_pred_filtered"} + ext.args = "-whitelist:ignore_modifications" + ext.args2 = "-whitelist:peptides" + publishDir = [ + path: {"${params.outdir}/intermediate_results/refined_fdr"}, + mode: params.publish_dir_mode, + pattern: '*.idXML' + ] + } + + withName: 'OPENMS_IDFILTER_REFINED' { + ext.args = [ + "-remove_decoys", + "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", + "-delete_unreferenced_peptide_hits", + (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold + ].join(' ').trim() + publishDir = [ + path: {"${params.outdir}/intermediate_results/refined_fdr"}, + mode: params.publish_dir_mode, + pattern: '*.idXML' + ] + } + + withName: 'OPENMS_IDRIPPER' { + publishDir = [ + enabled: false ] } @@ -46,12 +141,53 @@ process { } process { + if (!params.skip_quantification) { + withName: 'NFCORE_MHCQUANT:MHCQUANT:QUANT:OPENMS_IDSCORESWITCHER' { + ext.args = [ + "-new_score COMET:xcorr", + "-new_score_orientation higher_better", + "-old_score q-value" + ].join(' ').trim() + publishDir = [ + mode: params.publish_dir_mode, + pattern: '*.idXML', + enabled: false + ] + } + } +} - withName: 'THERMORAWFILEPARSER' { - publishDir = [ - path: {"${params.outdir}"}, +process { + if (params.rescoring_engine == 'mokapot') { + withName: 'NFCORE_MHCQUANT:MHCQUANT:OPENMS_IDSCORESWITCHER' { + ext.prefix = {"${meta.id}"} + ext.args = [ + "-new_score q-value", + "-new_score_orientation lower_better", + "-old_score expect" + ].join(' ').trim() + publishDir = [ + mode: params.publish_dir_mode, + pattern: '*.idXML', + enabled: false + ] + } + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Local module configs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +process { + + withName: SAMPLESHEET_CHECK { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, - enabled: false + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -76,12 +212,6 @@ process { ] } - withName: 'OPENMS_DECOYDATABASE' { - publishDir = [ - enabled: false - ] - } - withName: 'OPENMS_FILEFILTER' { publishDir = [ enabled: false @@ -101,12 +231,7 @@ process { } withName: 'OPENMS_MAPRTTRANSFORMERMZML|OPENMS_MAPRTTRANSFORMERIDXML' { - publishDir = [ - enabled: false - ] - } - - withName: 'OPENMS_IDMERGER*' { + ext.prefix = {"${meta.spectra}_aligned"} publishDir = [ enabled: false ] @@ -141,21 +266,6 @@ process { ] } - withName: 'OPENMS_IDFILTER_Q_VALUE' { - ext.prefix = {"${meta.id}_pout_filtered"} - ext.args = [ - "-remove_decoys", - "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", - "-delete_unreferenced_peptide_hits", - (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold - ].join(' ').trim() - publishDir = [ - path: {"${params.outdir}/intermediate_results/rescoring"}, - mode: params.publish_dir_mode, - pattern: '*.idXML' - ] - } - withName: 'MS2RESCORE' { containerOptions = '-u $(id -u) -e "HOME=${HOME}" -v /etc/passwd:/etc/passwd:ro -v /etc/shadow:/etc/shadow:ro -v /etc/group:/etc/group:ro -v $HOME:$HOME' ext.args = [ @@ -209,22 +319,6 @@ process { ] } - withName: 'OPENMS_IDRIPPER' { - publishDir = [ - mode: params.publish_dir_mode, - pattern: '*.idXML', - enabled: false - ] - } - - withName: 'PYOPENMS_IDFILTER' { - publishDir = [ - mode: params.publish_dir_mode, - pattern: '*.idXML', - enabled: false - ] - } - withName: 'OPENMS_FEATUREFINDERIDENTIFICATION' { ext.args = [ "-extract:mz_window ${params.quantification_mz_window}", @@ -265,22 +359,6 @@ process { } -process { - if (!params.skip_quantification) { - withName: 'NFCORE_MHCQUANT:MHCQUANT:QUANT:OPENMS_IDSCORESWITCHER' { - ext.args = [ - "-new_score COMET:xcorr", - "-new_score_orientation higher_better", - "-old_score q-value" - ].join(' ').trim() - publishDir = [ - mode: params.publish_dir_mode, - pattern: '*.idXML', - enabled: false - ] - } - } -} // Refine on predicted subset process { @@ -329,33 +407,6 @@ process { pattern: '*.idXML' ] } - - withName: 'OPENMS_IDFILTER_PSMS' { - ext.prefix = {"${meta.id}_pred_filtered"} - ext.args = [ - "-whitelist:ignore_modifications", - "-whitelist:peptides" - ].join(' ').trim() - publishDir = [ - path: {"${params.outdir}/intermediate_results/refined_fdr"}, - mode: params.publish_dir_mode, - pattern: '*.idXML' - ] - } - - withName: 'OPENMS_IDFILTER_REFINED' { - ext.args = [ - "-remove_decoys", - "-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'", - "-delete_unreferenced_peptide_hits", - (params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold - ].join(' ').trim() - publishDir = [ - path: {"${params.outdir}/intermediate_results/refined_fdr"}, - mode: params.publish_dir_mode, - pattern: '*.idXML' - ] - } } } @@ -495,21 +546,3 @@ process { } } } - -process { - if (params.rescoring_engine == 'mokapot') { - withName: 'NFCORE_MHCQUANT:MHCQUANT:OPENMS_IDSCORESWITCHER' { - ext.prefix = {"${meta.id}"} - ext.args = [ - "-new_score q-value", - "-new_score_orientation lower_better", - "-old_score expect" - ].join(' ').trim() - publishDir = [ - mode: params.publish_dir_mode, - pattern: '*.idXML', - enabled: false - ] - } - } -} diff --git a/modules.json b/modules.json index 2ef6de96..a530ccd1 100644 --- a/modules.json +++ b/modules.json @@ -14,6 +14,41 @@ "branch": "master", "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", "installed_by": ["modules"] + }, + "openms/decoydatabase": { + "branch": "master", + "git_sha": "0f4f5205a67d3a90adc8e341cf2b19a16f5cb272", + "installed_by": ["modules"] + }, + "openms/idfilter": { + "branch": "master", + "git_sha": "f1a1aa1931d1774a0735e34669d3b455c31f6a0b", + "installed_by": ["modules"] + }, + "openms/idmerger": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "openms/idripper": { + "branch": "master", + "git_sha": "82ae8a2a96e84fffb89b57f1ae8bf35ff4ae5f57", + "installed_by": ["modules"] + }, + "openms/idscoreswitcher": { + "branch": "master", + "git_sha": "733023d250311ee76c46d6863a4e056f9855eb5d", + "installed_by": ["modules"] + }, + "openms/peakpickerhires": { + "branch": "master", + "git_sha": "4e2cbac1db88f544711e488e552175368ca14588", + "installed_by": ["modules"] + }, + "thermorawfileparser": { + "branch": "master", + "git_sha": "2d0e53d398315b4e8ed06e81c175dc05d90f33d5", + "installed_by": ["modules"] } } } diff --git a/modules/local/openms_decoydatabase.nf b/modules/local/openms_decoydatabase.nf deleted file mode 100644 index 2d96b3b8..00000000 --- a/modules/local/openms_decoydatabase.nf +++ /dev/null @@ -1,35 +0,0 @@ -process OPENMS_DECOYDATABASE { - tag "$meta.id" - label 'process_single' - - conda "bioconda::openms=3.1.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:3.1.0--h8964181_3' : - 'biocontainers/openms:3.1.0--h8964181_3' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("*.fasta"), emit: decoy - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${fasta.baseName}_decoy" - - """ - DecoyDatabase -in $fasta \\ - -out ${prefix}.fasta \\ - -decoy_string DECOY_ \\ - -decoy_string_position prefix \\ - -enzyme 'no cleavage' - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/openms_falsediscoveryrate.nf b/modules/local/openms_falsediscoveryrate.nf deleted file mode 100644 index 6ebf48a1..00000000 --- a/modules/local/openms_falsediscoveryrate.nf +++ /dev/null @@ -1,34 +0,0 @@ -process OPENMS_FALSEDISCOVERYRATE { - tag "$meta.id" - label 'process_single' - - conda "bioconda::openms=3.1.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:3.1.0--h8964181_3' : - 'biocontainers/openms:3.1.0--h8964181_3' }" - - input: - tuple val(meta), path(idxml) - - output: - tuple val(meta), path("*.idXML"), emit: idxml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${idxml.baseName}_fdr" - - """ - FalseDiscoveryRate -in $idxml \\ - -protein 'false' \\ - -out ${prefix}.idXML \\ - -threads $task.cpus - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/openms_idfilter.nf b/modules/local/openms_idfilter.nf deleted file mode 100644 index 21b8ab18..00000000 --- a/modules/local/openms_idfilter.nf +++ /dev/null @@ -1,40 +0,0 @@ -process OPENMS_IDFILTER { - tag "$meta.id" - label 'process_single' - - conda "bioconda::openms=3.1.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:3.1.0--h8964181_3' : - 'biocontainers/openms:3.1.0--h8964181_3' }" - - input: - tuple val(meta), path(idxml), val(peptide_filter) - - output: - tuple val(meta), path("*.idXML"), emit: idxml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}_filtered" - def args = task.ext.args ?: '' - - // TODO: Fix such that [] emtpy list is provided as peptide filter, not null - if (peptide_filter != null) { - args += "-whitelist:peptides $peptide_filter" - } - - """ - IDFilter -in $idxml \\ - -out ${prefix}.idXML \\ - -threads $task.cpus \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/openms_idmerger.nf b/modules/local/openms_idmerger.nf deleted file mode 100644 index bf8bba47..00000000 --- a/modules/local/openms_idmerger.nf +++ /dev/null @@ -1,35 +0,0 @@ -process OPENMS_IDMERGER { - tag "$meta.id" - label 'process_single' - - conda "bioconda::openms=3.1.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:3.1.0--h8964181_3' : - 'biocontainers/openms:3.1.0--h8964181_3' }" - - input: - tuple val(meta), path(idxmls) - - output: - tuple val(meta), path("*.idXML"), emit: idxml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - IDMerger -in $idxmls \\ - -out ${prefix}.idXML \\ - -threads $task.cpus \\ - -annotate_file_origin true \\ - -merge_proteins_add_PSMs - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/openms_idripper.nf b/modules/local/openms_idripper.nf deleted file mode 100644 index f0bcc633..00000000 --- a/modules/local/openms_idripper.nf +++ /dev/null @@ -1,34 +0,0 @@ -process OPENMS_IDRIPPER { - tag "${meta.id}" - label 'process_single' - - conda "bioconda::openms=3.1.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:3.1.0--h8964181_3' : - 'biocontainers/openms:3.1.0--h8964181_3' }" - - input: - tuple val(meta), path(merged_idxml) - - output: - tuple val(meta), path("*.idXML"), emit: ripped - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - - """ - IDRipper -in $merged_idxml \\ - -out . \\ - -threads $task.cpus \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/openms_idscoreswitcher.nf b/modules/local/openms_idscoreswitcher.nf deleted file mode 100644 index a7838cc7..00000000 --- a/modules/local/openms_idscoreswitcher.nf +++ /dev/null @@ -1,36 +0,0 @@ -process OPENMS_IDSCORESWITCHER { - tag "$meta.id" - label 'process_single' - - conda "bioconda::openms=3.1.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:3.1.0--h8964181_3' : - 'biocontainers/openms:3.1.0--h8964181_3' }" - - input: - tuple val(meta), path(idxml), path(whitelist) - - output: - tuple val(meta), path("*.idXML"), path(whitelist), emit: switched_idxml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - // TODO: fix naming to be more generic - def prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}_switched" - def args = task.ext.args ?: '' - - """ - IDScoreSwitcher -in $idxml \\ - -out ${prefix}.idXML \\ - -threads $task.cpus \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/openms_mapaligneridentification.nf b/modules/local/openms_mapaligneridentification.nf index 6925578a..bca9c37c 100644 --- a/modules/local/openms_mapaligneridentification.nf +++ b/modules/local/openms_mapaligneridentification.nf @@ -18,7 +18,7 @@ process OPENMS_MAPALIGNERIDENTIFICATION { task.ext.when == null || task.ext.when script: - def out_names = idxmls.collect { it.baseName+'.trafoXML' }.join(' ') + def out_names = idxmls.collect { it.baseName.replace('_fdr_filtered','')+'.trafoXML' }.join(' ') def args = task.ext.args ?: '' """ diff --git a/modules/local/openms_maprttransformer.nf b/modules/local/openms_maprttransformer.nf index dde7ef1d..260caf65 100644 --- a/modules/local/openms_maprttransformer.nf +++ b/modules/local/openms_maprttransformer.nf @@ -18,7 +18,7 @@ process OPENMS_MAPRTTRANSFORMER { task.ext.when == null || task.ext.when script: - def prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}_aligned" + def prefix = task.ext.prefix ?: "${meta.id}" def fileExt = alignment_file.collect { it.name.tokenize("\\.")[1] }.join(' ') """ diff --git a/modules/local/openms_peakpickerhires.nf b/modules/local/openms_peakpickerhires.nf deleted file mode 100644 index 957f6e2d..00000000 --- a/modules/local/openms_peakpickerhires.nf +++ /dev/null @@ -1,33 +0,0 @@ -process OPENMS_PEAKPICKERHIRES { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::openms=3.1.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/openms:3.1.0--h8964181_3' : - 'biocontainers/openms:3.1.0--h8964181_3' }" - - input: - tuple val(meta), path(mzml) - - output: - tuple val(meta), path("*.mzML"), emit: mzml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${mzml.baseName}" - - """ - PeakPickerHiRes -in $mzml \\ - -out ${prefix}.mzML \\ - -algorithm:ms_levels ${params.pick_ms_levels} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/pyopenms_idfilter.nf b/modules/local/pyopenms_idfilter.nf deleted file mode 100644 index 64531083..00000000 --- a/modules/local/pyopenms_idfilter.nf +++ /dev/null @@ -1,34 +0,0 @@ -process PYOPENMS_IDFILTER { - tag "$meta.id" - label 'process_single' - - conda "bioconda::pyopenms=3.1.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pyopenms:3.1.0--py311h9b8898c_0' : - 'biocontainers/pyopenms:3.1.0--py311h9b8898c_0' }" - - input: - tuple val(meta), path(idxml), path(whitelist) - - output: - tuple val(meta), path("*_fdr_filtered.idXML") , emit: filtered - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - prefix = task.ext.prefix ?: "${meta.id}_${meta.sample}_${meta.condition}_fdr_filtered" - - """ - IDFilter.py \\ - --input $idxml \\ - --whitelist $whitelist \\ - --output ${prefix}.idXML - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - pyopenms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/openms/decoydatabase/environment.yml b/modules/nf-core/openms/decoydatabase/environment.yml new file mode 100644 index 00000000..4184259d --- /dev/null +++ b/modules/nf-core/openms/decoydatabase/environment.yml @@ -0,0 +1,7 @@ +name: openms_decoydatabase +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::openms=3.0.0 diff --git a/modules/nf-core/openms/decoydatabase/main.nf b/modules/nf-core/openms/decoydatabase/main.nf new file mode 100644 index 00000000..87632a04 --- /dev/null +++ b/modules/nf-core/openms/decoydatabase/main.nf @@ -0,0 +1,50 @@ +process OPENMS_DECOYDATABASE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.fasta"), emit: decoy_fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${fasta.baseName}_decoy" + + """ + DecoyDatabase \\ + -in $fasta \\ + -out ${prefix}.fasta \\ + -threads $task.cpus \\ + $args + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${fasta.baseName}_decoy" + + """ + touch ${prefix}.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/openms/decoydatabase/meta.yml b/modules/nf-core/openms/decoydatabase/meta.yml new file mode 100644 index 00000000..f00fe38f --- /dev/null +++ b/modules/nf-core/openms/decoydatabase/meta.yml @@ -0,0 +1,44 @@ +name: "openms_decoydatabase" +description: Create a decoy peptide database from a standard FASTA database. +keywords: + - decoy + - database + - openms + - proteomics + - fasta +tools: + - "openms": + description: "OpenMS is an open-source software C++ library for LC-MS data management and analyses" + homepage: "https://openms.de" + documentation: "https://openms.readthedocs.io/en/latest/index.html" + tool_dev_url: "https://github.com/OpenMS/OpenMS" + doi: "10.1038/nmeth.3959" + licence: ["BSD"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Fasta file containing protein sequences + pattern: "*.{fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fasta: + type: file + description: Fasta file containing proteins and decoy proteins + pattern: "*.{fasta}" +authors: + - "@jonasscheid" +maintainers: + - "@jonasscheid" diff --git a/modules/nf-core/openms/decoydatabase/tests/main.nf.test b/modules/nf-core/openms/decoydatabase/tests/main.nf.test new file mode 100644 index 00000000..8ff4abe9 --- /dev/null +++ b/modules/nf-core/openms/decoydatabase/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process OPENMS_DECOYDATABASE" + script "../main.nf" + process "OPENMS_DECOYDATABASE" + + tag "modules" + tag "modules_nfcore" + tag "openms" + tag "openms/decoydatabase" + + test("test_openms_decoydatabase") { + + when { + process { + """ + input[0] = [ + [id:'test'], + file(params.test_data['proteomics']['database']['yeast_ups'], checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/openms/decoydatabase/tests/main.nf.test.snap b/modules/nf-core/openms/decoydatabase/tests/main.nf.test.snap new file mode 100644 index 00000000..c700e7fe --- /dev/null +++ b/modules/nf-core/openms/decoydatabase/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "test_openms_decoydatabase": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "yeast_UPS_decoy.fasta:md5,96c0295f69e0cb92b705ca4c33e6f047" + ] + ], + "1": [ + "versions.yml:md5,b1b83426d54e63fbb114480936452c4a" + ], + "decoy_fasta": [ + [ + { + "id": "test" + }, + "yeast_UPS_decoy.fasta:md5,96c0295f69e0cb92b705ca4c33e6f047" + ] + ], + "versions": [ + "versions.yml:md5,b1b83426d54e63fbb114480936452c4a" + ] + } + ], + "timestamp": "2023-12-06T13:28:58.275989775" + } +} \ No newline at end of file diff --git a/modules/nf-core/openms/decoydatabase/tests/tags.yml b/modules/nf-core/openms/decoydatabase/tests/tags.yml new file mode 100644 index 00000000..4a144804 --- /dev/null +++ b/modules/nf-core/openms/decoydatabase/tests/tags.yml @@ -0,0 +1,2 @@ +openms/decoydatabase: + - "modules/nf-core/openms/decoydatabase/**" diff --git a/modules/nf-core/openms/idfilter/main.nf b/modules/nf-core/openms/idfilter/main.nf new file mode 100644 index 00000000..fdc9c13f --- /dev/null +++ b/modules/nf-core/openms/idfilter/main.nf @@ -0,0 +1,57 @@ +process OPENMS_IDFILTER { + tag "$meta.id" + label 'process_single' + + conda "bioconda::openms=3.1.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/openms:3.1.0--h8964181_3' : + 'biocontainers/openms:3.1.0--h8964181_3' }" + + input: + tuple val(meta), path(id_file), path(filter_file) + + output: + tuple val(meta), path("*.{idXML,consensusXML}"), emit: filtered + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "${id_file.getExtension()}" + // Optional filtering via filter_file + def filter_citerion = task.ext.args2 ?: "-whitelist:peptides" + def filter = filter_file ? "${filter_citerion} ${filter_file}" : "" + + """ + IDFilter -in $id_file \\ + -out ${prefix}.${suffix} \\ + -threads $task.cpus \\ + $filter \\ + $args \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "${id_file.getExtension()}" + // Optional filtering via filter_file + def filter_citerion = task.ext.args2 ?: "-whitelist:peptides" + def filter = filter_file ? "${filter_citerion} ${filter}" : "" + + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/openms/idfilter/meta.yml b/modules/nf-core/openms/idfilter/meta.yml new file mode 100644 index 00000000..2b160746 --- /dev/null +++ b/modules/nf-core/openms/idfilter/meta.yml @@ -0,0 +1,50 @@ +name: "openms_idfilter" +description: Filters peptide/protein identification results by different criteria. +keywords: + - filter + - idXML + - openms + - proteomics +tools: + - "openms": + description: "OpenMS is an open-source software C++ library for LC-MS data management and analyses" + homepage: "https://openms.de" + documentation: "https://openms.readthedocs.io/en/latest/index.html" + tool_dev_url: "https://github.com/OpenMS/OpenMS" + doi: "10.1038/nmeth.3959" + licence: "['BSD']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - id_file: + type: file + description: Peptide-spectrum matches. + pattern: "*.{idXML,consensusXML}" + - filter: + type: file + description: Optional idXML file to filter on/out peptides or proteins + patter: "*.{idXML,fasta}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - id_file: + type: file + description: Filtered peptide-spectrum matches. + pattern: "*.{idXML,consensusXML}" + +authors: + - "@jonasscheid" +maintainers: + - "@jonasscheid" diff --git a/modules/nf-core/openms/idmerger/environment.yml b/modules/nf-core/openms/idmerger/environment.yml new file mode 100644 index 00000000..68b85bd4 --- /dev/null +++ b/modules/nf-core/openms/idmerger/environment.yml @@ -0,0 +1,7 @@ +name: openms_idmerger +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::openms=3.0.0 diff --git a/modules/nf-core/openms/idmerger/main.nf b/modules/nf-core/openms/idmerger/main.nf new file mode 100644 index 00000000..fc8d41b3 --- /dev/null +++ b/modules/nf-core/openms/idmerger/main.nf @@ -0,0 +1,49 @@ +process OPENMS_IDMERGER { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" + + input: + tuple val(meta), path(idxmls) + + output: + tuple val(meta), path("*.idXML"), emit: idxml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + IDMerger \\ + -in $idxmls \\ + -out ${prefix}.idXML \\ + -threads $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.idXML + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/openms/idmerger/meta.yml b/modules/nf-core/openms/idmerger/meta.yml new file mode 100644 index 00000000..32fa27bf --- /dev/null +++ b/modules/nf-core/openms/idmerger/meta.yml @@ -0,0 +1,46 @@ +name: "openms_idmerger" +description: Merges several idXML files into one idXML file. +keywords: + - merge + - idXML + - openms + - proteomics +tools: + - "openms": + description: "OpenMS is an open-source software C++ library for LC-MS data management and analyses" + homepage: "https://openms.de" + documentation: "https://openms.readthedocs.io/en/latest/index.html" + tool_dev_url: "https://github.com/OpenMS/OpenMS" + doi: "10.1038/nmeth.3959" + licence: "['BSD']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - idxmls: + type: file + description: | + List containing 2 or more idXML files + e.g. `[ 'file1.idXML', 'file2.idXML' ]` + pattern: "*.{idXML}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - idxml: + type: file + description: Merged idXML output file + pattern: "*.{idXML}" + +authors: + - "@jonasscheid" diff --git a/modules/nf-core/openms/idripper/environment.yml b/modules/nf-core/openms/idripper/environment.yml new file mode 100644 index 00000000..565bdaf7 --- /dev/null +++ b/modules/nf-core/openms/idripper/environment.yml @@ -0,0 +1,7 @@ +name: "openms_idripper" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::openms=3.1.0" diff --git a/modules/nf-core/openms/idripper/main.nf b/modules/nf-core/openms/idripper/main.nf new file mode 100644 index 00000000..7350af42 --- /dev/null +++ b/modules/nf-core/openms/idripper/main.nf @@ -0,0 +1,50 @@ +process OPENMS_IDRIPPER { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/openms:3.1.0--h8964181_3': + 'biocontainers/openms:3.1.0--h8964181_3' }" + + input: + tuple val(meta), path(merged_idxml) + + output: + tuple val(meta), path("*.idXML"), emit: idxmls + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + IDRipper \\ + -in $merged_idxml \\ + -out . \\ + -threads $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}_1.idXML + touch ${prefix}_2.idXML + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/openms/idripper/meta.yml b/modules/nf-core/openms/idripper/meta.yml new file mode 100644 index 00000000..0f6a3e98 --- /dev/null +++ b/modules/nf-core/openms/idripper/meta.yml @@ -0,0 +1,46 @@ +name: openms_idripper +description: Split a merged identification file into their originating identification files +keywords: + - split + - idXML + - openms + - proteomics +tools: + - openms: + description: "OpenMS is an open-source software C++ library for LC-MS data management and analyses" + homepage: "https://openms.de" + documentation: "https://openms.readthedocs.io/en/latest/index.html" + tool_dev_url: "https://github.com/OpenMS/OpenMS" + doi: "10.1038/nmeth.3959" + licence: ["BSD"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - merged_idxml: + type: file + description: Merged idXML file + pattern: "*.{idXML}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - idxmls: + type: file + description: Multiple idXML files + pattern: "*.{idXML}" + +authors: + - "@jonasscheid" +maintainers: + - "@jonasscheid" diff --git a/modules/nf-core/openms/idripper/tests/main.nf.test b/modules/nf-core/openms/idripper/tests/main.nf.test new file mode 100644 index 00000000..223f378e --- /dev/null +++ b/modules/nf-core/openms/idripper/tests/main.nf.test @@ -0,0 +1,70 @@ +nextflow_process { + + name "Test Process OPENMS_IDRIPPER" + script "../main.nf" + process "OPENMS_IDRIPPER" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "openms" + tag "openms/idmerger" + tag "openms/idripper" + + setup { + run("OPENMS_IDMERGER") { + script "../../idmerger/main.nf" + process { + """ + input[0] = Channel.fromList([ + tuple([id:'test'], + [ + file(params.test_data['proteomics']['openms']['idxml1'], checkIfExists: true), + file(params.test_data['proteomics']['openms']['idxml2'], checkIfExists: true) + ] + ) + ]) + """ + } + } + } + + test("proteomics - split - idxml") { + + when { + process { + """ + input[0] = OPENMS_IDMERGER.out.idxml + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("proteomics - split - idxml - stub") { + + options "-stub" + + when { + process { + """ + input[0] = OPENMS_IDMERGER.out.idxml + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/openms/idripper/tests/main.nf.test.snap b/modules/nf-core/openms/idripper/tests/main.nf.test.snap new file mode 100644 index 00000000..2b58f423 --- /dev/null +++ b/modules/nf-core/openms/idripper/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "proteomics - split - idxml": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "HepG2_rep1_small.idXML:md5,ca1c4549a42e0b0c0be8345cf5062dfd", + "HepG2_rep2_small.idXML:md5,14a4e9a7dca3c62db38b6345a63d80d5" + ] + ] + ], + "1": [ + "versions.yml:md5,7a7ebf177e4987780b6371dab640e6c0" + ], + "idxmls": [ + [ + { + "id": "test" + }, + [ + "HepG2_rep1_small.idXML:md5,ca1c4549a42e0b0c0be8345cf5062dfd", + "HepG2_rep2_small.idXML:md5,14a4e9a7dca3c62db38b6345a63d80d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,7a7ebf177e4987780b6371dab640e6c0" + ] + } + ], + "timestamp": "2024-01-05T10:53:57.761966016" + }, + "proteomics - split - idxml - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "test_1.idXML:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_2.idXML:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,7a7ebf177e4987780b6371dab640e6c0" + ], + "idxmls": [ + [ + { + "id": "test" + }, + [ + "test_1.idXML:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_2.idXML:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,7a7ebf177e4987780b6371dab640e6c0" + ] + } + ], + "timestamp": "2024-01-05T10:54:05.672792514" + } +} \ No newline at end of file diff --git a/modules/nf-core/openms/idripper/tests/nextflow.config b/modules/nf-core/openms/idripper/tests/nextflow.config new file mode 100644 index 00000000..20754f56 --- /dev/null +++ b/modules/nf-core/openms/idripper/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName:OPENMS_IDMERGER { + ext.args = "-annotate_file_origin true -merge_proteins_add_PSMs" + } + +} diff --git a/modules/nf-core/openms/idripper/tests/tags.yml b/modules/nf-core/openms/idripper/tests/tags.yml new file mode 100644 index 00000000..cf17c062 --- /dev/null +++ b/modules/nf-core/openms/idripper/tests/tags.yml @@ -0,0 +1,2 @@ +openms/idripper: + - "modules/nf-core/openms/idripper/**" diff --git a/modules/nf-core/openms/idscoreswitcher/environment.yml b/modules/nf-core/openms/idscoreswitcher/environment.yml new file mode 100644 index 00000000..f31b2bca --- /dev/null +++ b/modules/nf-core/openms/idscoreswitcher/environment.yml @@ -0,0 +1,7 @@ +name: "openms_idscoreswitcher" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::openms=3.1.0" diff --git a/modules/nf-core/openms/idscoreswitcher/main.nf b/modules/nf-core/openms/idscoreswitcher/main.nf new file mode 100644 index 00000000..e2421176 --- /dev/null +++ b/modules/nf-core/openms/idscoreswitcher/main.nf @@ -0,0 +1,51 @@ +process OPENMS_IDSCORESWITCHER { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/openms:3.1.0--h8964181_3': + 'biocontainers/openms:3.1.0--h8964181_3' }" + + input: + tuple val(meta), path(idxml) + + output: + tuple val(meta), path("*.idXML"), emit: idxml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$idxml" == "${prefix}.idXML") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ + IDScoreSwitcher \\ + -in $idxml \\ + -out ${prefix}.idXML \\ + -threads $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$idxml" == "${prefix}.idXML") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ + touch ${prefix}.idXML + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/openms/idscoreswitcher/meta.yml b/modules/nf-core/openms/idscoreswitcher/meta.yml new file mode 100644 index 00000000..6e1a6e19 --- /dev/null +++ b/modules/nf-core/openms/idscoreswitcher/meta.yml @@ -0,0 +1,49 @@ +name: "openms_idscoreswitcher" +description: Switches between different scores of peptide or protein hits in identification data +keywords: + - switch + - score + - idXML + - openms + - proteomics +tools: + - openms: + description: "OpenMS is an open-source software C++ library for LC-MS data management and analyses" + homepage: "https://openms.de" + documentation: "https://openms.readthedocs.io/en/latest/index.html" + tool_dev_url: "https://github.com/OpenMS/OpenMS" + doi: "10.1038/nmeth.3959" + licence: ["BSD"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - idxml: + type: file + description: Identification file containing a primary PSM score + pattern: "*.{idXML}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - idxml: + type: file + description: | + Identification file containing a new primary PSM score + obtained from a specified meta value + pattern: "*.{idXML}" + +authors: + - "@jonasscheid" +maintainers: + - "@jonasscheid" diff --git a/modules/nf-core/openms/idscoreswitcher/tests/main.nf.test b/modules/nf-core/openms/idscoreswitcher/tests/main.nf.test new file mode 100644 index 00000000..bb4aa1e9 --- /dev/null +++ b/modules/nf-core/openms/idscoreswitcher/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process OPENMS_IDSCORESWITCHER" + script "../main.nf" + process "OPENMS_IDSCORESWITCHER" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "openms" + tag "openms/idscoreswitcher" + + test("proteomics - switch_score - idxml") { + + when { + process { + """ + input[0] = [ + [id:'test'], + file(params.test_data['proteomics']['openms']['idxml1'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("proteomics - switch_score - idxml - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id:'test'], + file(params.test_data['proteomics']['openms']['idxml1'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/openms/idscoreswitcher/tests/main.nf.test.snap b/modules/nf-core/openms/idscoreswitcher/tests/main.nf.test.snap new file mode 100644 index 00000000..3d9d95a4 --- /dev/null +++ b/modules/nf-core/openms/idscoreswitcher/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "proteomics - switch_score - idxml - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.idXML:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,fc26aec256a1ab21dd6eec546146e7f5" + ], + "idxml": [ + [ + { + "id": "test" + }, + "test.idXML:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,fc26aec256a1ab21dd6eec546146e7f5" + ] + } + ], + "timestamp": "2024-01-05T15:38:28.11227936" + }, + "proteomics - switch_score - idxml": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.idXML:md5,c9cb1593610b60cc1d9ca2c4defab690" + ] + ], + "1": [ + "versions.yml:md5,fc26aec256a1ab21dd6eec546146e7f5" + ], + "idxml": [ + [ + { + "id": "test" + }, + "test.idXML:md5,c9cb1593610b60cc1d9ca2c4defab690" + ] + ], + "versions": [ + "versions.yml:md5,fc26aec256a1ab21dd6eec546146e7f5" + ] + } + ], + "timestamp": "2024-01-05T15:38:21.402742579" + } +} \ No newline at end of file diff --git a/modules/nf-core/openms/idscoreswitcher/tests/nextflow.config b/modules/nf-core/openms/idscoreswitcher/tests/nextflow.config new file mode 100644 index 00000000..a2239682 --- /dev/null +++ b/modules/nf-core/openms/idscoreswitcher/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName:OPENMS_IDSCORESWITCHER { + ext.args = "-new_score COMET:xcorr -new_score_orientation higher_better" + } + +} diff --git a/modules/nf-core/openms/idscoreswitcher/tests/tags.yml b/modules/nf-core/openms/idscoreswitcher/tests/tags.yml new file mode 100644 index 00000000..ff6d13f8 --- /dev/null +++ b/modules/nf-core/openms/idscoreswitcher/tests/tags.yml @@ -0,0 +1,2 @@ +openms/idscoreswitcher: + - "modules/nf-core/openms/idscoreswitcher/**" diff --git a/modules/nf-core/openms/peakpickerhires/main.nf b/modules/nf-core/openms/peakpickerhires/main.nf new file mode 100644 index 00000000..420c64c5 --- /dev/null +++ b/modules/nf-core/openms/peakpickerhires/main.nf @@ -0,0 +1,49 @@ +process OPENMS_PEAKPICKERHIRES { + tag "$meta.id" + label 'process_low' + + conda "bioconda::openms=3.0.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/openms:3.0.0--h8964181_1' : + 'biocontainers/openms:3.0.0--h8964181_1' }" + + input: + tuple val(meta), path(mzml) + + output: + tuple val(meta), path("*.mzML"), emit: mzml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + PeakPickerHiRes \\ + -in $mzml \\ + -out ${prefix}.mzML \\ + -threads $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.mzML + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + openms: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/openms/peakpickerhires/meta.yml b/modules/nf-core/openms/peakpickerhires/meta.yml new file mode 100644 index 00000000..28765e09 --- /dev/null +++ b/modules/nf-core/openms/peakpickerhires/meta.yml @@ -0,0 +1,46 @@ +name: "openms_peakpickerhires" +description: A tool for peak detection in high-resolution profile data (Orbitrap or FTICR) +keywords: + - peak picking + - mzml + - openms + - proteomics +tools: + - "openms": + description: "OpenMS is an open-source software C++ library for LC-MS data management and analyses" + homepage: "https://openms.de" + documentation: "https://openms.readthedocs.io/en/latest/index.html" + tool_dev_url: "https://github.com/OpenMS/OpenMS" + doi: "10.1038/nmeth.3959" + licence: "['BSD']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - mzml: + type: file + description: Mass spectrometer output file in mzML format + pattern: "*.{mzML}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - mzml: + type: file + description: Peak-picked mass spectrometer output file in mzML format + pattern: "*.{mzML}" + +authors: + - "@jonasscheid" +maintainers: + - "@jonasscheid" diff --git a/modules/nf-core/thermorawfileparser/environment.yml b/modules/nf-core/thermorawfileparser/environment.yml new file mode 100644 index 00000000..77323cbb --- /dev/null +++ b/modules/nf-core/thermorawfileparser/environment.yml @@ -0,0 +1,7 @@ +name: "thermorawfileparser" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::thermorawfileparser=1.4.3" diff --git a/modules/nf-core/thermorawfileparser/main.nf b/modules/nf-core/thermorawfileparser/main.nf new file mode 100644 index 00000000..10ae8226 --- /dev/null +++ b/modules/nf-core/thermorawfileparser/main.nf @@ -0,0 +1,60 @@ +process THERMORAWFILEPARSER { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.4.3--ha8f3691_0' : + 'biocontainers/thermorawfileparser:1.4.3--ha8f3691_0' }" + + input: + tuple val(meta), path(raw) + + output: + tuple val(meta), path("*.{mzML,mzML.gz,mgf,mgf.gz,parquet,parquet.gz}"), emit: spectra + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = args.contains("--format 0") || args.contains("-f 0") ? "mgf" : + args.contains("--format 1") || args.contains("-f 1") ? "mzML" : + args.contains("--format 2") || args.contains("-f 2") ? "mzML" : + args.contains("--format 3") || args.contains("-f 3") ? "parquet" : + "mzML" + suffix = args.contains("--gzip")? "${suffix}.gz" : "${suffix}" + + """ + ThermoRawFileParser.sh \\ + --input $raw \\ + --output_file ${prefix}.${suffix} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + thermorawfileparser: \$(ThermoRawFileParser.sh --version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = args.contains("--format 0") || args.contains("-f 0") ? "mgf" : + args.contains("--format 1") || args.contains("-f 1") ? "mzML" : + args.contains("--format 2") || args.contains("-f 2") ? "mzML" : + args.contains("--format 3") || args.contains("-f 3") ? "parquet" : + "mzML" + suffix = args.contains("--gzip")? "${suffix}.gz" : "${suffix}" + + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + thermorawfileparser: \$(ThermoRawFileParser.sh --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/thermorawfileparser/meta.yml b/modules/nf-core/thermorawfileparser/meta.yml new file mode 100644 index 00000000..5c604c63 --- /dev/null +++ b/modules/nf-core/thermorawfileparser/meta.yml @@ -0,0 +1,48 @@ +name: "thermorawfileparser" +description: Parses a Thermo RAW file containing mass spectra to an open file format +keywords: + - raw + - mzml + - mgf + - parquet + - parser + - proteomics +tools: + - thermorawfileparser: + description: "Wrapper around the .net (C#) ThermoFisher ThermoRawFileReader library for running on Linux with mono" + homepage: "https://github.com/compomics/ThermoRawFileParser/blob/master/README.md" + documentation: "https://github.com/compomics/ThermoRawFileParser/blob/master/README.md" + tool_dev_url: "https://github.com/compomics/ThermoRawFileParser" + doi: "10.1021/acs.jproteome.9b00328" + licence: ["Apache Software"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - raw: + type: file + description: Thermo RAW file + pattern: "*.{raw,RAW}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - spectra: + type: file + description: Mass spectra in open format + pattern: "*.{mzML,mzML.gz,mgf,mgf.gz,parquet,parquet.gz}" + +authors: + - "@jonasscheid" +maintainers: + - "@jonasscheid" diff --git a/modules/nf-core/thermorawfileparser/tests/main.nf.test b/modules/nf-core/thermorawfileparser/tests/main.nf.test new file mode 100644 index 00000000..85cfed35 --- /dev/null +++ b/modules/nf-core/thermorawfileparser/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process THERMORAWFILEPARSER" + script "../main.nf" + process "THERMORAWFILEPARSER" + + tag "modules" + tag "modules_nfcore" + tag "thermorawfileparser" + + test("proteomics - parse - raw") { + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.test_data['proteomics']['msspectra']['hla2_file'], checkIfExists: true) + ] + """ + } + } + + then { + // The source file path is written at the top of the file, and the checksum at the bottom by the tool, the rest should be identical + def lines = file(process.out.spectra[0][1]).readLines() + assertAll( + { assert process.success }, + { assert snapshot(lines[21..50]).match() } + ) + } + + } + + test("proteomics - parse - raw - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.test_data['proteomics']['msspectra']['hla2_file'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/thermorawfileparser/tests/main.nf.test.snap b/modules/nf-core/thermorawfileparser/tests/main.nf.test.snap new file mode 100644 index 00000000..9b657f1f --- /dev/null +++ b/modules/nf-core/thermorawfileparser/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "proteomics - parse - raw": { + "content": [ + [ + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " ", + " " + ] + ], + "timestamp": "2024-01-21T22:31:20.271719782" + }, + "proteomics - parse - raw - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.mzML:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,dd82b5b6b8f27a1b1cf5a5d7c57017bf" + ], + "spectra": [ + [ + { + "id": "test" + }, + "test.mzML:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,dd82b5b6b8f27a1b1cf5a5d7c57017bf" + ] + } + ], + "timestamp": "2024-01-21T22:31:27.393155134" + } +} \ No newline at end of file diff --git a/modules/nf-core/thermorawfileparser/tests/tags.yml b/modules/nf-core/thermorawfileparser/tests/tags.yml new file mode 100644 index 00000000..8517ab93 --- /dev/null +++ b/modules/nf-core/thermorawfileparser/tests/tags.yml @@ -0,0 +1,2 @@ +thermorawfileparser: + - "modules/nf-core/thermorawfileparser/**" diff --git a/subworkflows/local/include_proteins.nf b/subworkflows/local/include_proteins.nf index 3235cc9c..b1c54396 100644 --- a/subworkflows/local/include_proteins.nf +++ b/subworkflows/local/include_proteins.nf @@ -26,7 +26,7 @@ workflow INCLUDE_PROTEINS { .map(it -> [it[1], it[2], it[3]]) // If specified translate variants to proteins and include in reference fasta GENERATE_PROTEINS_FROM_VCF( ch_vcf ) - ch_versions = ch_versions.mix(GENERATE_PROTEINS_FROM_VCF.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(GENERATE_PROTEINS_FROM_VCF.out.versions) emit: // Define the information that is returned by this workflow diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 087d71c0..835d7ae0 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -9,11 +9,19 @@ workflow INPUT_CHECK { samplesheet // file: /path/to/samplesheet.csv main: - SAMPLESHEET_CHECK ( samplesheet ) + // Process the samplesheet and get the initial channels + def initial_channels = SAMPLESHEET_CHECK ( samplesheet ) .csv - .splitCsv ( header:true, sep:'\t' ) - .map { create_ms_channel(it) } - .set { ms_runs } + .splitCsv(header:true, sep:'\t') + .map { row -> [row.Sample + '_' + row.Condition, row] } + + // Group by sample_condition and count + def count_channels = initial_channels + .groupTuple() + .map { key, rows -> [rows, rows.size()] } + .flatMap { rows, count -> rows.collect { [it, count] } } + + ms_runs = count_channels.map { row, group_count -> create_ms_channel(row, group_count) } emit: ms_runs // channel: [ val(meta), [ runs ] ] @@ -21,21 +29,23 @@ workflow INPUT_CHECK { } // Function to get list of [ meta, filenames ] -def create_ms_channel(LinkedHashMap row) { +def create_ms_channel(LinkedHashMap row, int group_count) { def meta = [:] meta.id = row.ID meta.sample = row.Sample meta.condition = row.Condition meta.ext = row.Extension + meta.group_count = group_count // add path(s) of the data file(s) to the meta map - def ms_meta = [] - - if (!file(row.ReplicateFileName).exists()) { + def ms_file = file(row.ReplicateFileName) + if (!ms_file.exists()) { exit 1, "ERROR: Please check input samplesheet -> MS file does not exist!\n${row.ReplicateFileName}" - } else { - ms_meta = [ meta, [ file(row.ReplicateFileName) ] ] } + + meta.spectra = ms_file.baseName + ms_meta = [ meta, [ ms_file ] ] + return ms_meta } diff --git a/subworkflows/local/map_alignment.nf b/subworkflows/local/map_alignment.nf index 90efc013..2623dd3c 100644 --- a/subworkflows/local/map_alignment.nf +++ b/subworkflows/local/map_alignment.nf @@ -9,41 +9,43 @@ include { workflow MAP_ALIGNMENT { take: - runs_to_be_aligned - mzml + ch_runs_to_be_aligned + ch_mzml merge_meta_map main: ch_versions = Channel.empty() // Compute group-wise alignment rt transformation - OPENMS_MAPALIGNERIDENTIFICATION( runs_to_be_aligned ) - ch_versions = ch_versions.mix(OPENMS_MAPALIGNERIDENTIFICATION.out.versions.first().ifEmpty(null)) + OPENMS_MAPALIGNERIDENTIFICATION( ch_runs_to_be_aligned ) + ch_versions = ch_versions.mix(OPENMS_MAPALIGNERIDENTIFICATION.out.versions) // Join run specific trafoXMLs with meta information merge_meta_map - .join( OPENMS_MAPALIGNERIDENTIFICATION.out.trafoxml ) - .map { groupMeta, meta, trafoxml -> [meta, trafoxml] } - .transpose() - .set { joined_trafos } + .flatMap { group_meta, metas -> metas } + .map { meta -> [[spectra:meta.spectra], meta]} + .join( OPENMS_MAPALIGNERIDENTIFICATION.out.trafoxml + .flatMap { group_meta, trafoxmls -> trafoxmls.collect { trafoxml -> [[spectra: trafoxml.baseName], trafoxml] } }) + .map { spectra, meta, trafoxml -> [meta, trafoxml] } + .set { ch_trafos } - // Intermediate step to join RT transformation files with mzml channels -> [meta, idxml, mzml] - joined_trafos_mzmls = mzml.join(joined_trafos) + // Align mzML files using trafoXMLs + ch_trafos_mzmls = ch_mzml.join(ch_trafos) + OPENMS_MAPRTTRANSFORMERMZML(ch_trafos_mzmls) + ch_versions = ch_versions.mix(OPENMS_MAPRTTRANSFORMERMZML.out.versions) - // Intermediate step to join RT transformation files with idxml channels -> [meta, idxml, trafoxml] - runs_to_be_aligned - .join( merge_meta_map ) + // Align idXMLfiles using trafoXMLs + ch_runs_to_be_aligned + .flatMap { group_meta, idxmls -> idxmls.collect { idxml -> [[spectra: idxml.baseName.replace("_fdr_filtered","")], idxml] } } + .join( merge_meta_map + .flatMap { group_meta, metas -> metas } + .map { meta -> [[spectra:meta.spectra], meta]} ) .map { group_meta, idxml, meta -> [meta, idxml] } - .transpose() - .join( joined_trafos ) - .set { joined_trafos_ids } + .join( ch_trafos ) + .set { ch_trafos_idxml } - // Align mzML files using trafoXMLs - OPENMS_MAPRTTRANSFORMERMZML(joined_trafos_mzmls) - ch_versions = ch_versions.mix(OPENMS_MAPRTTRANSFORMERMZML.out.versions.first().ifEmpty(null)) - // Align idXMLfiles using trafoXMLs - OPENMS_MAPRTTRANSFORMERIDXML(joined_trafos_ids) - ch_versions = ch_versions.mix(OPENMS_MAPRTTRANSFORMERIDXML.out.versions.first().ifEmpty(null)) + OPENMS_MAPRTTRANSFORMERIDXML(ch_trafos_idxml) + ch_versions = ch_versions.mix(OPENMS_MAPRTTRANSFORMERIDXML.out.versions) emit: versions = ch_versions diff --git a/subworkflows/local/predict_class1.nf b/subworkflows/local/predict_class1.nf index 948b8f91..16247fef 100644 --- a/subworkflows/local/predict_class1.nf +++ b/subworkflows/local/predict_class1.nf @@ -20,12 +20,12 @@ workflow PREDICT_CLASS1 { // If specified predict peptides using MHCFlurry MHCFLURRY_PREDICTPEPTIDESCLASS1(mztab.join(alleles)) - ch_versions = ch_versions.mix(MHCFLURRY_PREDICTPEPTIDESCLASS1.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(MHCFLURRY_PREDICTPEPTIDESCLASS1.out.versions) if ( params.include_proteins_from_vcf ) { // Predict all possible neoepitopes from vcf PREDICT_POSSIBLE_CLASS1_NEOEPITOPES(alleles.combine(ch_vcf_from_sheet, by:0)) - ch_versions = ch_versions.mix(PREDICT_POSSIBLE_CLASS1_NEOEPITOPES.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(PREDICT_POSSIBLE_CLASS1_NEOEPITOPES.out.versions) ch_predicted_possible_neoepitopes = PREDICT_POSSIBLE_CLASS1_NEOEPITOPES.out.csv // Resolve found neoepitopes RESOLVE_FOUND_CLASS1_NEOEPITOPES( @@ -34,10 +34,10 @@ workflow PREDICT_CLASS1 { .combine( ch_predicted_possible_neoepitopes, by:0) .map( it -> [it[1], it[2], it[3]]) ) - ch_versions = ch_versions.mix(RESOLVE_FOUND_CLASS1_NEOEPITOPES.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(RESOLVE_FOUND_CLASS1_NEOEPITOPES.out.versions) // Predict class 1 neoepitopes MHCFlurry MHCFLURRY_PREDICTNEOEPITOPESCLASS1(alleles.join(RESOLVE_FOUND_CLASS1_NEOEPITOPES.out.csv, by:0)) - ch_versions = ch_versions.mix(MHCFLURRY_PREDICTNEOEPITOPESCLASS1.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(MHCFLURRY_PREDICTNEOEPITOPESCLASS1.out.versions) } emit: diff --git a/subworkflows/local/predict_class2.nf b/subworkflows/local/predict_class2.nf index 41f3c7cd..e4af7c2c 100644 --- a/subworkflows/local/predict_class2.nf +++ b/subworkflows/local/predict_class2.nf @@ -24,21 +24,21 @@ workflow PREDICT_CLASS2 { // Preprocess found peptides for MHCNuggets prediction class 2 MHCNUGGETS_PEPTIDESCLASS2PRE(mztab) - ch_versions = ch_versions.mix(MHCNUGGETS_PEPTIDESCLASS2PRE.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(MHCNUGGETS_PEPTIDESCLASS2PRE.out.versions) // Predict found peptides using MHCNuggets class 2 MHCNUGGETS_PREDICTPEPTIDESCLASS2( MHCNUGGETS_PEPTIDESCLASS2PRE.out.preprocessed .join(alleles) ) - ch_versions = ch_versions.mix(MHCNUGGETS_PREDICTPEPTIDESCLASS2.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(MHCNUGGETS_PREDICTPEPTIDESCLASS2.out.versions) // Postprocess predicted MHCNuggets peptides class 2 MHCNUGGETS_PEPTIDESCLASS2POST( MHCNUGGETS_PREDICTPEPTIDESCLASS2.out.csv.join(MHCNUGGETS_PEPTIDESCLASS2PRE.out.geneID, by:0) ) - ch_versions = ch_versions.mix(MHCNUGGETS_PEPTIDESCLASS2POST.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(MHCNUGGETS_PEPTIDESCLASS2POST.out.versions) if ( params.include_proteins_from_vcf ) { // Predict all possible class 2 neoepitopes from vcf PREDICT_POSSIBLE_CLASS2_NEOEPITOPES(alleles.combine(ch_vcf_from_sheet, by:0)) - ch_versions = ch_versions.mix(PREDICT_POSSIBLE_CLASS2_NEOEPITOPES.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(PREDICT_POSSIBLE_CLASS2_NEOEPITOPES.out.versions) ch_predicted_possible_neoepitopes = PREDICT_POSSIBLE_CLASS2_NEOEPITOPES.out.csv // Resolve found class 2 neoepitopes RESOLVE_FOUND_CLASS2_NEOEPITOPES( @@ -46,16 +46,16 @@ workflow PREDICT_CLASS2 { .map{ it -> [it[0].sample, it[1]] } .combine( ch_predicted_possible_neoepitopes, by:0) ) - ch_versions = ch_versions.mix(RESOLVE_FOUND_CLASS2_NEOEPITOPES.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(RESOLVE_FOUND_CLASS2_NEOEPITOPES.out.versions) // Preprocess resolved neoepitopes in a format that MHCNuggets understands MHCNUGGETS_NEOEPITOPESCLASS2PRE(RESOLVE_FOUND_CLASS2_NEOEPITOPES.out.csv) - ch_versions = ch_versions.mix(MHCNUGGETS_NEOEPITOPESCLASS2PRE.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(MHCNUGGETS_NEOEPITOPESCLASS2PRE.out.versions) // Predict class 2 MHCNuggets MHCNUGGETS_PREDICTNEOEPITOPESCLASS2(MHCNUGGETS_NEOEPITOPESCLASS2PRE.out.preprocessed.join(alleles, by:0)) - ch_versions = ch_versions.mix(MHCNUGGETS_PREDICTNEOEPITOPESCLASS2.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(MHCNUGGETS_PREDICTNEOEPITOPESCLASS2.out.versions) // Class 2 MHCNuggets Postprocessing MHCNUGGETS_NEOEPITOPESCLASS2POST(RESOLVE_FOUND_CLASS2_NEOEPITOPES.out.csv.join(MHCNUGGETS_PREDICTNEOEPITOPESCLASS2.out.csv, by:0)) - ch_versions = ch_versions.mix(MHCNUGGETS_NEOEPITOPESCLASS2POST.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(MHCNUGGETS_NEOEPITOPESCLASS2POST.out.versions) } emit: diff --git a/subworkflows/local/process_feature.nf b/subworkflows/local/process_feature.nf index 176861c7..0e55e618 100644 --- a/subworkflows/local/process_feature.nf +++ b/subworkflows/local/process_feature.nf @@ -2,7 +2,6 @@ * Perform the quantification by extracting the feature intensities and group runs corresponding to the same sample and condition. */ -include { OPENMS_IDMERGER } from '../../modules/local/openms_idmerger' include { OPENMS_FEATUREFINDERIDENTIFICATION } from '../../modules/local/openms_featurefinderidentification' include { OPENMS_FEATURELINKERUNLABELEDKD } from '../../modules/local/openms_featurelinkerunlabeledkd' include { OPENMS_IDCONFLICTRESOLVER } from '../../modules/local/openms_idconflictresolver' @@ -16,18 +15,18 @@ workflow PROCESS_FEATURE { // Quantify identifications using targeted feature extraction OPENMS_FEATUREFINDERIDENTIFICATION(ch_runs_to_be_quantified).featurexml - .map { meta, featurexml -> [[id: meta.sample + '_' + meta.condition], featurexml] } + .map { meta, featurexml -> [ groupKey([id: meta.sample + '_' + meta.condition], meta.group_count), featurexml] } .groupTuple() .set { ch_features_grouped } - ch_versions = ch_versions.mix(OPENMS_FEATUREFINDERIDENTIFICATION.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(OPENMS_FEATUREFINDERIDENTIFICATION.out.versions) // Link extracted features OPENMS_FEATURELINKERUNLABELEDKD(ch_features_grouped) - ch_versions = ch_versions.mix(OPENMS_FEATURELINKERUNLABELEDKD.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(OPENMS_FEATURELINKERUNLABELEDKD.out.versions) // Resolve conflicting ids matching to the same feature OPENMS_IDCONFLICTRESOLVER(OPENMS_FEATURELINKERUNLABELEDKD.out.consensusxml) - ch_versions = ch_versions.mix(OPENMS_IDCONFLICTRESOLVER.out.versions.first().ifEmpty(null)) + ch_versions = ch_versions.mix(OPENMS_IDCONFLICTRESOLVER.out.versions) emit: // Define the information that is returned by this workflow diff --git a/subworkflows/local/quant.nf b/subworkflows/local/quant.nf index 0318bd34..f72a98a5 100644 --- a/subworkflows/local/quant.nf +++ b/subworkflows/local/quant.nf @@ -4,13 +4,14 @@ * It then aligns the retention times of the runs and merges the idxml files together to use them as id_ext in featurefinder * Finally, it performs the quantification and emits the consensusXML file */ -include { OPENMS_IDRIPPER } from '../../modules/local/openms_idripper' -include { OPENMS_IDSCORESWITCHER } from '../../modules/local/openms_idscoreswitcher' -include { PYOPENMS_IDFILTER } from '../../modules/local/pyopenms_idfilter' -include { OPENMS_IDMERGER as OPENMS_IDMERGER_QUANT } from '../../modules/local/openms_idmerger' -include { MAP_ALIGNMENT } from './map_alignment' -include { PROCESS_FEATURE } from './process_feature' +include { OPENMS_IDRIPPER } from '../../modules/nf-core/openms/idripper/main' +include { OPENMS_IDSCORESWITCHER } from '../../modules/nf-core/openms/idscoreswitcher/main' +include { OPENMS_IDFILTER as OPENMS_IDFILTER_QUANT } from '../../modules/nf-core/openms/idfilter/main' +include { OPENMS_IDMERGER as OPENMS_IDMERGER_QUANT } from '../../modules/nf-core/openms/idmerger/main' + +include { MAP_ALIGNMENT } from './map_alignment' +include { PROCESS_FEATURE } from './process_feature' // Sort closure for merging and splitting files def sortById = { a, b -> a.id <=> b.id } @@ -24,32 +25,42 @@ workflow QUANT { main: ch_versions = Channel.empty() - // Rip post-percolator idXML files and manipulate such that we end up with [meta_run1, idxml_run1, pout_filtered] [meta_run2, idxml_run2, pout_filtered] ... - OPENMS_IDRIPPER( merged_pout ).ripped - .join( merge_meta_map ) - .join( filter_q_value ) - // TODO: fdrfiltered is not needed for idscore switching, but for idfilter. This will be adressed in the next refacoring of the workflow - .map { group_meta, ripped, meta, fdrfiltered -> [meta, ripped, fdrfiltered] } - .transpose() - .set { ch_ripped_pout } - ch_versions = ch_versions.mix(OPENMS_IDRIPPER.out.versions.ifEmpty(null)) + // Split post-percolator idXML files and manipulate such that we end up with [meta_run1, idxml_run1] [meta_run2, idxml_run2] ... + // We need to make sure that the order of the runs is the same as in the mzml files since IDRipper always sorts the runs + // (and nextflow does not guarantee the order of the maps in merged_meta_map) + OPENMS_IDRIPPER( merged_pout ).idxmls + .flatMap { group_meta, idxmls -> idxmls.collect { idxml -> [[spectra: idxml.baseName], idxml] } } + // join on file basename to make sure that the order of the runs is the same as in the mzml files + // Is there a smoother way to do this? + .join( merge_meta_map + .flatMap { group_meta, metas -> metas } + .map { meta -> [[spectra:meta.spectra], meta]} ) + .map { spectra, idxmls, meta -> [meta, idxmls] } + .set { ch_ripped_idxml } + ch_versions = ch_versions.mix(OPENMS_IDRIPPER.out.versions) // Switch to xcorr for filtering since q-values are set to 1 with peptide-level-fdr if (params.fdr_level == 'peptide_level_fdrs'){ - ch_runs_to_be_filtered = OPENMS_IDSCORESWITCHER( ch_ripped_pout ).switched_idxml - ch_versions = ch_versions.mix(OPENMS_IDSCORESWITCHER.out.versions.ifEmpty(null)) + ch_runs_score_switched = OPENMS_IDSCORESWITCHER( ch_ripped_idxml ).idxml + ch_versions = ch_versions.mix(OPENMS_IDSCORESWITCHER.out.versions) } else { - ch_runs_to_be_filtered = ch_ripped_pout + ch_runs_score_switched = ch_ripped_idxml } + // Manipulate such that [meta_run1, idxml_run1, pout_group1], [meta_run2, idxml_run2, pout_group1] ... + ch_runs_score_switched + // Nextflow can only combine/join on the exact groupKey object, merge_id is not sufficient + .map { meta, idxml -> [groupKey([id: meta.sample + '_' + meta.condition], meta.group_count) , meta, idxml] } + .combine(filter_q_value, by:0) + .map { group_meta, meta, idxml, q_value -> [meta, idxml, q_value] } + .set { ch_runs_to_filter} + // Filter runs based on fdr filtered coprocessed percolator output. - // TODO: This is an alternative filtering method that will be replaced by IDFilter with new release of OpenMS - PYOPENMS_IDFILTER( ch_runs_to_be_filtered ).filtered - .map { meta, idxml -> [[id:meta.sample + '_' + meta.condition], [id:meta.id, file:idxml]] } - .groupTuple( sort: sortById ) - .map { meta, idxml -> [meta, idxml.file] } + OPENMS_IDFILTER_QUANT( ch_runs_to_filter ).filtered + .map { meta, idxml -> [ groupKey([id:meta.sample + '_' + meta.condition], meta.group_count), idxml] } + .groupTuple() .set { ch_runs_to_be_aligned } - ch_versions = ch_versions.mix(PYOPENMS_IDFILTER.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(OPENMS_IDFILTER_QUANT.out.versions) // Align retention times of runs MAP_ALIGNMENT( @@ -57,27 +68,27 @@ workflow QUANT { mzml, merge_meta_map ) - ch_versions = ch_versions.mix( MAP_ALIGNMENT.out.versions.ifEmpty(null) ) + ch_versions = ch_versions.mix( MAP_ALIGNMENT.out.versions ) // We need to merge groupwise the aligned idxml files together to use them as id_ext in featurefinder OPENMS_IDMERGER_QUANT( MAP_ALIGNMENT.out.aligned_idxml - .map { meta, aligned_idxml -> [[id: meta.sample + '_' + meta.condition], aligned_idxml] } + .map { meta, aligned_idxml -> [ groupKey([id: meta.sample + '_' + meta.condition], meta.group_count), aligned_idxml] } .groupTuple()) - ch_versions = ch_versions.mix(OPENMS_IDMERGER_QUANT.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(OPENMS_IDMERGER_QUANT.out.versions) // Manipulate channels such that we end up with : [meta, mzml, run_idxml, merged_runs_idxml] MAP_ALIGNMENT.out.aligned_mzml .join( MAP_ALIGNMENT.out.aligned_idxml ) - .map { meta, mzml, idxml -> [[id: meta.sample + '_' + meta.condition], meta, [id:meta.id, file:mzml], [id:meta.id, file:idxml]] } - .groupTuple( sort: sortById ) - .map { group_meta, meta, mzml, idxml -> [group_meta, meta, mzml.file, idxml.file] } + .map { meta, mzml, idxml -> [ groupKey([id: meta.sample + '_' + meta.condition], meta.group_count), meta, mzml, idxml] } + .groupTuple() + .map { group_meta, meta, mzml, idxml -> [group_meta, meta, mzml, idxml] } .join( OPENMS_IDMERGER_QUANT.out.idxml ) .map { group_meta, meta, mzml, idxml, merged_idxml -> [meta, mzml, idxml, merged_idxml] } .transpose() .set { ch_runs_to_be_quantified } PROCESS_FEATURE ( ch_runs_to_be_quantified ) - ch_versions = ch_versions.mix(PROCESS_FEATURE.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PROCESS_FEATURE.out.versions) emit: consensusxml = PROCESS_FEATURE.out.consensusxml diff --git a/subworkflows/local/refine_fdr.nf b/subworkflows/local/refine_fdr.nf index 162ab9b9..fdb1e9aa 100644 --- a/subworkflows/local/refine_fdr.nf +++ b/subworkflows/local/refine_fdr.nf @@ -7,8 +7,8 @@ include { OPENMS_MZTABEXPORTER as OPENMS_MZTABEXPORTERPERC } from '../../modules include { OPENMS_MZTABEXPORTER as OPENMS_MZTABEXPORTERPSM } from '../../modules/local/openms_mztabexporter' include { MHCFLURRY_PREDICTPSMS } from '../../modules/local/mhcflurry_predictpsms' include { OPENMS_PERCOLATORADAPTER } from '../../modules/local/openms_percolatoradapter' -include { OPENMS_IDFILTER as OPENMS_IDFILTER_PSMS } from '../../modules/local/openms_idfilter' -include { OPENMS_IDFILTER as OPENMS_IDFILTER_REFINED } from '../../modules/local/openms_idfilter' +include { OPENMS_IDFILTER as OPENMS_IDFILTER_PSMS } from '../../modules/nf-core/openms/idfilter/main' +include { OPENMS_IDFILTER as OPENMS_IDFILTER_REFINED } from '../../modules/nf-core/openms/idfilter/main' workflow REFINE_FDR { // Define the input parameters @@ -36,16 +36,17 @@ workflow REFINE_FDR { ch_versions = ch_versions.mix(MHCFLURRY_PREDICTPSMS.out.versions) // Filter psm results by shrinked search space + // TODO: Check if filtering works properly when reevaluating this subworkflow OPENMS_IDFILTER_PSMS(psm_features.combine( MHCFLURRY_PREDICTPSMS.out.idxml, by: [0] )) ch_versions = ch_versions.mix(OPENMS_IDFILTER_PSMS.out.versions) // Recompute percolator fdr on shrinked search space - OPENMS_PERCOLATORADAPTER( OPENMS_IDFILTER_PSMS.out.idxml ) + OPENMS_PERCOLATORADAPTER( OPENMS_IDFILTER_PSMS.out.filtered ) ch_versions = ch_versions.mix(OPENMS_PERCOLATORADAPTER.out.versions) // Filter results by refined fdr - OPENMS_IDFILTER_REFINED(OPENMS_PERCOLATORADAPTER.out.idxml.flatMap { it -> [tuple(it[0], it[1], null)]}) + OPENMS_IDFILTER_REFINED(OPENMS_PERCOLATORADAPTER.out.idxml.flatMap { it -> [tuple(it[0], it[1], [])]}) ch_versions = ch_versions.mix(OPENMS_IDFILTER_REFINED.out.versions) emit: // Define the information that is returned by this workflow - filter_refined_q_value = OPENMS_IDFILTER_REFINED.out.idxml + filter_refined_q_value = OPENMS_IDFILTER_REFINED.out.filtered versions = ch_versions } diff --git a/workflows/mhcquant.nf b/workflows/mhcquant.nf index 08ed75f2..9f5eb975 100644 --- a/workflows/mhcquant.nf +++ b/workflows/mhcquant.nf @@ -65,31 +65,29 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // // MODULE: Loaded from modules/local/ // -include { OPENMS_DECOYDATABASE } from '../modules/local/openms_decoydatabase' -include { THERMORAWFILEPARSER } from '../modules/local/thermorawfileparser' -include { TDF2MZML } from '../modules/local/tdf2mzml' -include { OPENMS_PEAKPICKERHIRES } from '../modules/local/openms_peakpickerhires' -include { OPENMS_FILEFILTER } from '../modules/local/openms_filefilter' -include { OPENMS_COMETADAPTER } from '../modules/local/openms_cometadapter' -include { OPENMS_PEPTIDEINDEXER } from '../modules/local/openms_peptideindexer' -include { MS2RESCORE } from '../modules/local/ms2rescore' -include { OPENMS_IDSCORESWITCHER } from '../modules/local/openms_idscoreswitcher' +include { TDF2MZML } from '../modules/local/tdf2mzml' +include { OPENMS_FILEFILTER } from '../modules/local/openms_filefilter' +include { OPENMS_COMETADAPTER } from '../modules/local/openms_cometadapter' +include { OPENMS_PEPTIDEINDEXER } from '../modules/local/openms_peptideindexer' +include { MS2RESCORE } from '../modules/local/ms2rescore' -include { OPENMS_IDFILTER as OPENMS_IDFILTER_Q_VALUE } from '../modules/local/openms_idfilter' -include { OPENMS_IDMERGER } from '../modules/local/openms_idmerger' +include { OPENMS_PSMFEATUREEXTRACTOR } from '../modules/local/openms_psmfeatureextractor' +include { OPENMS_PERCOLATORADAPTER } from '../modules/local/openms_percolatoradapter' +include { PYOPENMS_IONANNOTATOR } from '../modules/local/pyopenms_ionannotator' -include { OPENMS_PSMFEATUREEXTRACTOR } from '../modules/local/openms_psmfeatureextractor' -include { OPENMS_PERCOLATORADAPTER } from '../modules/local/openms_percolatoradapter' -include { PYOPENMS_IONANNOTATOR } from '../modules/local/pyopenms_ionannotator' - -include { OPENMS_TEXTEXPORTER } from '../modules/local/openms_textexporter' -include { OPENMS_MZTABEXPORTER } from '../modules/local/openms_mztabexporter' +include { OPENMS_TEXTEXPORTER } from '../modules/local/openms_textexporter' +include { OPENMS_MZTABEXPORTER } from '../modules/local/openms_mztabexporter' // -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// SUBWORKFLOW: Loaded from subworkflows/local/ // -include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { INCLUDE_PROTEINS } from '../subworkflows/local/include_proteins' +include { REFINE_FDR } from '../subworkflows/local/refine_fdr' +include { QUANT } from '../subworkflows/local/quant' +include { PREDICT_CLASS1 } from '../subworkflows/local/predict_class1' +include { PREDICT_CLASS2 } from '../subworkflows/local/predict_class2' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -100,8 +98,14 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' // // MODULE: Installed directly from nf-core/modules // -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { THERMORAWFILEPARSER } from '../modules/nf-core/thermorawfileparser/main' +include { OPENMS_DECOYDATABASE } from '../modules/nf-core/openms/decoydatabase/main' +include { OPENMS_PEAKPICKERHIRES } from '../modules/nf-core/openms/peakpickerhires/main' +include { OPENMS_IDMERGER } from '../modules/nf-core/openms/idmerger/main' +include { OPENMS_IDSCORESWITCHER } from '../modules/nf-core/openms/idscoreswitcher/main.nf' +include { OPENMS_IDFILTER as OPENMS_IDFILTER_Q_VALUE } from '../modules/nf-core/openms/idfilter/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -111,14 +115,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft // Info required for completion email and summary def multiqc_report = [] -// Sort closure for merging and splitting files -def sortById = { a, b -> a.id <=> b.id } -include { INCLUDE_PROTEINS } from '../subworkflows/local/include_proteins' -include { REFINE_FDR } from '../subworkflows/local/refine_fdr' -include { QUANT } from '../subworkflows/local/quant' -include { PREDICT_CLASS1 } from '../subworkflows/local/predict_class1' -include { PREDICT_CLASS2 } from '../subworkflows/local/predict_class2' //////////////////////////////////////////////////// /* -- RUN MAIN WORKFLOW -- */ @@ -142,11 +139,11 @@ workflow MHCQUANT { .branch { meta, filename -> raw : meta.ext == 'raw' - return [ meta.subMap('id', 'sample', 'condition'), filename ] + return [ meta.subMap('id', 'sample', 'condition', 'group_count', 'spectra'), filename ] mzml : meta.ext == 'mzml' - return [ meta.subMap('id', 'sample', 'condition'), filename ] + return [ meta.subMap('id', 'sample', 'condition', 'group_count', 'spectra'), filename ] tdf : meta.ext == 'd' - return [ meta.subMap('id', 'sample', 'condition'), filename ] + return [ meta.subMap('id', 'sample', 'condition', 'group_count', 'spectra'), filename ] other : true } .set { branched_ms_files } @@ -174,7 +171,7 @@ workflow MHCQUANT { // Generate reversed decoy database OPENMS_DECOYDATABASE(fasta_file) ch_versions = ch_versions.mix(OPENMS_DECOYDATABASE.out.versions) - ch_decoy_db = OPENMS_DECOYDATABASE.out.decoy + ch_decoy_db = OPENMS_DECOYDATABASE.out.decoy_fasta .map{ meta, fasta -> [fasta] } } else { ch_decoy_db = fasta_file.map{ meta, fasta -> [fasta] } @@ -184,18 +181,18 @@ workflow MHCQUANT { ch_ms_files = branched_ms_files.mzml.map{ meta, mzml -> [meta, mzml[0]]} // Raw file conversion THERMORAWFILEPARSER(branched_ms_files.raw) - ch_versions = ch_versions.mix(THERMORAWFILEPARSER.out.versions.ifEmpty(null)) - ch_ms_files = ch_ms_files.mix(THERMORAWFILEPARSER.out.mzml) + ch_versions = ch_versions.mix(THERMORAWFILEPARSER.out.versions) + ch_ms_files = ch_ms_files.mix(THERMORAWFILEPARSER.out.spectra) // timsTOF data conversion TDF2MZML(branched_ms_files.tdf) - ch_versions = ch_versions.mix(TDF2MZML.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(TDF2MZML.out.versions) ch_ms_files = ch_ms_files.mix(TDF2MZML.out.mzml) // Optional: Run Peak Picking as Preprocessing if (params.run_centroidisation) { OPENMS_PEAKPICKERHIRES(ch_ms_files) - ch_versions = ch_versions.mix(OPENMS_PEAKPICKERHIRES.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(OPENMS_PEAKPICKERHIRES.out.versions) ch_mzml_file = OPENMS_PEAKPICKERHIRES.out.mzml } else { ch_mzml_file = ch_ms_files @@ -204,7 +201,7 @@ workflow MHCQUANT { // Optionally clean up mzML files if (params.filter_mzml){ OPENMS_FILEFILTER(ch_mzml_file) - ch_versions = ch_versions.mix(OPENMS_FILEFILTER.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(OPENMS_FILEFILTER.out.versions) ch_clean_mzml_file = OPENMS_FILEFILTER.out.cleaned_mzml } else { ch_clean_mzml_file = ch_mzml_file @@ -222,26 +219,26 @@ workflow MHCQUANT { // Index decoy and target hits OPENMS_PEPTIDEINDEXER(OPENMS_COMETADAPTER.out.idxml.combine(ch_decoy_db)) - ch_versions = ch_versions.mix(OPENMS_PEPTIDEINDEXER.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(OPENMS_PEPTIDEINDEXER.out.versions) // Save indexed runs for later use to keep meta-run information. Sort based on file id OPENMS_PEPTIDEINDEXER.out.idxml - .map { meta, idxml -> [[id: meta.sample + '_' + meta.condition], meta] } - .groupTuple( sort: sortById ) - .set { merge_meta_map } + .map { meta, idxml -> [ groupKey([id: meta.sample + '_' + meta.condition], meta.group_count), meta] } + .groupTuple() + .set { merge_meta_map } OPENMS_PEPTIDEINDEXER.out.idxml - .map { meta, idxml -> [[id: meta.sample + '_' + meta.condition], idxml] } - .groupTuple() - .set { ch_runs_to_merge } + .map { meta, idxml -> [ groupKey([id: meta.sample + '_' + meta.condition], meta.group_count), idxml] } + .groupTuple() + .set { ch_runs_to_merge } // Merge aligned idXMLfiles OPENMS_IDMERGER(ch_runs_to_merge) - ch_versions = ch_versions.mix(OPENMS_IDMERGER.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(OPENMS_IDMERGER.out.versions) // Run MS2Rescore ch_clean_mzml_file - .map { meta, mzml -> [[id: meta.sample + '_' + meta.condition], mzml] } + .map { meta, mzml -> [ groupKey([id: meta.sample + '_' + meta.condition], meta.group_count), mzml] } .groupTuple() .join(OPENMS_IDMERGER.out.idxml) .map { meta, mzml, idxml -> [meta, idxml, mzml, []] } @@ -251,28 +248,25 @@ workflow MHCQUANT { ch_versions = ch_versions.mix(MS2RESCORE.out.versions) if (params.rescoring_engine == 'percolator') { - // TODO: Find a way to parse the feature names of ms2rescore and plug them into the feature extractor // Extract PSM features for Percolator - OPENMS_PSMFEATUREEXTRACTOR(MS2RESCORE.out.idxml - .join(MS2RESCORE.out.feature_names)) - ch_versions = ch_versions.mix(OPENMS_PSMFEATUREEXTRACTOR.out.versions.ifEmpty(null)) + OPENMS_PSMFEATUREEXTRACTOR(MS2RESCORE.out.idxml.join(MS2RESCORE.out.feature_names)) + ch_versions = ch_versions.mix(OPENMS_PSMFEATUREEXTRACTOR.out.versions) // Run Percolator OPENMS_PERCOLATORADAPTER(OPENMS_PSMFEATUREEXTRACTOR.out.idxml) - ch_versions = ch_versions.mix(OPENMS_PERCOLATORADAPTER.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(OPENMS_PERCOLATORADAPTER.out.versions) ch_rescored_runs = OPENMS_PERCOLATORADAPTER.out.idxml } else { log.warn "The rescoring engine is set to mokapot. This rescoring engine currently only supports psm-level-fdr via ms2rescore." - // TODO: remove whitelist argument from idscoreswitcher - OPENMS_IDSCORESWITCHER(MS2RESCORE.out.idxml - .map { meta, idxml -> [meta, idxml, []] }) - ch_rescored_runs = OPENMS_IDSCORESWITCHER.out.switched_idxml.map { tuple -> tuple.findAll { it != [] }} + // Switch comet e-value to mokapot q-value + OPENMS_IDSCORESWITCHER(MS2RESCORE.out.idxml) + ch_versions = ch_versions.mix(OPENMS_IDSCORESWITCHER.out.versions) + ch_rescored_runs = OPENMS_IDSCORESWITCHER.out.idxml } // Filter by percolator q-value - // TODO: Use empty list instead of null - OPENMS_IDFILTER_Q_VALUE(ch_rescored_runs.flatMap { it -> [tuple(it[0], it[1], null)] }) - ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE.out.versions.ifEmpty(null)) + OPENMS_IDFILTER_Q_VALUE(ch_rescored_runs.map {group_meta, idxml -> [group_meta, idxml, []]}) + ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE.out.versions) // // SUBWORKFLOW: Refine the FDR values on the predicted subset @@ -280,32 +274,44 @@ workflow MHCQUANT { if (params.refine_fdr_on_predicted_subset && params.predict_class_1) { // Run the following subworkflow REFINE_FDR ( - OPENMS_IDFILTER_Q_VALUE.out.idxml, + OPENMS_IDFILTER_Q_VALUE.out.filtered, OPENMS_PSMFEATUREEXTRACTOR.out.idxml, peptides_class_1_alleles ) - ch_versions = ch_versions.mix(REFINE_FDR.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(REFINE_FDR.out.versions) // Define the outcome of the paramer to a fixed variable - filter_q_value = REFINE_FDR.out.filter_refined_q_value + ch_filter_q_value = REFINE_FDR.out.filter_refined_q_value } else { // Make sure that the columns that consists of the ID's, sample names and the idXML file names are returned - filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.idxml + ch_filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.filtered } // // SUBWORKFLOW: QUANT // if (!params.skip_quantification) { - QUANT(merge_meta_map, ch_rescored_runs, filter_q_value, ch_clean_mzml_file) - ch_versions = ch_versions.mix(QUANT.out.versions.ifEmpty(null)) + QUANT(merge_meta_map, ch_rescored_runs, ch_filter_q_value, ch_clean_mzml_file) + ch_versions = ch_versions.mix(QUANT.out.versions) ch_output = QUANT.out.consensusxml } else { - ch_output = filter_q_value + ch_output = ch_filter_q_value + } + + if (params.annotate_ions) { + // Join the ch_filtered_idxml and the ch_mzml_file + ch_clean_mzml_file.map { meta, mzml -> [ groupKey([id: meta.sample + '_' + meta.condition], meta.group_count), mzml] } + .groupTuple() + .join(ch_filter_q_value) + .set{ ch_ion_annotator_input } + + // Annotate spectra with ion fragmentation information + PYOPENMS_IONANNOTATOR( ch_ion_annotator_input ) + ch_versions = ch_versions.mix(PYOPENMS_IONANNOTATOR.out.versions) } // Prepare for check if file is empty OPENMS_TEXTEXPORTER(ch_output) - ch_versions = ch_versions.mix(OPENMS_TEXTEXPORTER.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(OPENMS_TEXTEXPORTER.out.versions) // Return an error message when there is only a header present in the document OPENMS_TEXTEXPORTER.out.tsv.map { meta, tsv -> if (tsv.size() < 130) { @@ -314,49 +320,38 @@ workflow MHCQUANT { } OPENMS_MZTABEXPORTER(ch_output) - ch_versions = ch_versions.mix(OPENMS_MZTABEXPORTER.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(OPENMS_MZTABEXPORTER.out.versions) // // SUBWORKFLOW: Predict class I (neoepitopes) // - if (params.predict_class_1 & !params.skip_quantification) { - PREDICT_CLASS1 ( - OPENMS_MZTABEXPORTER.out.mztab, - peptides_class_1_alleles, - ch_vcf_from_sheet - ) - ch_versions = ch_versions.mix(PREDICT_CLASS1.out.versions.ifEmpty(null)) - ch_predicted_possible_neoepitopes = PREDICT_CLASS1.out.ch_predicted_possible_neoepitopes - } else { - ch_predicted_possible_neoepitopes = Channel.empty() - } - - // - // SUBWORKFLOW: Predict class II (neoepitopes) + // TODO: Temporary disabled because of outdated vcf parsing + //if (params.predict_class_1 & !params.skip_quantification) { + // PREDICT_CLASS1 ( + // OPENMS_MZTABEXPORTER.out.mztab, + // peptides_class_1_alleles, + // ch_vcf_from_sheet + // ) + // ch_versions = ch_versions.mix(PREDICT_CLASS1.out.versions) + // ch_predicted_possible_neoepitopes = PREDICT_CLASS1.out.ch_predicted_possible_neoepitopes + //} else { + // ch_predicted_possible_neoepitopes = Channel.empty() + //} // - if (params.predict_class_2 & !params.skip_quantification) { - PREDICT_CLASS2 ( - OPENMS_MZTABEXPORTER.out.mztab, - peptides_class_2_alleles, - ch_vcf_from_sheet - ) - ch_versions = ch_versions.mix(PREDICT_CLASS2.out.versions.ifEmpty(null)) - ch_predicted_possible_neoepitopes_II = PREDICT_CLASS2.out.ch_predicted_possible_neoepitopes - } else { - ch_predicted_possible_neoepitopes_II = Channel.empty() - } - - if (params.annotate_ions) { - // Join the ch_filtered_idxml and the ch_mzml_file - ch_clean_mzml_file.map { meta, mzml -> [[id: meta.sample + '_' + meta.condition], mzml] } - .groupTuple() - .join(filter_q_value) - .set{ ch_ion_annotator_input } - - // Annotate spectra with ion fragmentation information - PYOPENMS_IONANNOTATOR( ch_ion_annotator_input ) - ch_versions = ch_versions.mix(PYOPENMS_IONANNOTATOR.out.versions.ifEmpty(null)) - } + //// + //// SUBWORKFLOW: Predict class II (neoepitopes) + //// + //if (params.predict_class_2 & !params.skip_quantification) { + // PREDICT_CLASS2 ( + // OPENMS_MZTABEXPORTER.out.mztab, + // peptides_class_2_alleles, + // ch_vcf_from_sheet + // ) + // ch_versions = ch_versions.mix(PREDICT_CLASS2.out.versions) + // ch_predicted_possible_neoepitopes_II = PREDICT_CLASS2.out.ch_predicted_possible_neoepitopes + //} else { + // ch_predicted_possible_neoepitopes_II = Channel.empty() + //} // // MODULE: Pipeline reporting