diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index a2edc113..f7ad6eec 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -25,3 +25,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/airrflow/results-${{ github.sha }}" } profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index df60d9de..55a8725e 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -23,3 +23,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/airrflow/results-test-${{ github.sha }}" } profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.prettierignore b/.prettierignore index d0e7ae58..eb74a574 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,5 @@ email_template.html +adaptivecard.json .nextflow* work/ data/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 104a4def..f1710279 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [2.4.0] 2022-12-05 "Aparecium" + +### `Added` + +- [#209](https://github.com/nf-core/airrflow/pull/209) Template update to nf-core tools v2.6. +- [#210](https://github.com/nf-core/airrflow/pull/210) Add fastp for read QC, adapter trimming and read clipping. +- [#212](https://github.com/nf-core/airrflow/pull/212) Bump versions to 2.4.0 + ## [2.3.0] - 2022-09-22 "Expelliarmus" ### `Added` diff --git a/CITATION.cff b/CITATION.cff deleted file mode 100644 index 4533e2f2..00000000 --- a/CITATION.cff +++ /dev/null @@ -1,56 +0,0 @@ -cff-version: 1.2.0 -message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" -authors: - - family-names: Ewels - given-names: Philip - - family-names: Peltzer - given-names: Alexander - - family-names: Fillinger - given-names: Sven - - family-names: Patel - given-names: Harshil - - family-names: Alneberg - given-names: Johannes - - family-names: Wilm - given-names: Andreas - - family-names: Ulysse Garcia - given-names: Maxime - - family-names: Di Tommaso - given-names: Paolo - - family-names: Nahnsen - given-names: Sven -title: "The nf-core framework for community-curated bioinformatics pipelines." -version: 2.4.1 -doi: 10.1038/s41587-020-0439-x -date-released: 2022-05-16 -url: https://github.com/nf-core/tools -prefered-citation: - type: article - authors: - - family-names: Ewels - given-names: Philip - - family-names: Peltzer - given-names: Alexander - - family-names: Fillinger - given-names: Sven - - family-names: Patel - given-names: Harshil - - family-names: Alneberg - given-names: Johannes - - family-names: Wilm - given-names: Andreas - - family-names: Ulysse Garcia - given-names: Maxime - - family-names: Di Tommaso - given-names: Paolo - - family-names: Nahnsen - given-names: Sven - doi: 10.1038/s41587-020-0439-x - journal: nature biotechnology - start: 276 - end: 278 - title: "The nf-core framework for community-curated bioinformatics pipelines." - issue: 3 - volume: 38 - year: 2020 - url: https://dx.doi.org/10.1038/s41587-020-0439-x diff --git a/CITATIONS.md b/CITATIONS.md index 611485ca..10d23253 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,6 +12,10 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [Fastp](https://doi.org/10.1093/bioinformatics/bty560) + + > Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics. 2018 Sept 1; 34(17):i884–i890. doi: 10.1093/bioinformatics/bty560. + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. diff --git a/README.md b/README.md index b68a98e8..d5976da2 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ On release, automated continuous integration tests run the pipeline on a full-si By default, the pipeline currently performs the following steps: -- Raw read quality control (`FastQC`) +- Raw read quality control, adapter trimming and read clipping (`fastp`) - Pre-processing (`pRESTO`) - Filtering sequences by sequencing quality. - Masking amplicon primers. @@ -35,6 +35,7 @@ By default, the pipeline currently performs the following steps: - Assembling R1 and R2 read mates. - Removing and annotating read duplicates with different UMI barcodes. - Filtering out sequences that do not have at least 2 duplicates. +- Post-assembly read quality control (`FastQC`s) - Assigning gene segment alleles with `IgBlast` using the IMGT database (`Change-O`). - Finding the Hamming distance threshold for clone definition (`SHazaM`). - Clonal assignment: defining clonal lineages of the B-cell / T-cell populations (`Change-O`). diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..34b0d16b --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/airrflow v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 00000000..4b9bedf5 --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,25 @@ +id: "nf-core-airrflow-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/airrflow Methods Description" +section_href: "https://github.com/nf-core/airrflow" +plot_type: "html" +## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/airrflow v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

References

+ +
+
Notes:
+ +
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 931ef050..e8e62182 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -19,9 +19,11 @@ module_order: - "./*_ASSEMBLED_fastqc.zip" report_section_order: - software_versions: + "nf-core-airrflow-methods-description": order: -1000 - nf-core-airrflow-summary: + software_versions: order: -1001 + "nf-core-airrflow-summary": + order: -1002 export_plots: true diff --git a/conf/modules.config b/conf/modules.config index 9404957c..731e203a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -36,8 +36,36 @@ process { ] } - withName: FASTQC { - ext.args = '--quiet' + withName: 'FASTP' { + publishDir = [ + [ + path: { "${params.outdir}/fastp/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.{html,json,log}" + ], + [ + enabled: params.save_trimmed, + path: { "${params.outdir}/fastp/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: "*.fastp.fastq.gz" + ] + ] + ext.args = [ "--disable_quality_filtering --disable_length_filtering", + params.trim_fastq ?: "--disable_adapter_trimming", + params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1 + params.clip_r2 > 0 ? "--trim_front2 ${params.clip_r2}" : "", // Remove bp from the 5' end of read 2 + params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed + params.three_prime_clip_r2 > 0 ? "--trim_tail2 ${params.three_prime_clip_r2}" : "", // Remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed + params.trim_nextseq ? "--trim_poly_g" : "", // Apply the --nextseq=X option, to trim based on quality after removing poly-G tails + ].join(" ").trim() + } + + withName: 'GUNZIP_*' { + publishDir = [ + [ + enabled: false + ] + ] } withName: FASTQC_POSTASSEMBLY { diff --git a/docs/output.md b/docs/output.md index 838ac62e..bd8c88f7 100644 --- a/docs/output.md +++ b/docs/output.md @@ -10,7 +10,7 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [FastQC](#fastqc) - read quality control +- [FastP](#fastp) - read quality control, adapter trimming and read clipping - [pRESTO](#presto) - read pre-processing - [Filter by sequence quality](#filter-by-sequence-quality) - filter sequences by quality - [Mask primers](#mask-primers) - Masking primers @@ -21,6 +21,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Assemble mates](#assemble-mates) - Assemble sequence mates. - [Remove duplicates](#remove-duplicates) - Remove and annotate read duplicates. - [Filter sequences for at least 2 representative](#filter-sequences-for-at-least-2-representative) Filter sequences that do not have at least 2 duplicates. +- [FastQC](#fastqc) - read quality control post-assembly - [Change-O](#change-o) - Assign genes and clonotyping - [Assign genes with Igblast](#assign-genes-with-igblast) - [Make database from assigned genes](#make-database-from-assigned-genes) @@ -39,29 +40,20 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [MultiQC](#MultiQC) - MultiQC - [Pipeline information](#pipeline-information) - Pipeline information -## FastQC +## Fastp
Output files -- `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics for the raw unmated reads. - - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images for the raw unmated reads. - - `postassembly/` - - `*_ASSEMBLED_fastqc.html`: FastQC report containing quality metrics for the mated and quality filtered reads. - - `*_ASSEMBLED_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images for the mated and quality filtered reads. +- `fastp/` + - `/` + - `*.fastp.html`: Fast report containing quality metrics for the mated and quality filtered reads. + - `*.fastp.json`: Zip archive containing the FastQC report, tab-delimited data file and plot images for the mated and quality filtered reads. + - `*.fastp.log`: Fastp
-[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). - -![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) - -![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) - -![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) - -> **NB:** Two sets of FastQC plots are displayed in the MultiQC report: first for the raw _untrimmed_ and unmated reads and secondly for the assembled and QC filtered reads (but before collapsing duplicates). They may contain adapter sequence and potentially regions with low quality. +[fastp](https://doi.org/10.1093/bioinformatics/bty560) gives general quality metrics about your sequenced reads, as well as allows filtering reads by quality, trimming adapters and clipping reads at 5' or 3' ends. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [fastp documentation](https://github.com/OpenGene/fastp). ## presto @@ -193,6 +185,28 @@ Remove duplicates using [CollapseSeq](https://presto.readthedocs.io/en/version-0 Remove sequences which do not have 2 representative using [SplitSeq](https://presto.readthedocs.io/en/version-0.5.11/tools/SplitSeq.html) from the pRESTO Immcantation toolset. +## FastQC + +
+Output files + +- `fastqc/` + - `postassembly/` + - `*_ASSEMBLED_fastqc.html`: FastQC report containing quality metrics for the mated and quality filtered reads. + - `*_ASSEMBLED_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images for the mated and quality filtered reads. + +
+ +[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). + +![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) + +![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) + +![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) + +> **NB:** Two sets of FastQC plots are displayed in the MultiQC report: first for the raw _untrimmed_ and unmated reads and secondly for the assembled and QC filtered reads (but before collapsing duplicates). They may contain adapter sequence and potentially regions with low quality. + ## Change-O ### Assign genes with Igblast diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 2fc0a9b9..27feb009 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -145,6 +145,61 @@ class NfcoreTemplate { output_tf.withWriter { w -> w << email_txt } } + // + // Construct and send adaptive card + // https://adaptivecards.io + // + public static void adaptivecard(workflow, params, summary_params, projectDir, log) { + def hook_url = params.hook_url + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = workflow.manifest.version + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + def hf = new File("$projectDir/assets/adaptivecard.json") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } + } + // // Print pipeline summary on completion // diff --git a/lib/Utils.groovy b/lib/Utils.groovy old mode 100755 new mode 100644 index 28567bd7..8d030f4e --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -21,19 +21,26 @@ class Utils { } // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } - if (conda_check_failed) { + if (channels_missing | channel_priority_violation) { log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " There is a problem with your Conda configuration!\n\n" + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } } diff --git a/lib/WorkflowBcellmagic.groovy b/lib/WorkflowBcellmagic.groovy index 30dcb802..cab982f8 100755 --- a/lib/WorkflowBcellmagic.groovy +++ b/lib/WorkflowBcellmagic.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the workflow/bcellmagic.nf in the nf-core/bcellmagic pipeline // +import groovy.text.SimpleTemplateEngine + class WorkflowBcellmagic { // @@ -37,7 +39,22 @@ class WorkflowBcellmagic { return yaml_file_text } - // + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = run_workflow.toMap() + meta['manifest_map'] = run_workflow.manifest.toMap() + + meta['doi_text'] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : '' + meta['nodoi_text'] = meta.manifest_map.doi ? '' : '
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • ' + + def methods_text = mqc_methods_yaml.text + + def engine = new SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html + }// // Exit pipeline if incorrect --genome key provided // private static void genomeExistsError(params, log) { diff --git a/modules.json b/modules.json index fb3fa41e..54043170 100644 --- a/modules.json +++ b/modules.json @@ -2,20 +2,25 @@ "name": "nf-core/airrflow", "homePage": "https://github.com/nf-core/airrflow", "repos": { - "nf-core/modules": { - "git_url": "https://github.com/nf-core/modules.git", + "https://github.com/nf-core/modules.git": { "modules": { - "custom/dumpsoftwareversions": { - "branch": "master", - "git_sha": "5e7b1ef9a5a2d9258635bcbf70fcf37dacd1b247" - }, - "fastqc": { - "branch": "master", - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "multiqc": { - "branch": "master", - "git_sha": "4b1d4bf401d4cf65ebc4f604bc8c8e7551109db3" + "nf-core": { + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "8022c68e7403eecbd8ba9c49496f69f8c49d50f0" + }, + "fastp": { + "branch": "master", + "git_sha": "1e49f31e93c56a3832833eef90a02d3cde5a3f7e" + }, + "fastqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "multiqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + } } } } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf similarity index 81% rename from modules/nf-core/modules/custom/dumpsoftwareversions/main.nf rename to modules/nf-core/custom/dumpsoftwareversions/main.nf index 34b50b9f..cebb6e05 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? 'bioconda::multiqc=1.13a' : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : - 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml rename to modules/nf-core/custom/dumpsoftwareversions/meta.yml diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py rename to modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 00000000..207258ad --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,103 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? 'bioconda::fastp=0.23.2' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' : + 'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }" + + input: + tuple val(meta), path(reads) + path adapter_fasta + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + --out1 ${prefix}_1.fastp.fastq.gz \\ + --out2 ${prefix}_2.fastp.fastq.gz \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> ${prefix}.fastp.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 00000000..6f6fad74 --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,73 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: https://doi.org/10.1093/bioinformatics/bty560 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + type: file + description: Results in JSON format + pattern: "*.json" + - html: + type: file + description: Results in HTML format + pattern: "*.html" + - log: + type: file + description: fastq log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads_fail: + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" +authors: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/fastqc/main.nf similarity index 100% rename from modules/nf-core/modules/fastqc/main.nf rename to modules/nf-core/fastqc/main.nf diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml similarity index 100% rename from modules/nf-core/modules/fastqc/meta.yml rename to modules/nf-core/fastqc/meta.yml diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/multiqc/main.nf similarity index 98% rename from modules/nf-core/modules/multiqc/main.nf rename to modules/nf-core/multiqc/main.nf index 698461d7..a8159a57 100644 --- a/modules/nf-core/modules/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,5 +1,5 @@ process MULTIQC { - label 'process_medium' + label 'process_single' conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml similarity index 100% rename from modules/nf-core/modules/multiqc/meta.yml rename to modules/nf-core/multiqc/meta.yml diff --git a/nextflow.config b/nextflow.config index b91902e2..4a65c188 100644 --- a/nextflow.config +++ b/nextflow.config @@ -40,6 +40,16 @@ params { umi_length = -1 umi_start = 0 + // trimming options + trim_fastq = true + adapter_fasta = null + clip_r1 = 0 + clip_r2 = 0 + three_prime_clip_r1 = 0 + three_prime_clip_r2 = 0 + trim_nextseq = false + save_trimmed = false + // pRESTO options filterseq_q = 20 primer_maxerror = 0.2 @@ -80,10 +90,12 @@ params { igenomes_ignore = true // MultiQC options - multiqc_config = null - multiqc_title = null - max_multiqc_email_size = 25.MB - skip_multiqc = false + skip_multiqc = false + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options outdir = null @@ -93,6 +105,7 @@ params { email_on_fail = null plaintext_email = false monochrome_logs = false + hook_url = null help = false validate_params = true show_hidden_params = false @@ -134,7 +147,6 @@ try { // } - profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { @@ -254,7 +266,8 @@ manifest { description = 'B and T cell repertoire analysis pipeline with the Immcantation framework.' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '2.3.0' + version = '2.4.0' + doi = '10.5281/zenodo.2642009' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index 3f0d6300..dbfaa6df 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -60,7 +60,8 @@ "description": "Path to fasta file containing the linker sequence, if no V-region primers were used but a linker sequence is present (e.g. 5' RACE SMARTer TAKARA protocol).", "fa_icon": "fas fa-dna" } - } + }, + "fa_icon": "fas fa-flask" }, "databases_cache": { "title": "Databases cache", @@ -167,6 +168,61 @@ "help_text": "The pipeline requires indication of UMI barcode treatment (for identifying unique transcripts). These barcodes are typically read from an index file but sometimes can be provided merged with the start of the R1 or R2 reads. If provided in an additional index file, set the `--index_file` parameter, if provided merged with the R1 or R2 reads, set the `--umi_position` parameter. Specify the UMI barcode length with the `--umi_length` parameter. If no UMI barcodes are present, specify `--umi_length = 0` to use the sans-UMI subworkflow.", "fa_icon": "fas fa-barcode" }, + "adapter_trimming": { + "title": "Adapter trimming", + "type": "object", + "description": "Options for adapter trimming and read clipping", + "default": "", + "fa_icon": "fas fa-cut", + "properties": { + "trim_fastq": { + "type": "boolean", + "default": true, + "description": "Whether to trim adapters in fastq reads with fastp.", + "help_text": "By default adapters will be auto-detected, but adapter sequences can also be provided in a `fasta` file with the `--adapter_fasta` option.", + "fa_icon": "fas fa-cut" + }, + "adapter_fasta": { + "type": "string", + "default": null, + "fa_icon": "fas fa-file" + }, + "clip_r1": { + "type": "integer", + "default": 0, + "description": "Number of bases to clip 5' in R1 reads.", + "fa_icon": "fas fa-cut" + }, + "clip_r2": { + "type": "integer", + "default": 0, + "description": "Number of bases to clip 5' in R2 reads.", + "fa_icon": "fas fa-cut" + }, + "three_prime_clip_r1": { + "type": "integer", + "default": 0, + "description": "Number of bases to clip 3' in R1 reads.", + "fa_icon": "fas fa-cut" + }, + "three_prime_clip_r2": { + "type": "integer", + "default": 0, + "description": "Number of bases to clip 3' in R2 reads.", + "fa_icon": "fas fa-cut" + }, + "trim_nextseq": { + "type": "boolean", + "description": "Trim adapters specific for Nextseq sequencing", + "fa_icon": "fas fa-cut" + }, + "save_trimmed": { + "type": "boolean", + "description": "Option to save trimmed reads.", + "fa_icon": "far fa-save" + } + } + }, "presto_options": { "title": "pRESTO options", "type": "object", @@ -258,15 +314,18 @@ "properties": { "skip_report": { "type": "boolean", - "description": "Skip repertoire analysis and report generation" + "description": "Skip repertoire analysis and report generation", + "fa_icon": "fas fa-angle-double-right" }, "skip_lineage": { "type": "boolean", - "description": "Skip clonal lineage analysis and lineage tree plotting." + "description": "Skip clonal lineage analysis and lineage tree plotting.", + "fa_icon": "fas fa-angle-double-right" }, "skip_multiqc": { "type": "boolean", - "description": "Skip multiqc report" + "description": "Skip multiqc report", + "fa_icon": "fas fa-angle-double-right" } }, "help_text": "Downstream analyses include a series of R scripts based on the Immcantation Alakazam, ChangeO and Shazam packages to calculate:\n- Clonal abundance and diversity\n- Clonal lineage tree export in graphML\n- Clonal overlap and statistics\n- Mutational load\n- Isotype and V-family distribution", @@ -473,12 +532,30 @@ "fa_icon": "fas fa-palette", "hidden": true }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", + "hidden": true + }, "multiqc_config": { "type": "string", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, + "multiqc_logo": { + "type": "string", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", + "hidden": true + }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, "tracedir": { "type": "string", "description": "Directory to keep pipeline Nextflow logs and reports.", @@ -567,6 +644,9 @@ { "$ref": "#/definitions/umi_barcode_handling" }, + { + "$ref": "#/definitions/adapter_trimming" + }, { "$ref": "#/definitions/presto_options" }, diff --git a/subworkflows/local/presto_sans_umi.nf b/subworkflows/local/presto_sans_umi.nf index 3d310ab3..59a2db57 100644 --- a/subworkflows/local/presto_sans_umi.nf +++ b/subworkflows/local/presto_sans_umi.nf @@ -1,16 +1,17 @@ // Include statements -include { GUNZIP as GUNZIP_SANS_UMI } from '../../modules/local/gunzip' +include { GUNZIP as GUNZIP_SANS_UMI } from '../../modules/local/gunzip' include { FASTQC_POSTASSEMBLY as FASTQC_POSTASSEMBLY_SANS_UMI } from '../../modules/local/fastqc_postassembly' +include { FASTP } from '../../modules/nf-core/fastp/main' //PRESTO -include { PRESTO_ASSEMBLEPAIRS as PRESTO_ASSEMBLEPAIRS_SANS_UMI } from '../../modules/local/presto/presto_assemblepairs' +include { PRESTO_ASSEMBLEPAIRS as PRESTO_ASSEMBLEPAIRS_SANS_UMI } from '../../modules/local/presto/presto_assemblepairs' include { PRESTO_FILTERSEQ_POSTASSEMBLY as PRESTO_FILTERSEQ_POSTASSEMBLY_SANS_UMI } from '../../modules/local/presto/presto_filterseq_postassembly' include { PRESTO_MASKPRIMERS_POSTASSEMBLY as PRESTO_MASKPRIMERS_POSTASSEMBLY_SANS_UMI } from '../../modules/local/presto/presto_maskprimers_postassembly' -include { PRESTO_PARSEHEADERS_PRIMERS as PRESTO_PARSEHEADERS_PRIMERS_SANS_UMI } from '../../modules/local/presto/presto_parseheaders_primers' -include { PRESTO_PARSEHEADERS_METADATA as PRESTO_PARSEHEADERS_METADATA_SANS_UMI } from '../../modules/local/presto/presto_parseheaders_metadata' -include { PRESTO_COLLAPSESEQ as PRESTO_COLLAPSESEQ_SANS_UMI } from '../../modules/local/presto/presto_collapseseq' -include { PRESTO_SPLITSEQ as PRESTO_SPLITSEQ_SANS_UMI} from '../../modules/local/presto/presto_splitseq' +include { PRESTO_PARSEHEADERS_PRIMERS as PRESTO_PARSEHEADERS_PRIMERS_SANS_UMI } from '../../modules/local/presto/presto_parseheaders_primers' +include { PRESTO_PARSEHEADERS_METADATA as PRESTO_PARSEHEADERS_METADATA_SANS_UMI } from '../../modules/local/presto/presto_parseheaders_metadata' +include { PRESTO_COLLAPSESEQ as PRESTO_COLLAPSESEQ_SANS_UMI } from '../../modules/local/presto/presto_collapseseq' +include { PRESTO_SPLITSEQ as PRESTO_SPLITSEQ_SANS_UMI} from '../../modules/local/presto/presto_splitseq' workflow PRESTO_SANS_UMI { @@ -18,11 +19,23 @@ workflow PRESTO_SANS_UMI { ch_reads // channel: [ val(meta), [ reads ] ] ch_cprimers // channel: [ cprimers.fasta ] ch_vprimers // channel: [ vprimers.fasta ] + ch_adapter_fasta // channel: [ adapters.fasta ] main: ch_versions = Channel.empty() - ch_gunzip = ch_reads + + // Fastp + save_merged = false + FASTP ( + ch_reads, + ch_adapter_fasta, + params.save_trimmed, + save_merged + ) + ch_versions = ch_versions.mix(FASTP.out.versions.ifEmpty([])) + + ch_gunzip = FASTP.out.reads.map{ meta,reads -> [meta, reads[0], reads[1]] } // gunzip fastq.gz to fastq GUNZIP_SANS_UMI ( ch_gunzip ) @@ -81,6 +94,8 @@ workflow PRESTO_SANS_UMI { emit: fasta = PRESTO_SPLITSEQ_SANS_UMI.out.fasta software = ch_versions + fastp_reads_json = FASTP.out.json.collect{ meta,json -> json } + fastp_reads_html = FASTP.out.html.collect{ meta,html -> html } fastqc_postassembly_gz = FASTQC_POSTASSEMBLY_SANS_UMI.out.zip presto_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_SANS_UMI.out.logs.collect() presto_filterseq_logs = PRESTO_FILTERSEQ_POSTASSEMBLY_SANS_UMI.out.logs diff --git a/subworkflows/local/presto_umi.nf b/subworkflows/local/presto_umi.nf index 55a1cc39..5bcd972d 100644 --- a/subworkflows/local/presto_umi.nf +++ b/subworkflows/local/presto_umi.nf @@ -1,8 +1,10 @@ // Include statements -include { MERGE_UMI } from '../../modules/local/merge_UMI' -include { RENAME_FASTQ as RENAME_FASTQ_UMI } from '../../modules/local/rename_fastq' -include { GUNZIP as GUNZIP_UMI } from '../../modules/local/gunzip' +include { MERGE_UMI } from '../../modules/local/merge_UMI' +include { RENAME_FASTQ as RENAME_FASTQ_UMI } from '../../modules/local/rename_fastq' +include { GUNZIP as GUNZIP_UMI } from '../../modules/local/gunzip' include { FASTQC_POSTASSEMBLY as FASTQC_POSTASSEMBLY_UMI } from '../../modules/local/fastqc_postassembly' +include { FASTP } from '../../modules/nf-core/fastp/main' + //PRESTO include { PRESTO_FILTERSEQ as PRESTO_FILTERSEQ_UMI } from '../../modules/local/presto/presto_filterseq' @@ -25,18 +27,64 @@ workflow PRESTO_UMI { ch_reads // channel: [ val(meta), [ reads ] ] ch_cprimers // channel: [ cprimers.fasta ] ch_vprimers // channel: [ vprimers.fasta ] + ch_adapter_fasta // channel: [ adapters.fasta ] main: ch_versions = Channel.empty() + + // prepare reads for fastp + ch_reads.dump(tag:'presto umi reads') + // Merge UMI from index file to R1 if provided if (params.index_file) { - MERGE_UMI ( ch_reads ) + + // ch for fastp reads R1 R2 + ch_reads.map{ meta, reads -> [meta, [reads[0], reads[1]]] } + .dump(tag: 'presto_umi_R1_R2_reads') + .set{ ch_reads_R1_R2 } + + // Fastp reads R1 R2 + save_merged = false + FASTP ( + ch_reads_R1_R2, + ch_adapter_fasta, + params.save_trimmed, + save_merged + ) + ch_versions = ch_versions.mix(FASTP.out.versions.ifEmpty([])) + + //ch for merge umi + ch_meta_R1_R2 = FASTP.out.reads + .map{ meta, reads -> [meta.id, meta, reads[0], reads[1]] } + ch_meta_index = ch_reads + .map{ meta, reads -> [meta.id, meta, reads[2]] } + ch_meta_R1_R2_index = ch_meta_R1_R2.join( ch_meta_index ) + .map{ id, meta1, R1, R2, meta2, index -> [ meta1, R1, R2, index ] } + .dump(tag: 'ch_merge_umi') + + MERGE_UMI ( ch_meta_R1_R2_index ) ch_gunzip = MERGE_UMI.out.reads ch_versions = ch_versions.mix(MERGE_UMI.out.versions.ifEmpty(null)) + + } else { - RENAME_FASTQ_UMI ( ch_reads ) + + // Fastp reads + save_merged = false + FASTP ( + ch_reads, + ch_adapter_fasta, + params.save_trimmed, + save_merged + ) + ch_versions = ch_versions.mix(FASTP.out.versions.ifEmpty([])) + + ch_rename_fastq_umi = FASTP.out.reads.map{ meta,reads -> [meta, reads[0], reads[1]] } + + RENAME_FASTQ_UMI ( ch_rename_fastq_umi ) ch_gunzip = RENAME_FASTQ_UMI.out.reads + } // gunzip fastq.gz to fastq @@ -139,6 +187,8 @@ workflow PRESTO_UMI { emit: fasta = PRESTO_SPLITSEQ_UMI.out.fasta software = ch_versions + fastp_reads_json = FASTP.out.json.collect{ meta,json -> json } + fastp_reads_html = FASTP.out.html.collect{ meta,html -> html } fastqc_postassembly_gz = FASTQC_POSTASSEMBLY_UMI.out.zip presto_filterseq_logs = PRESTO_FILTERSEQ_UMI.out.logs presto_maskprimers_logs = PRESTO_MASKPRIMERS_UMI.out.logs.collect() diff --git a/workflows/bcellmagic.nf b/workflows/bcellmagic.nf index 7251200b..caefe289 100644 --- a/workflows/bcellmagic.nf +++ b/workflows/bcellmagic.nf @@ -15,12 +15,15 @@ def checkPathParamList = [ params.input, params.multiqc_config ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } // Check mandatory parameters -if (params.input) { ch_input = Channel.fromPath(params.input) } else { exit 1, "Please provide input file containing the sample metadata with the '--input' option." } +if (params.input) { ch_input = Channel.fromPath(params.input, checkIfExists: true) } else { exit 1, "Please provide input file containing the sample metadata with the '--input' option." } if (!params.library_generation_method) { exit 1, "Please specify a library generation method with the `--library_generation_method` option." } +// Check other params +if (params.adapter_fasta) { ch_adapter_fasta = Channel.fromPath(params.adapter_fasta, checkIfExists: true) } else { ch_adapter_fasta = [] } + // Validate library generation method parameter if (params.library_generation_method == 'specific_pcr_umi'){ if (params.vprimers) { @@ -161,9 +164,8 @@ include { PRESTO_SANS_UMI } from '../subworkflows/local/presto_sans_u // // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../modules/nf-core/modules/fastqc/main' -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -183,35 +185,29 @@ workflow BCELLMAGIC { // INPUT_CHECK ( ch_input ) - ch_fastqc = INPUT_CHECK + INPUT_CHECK.out.reads.dump(tag: 'input reads') + + ch_reads = INPUT_CHECK .out .reads - .groupTuple(by: [0]) - .map{ it -> [ it[0], it[1].flatten() ] } - - ch_presto = ch_fastqc.map{ it -> it.flatten() } + .dump(tag: 'input reads') ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - // - // MODULE: FastQC - // - FASTQC ( ch_fastqc ) - - // Channel for software versions - ch_versions = ch_versions.mix(FASTQC.out.versions.ifEmpty(null)) - if (params.umi_length == 0) { // // SUBWORKFLOW: pRESTO without UMIs // PRESTO_SANS_UMI ( - ch_presto, + ch_reads, ch_cprimers_fasta, - ch_vprimers_fasta + ch_vprimers_fasta, + ch_adapter_fasta ) ch_presto_fasta = PRESTO_SANS_UMI.out.fasta ch_presto_software = PRESTO_SANS_UMI.out.software + ch_fastp_reads_html = PRESTO_SANS_UMI.out.fastp_reads_html + ch_fastp_reads_json = PRESTO_SANS_UMI.out.fastp_reads_json ch_fastqc_postassembly_gz = PRESTO_SANS_UMI.out.fastqc_postassembly_gz ch_presto_assemblepairs_logs = PRESTO_SANS_UMI.out.presto_assemblepairs_logs ch_presto_filterseq_logs = PRESTO_SANS_UMI.out.presto_filterseq_logs @@ -228,12 +224,15 @@ workflow BCELLMAGIC { // SUBWORKFLOW: pRESTO with UMIs // PRESTO_UMI ( - ch_presto, + ch_reads, ch_cprimers_fasta, - ch_vprimers_fasta + ch_vprimers_fasta, + ch_adapter_fasta ) ch_presto_fasta = PRESTO_UMI.out.fasta ch_presto_software = PRESTO_UMI.out.software + ch_fastp_reads_html = PRESTO_UMI.out.fastp_reads_html + ch_fastp_reads_json = PRESTO_UMI.out.fastp_reads_json ch_fastqc_postassembly_gz = PRESTO_UMI.out.fastqc_postassembly_gz ch_presto_filterseq_logs = PRESTO_UMI.out.presto_filterseq_logs ch_presto_maskprimers_logs = PRESTO_UMI.out.presto_maskprimers_logs @@ -412,7 +411,8 @@ workflow BCELLMAGIC { ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_fastp_reads_json.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_fastp_reads_html.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_fastqc_postassembly_gz.collect{it[1]}.ifEmpty([])) MULTIQC ( diff --git a/workflows/reveal.nf b/workflows/reveal.nf index 61708573..7627e7fb 100644 --- a/workflows/reveal.nf +++ b/workflows/reveal.nf @@ -82,8 +82,8 @@ include { REVEAL_INPUT_CHECK } from '../subworkflows/local/reveal_input_check' // // MODULE: Installed directly from nf-core/modules // -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* ========================================================================================