diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..ea27a584 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,27 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", + "python.formatting.yapfPath": "/opt/conda/bin/yapf", + "python.linting.flake8Path": "/opt/conda/bin/flake8", + "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", + "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", + "python.linting.pylintPath": "/opt/conda/bin/pylint" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.gitattributes b/.gitattributes index 050bb120..7a2dabc2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow modules/nf-core/** linguist-generated subworkflows/nf-core/** linguist-generated diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 7fa5f24e..efebe9e0 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -101,3 +101,19 @@ If you are using a new feature from core Nextflow, you may bump the minimum requ ### Images and figures For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/airrflow/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) +- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 9d440095..e8f6878b 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,7 +42,7 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 21.10.3)_ + * Nextflow version _(eg. 22.10.1)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a2c484b6..ab6c66d0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,6 +11,10 @@ on: env: NXF_ANSI_LOG: false +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + jobs: test: name: Run pipeline with test data @@ -20,11 +24,28 @@ jobs: strategy: matrix: NXF_VER: - - "21.10.3" + - "22.10.1" - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 + + - name: Check if Dockerfile or Conda environment changed + uses: technote-space/get-diff-action@v4 + with: + FILES: | + modules/local/airrflow_report/Dockerfile + modules/local/airrflow_report/environment.yml + + - name: Build new docker image + if: env.MATCHED_FILES + run: docker build --no-cache . -t nfcore/airrflowreport:dev + + - name: Pull docker image + if: ${{ !env.MATCHED_FILES }} + run: | + docker pull nfcore/airrflowreport:dev + docker tag nfcore/airrflowreport:dev nfcore/airrflowreport:dev - name: Install Nextflow uses: nf-core/setup-nextflow@v1 @@ -42,13 +63,83 @@ jobs: strategy: matrix: NXF_VER: - - "21.10.3" + - "22.10.1" + - "latest-everything" + profile: ["test_tcr", "test_no_umi", "test_nocluster", "test_fetchimgt", "test_assembled"] + fail-fast: false + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Check if Dockerfile or Conda environment changed + uses: technote-space/get-diff-action@v4 + with: + FILES: | + modules/local/airrflow_report/Dockerfile + modules/local/airrflow_report/environment.yml + + - name: Build new docker image + if: env.MATCHED_FILES + run: docker build --no-cache . -t nfcore/airrflowreport:dev + + - name: Pull docker image + if: ${{ !env.MATCHED_FILES }} + run: | + docker pull nfcore/airrflowreport:dev + docker tag nfcore/airrflowreport:dev nfcore/airrflowreport:dev + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Run pipeline with test data + run: | + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }},docker --outdir ./results + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v2 + with: + name: logs-${{ matrix.profile }} + path: | + /home/runner/.nextflow.log + /home/runner/work + !/home/runner/work/conda + !/home/runner/work/singularity + + test_container: + name: Run workflow with immcantation devel test_container + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/airrflow') }} + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "22.10.1" - "latest-everything" - profile: ["test_tcr", "test_no_umi", "test_tcr_thr", "test_nocluster", "test_fetchimgt"] + profile: ["test_assembled_immcantation_devel", "test_raw_immcantation_devel"] + fail-fast: false steps: - name: Check out pipeline code uses: actions/checkout@v2 + - name: Check if Dockerfile or Conda environment changed + uses: technote-space/get-diff-action@v4 + with: + FILES: | + modules/local/airrflow_report/Dockerfile + modules/local/airrflow_report/environment.yml + + - name: Build new docker image + if: env.MATCHED_FILES + run: docker build --no-cache . -t nfcore/airrflowreport:dev + + - name: Pull docker image + if: ${{ !env.MATCHED_FILES }} + run: | + docker pull nfcore/airrflowreport:dev + docker tag nfcore/airrflowreport:dev nfcore/airrflowreport:dev + - name: Install Nextflow uses: nf-core/setup-nextflow@v1 with: @@ -57,3 +148,14 @@ jobs: - name: Run pipeline with test data run: | nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }},docker --outdir ./results + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v2 + with: + name: logs-${{ matrix.profile }} + path: | + /home/runner/.nextflow.log + /home/runner/work + !/home/runner/work/conda + !/home/runner/work/singularity diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index e66d24be..515f7455 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php @@ -34,9 +34,9 @@ jobs: id: prettier_status run: | if prettier --check ${GITHUB_WORKSPACE}; then - echo "::set-output name=result::pass" + echo "result=pass" >> $GITHUB_OUTPUT else - echo "::set-output name=result::fail" + echo "result=fail" >> $GITHUB_OUTPUT fi - name: Run 'prettier --write' diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8a5ce69b..858d622e 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -4,6 +4,8 @@ name: nf-core linting # that the code meets the nf-core guidelines. on: push: + branches: + - dev pull_request: release: types: [published] @@ -12,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -25,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @@ -38,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Check code lints with Black uses: psf/black@stable @@ -69,12 +71,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v4 with: python-version: "3.7" architecture: "x64" @@ -97,7 +99,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 04758f61..0bbcd30f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -18,7 +18,7 @@ jobs: - name: Get PR number id: pr_number - run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)" + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment uses: marocchino/sticky-pull-request-comment@v2 diff --git a/.github/workflows/push_dockerhub_dev.yml b/.github/workflows/push_dockerhub_dev.yml new file mode 100644 index 00000000..0364b42b --- /dev/null +++ b/.github/workflows/push_dockerhub_dev.yml @@ -0,0 +1,28 @@ +name: nf-core Docker push (dev) +# This builds the docker image and pushes it to DockerHub +# Runs on nf-core repo releases and push event to 'dev' branch (PR merges) +on: + push: + branches: + - dev + +jobs: + push_dockerhub: + name: Push new Docker image to Docker Hub (dev) + runs-on: ubuntu-latest + # Only run for the nf-core repo, for releases and merged PRs + if: ${{ github.repository == 'nf-core/airrflow' }} + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }} + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Build new docker image + run: docker build --no-cache ./modules/local/airrflow_report/ -t nfcore/airrflowreport:dev + + - name: Push Docker image to DockerHub (dev) + run: | + echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin + docker push nfcore/airrflowreport:dev diff --git a/.github/workflows/push_dockerhub_release.yml b/.github/workflows/push_dockerhub_release.yml new file mode 100644 index 00000000..eac03c54 --- /dev/null +++ b/.github/workflows/push_dockerhub_release.yml @@ -0,0 +1,29 @@ +name: nf-core Docker push (release) +# This builds the docker image and pushes it to DockerHub +# Runs on nf-core repo releases and push event to 'dev' branch (PR merges) +on: + release: + types: [published] + +jobs: + push_dockerhub: + name: Push new Docker image to Docker Hub (release) + runs-on: ubuntu-latest + # Only run for the nf-core repo, for releases and merged PRs + if: ${{ github.repository == 'nf-core/airrflow' }} + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }} + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Build new docker image + run: docker build --no-cache ./modules/local/airrflow_report/ -t nfcore/airrflowreport:latest + + - name: Push Docker image to DockerHub (release) + run: | + echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin + docker push nfcore/airrflowreport:latest + docker tag nfcore/airrflowreport:latest nfcore/airrflowreport:${{ github.event.release.tag_name }} + docker push nfcore/airrflowreport:${{ github.event.release.tag_name }} diff --git a/.nf-core.yml b/.nf-core.yml index 40647053..2c0141ff 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,5 +1,4 @@ lint: files_exist: - conf/igenomes.config - - lib/WorkflowAirrflow.groovy repository_type: pipeline diff --git a/.prettierignore b/.prettierignore index eb74a574..437d763d 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,5 +1,6 @@ email_template.html adaptivecard.json +slackreport.json .nextflow* work/ data/ @@ -8,3 +9,4 @@ results/ testing/ testing* *.pyc +bin/ diff --git a/CHANGELOG.md b/CHANGELOG.md index f1710279..47cfef14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,38 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [3.0dev] - + +### `Added` + +- Combined old bcellmagic and reveal subworkflows for better pipeline integration. +- Added compulsory AIRR fields in input samplesheet. +- Added option to calculate clones per group `clone_by` and then create a report with the results altogether. +- Added pipeline overview diagram and metro map. +- Added full logs to `enchantr report filesize` process. +- Template update to nf-core tools v2.7.1 +- Template update to nf-core tools v2.7.2 + +### `Fixed` + +- Fixed bug arising when not providing `--index_file FALSE` for some input options not requiring index files. + +### `Dependencies` + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| multiqc | 1.13 | 1.14 | +| pandas | 1.1.5 | 1.5.3 | +| presto | 0.7.0 | 0.7.1 | +| changeo | 1.2.0 | 1.3.0 | +| igblast | 1.17.1 | 1.19.0 | +| r-enchantr | | 0.0.6 | +| r-plotly | | 4.10.1 | + +### `Deprecated` + +- Deprecated param `enable_conda` + ## [2.4.0] 2022-12-05 "Aparecium" ### `Added` @@ -21,6 +53,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - [#180](https://github.com/nf-core/airrflow/pull/180) Added possibility to provide custom Rmarkdown report. - [#183](https://github.com/nf-core/airrflow/pull/183) Update template to nf-core tools v2.5.1 - [#183](https://github.com/nf-core/airrflow/pull/183) Add option to provide another different threshold in splitseq process +- Rename reveal test to test_assembled, add separate test for immcantation devel container as another job so other tests are not cancelled if this test does not run through. ### `Fixed` diff --git a/LICENSE b/LICENSE index 7b294aa0..2a949fd4 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) Gisela Gabernet, Simon Heumos, Alexander Peltzer +Copyright (c) Gisela Gabernet, Susanna Marquez, Alexander Peltzer, Simon Heumos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index d5976da2..8b15959b 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![GitHub Actions Linting Status](https://github.com/nf-core/airrflow/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/airrflow/actions?query=workflow%3A%22nf-core+linting%22) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/airrflow/results) [![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.2642009-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.2642009) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -14,7 +14,11 @@ ## Introduction -** nf-core/airrflow ** is a bioinformatics best-practice pipeline to analyze B-cell or T-cell bulk repertoire sequencing data. It makes use of the [Immcantation](https://immcantation.readthedocs.io) toolset and requires as input targeted amplicon sequencing data of the V, D, J and C regions of the B/T-cell receptor with multiplex PCR or 5' RACE protocol. +** nf-core/airrflow ** is a bioinformatics best-practice pipeline to analyze B-cell or T-cell repertoire sequencing data. It makes use of the [Immcantation](https://immcantation.readthedocs.io) +toolset. The input data can be (a) targeted amplicon bulk sequencing data of the V, D, J and C regions +of the B/T-cell receptor with multiplex PCR or 5' RACE protocol or (b) assembled reads (bulk or single cell). + +![nf-core/airrflow overview](docs/images/airrflow_workflow_overview.png) The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! @@ -22,31 +26,58 @@ On release, automated continuous integration tests run the pipeline on a full-si ## Pipeline summary -By default, the pipeline currently performs the following steps: - -- Raw read quality control, adapter trimming and read clipping (`fastp`) -- Pre-processing (`pRESTO`) - - Filtering sequences by sequencing quality. - - Masking amplicon primers. - - Pairing read mates. - - Cluster sequences according to similarity, it helps identify if the UMI barcode diversity was not high enough. - - Building consensus of sequences with the same UMI barcode. - - Re-pairing read mates. - - Assembling R1 and R2 read mates. - - Removing and annotating read duplicates with different UMI barcodes. - - Filtering out sequences that do not have at least 2 duplicates. -- Post-assembly read quality control (`FastQC`s) -- Assigning gene segment alleles with `IgBlast` using the IMGT database (`Change-O`). -- Finding the Hamming distance threshold for clone definition (`SHazaM`). -- Clonal assignment: defining clonal lineages of the B-cell / T-cell populations (`Change-O`). -- Reconstructing gene calls of germline sequences (`Change-O`). -- Generating clonal trees (`Alakazam`). -- Repertoire analysis: calculation of clonal diversity and abundance (`Alakazam`). +nf-core/airrflow allows the end-to-end processing of BCR and TCR bulk and single cell targeted sequencing. Several protocols are supported, please see the [usage documenation](https://nf-co.re/airrflow/usage) for more details on the supported protocols. + +![nf-core/airrflow overview](docs/images/metro-map-airrflow.png) + +1. QC and sequence assembly (bulk only) + +- Raw read quality control, adapter trimming and clipping (`Fastp`) +- Filtering sequences by sequencing quality (`pRESTO FilterSeq`). +- Mask amplicon primers (`pRESTO MaskPrimers`). +- Pair read mates (`pRESTO PairSeq`). +- For UMI-based sequencing: + - Cluster sequences according to similarity (optional for insufficient UMI diversity) (`pRESTO ClusterSets`). + - Building consensus of sequences with the same UMI barcode (`pRESTO BuildConsensus`). +- Assembling R1 and R2 read mates (`pRESTO AssemblePairs`). +- Removing and annotating read duplicates (`pRESTO CollapseSeq`). +- Filtering out sequences that do not have at least 2 duplicates (`pRESTO SplitSeq`). + +2. V(D)J annotation and filtering (bulk and single-cell) + +- Assigning gene segment alleles with `IgBlast` using the IMGT database (`Change-O AssignGenes`). +- Annotate alignments in AIRR format (`Change-O MakeDB`) +- Filter by alignment quality (locus matching v_call chain, min 200 informative positions, max 10% N nucleotides) +- Filter productive sequences (`Change-O ParseDB split`) +- Filter junction length multiple of 3 +- Annotate metadata (`EnchantR`) + +3. QC filtering (bulk and single-cell) + +- Bulk sequencing filtering: + - Remove chimeric sequences (optional) (`SHazaM`, `EnchantR`) + - Detect cross-contamination (optional) (`EnchantR`) + - Collapse duplicates (`Alakazam`, `EnchantR`) +- Single-cell QC filtering (`EnchantR`) + - Removes cells without heavy chains. + - Remove cells with multiple heavy chains. + - Remove sequences in different samples that share the same `cell_id` and nucleotide sequence. + - Modifies `cell_id`s to ensure they are unique in the project. + +4. Clonal analysis (bulk and single-cell) + +- Find Hamming distance threshold for clone definition (`SHazaM`, `EnchantR`). +- Create germlines and define clones, repertoire analysis (`Change-O`, `EnchantR`). +- Build lineage trees (`SCOPer`, `IgphyML`, `EnchantR`). + +5. Repertoire analysis and reporting + +- Custom repertoire analysis pipeline report (`Alakazam`). - Aggregating QC reports (`MultiQC`). ## Quick Start -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`) +1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`) 2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. @@ -70,10 +101,13 @@ nextflow run nf-core/airrflow \ -profile \ --input samplesheet.tsv \ --outdir ./results \ ---protocol pcr_umi \ +--library_generation_method specific_pcr_umi \ --cprimers CPrimers.fasta \ --vprimers VPrimers.fasta \ ---umi_length 12 +--umi_length 12 \ +--max_memory 8.GB \ +--max_cpus 8 \ +--outdir ./results ``` See [usage docs](https://nf-co.re/airrflow/usage) for all of the available options when running the pipeline. @@ -84,11 +118,10 @@ The nf-core/airrflow pipeline comes with documentation about the pipeline [usage ## Credits -nf-core/airrflow was originally written by Gisela Gabernet, Simon Heumos, Alexander Peltzer. +nf-core/airrflow was written by [Gisela Gabenet](https://github.com/ggabernet), [Susanna Marquez](https://github.com/ssnn-airr), [Alexander Peltzer](@apeltzer) and [Simon Heumos](@subwaystation). Further contributors to the pipeline are: -- [@ssnn-airr](https://github.com/ssnn-airr) - [@dladd](https://github.com/dladd) ## Contributions and Support diff --git a/assets/nf-core-airrflow_logo_reports.png b/assets/nf-core-airrflow_logo_reports.png new file mode 100644 index 00000000..c1d72b29 Binary files /dev/null and b/assets/nf-core-airrflow_logo_reports.png differ diff --git a/assets/repertoire_comparison.Rmd b/assets/repertoire_comparison.Rmd index 95c08634..934f1e27 100644 --- a/assets/repertoire_comparison.Rmd +++ b/assets/repertoire_comparison.Rmd @@ -27,6 +27,7 @@ output: html_document library(knitr) library(kableExtra) library(dplyr) +library(tidyr) library(alakazam) library(shazam) library(stringr) @@ -39,17 +40,39 @@ knitr::opts_chunk$set(echo = FALSE) # Airrflow analysis pipeline -## Pipeline overview - ## Number of sequences -Number of reads for each of the samples and number of sequences left after representative analysis steps. +Number of reads for each of the samples and number of sequences left after sequence assemble and mapping steps. + +```{r seq_numbers, echo=FALSE, warning=FALSE, results='asis'} +read_table <- function(tab_file){ + tab_seqs <- read.table(tab_file, header=TRUE, sep="\t", check.names = FALSE) + kable(tab_seqs) %>% + kable_styling("hover") %>% + scroll_box(width = "100%", height = "400px") + } +tryCatch( {read_table("./Table_sequences.tsv")} , + error=function(e){message("No sequence numbers are available if starting with assembled reads.")}) -```{r seq_numbers, echo=FALSE, results='asis'} -tab_seqs <- read.table("./Table_sequences.tsv", header=TRUE, sep="\t", check.names = FALSE) -kable(tab_seqs) %>% - kable_styling("hover") %>% - scroll_box(width = "100%", height = "400px") +``` + +Number of sequences for each of the samples for each downstream filtering and analysis step. + +```{r seq_assembled, echo=FALSE, warning=FALSE, results='asis'} +tab_seqs <- read.table("./Table_sequences_assembled.tsv", header=TRUE, sep="\t", check.names = FALSE) + +if (any(is.na(tab_seqs$sample_id))) { + tab_seqs$sample_id <- sapply(tab_seqs$file_0, function(x) unlist(strsplit(as.character(x), "_"))[1]) +} + +dat <- tidyr::pivot_wider(tab_seqs,id_cols=sample_id,names_from=task, values_from=to_num_seqs) +dat <- dat %>% dplyr::relocate(any_of(c("sample_id","ConvertDb-fasta", "AssignGenes-igblast", "MakeDB-igblast", "FilterQuality", + "ParseDb-split", "FilterJunctionMod3","AddMetadata","SingleCellQC","CreateGermlines", + "RemoveChimeric","CollapseDuplicates","ClonePass"))) + +kable(dat) %>% + kable_styling("hover") %>% + scroll_box(width = "100%", height = "400px") ``` @@ -62,7 +85,7 @@ outdir <- "repertoire_comparison" ### Read all the tables as produced by the pipeline in the current folder and joins them together in the df_all dataframe -all_files <- system(paste0("find '",datadir,"' -name '*germ-pass.tsv'"), intern=T) +all_files <- system(paste0("find '",datadir,"' -name '*clone-pass.tsv'"), intern=T) dir.create(outdir) diversity_dir <- paste(outdir, "Diversity", sep="/") @@ -73,18 +96,10 @@ dir.create(abundance_dir) dir.create(vfamily_dir) # Generate one big dataframe from all patient dataframes -df_all = data.frame() -for (file in all_files){ - fname = file - print(fname) - - df_pat <- read.csv(fname, sep="\t") - df_all <- rbind(df_all, df_pat) +df_list = lapply(all_files, read.csv, sep="\t") -} - -write.table(df_all, paste0(outdir,"/all_data.tsv"), sep = "\t", quote=F, row.names = F, col.names = T) +df_all <- dplyr::bind_rows(df_list) # Remove underscores in these columns df_all$subject_id <- sapply(df_all$subject_id, function(x) str_replace(as.character(x), "_", "")) @@ -93,6 +108,9 @@ df_all$sample_id <- sapply(df_all$sample_id, function(x) str_replace(as.characte # Annotate sample and samplepop (sample + population) by add ing all the conditions df_all$subj_locus <- as.factor(paste(df_all$sample_id, df_all$subject_id, df_all$pcr_target_locus, sep="_")) +# Write table to file +write.table(df_all, paste0(outdir,"/all_data.tsv"), sep = "\t", quote=F, row.names = F, col.names = T) + # Set number of bootrstraps nboot = 200 ``` diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 00000000..043d02f2 --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/add_metadata.R b/bin/add_metadata.R index b850e5ea..54471f56 100755 --- a/bin/add_metadata.R +++ b/bin/add_metadata.R @@ -6,7 +6,7 @@ # --samplesheet Names of the metadata column to be used as node label on the tree plots # --outname Filename for the output repertoire # -h Display help. -# Example: ./lineage_reconstruction.R --repertoire igblast_germ-pass.tsv --nodelabel population +# Example: ./add_metadata.R --repertoire igblast_germ-pass.tsv --samplesheet samplesheet.tsv --outname my-repertoire # Libraries suppressPackageStartupMessages(library(dplyr)) @@ -38,7 +38,7 @@ parsed_fields <- samplesheet_colnames <- colnames(samplesheet) -# merge tables only in case the samplesheet contains more co lumns than the required ones +# merge tables only in case the samplesheet contains more columns than the required ones print( samplesheet_colnames[!(samplesheet_colnames %in% parsed_fields)]) if (length(samplesheet_colnames[!(samplesheet_colnames %in% parsed_fields)]) > 1 ) { diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 8c540fe1..b9ee0f53 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -41,7 +41,7 @@ def check_samplesheet(file_in): """ This function checks that the samplesheet: - - contains the compulsory fields: sample_id, filename_R1, filename_R2, subject_id, pcr_target_locus, species + - contains the compulsory fields: sample_id, filename_R1, filename_R2, subject_id, pcr_target_locus, species, single_cell - sample ids are unique - samples from the same subject come from the same species - pcr_target_locus is "IG" or "TR" @@ -52,7 +52,7 @@ def check_samplesheet(file_in): with open(file_in, "r") as fin: ## Check that required columns are present - MIN_COLS = 6 + MIN_COLS = 7 REQUIRED_COLUMNS = [ "sample_id", "filename_R1", @@ -60,6 +60,11 @@ def check_samplesheet(file_in): "subject_id", "species", "pcr_target_locus", + "single_cell", + "sex", + "tissue", + "biomaterial_provider", + "age", ] header = [x.strip('"') for x in fin.readline().strip().split("\t")] for col in REQUIRED_COLUMNS: diff --git a/bin/imgt2igblast.sh b/bin/imgt2igblast.sh index 08acbaee..cccec263 100755 --- a/bin/imgt2igblast.sh +++ b/bin/imgt2igblast.sh @@ -65,6 +65,10 @@ do cat ${GERMDIR}/${SPECIES}/vdj/imgt_${SPECIES}_${CHAIN}?${SEGMENT}.fasta > ${TMPDIR}/${F} done + # C nucleotides + F=$(echo imgt_${SPECIES}_${CHAIN}_c.fasta | tr '[:upper:]' '[:lower:]') + cat ${GERMDIR}/${SPECIES}/constant/imgt_${SPECIES}_${CHAIN}?C.fasta > ${TMPDIR}/${F} + # V amino acids F=$(echo imgt_aa_${SPECIES}_${CHAIN}_v.fasta | tr '[:upper:]' '[:lower:]') cat ${GERMDIR}/${SPECIES}/vdj_aa/imgt_aa_${SPECIES}_${CHAIN}?V.fasta > ${TMPDIR}/${F} diff --git a/bin/lineage_reconstruction.R b/bin/lineage_reconstruction.R deleted file mode 100755 index 4fef1ace..00000000 --- a/bin/lineage_reconstruction.R +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env Rscript -# -# Create lineage trees: -# Arguments: -# --repertoire Tabulated data in AIRR (TSV) format with clonal assignments and germline assignments. -# --node-label Names of the metadata column to be used as node label on the tree plots -# -h Display help. -# Example: ./lineage_reconstruction.R --repertoire igblast_germ-pass.tsv --nodelabel population - -# Libraries -suppressPackageStartupMessages(library(alakazam)) -suppressPackageStartupMessages(library(igraph)) -suppressPackageStartupMessages(library(dplyr)) -suppressPackageStartupMessages(library(optparse)) -options(error = function() traceback(3)) - -# Define commmandline arguments -opt_list <- list( - make_option(c("--repertoire"), default=NULL, - help="Input repertoire .tsv file after clone definition and germline definition."), - make_option(c("--node-label"), dest="node_text", - help="Text to be used as node label. Provide 'none' if no label is desired.") -) - -opt <- parse_args(OptionParser(option_list=opt_list)) - -theme_set(theme_bw(base_family = "ArialMT") + -theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), text = element_text(family="ArialMT"))) - -# Set output directories -patdir_lineage_trees <- "Clone_tree_plots" -dir.create(patdir_lineage_trees) -patdir_lineage_graphml <- "Graphml_trees" -dir.create(patdir_lineage_graphml) - -# Read patient table -df_pat <- read.csv(opt$repertoire, sep="\t") - -print(paste0("Node text request: ", opt$node_text)) - -avail_text = colnames(df_pat) - -if (opt$node_text %in% avail_text) { - print(paste0("Node string set to: ",opt$node_text)) -} else if (opt$node_text == "none") { - print("Node string set to: none.") -} else { - print("Available fields: ") - print(avail_text) - print("or 'none'.") - stop("Node string must be one of the above fields.") -} - -# save clonal table -countclones <- countClones(df_pat, clone="clone_id", copy="duplicate_count") -write.table(countclones, paste("Clones_table_patient_", df_pat$subject_id[1],"_",df_pat$pcr_target_locus[1],".tsv", sep=""), quote=F, sep="\t", row.names = F) - -# Get dnapars exec path -dnapars_exec_tab <- read.csv("dnapars_exec.txt", header=F) -dnapars_exec <- as.character(dnapars_exec_tab[1,1]) - -# Create clonal tree per clone -save_graph <- function(df_pat, clone_num){ - print(paste0("Started processing clone:",clone_num)) - sub_db_clone = dplyr::filter(df_pat, clone_id == clone_num) %>% - dplyr::mutate(across(everything(),as.character)) %>% - dplyr::mutate(across(c(junction_length,duplicate_count), as.numeric)) - - # Make changeo clone and Build Phylip Lineage - if ( opt$node_text == "none" ) { - clone <- makeChangeoClone(sub_db_clone, text_fields = c("c_primer", "subject_id", - "sample_id", "clone_id", "pcr_target_locus"), - num_fields = "duplicate_count") - graph <- buildPhylipLineage(clone, dnapars_exec, rm_temp = T, verbose = F) - V(graph)$label <- "" - } else { - clone <- makeChangeoClone(sub_db_clone, text_fields = append(c("c_primer", "subject_id", - "sample_id", "clone_id", "pcr_target_locus"), opt$node_text), - num_fields = "duplicate_count") - graph <- buildPhylipLineage(clone, dnapars_exec, rm_temp = T, verbose = F) - varname <- opt$node_text - V(graph)$label <- igraph::vertex_attr(graph, varname) - } - - #Modify graph and plot attributes - V(graph)$color <- "steelblue" - V(graph)$color[V(graph)$name == "Germline"] <- "black" - V(graph)$color[grepl("Inferred", V(graph)$name)] <- "white" - - # Remove large default margins - par(mar=c(0, 0, 0, 0) + 0.1) - vsize = V(graph)$duplicate_count - vsize[is.na(vsize)] <- 1 - - # Save graph in graphML format - write_graph(graph, file=paste(patdir_lineage_graphml, "/Graph_", clone@data$subject_id[1], "_", clone@data$pcr_target_locus[1], "_clone_id_", clone_num, ".graphml", sep=""), format = c("graphml")) - - # Plot tree - pdf(paste(patdir_lineage_trees,"/Clone_tree_", clone@data$subject_id[1], "_clone_id_", clone_num, ".pdf", sep="")) - plot(graph, layout=layout_as_tree, edge.arrow.mode=0, vertex.frame.color="black", - vertex.label.color="black", vertex.size=(vsize/20 + 6)) - legend("topleft", c("Germline", "Inferred", "Sample"), - fill=c("black", "white", "steelblue"), cex=0.75) - dev.off() - -} - -for (clone_num in countclones$clone_id){ - tryCatch(withCallingHandlers(save_graph(df_pat, clone_num), - error=function(e) {print(paste0("Skipping clone due to problem:", clone_num)) - print("Here is the original error message:") - print(e)}, - warning=function(w) {print(paste0("Warning for clone:", clone_num)) - invokeRestart("muffleWarning")}), - error = function(e) { print(paste0("Processed clone:", clone_num)) }) -} - diff --git a/bin/reveal_add_metadata.R b/bin/reveal_add_metadata.R index 83c027a3..b5b7cbd4 100755 --- a/bin/reveal_add_metadata.R +++ b/bin/reveal_add_metadata.R @@ -61,7 +61,7 @@ if (!("INPUTID" %in% names(opt))) { metadata <- read.csv(opt$METADATA, sep = "\t", header = TRUE, stringsAsFactors = F) metadata <- metadata %>% - filter(id == opt$INPUTID) + filter(sample_id == opt$INPUTID) if (nrow(metadata) != 1) { stop("Expecting nrow(metadata) == 1; nrow(metadata) == ", nrow(metadata), " found") @@ -77,13 +77,17 @@ internal_fields <- "valid_cloneby", # "cloneby_group", "cloneby_size", + "id", "filetype", "valid_single_cell", - "valid_pcr_target_locus" + "valid_pcr_target_locus", + "filename_R1", + "filename_R2", + "filename_I1" ) metadata <- metadata[, !colnames(metadata) %in% internal_fields] -db <- read_airr(opt$REPERTOIRE) +db <- read_rearrangement(opt$REPERTOIRE) db <- cbind(db, metadata) @@ -93,7 +97,7 @@ if (!is.null(opt$OUTNAME)) { output_fn <- sub(".tsv$", "_meta-pass.tsv", basename(opt$REPERTOIRE)) } -write_airr(db, file = output_fn) +write_rearrangement(db, file = output_fn) write("START> AddMetadata", stdout()) diff --git a/bin/reveal_collapseDuplicates.R b/bin/reveal_collapseDuplicates.R index cfce91c4..def52f3b 100755 --- a/bin/reveal_collapseDuplicates.R +++ b/bin/reveal_collapseDuplicates.R @@ -102,5 +102,5 @@ for (i in 1:length(repertoires)) { } else { output_fn <- paste0(ids[i],"_collapse-pass.tsv") } - write_airr(db %>% filter(id == ids[i]), file=output_fn) + write_rearrangement(db %>% filter(id == ids[i]), file=output_fn) } diff --git a/bin/shazam_threshold.R b/bin/shazam_threshold.R deleted file mode 100755 index f11472e7..00000000 --- a/bin/shazam_threshold.R +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env Rscript -invisible(lapply(c("ggplot2", - "stringi", - "alakazam", - "shazam", - "dplyr"), library, character.only=T)) - -# Set random seed for reproducibility -set.seed(12345) - -args = commandArgs(trailingOnly=TRUE) - -if (length(args)<2) { - stop("Two arguments must be supplied (input file tab and input file fasta).\n", call.=FALSE) -} - -# Get input from args -inputtable = args[1] -threshold_method = args[2] - - -output_folder = dirname(inputtable) - -db <- read.table(inputtable, header=TRUE, sep="\t") - -# Add label for source species -sourceLabel <- gsub(pattern = "\\.tsv$", "", inputtable) - -# Find the Hamming distance -dist_ham <- distToNearest(db, - vCallColumn="v_call", - jCallColumn="j_call", - sequenceColumn="junction", - model="ham", - normalize="len", - nproc=1, - first = FALSE) - - -num_dist <- length(unique(na.omit(dist_ham$dist_nearest))) - -if (num_dist > 3) { - # Find threshold using chosen method - if (threshold_method == "density") { - output <- findThreshold(dist_ham$dist_nearest, method="density") - threshold <- output@threshold - } else if (threshold_method == "gmm") { - output <- findThreshold(dist_ham$dist_nearest, method="gmm") - threshold <- output@threshold - } else { - stop("Threshold method is not available, please choose from: density, gmm") - } - - # Plot distance histogram, density estimate and optimum threshold - ggsave(paste(output_folder,paste0(sourceLabel, "_Hamming_distance_threshold.pdf"),sep="/"), plot(output), device="pdf") - -} else { - # Workaround for sources with too few nearest distance values to determine an effective threshold. - # Set threshold to 0 and print a warning - threshold <- 0.0 - warning(paste("Could not determine an effective Hamming distance threshold for source:", sourceLabel, ", which has", num_dist, "unique nearest distances. Threshold defaulting to 0.", sep=" ")) -} - -write.table(threshold, file= paste(output_folder,paste0(sourceLabel, "_threshold.txt"),sep="/"), quote=FALSE, sep="", row.names = FALSE, col.names = FALSE) diff --git a/conf/base.config b/conf/base.config index 0a25ad4a..f5124a93 100644 --- a/conf/base.config +++ b/conf/base.config @@ -59,6 +59,7 @@ process { withLabel:process_long_parallelized { time = { check_max( 30.h * task.attempt, 'time' ) } cpus = { check_max( 16 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } } withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false diff --git a/conf/modules.config b/conf/modules.config index 731e203a..fc21a51c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -26,17 +26,16 @@ process { ] } - if (params.subworkflow == "bcellmagic") { - - withName: SAMPLESHEET_CHECK { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + // Validate input raw + withName: SAMPLESHEET_CHECK { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: 'FASTP' { + withName: 'FASTP' { publishDir = [ [ path: { "${params.outdir}/fastp/${meta.id}" }, @@ -58,464 +57,403 @@ process { params.three_prime_clip_r2 > 0 ? "--trim_tail2 ${params.three_prime_clip_r2}" : "", // Remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed params.trim_nextseq ? "--trim_poly_g" : "", // Apply the --nextseq=X option, to trim based on quality after removing poly-G tails ].join(" ").trim() - } - - withName: 'GUNZIP_*' { - publishDir = [ - [ - enabled: false - ] - ] - } - - withName: FASTQC_POSTASSEMBLY { - publishDir = [ - path: { "${params.outdir}/fastqc/postassembly" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '--quiet' - } - - withName: PRESTO_FILTERSEQ { - publishDir = [ - path: { "${params.outdir}/presto/01-filterseq/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: PRESTO_FILTERSEQ_POSTASSEMBLY_SANS_UMI { - publishDir = [ - path: { "${params.outdir}/presto/02-filterseq/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: PRESTO_MASKPRIMERS { - publishDir = [ - path: { "${params.outdir}/presto/02-maskprimers/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: PRESTO_MASKPRIMERS_POSTASSEMBLY_SANS_UMI { - publishDir = [ - path: { "${params.outdir}/presto/03-maskprimers/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: PRESTO_PAIRSEQ { - publishDir = [ - path: { "${params.outdir}/presto/03-pairseq/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + } - withName: PRESTO_CLUSTERSETS { - publishDir = [ - path: { "${params.outdir}/presto/04-clustersets/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + withName: 'GUNZIP_*' { + publishDir = [ + [ + enabled: false ] - } + ] + } - withName: PRESTO_PARSE_CLUSTER { - publishDir = [ - path: { "${params.outdir}/presto/05-parse-clusters/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + // Validate input assembled + withName: VALIDATE_INPUT { + publishDir = [ + path: { "${params.outdir}/validated_input" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: PRESTO_BUILDCONSENSUS { - publishDir = [ - path: { "${params.outdir}/presto/06-build-consensus/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: FASTQC { + ext.args = '--quiet' + } - withName: PRESTO_POSTCONSENSUS_PAIRSEQ { - publishDir = [ - path: { "${params.outdir}/presto/07-postconsensus-pairseq/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: FASTQC_POSTASSEMBLY { + publishDir = [ + path: { "${params.outdir}/fastqc/postassembly" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--quiet' + } - withName: PRESTO_ASSEMBLEPAIRS_UMI { - publishDir = [ - path: { "${params.outdir}/presto/08-assemble-pairs/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '--coord presto --rc tail --1f CONSCOUNT PRCONS --2f CONSCOUNT PRCONS' - ext.args2 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT LENGTH OVERLAP ERROR PVALUE' - } + // ----------------- + // sequence assembly + // ----------------- + withName: PRESTO_FILTERSEQ { + publishDir = [ + path: { "${params.outdir}/presto/01-filterseq/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: PRESTO_ASSEMBLEPAIRS_SANS_UMI { - publishDir = [ - path: { "${params.outdir}/presto/01-assemble-pairs/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '--rc tail' - ext.args2 = '-f ID SEQCOUNT PRIMER PRCOUNT PRFREQ LENGTH OVERLAP ERROR PVALUE' - } + withName: PRESTO_FILTERSEQ_POSTASSEMBLY_SANS_UMI { + publishDir = [ + path: { "${params.outdir}/presto/02-filterseq/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: PRESTO_PARSEHEADERS_COLLAPSE_UMI { - publishDir = [ - enabled: false - ] - ext.subcommand = 'collapse' - ext.args = '-f CONSCOUNT --act min' - } + withName: PRESTO_MASKPRIMERS { + publishDir = [ + path: { "${params.outdir}/presto/02-maskprimers/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: PRESTO_PARSEHEADERS_PRIMERS_UMI { - publishDir = [ - enabled: false - ] - ext.args = 'PRCONS PRCONS' - } + withName: PRESTO_MASKPRIMERS_POSTASSEMBLY_SANS_UMI { + publishDir = [ + path: { "${params.outdir}/presto/03-maskprimers/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: PRESTO_PARSEHEADERS_PRIMERS_SANS_UMI { - publishDir = [ - enabled: false - ] - ext.args = 'PRIMER PRIMER' - } + withName: PRESTO_PAIRSEQ { + publishDir = [ + path: { "${params.outdir}/presto/03-pairseq/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: PRESTO_PARSEHEADERS_METADATA { - publishDir = [ - enabled: false - ] - ext.args = '-f sample_id subject_id species pcr_target_locus' - } + withName: PRESTO_CLUSTERSETS { + publishDir = [ + path: { "${params.outdir}/presto/04-clustersets/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: PRESTO_COLLAPSESEQ_UMI { - publishDir = [ - path: { "${params.outdir}/presto/09-collapseseq/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '-n 20 --inner --uf PRCONS --cf CONSCOUNT --act sum' - ext.args2 = '-f HEADER DUPCOUNT' - } + withName: PRESTO_PARSE_CLUSTER { + publishDir = [ + path: { "${params.outdir}/presto/05-parse-clusters/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: PRESTO_COLLAPSESEQ_SANS_UMI { - publishDir = [ - path: { "${params.outdir}/presto/04-collapseseq/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '-n 20 --inner' - ext.args2 = '-f HEADER DUPCOUNT' - } + withName: PRESTO_BUILDCONSENSUS { + publishDir = [ + path: { "${params.outdir}/presto/06-build-consensus/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: PRESTO_SPLITSEQ_UMI { - publishDir = [ - path: { "${params.outdir}/presto/10-splitseq/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '-f CONSCOUNT --num 2' - } + withName: PRESTO_POSTCONSENSUS_PAIRSEQ { + publishDir = [ + path: { "${params.outdir}/presto/07-postconsensus-pairseq/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: PRESTO_SPLITSEQ_SANS_UMI { - publishDir = [ - path: { "${params.outdir}/presto/05-splitseq/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '-f DUPCOUNT --num 2' - } + withName: PRESTO_ASSEMBLEPAIRS_UMI { + publishDir = [ + path: { "${params.outdir}/presto/08-assemble-pairs/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--coord presto --rc tail --1f CONSCOUNT PRCONS --2f CONSCOUNT PRCONS' + ext.args2 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT LENGTH OVERLAP ERROR PVALUE' + } - withName: CHANGEO_ASSIGNGENES { - publishDir = [ - path: { "${params.outdir}/changeo/01-assign-genes/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '--format blast' - } + withName: PRESTO_ASSEMBLEPAIRS_SANS_UMI { + publishDir = [ + path: { "${params.outdir}/presto/01-assemble-pairs/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--rc tail' + ext.args2 = '-f ID SEQCOUNT PRIMER PRCOUNT PRFREQ LENGTH OVERLAP ERROR PVALUE' + } - withName: CHANGEO_MAKEDB { - publishDir = [ - path: { "${params.outdir}/changeo/02-make-db/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '--regions default --format airr' - } + withName: PRESTO_PARSEHEADERS_COLLAPSE_UMI { + publishDir = [ + enabled: false + ] + ext.subcommand = 'collapse' + ext.args = '-f CONSCOUNT --act min' + } - withName: CHANGEO_PARSEDB_SPLIT { - publishDir = [ - path: { "${params.outdir}/changeo/03-parsedb-split/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: PRESTO_PARSEHEADERS_PRIMERS_UMI { + publishDir = [ + enabled: false + ] + ext.args = 'PRCONS PRCONS' + } - withName: CHANGEO_PARSEDB_SELECT { - publishDir = [ - path: { "${params.outdir}/changeo/04-parsedb-select/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '-f v_call j_call -u "IG[HLK]" --regex --logic all' - ext.args2 = '-f v_call j_call -u "TR" --regex --logic all' - } + withName: PRESTO_PARSEHEADERS_PRIMERS_SANS_UMI { + publishDir = [ + enabled: false + ] + ext.args = 'PRIMER PRIMER' + } - withName: CHANGEO_CONVERTDB_FASTA { - publishDir = [ - path: { "${params.outdir}/changeo/05-convertdb-fasta/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '--if sequence_id --sf sequence_alignment --mf v_call dupcount' - } + withName: PRESTO_PARSEHEADERS_METADATA { + publishDir = [ + enabled: false + ] + ext.args = '-f sample_id subject_id species pcr_target_locus' + } - withName: MERGE_TABLES { - publishDir = [ - path: { "${params.outdir}/shazam/01-merged-tables/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: PRESTO_COLLAPSESEQ_UMI { + publishDir = [ + path: { "${params.outdir}/presto/09-collapseseq/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '-n 20 --inner --uf PRCONS --cf CONSCOUNT --act sum' + ext.args2 = '-f HEADER DUPCOUNT' + } - withName: SHAZAM_THRESHOLD { - publishDir = [ - path: { "${params.outdir}/shazam/02-clonal-threshold/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: PRESTO_COLLAPSESEQ_SANS_UMI { + publishDir = [ + path: { "${params.outdir}/presto/04-collapseseq/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '-n 20 --inner' + ext.args2 = '-f HEADER DUPCOUNT' + } - withName: CHANGEO_DEFINECLONES { - publishDir = [ - path: { "${params.outdir}/changeo/06-define-clones/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: PRESTO_SPLITSEQ_UMI { + publishDir = [ + path: { "${params.outdir}/presto/10-splitseq/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '-f CONSCOUNT --num 2' + } - withName: CHANGEO_CREATEGERMLINES { - publishDir = [ - path: { "${params.outdir}/changeo/07-create-germlines/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: PRESTO_SPLITSEQ_SANS_UMI { + publishDir = [ + path: { "${params.outdir}/presto/05-splitseq/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '-f DUPCOUNT --num 2' + } - withName: CHANGEO_BUILDTREES { - publishDir = [ - path: { "${params.outdir}/changeo/08-build-trees/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '--igphyml --collapse' - } + // ----------------- + // V(D)J annotation + // ----------------- - withName: ALAKAZAM_LINEAGE { - publishDir = [ - path: { "${params.outdir}/lineage-reconstruction/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '--node-label none' - } + withName: FETCH_DATABASES { + publishDir = [ + path: { "${params.outdir}/databases" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_databases + ] + } - withName: ALAKAZAM_SHAZAM_REPERTOIRES { - publishDir = [ - path: { "${params.outdir}/repertoire_analysis" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = 'none' - } + withName: CHANGEO_CONVERTDB_FASTA_FROM_AIRR { + publishDir = [ + path: { "${params.outdir}/vdj_annotation/convert-db/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--if sequence_id --sf sequence --mf cell_id consensus_count duplicate_count c_call c_cigar c_sequence_start c_sequence_end' + } - withName: FETCH_DATABASES { - publishDir = [ - path: { "${params.outdir}/databases" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_databases - ] - } - withName: PARSE_LOGS { - publishDir = [ - path: { "${params.outdir}/parsed-logs" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: CHANGEO_ASSIGNGENES { + publishDir = [ + path: { "${params.outdir}/vdj_annotation/01-assign-genes/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--format blast' } -// REVEAL modules + withName: CHANGEO_MAKEDB { + publishDir = [ + path: { "${params.outdir}/vdj_annotation/02-make-db/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--regions default --format airr --extended' + } - if ( params.subworkflow == "reveal" ) { - withName: IMMCANTATION { - publishDir = [ - path: { "${params.outdir}/immcantation_version" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: FILTER_QUALITY { + publishDir = [ + path: { "${params.outdir}/vdj_annotation/03-quality-filter/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: FETCH_DATABASES { - publishDir = [ - path: { "${params.outdir}/databases" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_databases - ] - } + withName: CHANGEO_PARSEDB_SPLIT { + publishDir = [ + path: { "${params.outdir}/vdj_annotation/04-select-productive/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: CHANGEO_ASSIGNGENES_REVEAL { - publishDir = [ - path: { "${params.outdir}/changeo/assign_genes/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '--format blast' - } + withName: FILTER_JUNCTION_MOD3 { + publishDir = [ + path: { "${params.outdir}/vdj_annotation/05-select-junction-mod3/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: CHANGEO_MAKEDB_REVEAL { - publishDir = [ - path: { "${params.outdir}/changeo/makedb-igblast/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: ADD_META_TO_TAB { + publishDir = [ + path: { "${params.outdir}/vdj_annotation/06-annotate-metadata/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: CHANGEO_CONVERTDB_FASTA_FROM_AIRR { - publishDir = [ - path: { "${params.outdir}/changeo/convert-db/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = '--if sequence_id --sf sequence --mf cell_id consensus_count duplicate_count c_call c_cigar c_sequence_start c_sequence_end' - } + // ------------------------------- + // QC filtering + // ------------------------------- - withName: FILTER_QUALITY { - publishDir = [ - path: { "${params.outdir}/changeo/quality-filter/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: CHANGEO_CREATEGERMLINES { + publishDir = [ + path: { "${params.outdir}/qc-filtering/bulk-qc-filtering/01-create-germlines/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: FILTER_JUNCTION_MOD3 { - publishDir = [ - path: { "${params.outdir}/changeo/quality-filter/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: REMOVE_CHIMERIC { + publishDir = [ + path: { "${params.outdir}/qc-filtering/bulk-qc-filtering/02-chimera-filter/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: CHANGEO_CREATEGERMLINES_REVEAL { - publishDir = [ - path: { "${params.outdir}/changeo/chimera-filter/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: DETECT_CONTAMINATION { + publishDir = [ + path: { "${params.outdir}/qc-filtering/bulk-qc-filtering/03-detect_contamination" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: SINGLE_CELL_QC { - publishDir = [ - path: { "${params.outdir}/changeo/single-cell-qc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: COLLAPSE_DUPLICATES { + publishDir = [ + path: { "${params.outdir}/qc-filtering/bulk-qc-filtering/04-collapse-duplicates/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: ADD_META_TO_TAB { - publishDir = [ - path: { "${params.outdir}/changeo/metadata-repertoire/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + // ------------------------------ + // Single cell QC + // ------------------------------ + withName: SINGLE_CELL_QC { + publishDir = [ + path: { "${params.outdir}/qc-filtering/single-cell-qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = ['outname':'', 'outputby':'sample_id'] + } - withName: CHANGEO_PARSEDB_SPLIT_REVEAL { - publishDir = [ - path: { "${params.outdir}/changeo/parsedb_split/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: COLLAPSE_DUPLICATES { - publishDir = [ - path: { "${params.outdir}/collapse_duplicates/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + // ------------------------------- + // Clonal analysis + // ------------------------------- - withName: DEFINE_CLONES { - publishDir = [ - path: { "${params.outdir}/changeo/define_clones" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = ['outname':'', 'model':'hierarchical', 'method':'nt', 'linkage':'single'] - } + withName: FIND_THRESHOLD { + publishDir = [ + path: { "${params.outdir}/clonal_analysis/find_threshold" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: DETECT_CONTAMINATION { - publishDir = [ - path: { "${params.outdir}/changeo/detect_contamination/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: DEFINE_CLONES_COMPUTE { + publishDir = [ + path: { "${params.outdir}/clonal_analysis/define_clones" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = ['outname':'', 'model':'hierarchical', + 'method':'nt', 'linkage':'single', + 'skip_overlap':true, + 'outputby':'sample_id', 'min_n':30] + } - withName: DOWSER_LINEAGES { - publishDir = [ - path: { "${params.outdir}/dowser/lineages" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - ext.args = ['build':'igphyml'] - } + withName: DEFINE_CLONES_REPORT { + publishDir = [ + path: { "${params.outdir}/clonal_analysis/define_clones" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = ['outname':'', 'model':'hierarchical', + 'method':'nt', 'linkage':'single', + 'skip_overlap':true, + 'outputby':'sample_id', 'min_n':30] + } - withName: FIND_THRESHOLD { - publishDir = [ - path: { "${params.outdir}/changeo/find_threshold" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: DOWSER_LINEAGES { + publishDir = [ + path: { "${params.outdir}/clonal_analysis/dowser_lineages" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = ['build':'igphyml'] + } - withName: REMOVE_CHIMERIC { - publishDir = [ - path: { "${params.outdir}/changeo/chimera-filter/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } +// withName: CHANGEO_PARSEDB_SELECT { +// publishDir = [ +// path: { "${params.outdir}/changeo/04-parsedb-select/${meta.id}" }, +// mode: params.publish_dir_mode, +// saveAs: { filename -> filename.equals('versions.yml') ? null : filename } +// ] +// ext.args = '-f v_call j_call -u "IG[HLK]" --regex --logic all' +// ext.args2 = '-f v_call j_call -u "TR" --regex --logic all' +// } + + // ------------------------------- + // Reports + // ------------------------------- + + withName: AIRRFLOW_REPORT { + publishDir = [ + path: { "${params.outdir}/repertoire_analysis" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = 'none' + } - withName: REPORT_FILE_SIZE { - publishDir = [ - path: { "${params.outdir}/report_file_size" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: PARSE_LOGS { + publishDir = [ + path: { "${params.outdir}/parsed-logs" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: VALIDATE_INPUT { - publishDir = [ - path: { "${params.outdir}/validated_input" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: REPORT_FILE_SIZE { + publishDir = [ + path: { "${params.outdir}/report_file_size" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } } diff --git a/conf/test.config b/conf/test.config index 436e01bc..4ca62b41 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,18 +20,26 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/Metadata_test.tsv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/Metadata_test_airr.tsv' cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/C_primers.fasta' vprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/V_primers.fasta' imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' - subworkflow = 'bcellmagic' + mode = 'fastq' library_generation_method = 'specific_pcr_umi' cprimer_position = 'R1' - index_file = true umi_length = 8 umi_start = 6 umi_position = 'R1' + index_file = true +} + +process{ + withName:"DEFINE_CLONES*"{ + ext.args = ['outname':'', 'model':'hierarchical', + 'method':'nt', 'linkage':'single', + 'outputby':'sample_id', 'min_n':10] + } } diff --git a/conf/test_reveal_no_cc.config b/conf/test_assembled.config similarity index 74% rename from conf/test_reveal_no_cc.config rename to conf/test_assembled.config index c589801c..8d3e5e10 100644 --- a/conf/test_reveal_no_cc.config +++ b/conf/test_assembled.config @@ -8,8 +8,8 @@ */ params { - config_profile_name = 'Test Reveal profile without Immcantation custom_container' - config_profile_description = 'Minimal test dataset to check pipeline function withot Immcantation custom_container' + config_profile_name = 'Test assembled mode' + config_profile_description = 'Minimal test dataset to test assembled mode' // Limit resources so that this can run on GitHub Actions max_cpus = 2 @@ -17,10 +17,8 @@ params { max_time = 6.h // Input data - subworkflow = 'reveal' + mode = 'assembled' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_reveal_metadata.tsv' - outdir = 'test-reveal-results-no-cc' - tracedir = 'test-reveal-results-no-cc/pipeline_info' imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' @@ -30,3 +28,4 @@ params { cloneby = 'subject_id' remove_chimeric = true } + diff --git a/conf/test_reveal.config b/conf/test_assembled_immcantation_devel.config similarity index 74% rename from conf/test_reveal.config rename to conf/test_assembled_immcantation_devel.config index 2468a86c..49c6f5a4 100644 --- a/conf/test_reveal.config +++ b/conf/test_assembled_immcantation_devel.config @@ -4,12 +4,12 @@ * ------------------------------------------------- * Defines bundled input files and everything required * to run a fast and simple test. Use as follows: - * nextflow run nf-core/airrflow -profile test, + * nextflow run nf-core/airrflow -profile test_assembled_immcantation_devel, */ params { - config_profile_name = 'Test Reveal profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test Reveal profile with Immcantation custom_container' + config_profile_description = 'Minimal test dataset to check pipeline function with Immcantation custom_container' // Limit resources so that this can run on GitHub Actions max_cpus = 2 @@ -17,12 +17,11 @@ params { max_time = 6.h // Input data - subworkflow = 'reveal' + mode = 'assembled' input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_reveal_metadata.tsv' - outdir = 'test-reveal-results' - tracedir = 'test-reveal-results/pipeline_info' imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + igphyml = '/usr/local/share/igphyml/src/igphyml' reassign = true productive_only = true @@ -31,8 +30,8 @@ params { remove_chimeric = true } -process { - withLabel:immcantation { +process{ + withLabel:immcantation{ container = 'immcantation/suite:devel' } } @@ -40,4 +39,3 @@ process { env { PYTHONNOUSERSITE = 0 } - diff --git a/conf/test_fetchimgt.config b/conf/test_fetchimgt.config index 0ea9089d..dadb46c7 100644 --- a/conf/test_fetchimgt.config +++ b/conf/test_fetchimgt.config @@ -20,16 +20,25 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/Metadata_test.tsv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/Metadata_test_airr.tsv' cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/C_primers.fasta' vprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/V_primers.fasta' - subworkflow = 'bcellmagic' + mode = 'fastq' library_generation_method = 'specific_pcr_umi' cprimer_position = 'R1' - index_file = true umi_length = 8 umi_start = 6 umi_position = 'R1' + index_file = true } + +process{ + withName:"DEFINE_CLONES*"{ + ext.args = ['outname':'', 'model':'hierarchical', + 'method':'nt', 'linkage':'single', + 'outputby':'sample_id', 'min_n':10] + } +} + diff --git a/conf/test_full.config b/conf/test_full.config index ec64d910..cc0cdecc 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,7 +15,7 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data - input = 's3://nf-core-awsmegatests/airrflow/input_data/pcr_umi/metadata_pcr_umi_airr.tsv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/metadata_pcr_umi_airr_300.tsv' cprimers = 's3://nf-core-awsmegatests/airrflow/input_data/pcr_umi/cprimers.fasta' vprimers = 's3://nf-core-awsmegatests/airrflow/input_data/pcr_umi/vprimers.fasta' imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' @@ -24,7 +24,6 @@ params { // Other params library_generation_method = 'specific_pcr_umi' cprimer_position = 'R1' - index_file = false umi_length = 15 umi_start = 0 umi_position = 'R1' diff --git a/conf/test_no_umi.config b/conf/test_no_umi.config index fe2b18ad..f2952cb3 100644 --- a/conf/test_no_umi.config +++ b/conf/test_no_umi.config @@ -16,16 +16,17 @@ params { max_memory = 6.GB max_time = 6.h + mode = 'fastq' + cprimer_position = 'R1' library_generation_method = 'specific_pcr' - index_file = false cprimer_start = 4 vprimer_start = 4 primer_revpr = true umi_length = 0 // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-no-umi/Metadata_test-no-umi.tsv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-no-umi/Metadata_test-no-umi_airr.tsv' cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-no-umi/Greiff2014_CPrimers.fasta' vprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-no-umi/Greiff2014_VPrimers.fasta' imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' @@ -37,6 +38,11 @@ process { // When not using UMIs, set the coord parameter based on source (e.g., sra or illumina) withName: PRESTO_ASSEMBLEPAIRS_SANS_UMI { ext.args = '--rc tail --coord sra --maxerror 0.3' - } - + } + withName:"DEFINE_CLONES*"{ + ext.args = ['outname':'', 'model':'hierarchical', + 'method':'nt', 'linkage':'single', + 'outputby':'sample_id', 'min_n':10] + } } + diff --git a/conf/test_nocluster.config b/conf/test_nocluster.config index 53edd963..88b45765 100644 --- a/conf/test_nocluster.config +++ b/conf/test_nocluster.config @@ -20,19 +20,27 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/Metadata_test.tsv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/Metadata_test_airr.tsv' cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/C_primers.fasta' vprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/V_primers.fasta' imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' - subworkflow = 'bcellmagic' + mode = 'fastq' library_generation_method = 'specific_pcr_umi' cprimer_position = 'R1' - index_file = true umi_length = 8 umi_start = 6 umi_position = 'R1' cluster_sets = false + index_file = true +} + +process{ + withName:"DEFINE_CLONES*"{ + ext.args = ['outname':'', 'model':'hierarchical', + 'method':'nt', 'linkage':'single', + 'outputby':'sample_id', 'min_n':10] + } } diff --git a/conf/test_raw_immcantation_devel.config b/conf/test_raw_immcantation_devel.config new file mode 100644 index 00000000..7eff6f53 --- /dev/null +++ b/conf/test_raw_immcantation_devel.config @@ -0,0 +1,54 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/airrflow -profile test_raw_immcantation_devel, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/Metadata_test_airr.tsv' + cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/C_primers.fasta' + vprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/V_primers.fasta' + + imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + igphyml = '/usr/local/share/igphyml/src/igphyml' + + mode = 'fastq' + + library_generation_method = 'specific_pcr_umi' + cprimer_position = 'R1' + umi_length = 8 + umi_start = 6 + umi_position = 'R1' + index_file = true +} + +process{ + withLabel:immcantation{ + container = 'immcantation/suite:devel' + } + withName:"DEFINE_CLONES*"{ + ext.args = ['outname':'', 'model':'hierarchical', + 'method':'nt', 'linkage':'single', + 'outputby':'sample_id', 'min_n':10] + } +} + +env { + PYTHONNOUSERSITE = 0 +} diff --git a/conf/test_tcr.config b/conf/test_tcr.config index e1b419a2..e8eb1ce2 100644 --- a/conf/test_tcr.config +++ b/conf/test_tcr.config @@ -8,24 +8,27 @@ */ params { - config_profile_name = 'Test profile' + config_profile_name = 'Test TCR' config_profile_description = 'Minimal test dataset to check pipeline function' + // Limit resources so that this can run on GitHub Actions max_cpus = 2 max_memory = 6.GB max_time = 48.h - index_file = false + + // params + mode = 'fastq' umi_length = 12 umi_position = 'R2' cprimer_start = 5 skip_report = false - threshold_method = 'gmm' library_generation_method = 'dt_5p_race_umi' cprimer_position = 'R1' + clonal_threshold = 0 // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-tcr/TCR_metadata.tsv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-tcr/TCR_metadata_airr.tsv' cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-tcr/cprimers.fasta' race_linker = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-tcr/linker.fasta' imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' @@ -33,3 +36,10 @@ params { } +process{ + withName:"DEFINE_CLONES*"{ + ext.args = ['outname':'', 'model':'hierarchical', + 'method':'nt', 'linkage':'single', + 'outputby':'sample_id', 'min_n':10] + } +} diff --git a/conf/test_tcr_thr.config b/conf/test_tcr_thr.config deleted file mode 100644 index 57ae1a03..00000000 --- a/conf/test_tcr_thr.config +++ /dev/null @@ -1,35 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/airrflow -profile test_tcr, - */ - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = 6.GB - max_time = 48.h - index_file = false - umi_length = 12 - umi_position = 'R2' - cprimer_start = 5 - skip_report = false - set_cluster_threshold = true - cluster_threshold = 0 - library_generation_method = 'dt_5p_race_umi' - cprimer_position = 'R1' - - - // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-tcr/TCR_metadata.tsv' - cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-tcr/cprimers.fasta' - race_linker = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-tcr/linker.fasta' - imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' - igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' - -} diff --git a/docs/images/airrflow_workflow_overview.png b/docs/images/airrflow_workflow_overview.png new file mode 100644 index 00000000..17b8eac2 Binary files /dev/null and b/docs/images/airrflow_workflow_overview.png differ diff --git a/docs/images/airrflow_workflow_overview.svg b/docs/images/airrflow_workflow_overview.svg new file mode 100644 index 00000000..5a208af6 --- /dev/null +++ b/docs/images/airrflow_workflow_overview.svg @@ -0,0 +1,2743 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +. +CC-BY 4.0. Design originally by Zandra Fagernäs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + bcftools + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/metro-map-airrflow.png b/docs/images/metro-map-airrflow.png new file mode 100644 index 00000000..9230e3ec Binary files /dev/null and b/docs/images/metro-map-airrflow.png differ diff --git a/docs/images/metro-map-airrflow.svg b/docs/images/metro-map-airrflow.svg new file mode 100644 index 00000000..1117253c --- /dev/null +++ b/docs/images/metro-map-airrflow.svg @@ -0,0 +1,5312 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/output.md b/docs/output.md index bd8c88f7..29567be9 100644 --- a/docs/output.md +++ b/docs/output.md @@ -10,6 +10,8 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: +TODO: update this to add/remove lines + - [FastP](#fastp) - read quality control, adapter trimming and read clipping - [pRESTO](#presto) - read pre-processing - [Filter by sequence quality](#filter-by-sequence-quality) - filter sequences by quality @@ -70,7 +72,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d -Filters reads that are below a quality threshold by using the tool [FilterSeq](https://presto.readthedocs.io/en/version-0.5.11/tools/FilterSeq.html) from the pRESTO Immcantation toolset. The default quality threshold is 20. +Filters reads that are below a quality threshold by using the tool [FilterSeq](https://presto.readthedocs.io/en/stable/tools/FilterSeq.html) from the pRESTO Immcantation toolset. The default quality threshold is 20. ### Mask primers @@ -83,7 +85,7 @@ Filters reads that are below a quality threshold by using the tool [FilterSeq](h -Masks primers that are provided in the C-primers and V-primers input files. It uses the tool [MaskPrimers](https://presto.readthedocs.io/en/version-0.5.11/tools/MaskPrimers.html) of the pRESTO Immcantation toolset. +Masks primers that are provided in the C-primers and V-primers input files. It uses the tool [MaskPrimers](https://presto.readthedocs.io/en/stable/tools/MaskPrimers.html) of the pRESTO Immcantation toolset. ### Pair mates @@ -95,7 +97,7 @@ Masks primers that are provided in the C-primers and V-primers input files. It u -Pair read mates using [PairSeq](https://presto.readthedocs.io/en/version-0.5.11/tools/PairSeq.html) from the pRESTO Immcantation toolset. +Pair read mates using [PairSeq](https://presto.readthedocs.io/en/stable/tools/PairSeq.html) from the pRESTO Immcantation toolset. ### Cluster sets @@ -108,7 +110,7 @@ Pair read mates using [PairSeq](https://presto.readthedocs.io/en/version-0.5.11/ -Cluster sequences according to similarity, using [ClusterSets set](https://presto.readthedocs.io/en/version-0.5.11/tools/ClusterSets.html#clustersets-set). This step is introduced to deal with too low UMI diversity. +Cluster sequences according to similarity, using [ClusterSets set](https://presto.readthedocs.io/en/stable/tools/ClusterSets.html#clustersets-set). This step is introduced to deal with too low UMI diversity. ### Parse clusters @@ -133,7 +135,7 @@ Annotate cluster ID as part of the barcode, using [Parseheaders copy](https://pr -Build sequence consensus from all sequences that were annotated to have the same UMI. Uses [BuildConsensus](https://presto.readthedocs.io/en/version-0.5.11/tools/BuildConsensus.html) from the pRESTO Immcantation toolset. +Build sequence consensus from all sequences that were annotated to have the same UMI. Uses [BuildConsensus](https://presto.readthedocs.io/en/stable/tools/BuildConsensus.html) from the pRESTO Immcantation toolset. ### Re-pair mates @@ -145,7 +147,7 @@ Build sequence consensus from all sequences that were annotated to have the same -Re-pair read mates using [PairSeq](https://presto.readthedocs.io/en/version-0.5.11/tools/PairSeq.html) from the pRESTO Immcantation toolset. +Re-pair read mates using [PairSeq](https://presto.readthedocs.io/en/stable/tools/PairSeq.html) from the pRESTO Immcantation toolset. ### Assemble mates @@ -158,7 +160,7 @@ Re-pair read mates using [PairSeq](https://presto.readthedocs.io/en/version-0.5. -Assemble read mates using [AssemblePairs](https://presto.readthedocs.io/en/version-0.5.11/tools/AssemblePairs.html) from the pRESTO Immcantation toolset. +Assemble read mates using [AssemblePairs](https://presto.readthedocs.io/en/stable/tools/AssemblePairs.html) from the pRESTO Immcantation toolset. ### Remove duplicates @@ -171,7 +173,7 @@ Assemble read mates using [AssemblePairs](https://presto.readthedocs.io/en/versi -Remove duplicates using [CollapseSeq](https://presto.readthedocs.io/en/version-0.5.11/tools/CollapseSeq.html) from the pRESTO Immcantation toolset. +Remove duplicates using [CollapseSeq](https://presto.readthedocs.io/en/stable/tools/CollapseSeq.html) from the pRESTO Immcantation toolset. ### Filter sequences for at least 2 representatives @@ -183,7 +185,7 @@ Remove duplicates using [CollapseSeq](https://presto.readthedocs.io/en/version-0 -Remove sequences which do not have 2 representative using [SplitSeq](https://presto.readthedocs.io/en/version-0.5.11/tools/SplitSeq.html) from the pRESTO Immcantation toolset. +Remove sequences which do not have 2 representative using [SplitSeq](https://presto.readthedocs.io/en/stable/tools/SplitSeq.html) from the pRESTO Immcantation toolset. ## FastQC @@ -209,155 +211,255 @@ Remove sequences which do not have 2 representative using [SplitSeq](https://pre ## Change-O +### Convert input to fasta, if needed + +
+Output files. Optional. + +- `vdj_annotation/convert-db/` + - `*.fasta`: The sequences in fasta format. + - `*log.txt`: Log of the process that will be parsed to generate a report. + +
+ +This folder is generated when the input data are AIRR-C formatted rearrangement tables that need to +be reprocessed (`--reassign true`). For example, 10x Genomics' `airr_rearrangement.tsv` files. [ConvertDb fasta](https://changeo.readthedocs.io/en/stable/tools/ConvertDb.html#convertdb-py-fasta) is used to +generate a `.fasta` file from the rearrangement table. + ### Assign genes with Igblast
Output files -- `changeo/01-assigngenes/` - - `fasta/*.fasta`: Igblast results converted to fasta format with genotype V-call annotated in the header. +- `vdj_annotation/01-assigngenes/` + - `*.fmt7`: Igblast results. + - `*.fasta`: Igblast results converted to fasta. + - `*log.txt`: Log of the process that will be parsed to generate a report.
-Assign genes with Igblast, using the IMGT database is performed by the [AssignGenes](https://changeo.readthedocs.io/en/version-0.4.5/examples/igblast.html#running-igblast) command of the Change-O tool from the Immcantation Framework. +Assign genes with Igblast, using the IMGT database is performed by the [AssignGenes](https://changeo.readthedocs.io/en/stable/examples/igblast.html#running-igblast) command of the Change-O tool from the Immcantation Framework. ### Make database from assigned genes
Output files -- `changeo/02-makedb/` - - `logs`: Log of the process that will be parsed to generate a report. - - `tab`: Table in AIRR format containing the assigned gene information and metadata provided in the starting metadata sheet. +- `vdj_annotation/02-makedb/` + - `*log.txt`: Log of the process that will be parsed to generate a report. + - `*db-pass.tsv`: Rearrangement table in AIRR-C format containing the assigned gene information.
-A table is generated with [MakeDB](https://changeo.readthedocs.io/en/version-0.4.5/examples/igblast.html#processing-the-output-of-igblast) following the [AIRR standards](https://docs.airr-community.org/en/stable/datarep/rearrangements.html). +IgBLAST's results are parsed and standardized with [MakeDB](https://changeo.readthedocs.io/en/stable/examples/igblast.html#processing-the-output-of-igblast) to follow the [AIRR Community standards](https://docs.airr-community.org/en/stable/datarep/rearrangements.html) for rearrangement data. -### Removal of non-productive sequences +### Quality filter sequences
Output files -- `changeo/03-parsedb_split/` - - `logs`: Log of the process that will be parsed to generate a report. - - `tab`: Table in AIRR format containing the assigned gene information, with only productive sequences and metadata provided in the starting metadata sheet. +- `vdj_annotation/03-quality-filter/` + - `*log.txt`: Log of the process that will be parsed to generate a report. + - `*quality-pass.tsv*`: Rearrangement table in AIRR-C format containing the sequences that passed the quality filtering steps.
-Non-functional sequences are removed with [ParseDb](https://changeo.readthedocs.io/en/version-0.4.5/tools/ParseDb.html). +A table is generated that retains sequences with concordant locus in the `v_call` and `locus` fields, with a `sequence_alignment` with a maximum of 10% of Ns and a length of at least 200 informative nucleotides (not `-`, `.` or `N`). -### Selection of IGH / TR sequences +### Removal of non-productive sequences
Output files -- `changeo/04-parsedb_select/` - - `logs`: Log of the process that will be parsed to generate a report. - - `tab`: Table in AIRR format containing the assigned gene information, with only productive sequences and IGH/TR sequences, and metadata provided in the starting metadata sheet. +- `vdj_annotation/04-select-productive/` + - `*log.txt`: Log of the process that will be parsed to generate a report. + - `*productive-T.tsv*`: Rearrangement table in AIRR-C format, with only productive sequences.
-Heavy chain sequences (IGH) are selected if 'ig' locus is selected, TR sequences are selected if 'tr' locus is selected. The tool [ParseDb](https://changeo.readthedocs.io/en/version-0.4.5/tools/ParseDb.html) is employed. +Non-functional sequences identified with IgBLAST are removed with [ParseDb](https://changeo.readthedocs.io/en/stable/tools/ParseDb.html). -### Convert database to fasta +### Removal of sequences with junction length not multiple of 3
Output files -- `changeo/05-convertdb-fasta/` - - `fasta`: Fasta file containing the processed sequences with the barcode ID and allele annotation in the header. +- `vdj_annotation/05-select-junction-mod3/` + - `*log.txt`: Log of the process that will be parsed to generate a report. + - `*junction-pass.tsv*`: Rearrangement table in AIRR-C format, with only sequences that have a nucleotide junction length multiple of 3.
-Sequences in are additionally converted to a fasta file with the [ConvertDb](https://changeo.readthedocs.io/en/version-0.4.5/tools/ConvertDb.html?highlight=convertdb) tool. +### Add metadata + +
+Output files + +- `vdj_annotation/06-annotate-metadata/` + - `*log.txt`: Log of the process that will be parsed to generate a report. + - `*meta-pass.tsv*`: Rearrangement table in AIRR-C format annotated with metadata provided in the starting metadata sheet. + +
## Shazam -### Merging tables per subject +### Reconstruct germlines
Output files -- `shazam/01-merged-tables/` - - `tab`: Table in AIRR format containing the assigned gene information. +- `qc-filtering/bulk-qc-filtering/01-create-germlines/` + - `*log.txt`: Log of the process that will be parsed to generate a report. + - `*germ-pass.tsv`: Rearrangement table in AIRR-C format with an additional + field with the reconstructed germline sequence for each sequence.
-AIRR tables for each subject are merged to be able to determine the subject genotype and full clonal analysis. +Reconstructing the germline sequences with the [CreateGermlines](https://changeo.readthedocs.io/en/stable/tools/CreateGermlines.html#creategermlines) Immcantation tool. -### Determining hamming distance threshold +### Chimera filter
Output files -- `shazam/02-clonal-threshold/` - - `threshold`: Hamming distance threshold of the Junction regions as determined by Shazam. - - `plots`: Plot of the Hamming distance distribution between junction regions displaying the threshold for clonal assignment as determined by Shazam. +- `qc-filtering/bulk-qc-filtering/02-chimera-filter/` + - `*log.txt`: Log of the process that will be parsed to generate a report. + - `*chimera-pass.tsv`: Rearrangement table in AIRR-C format sequences that + passed the chimera removal filter. + - `_chimera_report`: Report with plots showing the mutation patterns
-Determining the hamming distance threshold of the junction regions for clonal determination using [Shazam](https://shazam.readthedocs.io/en/version-0.1.11_a/). +Mutations patterns in different window sizes are analyzed with functions from +the Immcantation R package [SHazaM](https://shazam.readthedocs.io/en/stable/). -## Change-O define clones +### Detect contamination -### Define clones +
+Output files. Optional. + +- `qc-filtering/bulk-qc-filtering/03-detect_contamination` + - `*log.txt`: Log of the process that will be parsed to generate a report. + - `*cont-flag.tsv`: Rearrangement table in AIRR-C format with sequences that + passed the chimera removal filter. + - `all_reps_cont_report`: Report. + +
+ +This folder is genereated when `detect_contamination` is set to `true`. + +### Collapse duplicates + +
+Output files. + +- `qc-filtering/bulk-qc-filtering/04-collapse-duplicates/` + - `*log.txt`: Log of the process that will be parsed to generate a report. + - `*collapse_report/`: Report. + - `repertoires/*collapse-pass.tsv`: Rearrangement table in AIRR-C format with duplicated + sequences removed. + +
+ +### Single cell QC + +
+Output files. + +- `qc-filtering/single-cell-qc/all_reps_scqc_report` + - `*log.txt`: Log of the process that will be parsed to generate a report. + - `*all_reps_scqc_report/`: Report. + - `*scqc-pass.tsv`: Rearrangement table in AIRR-C format with sequences that + passed the quality filtering. + +
+ +### Determining hamming distance threshold
Output files -- `changeo/06-define_clones/` - - `tab`: Table in AIRR format containing the assigned gene information and an additional field with the clone id. +- `clonal_analysis/find-threshold/` + - `*log`: Log of the process that will be parsed to generate a report. + - `all_reps_threshold-mean.tsv`: Mean of all hamming distance thresholds of the + Junction regions as determined by Shazam. + - `all_reps_threshold-summary.tsv`: Thresholds for each group of `--cloneby` samples. + - `all_reps_dist_report`: Report
-Assigning clones to the sequences obtained from IgBlast with the [DefineClones](https://changeo.readthedocs.io/en/version-0.4.5/tools/DefineClones.html?highlight=DefineClones) Immcantation tool. +Determining the hamming distance threshold of the junction regions for clonal determination using [Shazam](https://shazam.readthedocs.io) when `clonal_threshold` is set to `auto`. -### Reconstruct germlines +## SCOPer define clones + +### Define clones
Output files -- `changeo/07-create_germlines/` - - `tab`: Table in AIRR format contaning the assigned gene information and an additional field with the germline reconstructed gene calls. +- `clonal_analysis/define_clones/` + - `*log`: Log of the process that will be parsed to generate a report. + - `repertoires/_clone-pass.tsv`: Rearrangement tables in AIRR-C format with sequences that + passed the clonal assignment step. The field `clone_id` contains the clonal clusters identifiers. + - `tables/`: Table in AIRR format containing the assigned gene information and an additional field with the clone id. + - `clonal_abundance.tsv` + - `clonal_diversity.tsv` + - `clone_sizes_table.tsv` + - `num_clones_table_nosingle.tsv` + - `num_clones_table.tsv` + - `ggplots/`: Diversity and abundance plots as `ggplot` objects. + - `figures/`: Clone size, diversity and abundance `png` plots. + +A similar output folder `clonal_analysis/define_clones/all_reps_clone_report` is generated for all data.
-Reconstructing the germline sequences with the [CreateGermlines](https://changeo.readthedocs.io/en/version-0.4.5/tools/CreateGermlines.html#creategermlines) Immcantation tool. +Assigning clones to the sequences obtained from IgBlast with the [scoper::hierarchicalClones](https://scoper.readthedocs.io/en/stable/topics/hierarchicalClones/) Immcantation tool. + +# ## Lineage reconstruction
Output files -- `lineage_reconstruction/` - - `tab` - - `Clones_table_patient.tsv`: contains a summary of the clones found for the patient, and the number of unique and total sequences identified in each clone. - - `Clones_table_patient_filtered_between_3_and_1000.tsv`: contains a summary of the clones found for the patient, and the number of unique and total sequences identified in each clone, filtered by clones of size between 3 and 1000, for which the lineages were reconstructed and the trees plotted. - - `xxx_germ-pass.tsv`: AIRR format table with all the sequences from a patient after the germline annotation step. - - `Clone_tree_plots`: Contains a rooted graphical representation of each of the clones, saved in pdf format. - - `Graphml_trees`: All lineage trees for the patient exported in a GraphML format: `All_graphs_patient.graphml`. +- `clonal_analysis/dowser_lineages/` + - `*log`: Log of the process that will be parsed to generate a report. + - `_dowser_report`: Report
-Reconstructing clonal linage with the [Alakazam R package](https://alakazam.readthedocs.io/en/stable/) from the Immcantation toolset. +Reconstructing clonal lineage with [IgPhyML](https://igphyml.readthedocs.io/en/stable/) and +[dowser](https://dowser.readthedocs.io/en/stable/topics/getTrees/) from the Immcantation toolset. ## Repertoire comparison
Output files -- `repertoire_comparison/` +- `repertoire_analysis/repertoire_comparison/` - `all_data.tsv`: AIRR format table containing the processed sequence information for all subjects. - `Abundance`: contains clonal abundance calculation plots and tables. - `Diversity`: contains diversity calculation plots and tables. - `V_family`: contains V gene and family distribution calculation plots and tables. -- `Bcellmagic_report.html`: Contains the repertoire comparison results in an html report form: Abundance, Diversity, V gene usage tables and plots. Comparison between treatments and subjects. +- `Airrflow_report.html`: Contains the repertoire comparison results in an html report form: Abundance, Diversity, V gene usage tables and plots. Comparison between treatments and subjects.
Calculation of several repertoire characteristics (diversity, abundance, V gene usage) for comparison between subjects, time points and cell populations. An Rmarkdown report is generated with the [Alakazam R package](https://alakazam.readthedocs.io/en/stable/). +## Tracking number of reads + +
+Output files + +- `report_file_size/file_size_report`: Report summarizing the number of sequences after the most important pipeline steps. + - `tables/*tsv`: Tables with the number of sequences at each processing step. + +
+ +Parsing the logs from the previous processes. Summary of the number of sequences left after each of the most important pipeline steps. + ## Log parsing
diff --git a/docs/usage.md b/docs/usage.md index f9d8ca4b..194c9f7a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,7 +6,9 @@ ## Introduction -The airrflow pipeline allows processing bulk targeted BCR and TCR sequencing data from multiplex or RACE PCR protocols. It performs V(D)J assignment, clonotyping, lineage reconsctruction and repertoire analysis using the [Immcantation](https://immcantation.readthedocs.io/en/stable/) framework. +The airrflow pipeline allows processing BCR and TCR targeted sequencing data from bulk and single-cell sequencing protocols. It performs V(D)J assignment, clonotyping, lineage reconsctruction and repertoire analysis using the [Immcantation](https://immcantation.readthedocs.io/en/stable/) framework. + +![nf-core/airrflow overview](images/airrflow_workflow_overview.png) ## Running the pipeline @@ -26,7 +28,7 @@ nextflow run nf-core/airrflow \ ``` For more information about the parameters, please refer to the [parameters documentation](https://nf-co.re/airrflow/parameters). -This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. +The command above will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. Note that the pipeline will create the following files in your working directory: @@ -37,28 +39,32 @@ work # Directory containing the nextflow working files # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` -## AIRR fields support +## Input metadata ### Supported AIRR fields -nf-core/airrflow offers full support for the AIRR metadata fields. The minimum metadata fields that are needed by the pipeline are listed in the table below. Other metadata fields can be provided in the input samplesheet, which will be available for reporting and introducing comparisons among repertoires. +nf-core/airrflow offers full support for the [AIRR standards 1.4](https://docs.airr-community.org/en/stable/datarep/metadata.html) metadata annotation. The minimum metadata fields that are needed by the pipeline are listed in the table below. Other non-mandatory AIRR fields can be provided in the input samplesheet, which will be available for reporting and introducing comparisons among repertoires. -| AIRR field | Type | Name | Description | +| AIRR field | Type | Parameter Name | Description | | ------------------------- | ------------------ | ----------------------------- | ----------------------------------------------------- | -| sample_id | Samplesheet column | sample_id | Sample ID assigned by submitter, unique within study | -| subject_id | Samplesheet column | subject_id | Subject ID assigned by submitter, unique within study | -| species | Samplesheet column | species | Subject species | -| pcr_target_locus | Samplesheet column | pcr_target_locus | Designation of the target locus (IG or TR) | +| sample_id | Samplesheet column | | Sample ID assigned by submitter, unique within study | +| subject_id | Samplesheet column | | Subject ID assigned by submitter, unique within study | +| species | Samplesheet column | | Subject species | +| tissue | Samplesheet column | | Sample tissue | +| pcr_target_locus | Samplesheet column | | Designation of the target locus (IG or TR) | +| sex | Samplesheet column | | Subject sex | +| age | Samplesheet column | | Subject age | +| biomaterial_provider | Samplesheet column | | Name of sample biomaterial provider | | library_generation_method | Parameter | `--library_generation_method` | Generic type of library generation | -### Fastq input samplesheet +### Fastq input samplesheet (bulk) -The required input file is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename_R1`, `filename_R2`, `subject_id`, `species` and `pcr_target_locus` are required. An example samplesheet is: +The required input file for processing raw BCR or TCR bulk targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename_R1`, `filename_R2`, `subject_id`, `species`, `tissue`, `pcr_target_locus`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required. An example samplesheet is: -| sample_id | filename_R1 | filename_R2 | filename_I1 | subject_id | species | pcr_target_locus | intervention | collection_time_point_relative | cell_subset | -| --------- | ------------------------------- | ------------------------------- | ------------------------------- | ---------- | ------- | ---------------- | -------------- | ------------------------------ | ------------ | -| sample01 | sample1_S8_L001_R1_001.fastq.gz | sample1_S8_L001_R2_001.fastq.gz | sample1_S8_L001_I1_001.fastq.gz | Subject02 | human | IG | Drug_treatment | Baseline | plasmablasts | -| sample02 | sample2_S8_L001_R1_001.fastq.gz | sample2_S8_L001_R2_001.fastq.gz | sample2_S8_L001_I1_001.fastq.gz | Subject02 | human | TR | Drug_treatment | Baseline | plasmablasts | +| sample_id | filename_R1 | filename_R2 | filename_I1 | subject_id | species | pcr_target_locus | tissue | sex | age | biomaterial_provider | single_cell | intervention | collection_time_point_relative | cell_subset | +| --------- | ------------------------------- | ------------------------------- | ------------------------------- | ---------- | ------- | ---------------- | ------ | ------ | --- | -------------------- | ----------- | -------------- | ------------------------------ | ------------ | +| sample01 | sample1_S8_L001_R1_001.fastq.gz | sample1_S8_L001_R2_001.fastq.gz | sample1_S8_L001_I1_001.fastq.gz | Subject02 | human | IG | blood | NA | 53 | sequencing_facility | FALSE | Drug_treatment | Baseline | plasmablasts | +| sample02 | sample2_S8_L001_R1_001.fastq.gz | sample2_S8_L001_R2_001.fastq.gz | sample2_S8_L001_I1_001.fastq.gz | Subject02 | human | TR | blood | female | 78 | sequencing_facility | FALSE | Drug_treatment | Baseline | plasmablasts | - sample_id: Sample ID assigned by submitter, unique within study. - filename_R1: path to fastq file with first mates of paired-end sequencing. @@ -66,7 +72,11 @@ The required input file is a sample sheet in TSV format (tab separated). The col - filename_I1 (optional): path to fastq with illumina index and UMI (unique molecular identifier) barcode. - subject_id: Subject ID assigned by submitter, unique within study. - species: species from which the sample was taken. Supported species are `human` and `mouse`. +- tissue: tissue from which the sample was taken. E.g. `blood`, `PBMC`, `brain`. - pcr_target_locus: Designation of the target locus (`IG` or `TR`). +- sex: Subject biological sex (`female`, `male`, etc.). +- age: Subject biological age. +- single_cell: TRUE or FALSE. Fastq input samplesheet only supports a FALSE value. Other optional columns can be added. These columns will be available when building the contrasts for the repertoire comparison report. It is recommended that these columns also follow the AIRR nomenclature. Examples are: @@ -78,6 +88,18 @@ Other optional columns can be added. These columns will be available when buildi The metadata specified in the input file will then be automatically annotated in a column with the same header in the tables generated by the pipeline. +### Assembled input samplesheet (bulk or single-cell) + +The required input file for processing raw BCR or TCR bulk targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename`, `subject_id`, `species`, `tissue`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required. + +An example samplesheet is + +| filename | species | subject_id | sample_id | tissue | sex | age | biomaterial_provider | pcr_target_locus | single_cell | +| -------------------------------------------------------- | ------- | ---------- | --------------------------------- | ---------- | ---- | --- | -------------------- | ---------------- | ----------- | +| sc5p_v2_hs_PBMC_1k_b_airr_rearrangement.tsv | human | subject_x | sc5p_v2_hs_PBMC_1k_5fb | PBMC | NA | NA | 10x Genomics | ig | TRUE | +| sc5p_v2_mm_c57bl6_splenocyte_1k_b_airr_rearrangement.tsv | mouse | mouse_x | sc5p_v2_mm_c57bl6_splenocyte_1k_b | splenocyte | NA | NA | 10x Genomics | ig | TRUE | +| bulk-Laserson-2014.fasta | human | PGP1 | PGP1 | PBMC | male | NA | Laserson-2014 | ig | FALSE | + ## Supported library generation methods (protocols) | Library generation methods (AIRR) | Description | Name in pipeline | Commercial protocols | @@ -187,7 +209,7 @@ nextflow run nf-core/airrflow -profile docker \ #### UMI barcode is provided in the index file -If the UMI barcodes are provided in an additional index file, set the `--index_file` parameter. Specify the UMI barcode length with the `--umi_length` parameter. You can optionally specify the UMI start position in the index sequence with the `--umi_start` parameter (the default is 0). +If the UMI barcodes are provided in an additional index file, please provide it in the column `filename_I1` in the input samplesheet and additionally set the `--index_file` parameter. Specify the UMI barcode length with the `--umi_length` parameter. You can optionally specify the UMI start position in the index sequence with the `--umi_start` parameter (the default is 0). For example: @@ -312,9 +334,9 @@ nextflow pull nf-core/airrflow It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/airrflow releases page](https://github.com/nf-core/airrflow/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +First, go to the [nf-core/airrflow releases page](https://github.com/nf-core/airrflow/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. ## Core Nextflow arguments @@ -324,7 +346,7 @@ This version number will be logged in reports when you run the pipeline, so that Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/). +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. > We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. @@ -333,8 +355,11 @@ The pipeline also dynamically loads configurations from [https://github.com/nf-c Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters - `docker` - A generic configuration profile to be used with [Docker](https://docker.com/) - `singularity` @@ -347,9 +372,6 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters ### `-resume` @@ -398,8 +420,14 @@ Work dir: Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` ``` +#### For beginners + +A first step to bypass this error, you could try to increase the amount of CPUs, memory, and time for the whole pipeline. Therefor you can try to increase the resource for the parameters `--max_cpus`, `--max_memory`, and `--max_time`. Based on the error above, you have to increase the amount of memory. Therefore you can go to the [parameter documentation of rnaseq](https://nf-co.re/rnaseq/3.9/parameters) and scroll down to the `show hidden parameter` button to get the default value for `--max_memory`. In this case 128GB, you than can try to run your pipeline again with `--max_memory 200GB -resume` to skip all process, that were already calculated. If you can not increase the resource of the complete pipeline, you can try to adapt the resource for a single process as mentioned below. + +#### Advanced option on process level + To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`. +We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/star/align/main.nf`. If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. @@ -418,7 +446,7 @@ process { > > If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. -### Updating containers +### Updating containers (advanced users) The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index b3d092f8..33cd4f6e 100755 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -46,7 +46,6 @@ class NfcoreSchema { 'quiet', 'syslog', 'v', - 'version', // Options for `nextflow run` command 'ansi', diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 27feb009..25a0a74a 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -32,6 +32,25 @@ class NfcoreTemplate { } } + // + // Generate version string + // + public static String version(workflow) { + String version_string = "" + + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string + } + // // Construct and send completion email // @@ -61,7 +80,7 @@ class NfcoreTemplate { misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp def email_fields = [:] - email_fields['version'] = workflow.manifest.version + email_fields['version'] = NfcoreTemplate.version(workflow) email_fields['runName'] = workflow.runName email_fields['success'] = workflow.success email_fields['dateComplete'] = workflow.complete @@ -146,10 +165,10 @@ class NfcoreTemplate { } // - // Construct and send adaptive card - // https://adaptivecards.io + // Construct and send a notification to a web server as JSON + // e.g. Microsoft Teams and Slack // - public static void adaptivecard(workflow, params, summary_params, projectDir, log) { + public static void IM_notification(workflow, params, summary_params, projectDir, log) { def hook_url = params.hook_url def summary = [:] @@ -170,7 +189,7 @@ class NfcoreTemplate { misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp def msg_fields = [:] - msg_fields['version'] = workflow.manifest.version + msg_fields['version'] = NfcoreTemplate.version(workflow) msg_fields['runName'] = workflow.runName msg_fields['success'] = workflow.success msg_fields['dateComplete'] = workflow.complete @@ -178,13 +197,16 @@ class NfcoreTemplate { msg_fields['exitStatus'] = workflow.exitStatus msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") msg_fields['projectDir'] = workflow.projectDir msg_fields['summary'] = summary << misc_fields // Render the JSON template def engine = new groovy.text.GStringTemplateEngine() - def hf = new File("$projectDir/assets/adaptivecard.json") + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("$projectDir/assets/${json_path}") def json_template = engine.createTemplate(hf).make(msg_fields) def json_message = json_template.toString() @@ -209,7 +231,7 @@ class NfcoreTemplate { if (workflow.stats.ignoredCount == 0) { log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" } } else { log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" @@ -297,6 +319,7 @@ class NfcoreTemplate { // public static String logo(workflow, monochrome_logs) { Map colors = logColours(monochrome_logs) + String workflow_version = NfcoreTemplate.version(workflow) String.format( """\n ${dashedLine(monochrome_logs)} @@ -305,7 +328,7 @@ class NfcoreTemplate { ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} ${dashedLine(monochrome_logs)} """.stripIndent() ) diff --git a/lib/WorkflowBcellmagic.groovy b/lib/WorkflowAirrflow.groovy similarity index 99% rename from lib/WorkflowBcellmagic.groovy rename to lib/WorkflowAirrflow.groovy index cab982f8..e66386bf 100755 --- a/lib/WorkflowBcellmagic.groovy +++ b/lib/WorkflowAirrflow.groovy @@ -4,7 +4,7 @@ import groovy.text.SimpleTemplateEngine -class WorkflowBcellmagic { +class WorkflowAirrflow { // // Check and validate parameters diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 71381438..e74305a7 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -18,7 +18,7 @@ class WorkflowMain { } // - // Print help to screen if required + // Generate help string // public static String help(workflow, params, log) { def command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.tsv --cprimers CPrimers.fasta --vprimers VPrimers.fasta --umi_length 12 --loci ig" @@ -31,7 +31,7 @@ class WorkflowMain { } // - // Print parameter summary log to screen + // Generate parameter summary log string // public static String paramsSummaryLog(workflow, params, log) { def summary_log = '' @@ -52,20 +52,26 @@ class WorkflowMain { System.exit(0) } - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) + // Print workflow version and exit on --version + if (params.version) { + String workflow_version = NfcoreTemplate.version(workflow) + log.info "${workflow.manifest.name} ${workflow_version}" + System.exit(0) } // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) + // Validate workflow parameters via the JSON schema + if (params.validate_params) { + NfcoreSchema.validateParameters(workflow, params, log) + } + // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) // Check that conda channels are set-up correctly - if (params.enable_conda) { + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { Utils.checkCondaChannels(log) } diff --git a/main.nf b/main.nf index f93651f6..2c213e4b 100644 --- a/main.nf +++ b/main.nf @@ -25,20 +25,10 @@ WorkflowMain.initialise(workflow, params, log) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -if (params.subworkflow == 'bcellmagic') { - include { BCELLMAGIC } from './workflows/bcellmagic' -} else if (params.subworkflow == 'reveal') { - include { REVEAL } from './workflows/reveal' -} +include { AIRRFLOW } from './workflows/airrflow' workflow NFCORE_AIRRFLOW { - if (params.subworkflow == "bcellmagic") { - BCELLMAGIC() - } else if (params.subworkflow == "reveal") { - REVEAL() - } else { - exit 1 - } + AIRRFLOW() } workflow { diff --git a/modules.json b/modules.json index 54043170..58105545 100644 --- a/modules.json +++ b/modules.json @@ -7,19 +7,23 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "8022c68e7403eecbd8ba9c49496f69f8c49d50f0" + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] }, "fastp": { "branch": "master", - "git_sha": "1e49f31e93c56a3832833eef90a02d3cde5a3f7e" + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba", + "installed_by": ["modules"] } } } diff --git a/modules/local/airrflow_report/Dockerfile b/modules/local/airrflow_report/Dockerfile new file mode 100644 index 00000000..d648de92 --- /dev/null +++ b/modules/local/airrflow_report/Dockerfile @@ -0,0 +1,21 @@ +FROM condaforge/mambaforge:22.9.0-3 +LABEL authors="gisela.gabernet@gmail.com" \ + description="Docker image containing base requirements for the nfcore/airrflowreport" + +# Install procps so that Nextflow can poll CPU usage and +# deep clean the apt cache to reduce image/layer size +RUN apt-get install -y procps + +# Instruct R processes to use these empty files instead of clashing with a local version +RUN touch .Rprofile +RUN touch .Renviron + +# Install the conda environment +COPY environment.yml / +RUN mamba env create -f /environment.yml && conda clean -a + +# Add conda installation dir to PATH (instead of doing 'conda activate') +ENV PATH /opt/conda/envs/nf-core-airrflow-report-3.0dev/bin:$PATH + +# Dump the details of the installed packages to a file for posterity +RUN mamba env export --name nf-core-airrflow-report-3.0dev > nf-core-airrflow-report-3.0dev.yml diff --git a/modules/local/alakazam/alakazam_shazam_repertoires.nf b/modules/local/airrflow_report/airrflow_report.nf similarity index 64% rename from modules/local/alakazam/alakazam_shazam_repertoires.nf rename to modules/local/airrflow_report/airrflow_report.nf index b062f0ce..dcb330e0 100644 --- a/modules/local/alakazam/alakazam_shazam_repertoires.nf +++ b/modules/local/airrflow_report/airrflow_report.nf @@ -1,15 +1,16 @@ -process ALAKAZAM_SHAZAM_REPERTOIRES { - tag "report" +process AIRRFLOW_REPORT { + tag "${meta.id}" label 'process_high' - conda (params.enable_conda ? "conda-forge::r-base=4.1.2 bioconda::r-alakazam=1.2.0 bioconda::r-shazam=1.1.0 conda-forge::r-kableextra=1.3.4 conda-forge::r-knitr=1.33 conda-forge::r-stringr=1.4.0 conda-forge::r-dplyr=1.0.6 conda-forge::r-optparse=1.7.1" : null) + conda "bioconda::r-enchantr=0.0.6 conda-forge::plotly=4.10.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-7da73314bcc47157b442d16c3dcfbe81e75a404f:9bb35f8114dffcd97b3afb5de8587355aca16b66-0' : - 'quay.io/biocontainers/mulled-v2-7da73314bcc47157b442d16c3dcfbe81e75a404f:9bb35f8114dffcd97b3afb5de8587355aca16b66-0' }" + 'nfcore/airrflowreport:dev' : + 'nfcore/airrflowreport:dev' }" input: - path(tab) // sequence tsv table in AIRR format + tuple val(meta), path(tab) // sequence tsv table in AIRR format path("Table_sequences.tsv") + path("Table_sequences_assembled.tsv") path(repertoire_report) path(css) path(logo) diff --git a/modules/local/airrflow_report/environment.yml b/modules/local/airrflow_report/environment.yml new file mode 100644 index 00000000..441b7a8b --- /dev/null +++ b/modules/local/airrflow_report/environment.yml @@ -0,0 +1,9 @@ +name: nf-core-airrflow-report-3.0dev +channels: + - bioconda + - conda-forge + - defaults +dependencies: + - r-enchantr=0.0.6 + - r-kableextra + - r-plotly diff --git a/modules/local/alakazam/alakazam_lineage.nf b/modules/local/alakazam/alakazam_lineage.nf deleted file mode 100644 index a12605d6..00000000 --- a/modules/local/alakazam/alakazam_lineage.nf +++ /dev/null @@ -1,36 +0,0 @@ -process ALAKAZAM_LINEAGE { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? "conda-forge::r-base=4.1.2 bioconda::r-alakazam=1.2.0 bioconda::changeo=1.2.0 bioconda::phylip=3.697 conda-forge::r-optparse=1.7.1" : null) // Please also update the phylip version manually in the script section below as phylip does not print the version - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-adaea5efbfa2a35669a6db7ddb1e1c8d5e60ef6e:031d6ccffe3e78cd908a83c2387b67eb856da7dd-0' : - 'quay.io/biocontainers/mulled-v2-adaea5efbfa2a35669a6db7ddb1e1c8d5e60ef6e:031d6ccffe3e78cd908a83c2387b67eb856da7dd-0' }" - - input: - tuple val(meta), path(tab) // sequence tsv table in AIRR format - - output: - tuple val(meta), path("${tab}"), emit: tab - path "versions.yml" , emit: versions - path("*.tsv") - path("Clone_tree_plots/*.pdf"), emit: graph_plots optional true - path("Graphml_trees/*.graphml"), emit: graph_export optional true - - script: - def args = task.ext.args ?: '' - """ - which dnapars > dnapars_exec.txt - lineage_reconstruction.R --repertoire ${tab} $args - merge_graphs.sh - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - R: \$(echo \$(R --version 2>&1) | awk -F' ' '{print \$3}') - alakazam: \$(Rscript -e "library(alakazam); cat(paste(packageVersion('alakazam'), collapse='.'))") - changeo: \$( AssignGenes.py --version | awk -F' ' '{print \$2}' ) - pyhlip: 3.697 - END_VERSIONS - """ - -} diff --git a/modules/local/changeo/changeo_assigngenes.nf b/modules/local/changeo/changeo_assigngenes.nf index 31e93ee8..5b5b412e 100644 --- a/modules/local/changeo/changeo_assigngenes.nf +++ b/modules/local/changeo/changeo_assigngenes.nf @@ -1,11 +1,12 @@ process CHANGEO_ASSIGNGENES { tag "$meta.id" label 'process_low' + label 'immcantation' - conda (params.enable_conda ? "bioconda::changeo=1.2.0 bioconda::igblast=1.17.1 conda-forge::wget=1.20.1" : null) // Conda package + conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:e7f88c6f7da46a5407f261ca406c050d5bd12dea-0' : - 'quay.io/biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:e7f88c6f7da46a5407f261ca406c050d5bd12dea-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : + 'quay.io/biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" input: tuple val(meta), path(reads) // reads in fasta format @@ -15,11 +16,12 @@ process CHANGEO_ASSIGNGENES { path("*igblast.fmt7"), emit: blast tuple val(meta), path("$reads"), emit: fasta path "versions.yml" , emit: versions + path("*_command_log.txt"), emit: logs //process logs script: def args = task.ext.args ?: '' """ - AssignGenes.py igblast -s $reads -b $igblast --organism $meta.species --loci ${meta.locus.toLowerCase()} $args --nproc $task.cpus --outname "$meta.id" + AssignGenes.py igblast -s $reads -b $igblast --organism $meta.species --loci ${meta.locus.toLowerCase()} $args --nproc $task.cpus --outname "$meta.id" > "$meta.id"_changeo_assigngenes_command_log.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/changeo/changeo_buildtrees.nf b/modules/local/changeo/changeo_buildtrees.nf deleted file mode 100644 index 597cea84..00000000 --- a/modules/local/changeo/changeo_buildtrees.nf +++ /dev/null @@ -1,30 +0,0 @@ -process CHANGEO_BUILDTREES { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "conda-forge::r-base=4.1.2 bioconda:r-alakazam=1.2.0 bioconda::changeo=1.2.0 bioconda::igphyml=1.1.3" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-d432bd3f78aaba1be2f7eb105c18998acb64d739:2c83ca89e577c8839f746f0fe4a6c63ef5984b99-0' : - 'quay.io/biocontainers/mulled-v2-d432bd3f78aaba1be2f7eb105c18998acb64d739:2c83ca89e577c8839f746f0fe4a6c63ef5984b99-0' }" - - input: - tuple val(meta), path(tab) // sequence tsv table in AIRR format - - output: - tuple val(meta), path("*_lineages.tsv") - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - """ - BuildTrees.py -d ${tab} --outname ${meta.id} --log ${meta.id}.log --nproc $task.cpus $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - R: \$(echo \$(R --version 2>&1) | awk -F' ' '{print \$3}') - alakazam: \$(Rscript -e "library(alakazam); cat(paste(packageVersion('alakazam'), collapse='.'))") - changeo: \$(AssignGenes.py --version | awk -F' ' '{print \$2}') - igphyml: \$(igphyml --version | grep -o "IgPhyML [0-9\\. ]\\+" | grep -o "[0-9\\. ]\\+") - END_VERSIONS - """ -} diff --git a/modules/local/changeo/changeo_convertdb_fasta.nf b/modules/local/changeo/changeo_convertdb_fasta.nf index c1d94965..c5954a23 100644 --- a/modules/local/changeo/changeo_convertdb_fasta.nf +++ b/modules/local/changeo/changeo_convertdb_fasta.nf @@ -3,10 +3,11 @@ process CHANGEO_CONVERTDB_FASTA { label 'process_low' label 'immcantation' - conda (params.enable_conda ? "bioconda::changeo=1.2.0 bioconda::igblast=1.17.1" : null) + + conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-2665a8a48fa054ad1fcccf53e711669939b3eac1:f479475bceae84156e57e303cfe804ab5629d62b-0' : - 'quay.io/biocontainers/mulled-v2-2665a8a48fa054ad1fcccf53e711669939b3eac1:f479475bceae84156e57e303cfe804ab5629d62b-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : + 'quay.io/biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" input: tuple val(meta), path(tab) // sequence tsv in AIRR format @@ -19,7 +20,7 @@ process CHANGEO_CONVERTDB_FASTA { script: def args = task.ext.args ?: '' """ - ConvertDb.py fasta -d $tab $args > "${meta.id}_command_log.txt" + ConvertDb.py fasta -d $tab $args > "${meta.id}"_convertdb_command_log.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/changeo/changeo_creategermlines.nf b/modules/local/changeo/changeo_creategermlines.nf index f00f5bf6..31814a92 100644 --- a/modules/local/changeo/changeo_creategermlines.nf +++ b/modules/local/changeo/changeo_creategermlines.nf @@ -1,11 +1,13 @@ process CHANGEO_CREATEGERMLINES { tag "$meta.id" label 'process_low' + label 'immcantation' - conda (params.enable_conda ? "bioconda::changeo=1.2.0 bioconda::igblast=1.17.1" : null) + + conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-2665a8a48fa054ad1fcccf53e711669939b3eac1:f479475bceae84156e57e303cfe804ab5629d62b-0' : - 'quay.io/biocontainers/mulled-v2-2665a8a48fa054ad1fcccf53e711669939b3eac1:f479475bceae84156e57e303cfe804ab5629d62b-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : + 'quay.io/biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" input: tuple val(meta), path(tab) // sequence tsv table in AIRR format @@ -17,11 +19,12 @@ process CHANGEO_CREATEGERMLINES { path "versions.yml" , emit: versions script: + def args = task.ext.args ?: '' """ - CreateGermlines.py -d ${tab} -g dmask --cloned \\ + CreateGermlines.py -d ${tab} \\ -r ${imgt_base}/${meta.species}/vdj/ \\ - --format airr \\ - --log ${meta.id}.log --outname ${meta.id} > ${meta.id}_command_log.txt + -g dmask --format airr \\ + --log ${meta.id}.log --outname ${meta.id} $args > ${meta.id}_create-germlines_command_log.txt ParseLog.py -l ${meta.id}.log -f ID V_CALL D_CALL J_CALL cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/changeo/changeo_defineclones.nf b/modules/local/changeo/changeo_defineclones.nf deleted file mode 100644 index 39e1cf3e..00000000 --- a/modules/local/changeo/changeo_defineclones.nf +++ /dev/null @@ -1,36 +0,0 @@ -process CHANGEO_DEFINECLONES { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::changeo=1.2.0 bioconda::igblast=1.17.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-2665a8a48fa054ad1fcccf53e711669939b3eac1:f479475bceae84156e57e303cfe804ab5629d62b-0' : - 'quay.io/biocontainers/mulled-v2-2665a8a48fa054ad1fcccf53e711669939b3eac1:f479475bceae84156e57e303cfe804ab5629d62b-0' }" - - input: - tuple val(meta), path(tab) // sequence tsv table in AIRR format - val(threshold) // threshold file - - output: - tuple val(meta), path("*clone-pass.tsv"), emit: tab // sequence tsv table in AIRR format - path "*_command_log.txt" , emit: logs - path "versions.yml" , emit: versions - - script: - if (params.set_cluster_threshold) { - thr = params.cluster_threshold - } else { - thr = file(threshold).text - thr = thr.trim() - } - """ - DefineClones.py -d $tab --act set --model ham --norm len --nproc $task.cpus --dist $thr --outname ${meta.id} --log ${meta.id}.log > "${meta.id}_command_log.txt" - ParseLog.py -l "${meta.id}.log" -f id v_call j_call junction_length cloned filtered clones - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - igblastn: \$( igblastn -version | grep -o "igblast[0-9\\. ]\\+" | grep -o "[0-9\\. ]\\+" ) - changeo: \$( DefineClones.py --version | awk -F' ' '{print \$2}' ) - END_VERSIONS - """ -} diff --git a/modules/local/changeo/changeo_makedb.nf b/modules/local/changeo/changeo_makedb.nf index dab4bc18..b4b016ad 100644 --- a/modules/local/changeo/changeo_makedb.nf +++ b/modules/local/changeo/changeo_makedb.nf @@ -1,12 +1,13 @@ process CHANGEO_MAKEDB { tag "$meta.id" label 'process_low' + label 'immcantation' - conda (params.enable_conda ? "bioconda::changeo=1.2.0 bioconda::igblast=1.17.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-2665a8a48fa054ad1fcccf53e711669939b3eac1:f479475bceae84156e57e303cfe804ab5629d62b-0' : - 'quay.io/biocontainers/mulled-v2-2665a8a48fa054ad1fcccf53e711669939b3eac1:f479475bceae84156e57e303cfe804ab5629d62b-0' }" + conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : + 'quay.io/biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" input: tuple val(meta), path(reads) // reads in fasta format @@ -24,7 +25,7 @@ process CHANGEO_MAKEDB { MakeDb.py igblast -i $igblast -s $reads -r \\ ${imgt_base}/${meta.species.toLowerCase()}/vdj/ \\ $args \\ - --outname "${meta.id}" > "${meta.id}_command_log.txt" + --outname "${meta.id}" > "${meta.id}_makedb_command_log.txt" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/changeo/changeo_parsedb_select.nf b/modules/local/changeo/changeo_parsedb_select.nf index 42606fc6..4241d043 100644 --- a/modules/local/changeo/changeo_parsedb_select.nf +++ b/modules/local/changeo/changeo_parsedb_select.nf @@ -1,11 +1,13 @@ process CHANGEO_PARSEDB_SELECT { tag "$meta.id" label 'process_low' + label 'immcantation' - conda (params.enable_conda ? "bioconda::changeo=1.2.0 bioconda::igblast=1.17.1" : null) + + conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-2665a8a48fa054ad1fcccf53e711669939b3eac1:f479475bceae84156e57e303cfe804ab5629d62b-0' : - 'quay.io/biocontainers/mulled-v2-2665a8a48fa054ad1fcccf53e711669939b3eac1:f479475bceae84156e57e303cfe804ab5629d62b-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : + 'quay.io/biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" input: tuple val(meta), path(tab) // sequence tsv in AIRR format @@ -20,7 +22,7 @@ process CHANGEO_PARSEDB_SELECT { def args2 = task.ext.args2 ?: '' if (meta.locus == 'IG'){ """ - ParseDb.py select -d $tab $args --outname ${meta.id} > "${meta.id}_command_log.txt" + ParseDb.py select -d $tab $args --outname ${meta.id} > "${meta.id}_select_command_log.txt" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/changeo/changeo_parsedb_split.nf b/modules/local/changeo/changeo_parsedb_split.nf index 7b0a75a9..ff63d02f 100644 --- a/modules/local/changeo/changeo_parsedb_split.nf +++ b/modules/local/changeo/changeo_parsedb_split.nf @@ -3,10 +3,11 @@ process CHANGEO_PARSEDB_SPLIT { label 'process_low' label 'immcantation' - conda (params.enable_conda ? "bioconda::changeo=1.2.0 bioconda::igblast=1.17.1" : null) + + conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-2665a8a48fa054ad1fcccf53e711669939b3eac1:f479475bceae84156e57e303cfe804ab5629d62b-0' : - 'quay.io/biocontainers/mulled-v2-2665a8a48fa054ad1fcccf53e711669939b3eac1:f479475bceae84156e57e303cfe804ab5629d62b-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : + 'quay.io/biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" input: tuple val(meta), path(tab) // sequence tsv in AIRR format diff --git a/modules/local/enchantr/collapse_duplicates.nf b/modules/local/enchantr/collapse_duplicates.nf index b2f552d2..9ce827b8 100644 --- a/modules/local/enchantr/collapse_duplicates.nf +++ b/modules/local/enchantr/collapse_duplicates.nf @@ -1,31 +1,37 @@ process COLLAPSE_DUPLICATES { - tag "all_bulk_reps" - label 'immcantation' - label 'enchantr' - label 'process_long' + tag "$meta.id" - cache 'lenient' + label 'process_long_parallelized' + label 'immcantation' - // TODO: update container - container "immcantation/suite:devel" + conda "bioconda::r-enchantr=0.0.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.0.6--r42hdfd78af_0': + 'quay.io/biocontainers/r-enchantr:0.0.6--r42hdfd78af_0' }" input: - path(tabs) // tuple val(meta) // sequence tsv in AIRR format - val(collapseby) + tuple val(meta), path(tabs) // tuple [val(meta), sequence tsv in AIRR format ] output: - tuple val(meta), path("*collapse-pass.tsv"), emit: tab // sequence tsv in AIRR format + tuple val(meta), path("*/*/*collapse-pass.tsv"), emit: tab // sequence tsv in AIRR format path("*_command_log.txt"), emit: logs //process logs - path "*_report" // , emit: duplicates_report + path "*_report" path "versions.yml" , emit: versions script: - meta=[] """ echo "${tabs.join('\n')}" > tabs.txt - Rscript -e "enchantr::enchantr_report('collapse_duplicates', report_params=list('input'='tabs.txt','collapseby'='${collapseby}','outdir'=getwd(), 'nproc'=${task.cpus},'outname'='all_reps', 'log'='all_reps_collapse_command_log'))" + Rscript -e "enchantr::enchantr_report('collapse_duplicates', \\ + report_params=list('input'='tabs.txt',\\ + 'collapseby'='${params.collapseby}',\\ + 'outdir'=getwd(),\\ + 'nproc'=${task.cpus},\\ + 'outname'='${meta.id}',\\ + 'log'='${meta.id}_collapse_command_log'))" + echo "${task.process}": > versions.yml Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml - mv enchantr all_reps_collapse_report + + mv enchantr ${meta.id}_collapse_report """ } diff --git a/modules/local/enchantr/define_clones.nf b/modules/local/enchantr/define_clones.nf index 85d7e138..fa34fefb 100644 --- a/modules/local/enchantr/define_clones.nf +++ b/modules/local/enchantr/define_clones.nf @@ -1,9 +1,14 @@ def asString (args) { - s = "" + def s = "" + def value = "" if (args.size()>0) { if (args[0] != 'none') { for (param in args.keySet().sort()){ - s = s + ",'"+param+"'='"+args[param]+"'" + value = args[param].toString() + if (!value.isNumber()) { + value = "'"+value+"'" + } + s = s + ",'"+param+"'="+value } } } @@ -11,41 +16,45 @@ def asString (args) { } process DEFINE_CLONES { - tag 'all_reps' + tag "${meta.id}" + + label 'process_long_parallelized' label 'immcantation' - label 'enchantr' - label 'process_long' - // TODO: update container - container "immcantation/suite:devel" + conda "bioconda::r-enchantr=0.0.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.0.6--r42hdfd78af_0': + 'quay.io/biocontainers/r-enchantr:0.0.6--r42hdfd78af_0' }" input: - //tuple val(meta), path(tabs) // sequence tsv in AIRR format - path(tabs) - val cloneby - val singlecell + tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format val threshold path imgt_base output: - tuple val(meta), path("*clone-pass.tsv"), emit: tab, optional: true // sequence tsv in AIRR format - path("*_command_log.txt"), emit: logs //process logs + path("*/*/*clone-pass.tsv"), emit: tab // sequence tsv in AIRR format + path("*/*_command_log.txt"), emit: logs //process logs path "*_report" + path "versions.yml", emit: versions + script: - meta=[] - def outname = '' - if (task.ext.args.containsKey('outname')) { outname = task.ext.args['outname'] } + def args = asString(task.ext.args) ?: '' + def thr = threshold.join("") """ Rscript -e "enchantr::enchantr_report('define_clones', \\ report_params=list('input'='${tabs.join(',')}', \\ 'imgt_db'='${imgt_base}', \\ - 'cloneby'='${cloneby}','threshold'=${threshold}, \\ - 'outputby'='id', \\ - 'outname'='${outname}', \\ - 'singlecell'='${singlecell}','outdir'=getwd(), \\ + 'cloneby'='${params.cloneby}', \\ + 'force'=FALSE, \\ + 'threshold'=${thr}, \\ + 'singlecell'='${params.singlecell}','outdir'=getwd(), \\ 'nproc'=${task.cpus},\\ - 'log'='all_reps_clone_command_log' ${args}))" - mv enchantr 'all_reps_clone_report' + 'log'='${meta.id}_clone_command_log' ${args}))" + + echo "${task.process}": > versions.yml + Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml + + mv enchantr '${meta.id}_clone_report' """ } diff --git a/modules/local/enchantr/detect_contamination.nf b/modules/local/enchantr/detect_contamination.nf index cc73a7f7..1ff546b9 100644 --- a/modules/local/enchantr/detect_contamination.nf +++ b/modules/local/enchantr/detect_contamination.nf @@ -1,30 +1,35 @@ process DETECT_CONTAMINATION { tag "multi_repertoire" + + label 'process_long_parallelized' label 'immcantation' - label 'enchantr' - cache 'lenient' - // TODO: update container - container "immcantation/suite:devel" + conda "bioconda::r-enchantr=0.0.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.0.6--r42hdfd78af_0': + 'quay.io/biocontainers/r-enchantr:0.0.6--r42hdfd78af_0' }" input: path(tabs) - val(input_id) output: - tuple val(meta), path("*cont-flag.tsv"), emit: tab // sequence tsv in AIRR format + path("*cont-flag.tsv"), emit: tab // sequence tsv in AIRR format path("*_command_log.txt"), emit: logs //process logs - path "*_report" //, emit: contamination_report + path "*_report" path "versions.yml" , emit: versions script: - meta=[] """ echo "${tabs.join('\n')}" > tabs.txt - Rscript -e "enchantr::enchantr_report('contamination', report_params=list('input'='tabs.txt','input_id'='${input_id}','outdir'=getwd(), 'outname'='cont-flag', 'log'='all_reps_contamination_command_log'))" + Rscript -e "enchantr::enchantr_report('contamination', \\ + report_params=list('input'='tabs.txt',\\ + 'input_id'='id','outdir'=getwd(), \\ + 'outname'='cont-flag', \\ + 'log'='all_reps_contamination_command_log'))" + echo "${task.process}": > versions.yml Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml - mv enchantr al_reps_cont_report + mv enchantr all_reps_cont_report """ } diff --git a/modules/local/enchantr/dowser_lineages.nf b/modules/local/enchantr/dowser_lineages.nf index 39a4fca1..f1924055 100644 --- a/modules/local/enchantr/dowser_lineages.nf +++ b/modules/local/enchantr/dowser_lineages.nf @@ -1,9 +1,14 @@ def asString (args) { - s = "" + def s = "" + def value = "" if (args.size()>0) { if (args[0] != 'none') { for (param in args.keySet().sort()){ - s = s + ",'"+param+"'='"+args[param]+"'" + value = args[param].toString() + if (!value.isNumber()) { + value = "'"+value+"'" + } + s = s + ",'"+param+"'="+value } } } @@ -11,33 +16,41 @@ def asString (args) { } process DOWSER_LINEAGES { - tag "$tabs" + tag "${meta.id}" + + label 'process_long_parallelized' + label 'error_ignore' label 'immcantation' - label 'enchantr' - label 'process_long' - // TODO: update container - container "immcantation/suite:devel" + conda "bioconda::r-enchantr=0.0.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.0.6--r42hdfd78af_0': + 'quay.io/biocontainers/r-enchantr:0.0.6--r42hdfd78af_0' }" input: //tuple val(meta), path(tabs) // sequence tsv in AIRR format - path(tabs) + tuple val(meta), path(tabs) output: path("*_command_log.txt"), emit: logs //process logs path "*_report" + path "versions.yml", emit: versions script: - meta=[] - def args = asString(task.ext.args) + def args = asString(task.ext.args) ?: '' def id_name = "$tabs".replaceFirst('__.*','') // TODO use nice outname, not tabs """ Rscript -e "enchantr::enchantr_report('dowser_lineage', \\ report_params=list('input'='${tabs}', \\ + 'exec'='${params.igphyml}', \\ 'outdir'=getwd(), \\ 'nproc'=${task.cpus},\\ 'log'='${id_name}_dowser_command_log' ${args}))" + + echo "${task.process}": > versions.yml + Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml + mv enchantr '${id_name}_dowser_report' """ } diff --git a/modules/local/enchantr/find_threshold.nf b/modules/local/enchantr/find_threshold.nf index 3fd4d9e5..8c1584e4 100644 --- a/modules/local/enchantr/find_threshold.nf +++ b/modules/local/enchantr/find_threshold.nf @@ -1,31 +1,41 @@ process FIND_THRESHOLD { tag "all_reps" - label 'immcantation' - label 'enchantr' - label 'process_long' - cache 'lenient' + label 'process_long_parallelized' + label 'immcantation' - // TODO: update container - container "immcantation/suite:devel" + conda "bioconda::r-enchantr=0.0.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.0.6--r42hdfd78af_0': + 'quay.io/biocontainers/r-enchantr:0.0.6--r42hdfd78af_0' }" input: - path tab // tuple val(meta) // sequence tsv in AIRR format - val(cloneby) - val(singlecell) + path tab // sequence tsv in AIRR format + path logo output: // tuple val(meta), path("*threshold-pass.tsv"), emit: tab // sequence tsv in AIRR format path("*_command_log.txt"), emit: logs //process logs path "*_report" - path "*_threshold-summary.tsv" + path "*_threshold-summary.tsv", emit: threshold_summary path "*_threshold-mean.tsv", emit: mean_threshold + path "versions.yml", emit: versions script: - meta=[] """ - Rscript -e "enchantr::enchantr_report('find_threshold', report_params=list('input'='${tab.join(',')}','cloneby'='${cloneby}','singlecell'='${singlecell}','outdir'=getwd(), 'nproc'=${task.cpus},'outname'='all_reps', 'log'='all_reps_clone_command_log'))" + Rscript -e "enchantr::enchantr_report('find_threshold', \\ + report_params=list('input'='${tab.join(',')}',\\ + 'cloneby'='${params.cloneby}',\\ + 'singlecell'='${params.singlecell}',\\ + 'outdir'=getwd(),\\ + 'nproc'=${task.cpus},\\ + 'outname'='all_reps',\\ + 'log'='all_reps_threshold_command_log',\\ + 'logo'='${logo}'))" + + echo "${task.process}": > versions.yml + Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml mv enchantr all_reps_dist_report """ } diff --git a/modules/local/enchantr/remove_chimeric.nf b/modules/local/enchantr/remove_chimeric.nf index 408101ce..b95129f5 100644 --- a/modules/local/enchantr/remove_chimeric.nf +++ b/modules/local/enchantr/remove_chimeric.nf @@ -1,12 +1,14 @@ process REMOVE_CHIMERIC { tag "$meta.id" + + label 'process_long_parallelized' label 'immcantation' - label 'enchantr' - label 'process_high' - label 'process_long' - // TODO: update container - container "immcantation/suite:devel" + + conda "bioconda::r-enchantr=0.0.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.0.6--r42hdfd78af_0': + 'quay.io/biocontainers/r-enchantr:0.0.6--r42hdfd78af_0' }" input: @@ -21,9 +23,16 @@ process REMOVE_CHIMERIC { script: """ - Rscript -e "enchantr:::enchantr_report('chimera_analysis', report_params=list('input'='${tab}','outdir'=getwd(), 'nproc'=${task.cpus},'outname'='${meta.id}', 'log'='${meta.id}_chimeric_command_log'))" + Rscript -e "enchantr:::enchantr_report('chimera_analysis', \\ + report_params=list('input'='${tab}',\\ + 'outdir'=getwd(), \\ + 'nproc'=${task.cpus},\\ + 'outname'='${meta.id}', \\ + 'log'='${meta.id}_chimeric_command_log'))" + echo "\"${task.process}\":" > versions.yml Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml + mv enchantr ${meta.id}_chimera_report """ } diff --git a/modules/local/enchantr/report_file_size.nf b/modules/local/enchantr/report_file_size.nf index 299eebf9..fb252314 100644 --- a/modules/local/enchantr/report_file_size.nf +++ b/modules/local/enchantr/report_file_size.nf @@ -4,22 +4,32 @@ process REPORT_FILE_SIZE { tag "file_size" label 'immcantation' - label 'enchantr' label 'single_cpu' - // TODO: update container - container "immcantation/suite:devel" + conda "bioconda::r-enchantr=0.0.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.0.6--r42hdfd78af_0': + 'quay.io/biocontainers/r-enchantr:0.0.6--r42hdfd78af_0' }" input: path logs + path metadata output: path "*_report", emit: file_size + path "versions.yml", emit: versions + path "file_size_report/tables/log_data.tsv", emit: table script: """ echo "${logs.join('\n')}" > logs.txt - Rscript -e "enchantr::enchantr_report('file_size', report_params=list('input'='logs.txt','outdir'=getwd()))" + Rscript -e "enchantr::enchantr_report('file_size', \\ + report_params=list('input'='logs.txt', 'metadata'='${metadata}',\\ + 'outdir'=getwd()))" + + echo "\"${task.process}\":" > versions.yml + Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml + mv enchantr file_size_report """ } diff --git a/modules/local/enchantr/single_cell_qc.nf b/modules/local/enchantr/single_cell_qc.nf index dc0988f7..1707ea9f 100644 --- a/modules/local/enchantr/single_cell_qc.nf +++ b/modules/local/enchantr/single_cell_qc.nf @@ -1,27 +1,51 @@ +def asString (args) { + def s = "" + def value = "" + if (args.size()>0) { + if (args[0] != 'none') { + for (param in args.keySet().sort()){ + value = args[param].toString() + if (!value.isNumber()) { + value = "'"+value+"'" + } + s = s + ",'"+param+"'="+value + } + } + } + return s +} + process SINGLE_CELL_QC { - tag "multi_repertoire" + tag 'all_single_cell' label 'immcantation' - label 'enchantr' + label 'process_medium' - // TODO: update container - container "immcantation/suite:devel" + conda "bioconda::r-enchantr=0.0.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.0.6--r42hdfd78af_0': + 'quay.io/biocontainers/r-enchantr:0.0.6--r42hdfd78af_0' }" input: - path tabs + path(tabs) output: - tuple val(meta), path("*scqc-pass.tsv"), emit: tab // sequence tsv in AIRR format + path("*/*scqc-pass.tsv"), emit: tab // sequence tsv in AIRR format path("*_command_log.txt"), emit: logs //process logs path("*_report"), emit: report path("versions.yml"), emit: versions script: - meta=[] + def args = asString(task.ext.args) ?: '' """ echo "${tabs.join('\n')}" > tabs.txt - Rscript -e "enchantr::enchantr_report('single_cell_qc', report_params=list('input'='tabs.txt','outdir'=getwd(), 'outname'='all_reps', 'log'='all_reps_scqc_command_log'))" + Rscript -e "enchantr::enchantr_report('single_cell_qc', \\ + report_params=list('input'='tabs.txt',\\ + 'outdir'=getwd(), \\ + 'log'='all_reps_scqc_command_log' ${args} ))" + echo "${task.process}": > versions.yml Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml + mv enchantr all_reps_scqc_report """ } diff --git a/modules/local/enchantr/validate_input.nf b/modules/local/enchantr/validate_input.nf index a1245956..e16df988 100644 --- a/modules/local/enchantr/validate_input.nf +++ b/modules/local/enchantr/validate_input.nf @@ -4,24 +4,30 @@ process VALIDATE_INPUT { tag "$samplesheet" label 'immcantation' - label 'enchantr' + label 'single_cpu' - // TODO: update container - container "immcantation/suite:devel" + conda "bioconda::r-enchantr=0.0.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.0.6--r42hdfd78af_0': + 'quay.io/biocontainers/r-enchantr:0.0.6--r42hdfd78af_0' }" input: file samplesheet path miairr val collapseby val cloneby - val reassign + //val reassign output: - path "validated_input.tsv", emit: validated_input - path "validated_input_not-valid.tsv", emit: not_valid_input, optional: true + path "*/validated_input.tsv", emit: validated_input + path "*/validated_input_not-valid.tsv", emit: not_valid_input, optional: true + path "versions.yml", emit: versions script: """ - Rscript -e "enchantr:::enchantr_report('validate_input', report_params=list('input'='${samplesheet}','collapseby'='${collapseby}','cloneby'='${cloneby}','reassign'='${reassign}','miairr'='${miairr}','outdir'=getwd()))" + Rscript -e "enchantr:::enchantr_report('validate_input', report_params=list('input'='${samplesheet}','collapseby'='${collapseby}','cloneby'='${cloneby}','reassign'='${params.reassign}','miairr'='${miairr}','outdir'=getwd()))" + + echo "\"${task.process}\":" > versions.yml + Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml """ } diff --git a/modules/local/fastqc_postassembly.nf b/modules/local/fastqc_postassembly.nf index a32522de..84f3b1ba 100644 --- a/modules/local/fastqc_postassembly.nf +++ b/modules/local/fastqc_postassembly.nf @@ -3,7 +3,7 @@ process FASTQC_POSTASSEMBLY { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) + conda "bioconda::fastqc=0.11.9" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : 'quay.io/biocontainers/fastqc:0.11.9--0' }" diff --git a/modules/local/fetch_databases.nf b/modules/local/fetch_databases.nf index 9762404e..8c110b0c 100644 --- a/modules/local/fetch_databases.nf +++ b/modules/local/fetch_databases.nf @@ -3,10 +3,10 @@ process FETCH_DATABASES { label 'process_low' label 'immcantation' - conda (params.enable_conda ? "bioconda::changeo=1.2.0 bioconda::igblast=1.17.1 conda-forge::wget=1.20.1" : null) // Conda package + conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:e7f88c6f7da46a5407f261ca406c050d5bd12dea-0' : - 'quay.io/biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:e7f88c6f7da46a5407f261ca406c050d5bd12dea-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : + 'quay.io/biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" output: path("igblast_base"), emit: igblast diff --git a/modules/local/gunzip.nf b/modules/local/gunzip.nf index f3b8155c..2a929579 100644 --- a/modules/local/gunzip.nf +++ b/modules/local/gunzip.nf @@ -2,7 +2,7 @@ process GUNZIP { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : 'biocontainers/biocontainers:v1.2.0_cv1' }" diff --git a/modules/local/merge_UMI.nf b/modules/local/merge_UMI.nf index 8849f189..dbdceef9 100644 --- a/modules/local/merge_UMI.nf +++ b/modules/local/merge_UMI.nf @@ -3,7 +3,7 @@ process MERGE_UMI { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "conda-forge::python=3.8.0 conda-forge::biopython=1.74" : null) + conda "conda-forge::python=3.8.0 conda-forge::biopython=1.74" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-adc9bb9edc31eb38b3c24786a83b7dfa530e2bea:47d6d7765d7537847ced7dac873190d164146022-0' : 'quay.io/biocontainers/mulled-v2-adc9bb9edc31eb38b3c24786a83b7dfa530e2bea:47d6d7765d7537847ced7dac873190d164146022-0' }" diff --git a/modules/local/merge_tables.nf b/modules/local/merge_tables.nf deleted file mode 100644 index b6f3c02a..00000000 --- a/modules/local/merge_tables.nf +++ /dev/null @@ -1,32 +0,0 @@ -process MERGE_TABLES { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "conda-forge::r-base=4.1.2 bioconda::r-alakazam=1.2.0 bioconda::r-shazam=1.1.0 conda-forge::r-kableextra=1.3.4 conda-forge::r-knitr=1.33 conda-forge::r-stringr=1.4.0 conda-forge::r-dplyr=1.0.6 conda-forge::r-optparse=1.7.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-7da73314bcc47157b442d16c3dcfbe81e75a404f:9bb35f8114dffcd97b3afb5de8587355aca16b66-0' : - 'quay.io/biocontainers/mulled-v2-7da73314bcc47157b442d16c3dcfbe81e75a404f:9bb35f8114dffcd97b3afb5de8587355aca16b66-0' }" - - input: - tuple val(meta), path(tab) // sequence tsv in AIRR format - path samplesheet - - output: - tuple val(meta), path("${meta.id}.tsv"), emit: tab // sequence tsv with metadata annotation in AIRR format - - script: - """ - echo "${meta.id}" - echo "${meta.samples}" - echo "${tab}" - echo "${tab.join('\n')}" > tab.list - - head -n 1 ${tab[0]} > ${meta.id}_preannotation.tsv - tail -n +2 ${tab} >> ${meta.id}_preannotation.tsv - - # Remove line introduced by tail when merging multiple samples - sed -i '/==>/d' ${meta.id}_preannotation.tsv - - add_metadata.R --repertoire ${meta.id}_preannotation.tsv --samplesheet ${samplesheet} --outname "${meta.id}.tsv" - """ -} diff --git a/modules/local/parse_logs.nf b/modules/local/parse_logs.nf index ba5e538d..c2a77fd9 100644 --- a/modules/local/parse_logs.nf +++ b/modules/local/parse_logs.nf @@ -3,7 +3,7 @@ process PARSE_LOGS { tag "logs" label 'process_low' - conda (params.enable_conda ? "bioconda::pandas=1.1.5" : null) + conda "bioconda::pandas=1.1.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pandas:1.1.5' : 'quay.io/biocontainers/pandas:1.1.5' }" diff --git a/modules/local/presto/presto_assemblepairs.nf b/modules/local/presto/presto_assemblepairs.nf index 48113afd..5f02cc84 100644 --- a/modules/local/presto/presto_assemblepairs.nf +++ b/modules/local/presto/presto_assemblepairs.nf @@ -1,11 +1,12 @@ process PRESTO_ASSEMBLEPAIRS { tag "$meta.id" label 'process_long_parallelized' + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path(R1), path(R2) diff --git a/modules/local/presto/presto_buildconsensus.nf b/modules/local/presto/presto_buildconsensus.nf index 21b0cc8f..88c3b932 100644 --- a/modules/local/presto/presto_buildconsensus.nf +++ b/modules/local/presto/presto_buildconsensus.nf @@ -1,11 +1,12 @@ process PRESTO_BUILDCONSENSUS { tag "$meta.id" label "process_long_parallelized" + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path(R1), path(R2) diff --git a/modules/local/presto/presto_clustersets.nf b/modules/local/presto/presto_clustersets.nf index b08faeed..691c256d 100644 --- a/modules/local/presto/presto_clustersets.nf +++ b/modules/local/presto/presto_clustersets.nf @@ -1,11 +1,12 @@ process PRESTO_CLUSTERSETS { tag "$meta.id" label "process_long_parallelized" + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path(R1), path(R2) diff --git a/modules/local/presto/presto_collapseseq.nf b/modules/local/presto/presto_collapseseq.nf index 5d04afb8..0fabc851 100644 --- a/modules/local/presto/presto_collapseseq.nf +++ b/modules/local/presto/presto_collapseseq.nf @@ -1,11 +1,12 @@ process PRESTO_COLLAPSESEQ { tag "$meta.id" label "process_medium" + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path(reads) diff --git a/modules/local/presto/presto_filterseq.nf b/modules/local/presto/presto_filterseq.nf index 9f6704dc..3bcea7be 100644 --- a/modules/local/presto/presto_filterseq.nf +++ b/modules/local/presto/presto_filterseq.nf @@ -1,11 +1,12 @@ process PRESTO_FILTERSEQ { tag "$meta.id" label "process_medium" + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path(R1), path(R2) diff --git a/modules/local/presto/presto_filterseq_postassembly.nf b/modules/local/presto/presto_filterseq_postassembly.nf index fc2c5fc6..b7e1543d 100644 --- a/modules/local/presto/presto_filterseq_postassembly.nf +++ b/modules/local/presto/presto_filterseq_postassembly.nf @@ -1,11 +1,12 @@ process PRESTO_FILTERSEQ_POSTASSEMBLY { tag "$meta.id" label "process_medium" + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path(reads) diff --git a/modules/local/presto/presto_maskprimers.nf b/modules/local/presto/presto_maskprimers.nf index 6e2f3df8..ad8894e6 100644 --- a/modules/local/presto/presto_maskprimers.nf +++ b/modules/local/presto/presto_maskprimers.nf @@ -1,11 +1,12 @@ process PRESTO_MASKPRIMERS { tag "$meta.id" label "process_high" + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path(R1), path(R2) diff --git a/modules/local/presto/presto_maskprimers_postassembly.nf b/modules/local/presto/presto_maskprimers_postassembly.nf index e90942cf..84d034d2 100644 --- a/modules/local/presto/presto_maskprimers_postassembly.nf +++ b/modules/local/presto/presto_maskprimers_postassembly.nf @@ -1,11 +1,12 @@ process PRESTO_MASKPRIMERS_POSTASSEMBLY { tag "$meta.id" label "process_high" + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path(reads) diff --git a/modules/local/presto/presto_pairseq.nf b/modules/local/presto/presto_pairseq.nf index f0532e8e..4d221bce 100644 --- a/modules/local/presto/presto_pairseq.nf +++ b/modules/local/presto/presto_pairseq.nf @@ -1,11 +1,12 @@ process PRESTO_PAIRSEQ { tag "$meta.id" label "process_low" + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path("${meta.id}_R1.fastq"), path("${meta.id}_R2.fastq") diff --git a/modules/local/presto/presto_parse_cluster.nf b/modules/local/presto/presto_parse_cluster.nf index dbb23457..8ef48b18 100644 --- a/modules/local/presto/presto_parse_cluster.nf +++ b/modules/local/presto/presto_parse_cluster.nf @@ -1,11 +1,12 @@ process PRESTO_PARSE_CLUSTER { tag "$meta.id" label "process_low" + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path(R1), path(R2) diff --git a/modules/local/presto/presto_parseheaders.nf b/modules/local/presto/presto_parseheaders.nf index ad9d59ab..188ba5bc 100644 --- a/modules/local/presto/presto_parseheaders.nf +++ b/modules/local/presto/presto_parseheaders.nf @@ -1,11 +1,12 @@ process PRESTO_PARSEHEADERS { tag "$meta.id" label "process_low" + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path(reads) diff --git a/modules/local/presto/presto_parseheaders_metadata.nf b/modules/local/presto/presto_parseheaders_metadata.nf index 717fdcc8..11ab1778 100644 --- a/modules/local/presto/presto_parseheaders_metadata.nf +++ b/modules/local/presto/presto_parseheaders_metadata.nf @@ -1,11 +1,12 @@ process PRESTO_PARSEHEADERS_METADATA { tag "$meta.id" label "process_low" + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path(reads) @@ -17,7 +18,7 @@ process PRESTO_PARSEHEADERS_METADATA { script: def args = task.ext.args ?: '' """ - ParseHeaders.py add -s $reads -o "${reads.baseName}_reheader-pass.fastq" $args -u ${meta.id} ${meta.subject} ${meta.species} ${meta.locus} + ParseHeaders.py add -s $reads -o "${reads.baseName}_reheader-pass.fastq" $args -u ${meta.id} ${meta.subject_id} ${meta.species} ${meta.locus} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/presto/presto_parseheaders_primers.nf b/modules/local/presto/presto_parseheaders_primers.nf index 49de701f..207cae8a 100644 --- a/modules/local/presto/presto_parseheaders_primers.nf +++ b/modules/local/presto/presto_parseheaders_primers.nf @@ -1,11 +1,12 @@ process PRESTO_PARSEHEADERS_PRIMERS { tag "$meta.id" label "process_low" + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path(reads) diff --git a/modules/local/presto/presto_postconsensus_pairseq.nf b/modules/local/presto/presto_postconsensus_pairseq.nf index 868e50b9..31701183 100644 --- a/modules/local/presto/presto_postconsensus_pairseq.nf +++ b/modules/local/presto/presto_postconsensus_pairseq.nf @@ -1,11 +1,12 @@ process PRESTO_POSTCONSENSUS_PAIRSEQ { tag "$meta.id" label "process_low" + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path("${meta.id}_R1.fastq"), path("${meta.id}_R2.fastq") diff --git a/modules/local/presto/presto_splitseq.nf b/modules/local/presto/presto_splitseq.nf index bc7f6d74..de66d52e 100644 --- a/modules/local/presto/presto_splitseq.nf +++ b/modules/local/presto/presto_splitseq.nf @@ -1,11 +1,12 @@ process PRESTO_SPLITSEQ { tag "$meta.id" label "process_low" + label 'immcantation' - conda (params.enable_conda ? "bioconda::presto=0.7.0" : null) + conda "bioconda::presto=0.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/presto:0.7.0--pyhdfd78af_0' : - 'quay.io/biocontainers/presto:0.7.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'quay.io/biocontainers/presto:0.7.1--pyhdfd78af_0' }" input: tuple val(meta), path(reads) diff --git a/modules/local/rename_fastq.nf b/modules/local/rename_fastq.nf index c1e768df..de46bab3 100644 --- a/modules/local/rename_fastq.nf +++ b/modules/local/rename_fastq.nf @@ -3,7 +3,7 @@ process RENAME_FASTQ { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "conda-forge::python=3.8.0 conda-forge::biopython=1.74" : null) + conda "conda-forge::python=3.8.0 conda-forge::biopython=1.74" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-adc9bb9edc31eb38b3c24786a83b7dfa530e2bea:47d6d7765d7537847ced7dac873190d164146022-0' : 'quay.io/biocontainers/mulled-v2-adc9bb9edc31eb38b3c24786a83b7dfa530e2bea:47d6d7765d7537847ced7dac873190d164146022-0' }" diff --git a/modules/local/reveal/add_meta_to_tab.nf b/modules/local/reveal/add_meta_to_tab.nf index f37340f1..5666b145 100644 --- a/modules/local/reveal/add_meta_to_tab.nf +++ b/modules/local/reveal/add_meta_to_tab.nf @@ -3,8 +3,10 @@ process ADD_META_TO_TAB { label 'immcantation' label 'single_cpu' - // TODO: update container - container "immcantation/suite:devel" + conda "bioconda::r-enchantr=0.0.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.0.6--r42hdfd78af_0': + 'quay.io/biocontainers/r-enchantr:0.0.6--r42hdfd78af_0' }" cache 'deep' // Without 'deep' this process would run when using -resume diff --git a/modules/local/reveal/changeo_assigngenes_reveal.nf b/modules/local/reveal/changeo_assigngenes_reveal.nf deleted file mode 100644 index 34c2572f..00000000 --- a/modules/local/reveal/changeo_assigngenes_reveal.nf +++ /dev/null @@ -1,27 +0,0 @@ -process CHANGEO_ASSIGNGENES_REVEAL { - tag "$meta.id" - label 'process_high' - label 'immcantation' - - // TODO: update container - container "immcantation/suite:devel" - - input: - tuple val(meta), path(reads) // reads in fasta format - path(igblast) // igblast fasta - - output: - path("*igblast.fmt7"), emit: blast - tuple val(meta), path("$reads"), emit: fasta - path "versions.yml" , emit: versions - path "*_command_log.txt" , emit: logs - - script: - """ - AssignGenes.py igblast -s $reads -b $igblast --organism "$meta.species" --loci "$meta.locus" --format blast --nproc $task.cpus --outname "$meta.id" > "$meta.id"_changeo_assigngenes_command_log.txt - cat <<-END_VERSIONS > versions.yml - "${task.process}": - igblastn: \$( igblastn -version | grep -o "igblast[0-9\\. ]\\+" | grep -o "[0-9\\. ]\\+" ) - changeo: \$( AssignGenes.py --version | awk -F' ' '{print \$2}' ) - END_VERSIONS""" -} diff --git a/modules/local/reveal/changeo_creategermlines_reveal.nf b/modules/local/reveal/changeo_creategermlines_reveal.nf deleted file mode 100644 index a1a3396c..00000000 --- a/modules/local/reveal/changeo_creategermlines_reveal.nf +++ /dev/null @@ -1,35 +0,0 @@ -process CHANGEO_CREATEGERMLINES_REVEAL { - tag "$meta.id" - label 'process_low' - label 'immcantation' - - // TODO: update container - container "immcantation/suite:devel" - - input: - tuple val(meta), path(tab) // sequence tsv in AIRR format - path(imgt_base) // imgt db - - output: - tuple val(meta), path("*germ-pass.tsv"), emit: tab - path("*_command_log.txt"), emit: logs - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - """ - CreateGermlines.py -d ${tab} -g dmask \\ - -r ${imgt_base}/${meta.species}/vdj/ --format airr --outdir . \\ - --log ${meta.id}.log --outname ${meta.id} $args > "${meta.id}_create-germlines_command_log.txt" - ParseLog.py -l ${meta.id}.log -f ID V_CALL D_CALL J_CALL - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - changeo: \$( CreateGermlines.py --version | awk -F' ' '{print \$2}' ) - presto: \$( ParseLog.py --version | awk -F' ' '{print \$2}' ) - END_VERSIONS - """ -} - - - diff --git a/modules/local/reveal/changeo_makedb_reveal.nf b/modules/local/reveal/changeo_makedb_reveal.nf deleted file mode 100644 index 72a23d8f..00000000 --- a/modules/local/reveal/changeo_makedb_reveal.nf +++ /dev/null @@ -1,30 +0,0 @@ -process CHANGEO_MAKEDB_REVEAL { - tag "$meta.id" - label 'process_low' - label 'immcantation' - - // TODO: update container - container "immcantation/suite:devel" - - input: - tuple val(meta), path(reads) // reads in fasta format - path(igblast) // igblast fasta from ch_igblast_db_for_process_igblast.mix(ch_igblast_db_for_process_igblast_mix).collect() - path(imgt_base) - - output: - tuple val(meta), path("*db-pass.tsv"), emit: tab //sequence table in AIRR format - path("*_command_log.txt"), emit: logs //process logs - path "versions.yml" , emit: versions - - script: - """ - MakeDb.py igblast -i $igblast -s $reads -r \\ - ${imgt_base}/${meta.species}/vdj/ \\ - $task.ext.args \\ - --outname "${meta.id}" > "${meta.id}_mdb_command_log.txt" - cat <<-END_VERSIONS > versions.yml - "${task.process}": - changeo: \$( MakeDb.py --version | awk -F' ' '{print \$2}' ) - END_VERSIONS - """ -} diff --git a/modules/local/reveal/filter_junction_mod3.nf b/modules/local/reveal/filter_junction_mod3.nf index 93e5142f..c2aa3274 100644 --- a/modules/local/reveal/filter_junction_mod3.nf +++ b/modules/local/reveal/filter_junction_mod3.nf @@ -3,8 +3,10 @@ process FILTER_JUNCTION_MOD3 { label 'immcantation' label 'single_cpu' - // TODO: update container - container "immcantation/suite:devel" + conda "bioconda::r-enchantr=0.0.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.0.6--r42hdfd78af_0': + 'quay.io/biocontainers/r-enchantr:0.0.6--r42hdfd78af_0' }" input: tuple val(meta), path(tab) // sequence tsv in AIRR format diff --git a/modules/local/reveal/filter_quality.nf b/modules/local/reveal/filter_quality.nf index b5e5f9d3..5ea3542f 100644 --- a/modules/local/reveal/filter_quality.nf +++ b/modules/local/reveal/filter_quality.nf @@ -3,8 +3,10 @@ process FILTER_QUALITY { label 'immcantation' label 'single_cpu' - // TODO: update container - container "immcantation/suite:devel" + conda "bioconda::r-enchantr=0.0.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.0.6--r42hdfd78af_0': + 'quay.io/biocontainers/r-enchantr:0.0.6--r42hdfd78af_0' }" input: tuple val(meta), path(tab) // sequence tsv in AIRR format diff --git a/modules/local/reveal/immcantation_container_version.nf b/modules/local/reveal/immcantation_container_version.nf deleted file mode 100644 index 7fdd5e43..00000000 --- a/modules/local/reveal/immcantation_container_version.nf +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Immcantation version - */ -process IMMCANTATION { - label 'immcantation' - label 'single_cpu' - - output: - path "versions.yml", emit: versions - - script: - """ - if ! command -v versions report &> /dev/null - then - cat <<-END_VERSIONS > versions.yml - "${task.process}_CONTAINER": - immcantation: none - END_VERSIONS - else - echo "${task.process}_CONTAINER:" > versions.yml && \ - cat /Version.yaml | grep "^ " | grep -v "date:" | sed s/version/immcantation/g >> versions.yml - fi - """ -} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 1ab5d3f6..d94c81de 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -2,7 +2,7 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" label 'process_single' - conda (params.enable_conda ? "conda-forge::pandas=1.1.5" : null) + conda "conda-forge::pandas=1.5.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pandas:1.1.5' : 'quay.io/biocontainers/pandas:1.1.5' }" @@ -14,6 +14,9 @@ process SAMPLESHEET_CHECK { path '*.tsv', emit: tsv path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: // This script is bundled with the pipeline, in nf-core/airrflow/bin/ """ check_samplesheet.py $samplesheet diff --git a/modules/local/shazam/shazam_threshold.nf b/modules/local/shazam/shazam_threshold.nf deleted file mode 100644 index 0aaa6d30..00000000 --- a/modules/local/shazam/shazam_threshold.nf +++ /dev/null @@ -1,30 +0,0 @@ -process SHAZAM_THRESHOLD { - tag "$meta.id" - - conda (params.enable_conda ? "conda-forge::r-base=4.1.2 bioconda::r-shazam=1.1.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-db80433cc6df75bc43a5fd7bfa7529a7df8cfe15:f0e1329252bbc0f36a8656cfa655cf205da30e5b-0' : - 'quay.io/biocontainers/mulled-v2-db80433cc6df75bc43a5fd7bfa7529a7df8cfe15:f0e1329252bbc0f36a8656cfa655cf205da30e5b-0' }" - - input: - tuple val(meta), path(tab) // tsv tab in AIRR format - path(imgt_base) // igblast fasta - - output: - tuple val(meta), path("${tab}"), emit: tab - path("*threshold.txt"), emit: threshold - path("versions.yml") , emit: versions - path("*Hamming_distance_threshold.pdf") optional true - - script: - def args = task.ext.args ?: '' - """ - shazam_threshold.R $tab $params.threshold_method - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - shazam: \$(Rscript -e "library(shazam); cat(paste(packageVersion('shazam'), collapse='.'))") - R: \$(echo \$(R --version 2>&1) | awk -F' ' '{print \$3}') - END_VERSIONS - """ -} diff --git a/modules/local/unzip_db.nf b/modules/local/unzip_db.nf index 2291d7bb..10f49619 100644 --- a/modules/local/unzip_db.nf +++ b/modules/local/unzip_db.nf @@ -2,7 +2,7 @@ process UNZIP_DB { tag "unzip_db" label 'process_medium' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : 'biocontainers/biocontainers:v1.2.0_cv1' }" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index cebb6e05..3df21765 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,7 +2,7 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + conda "bioconda::multiqc=1.13" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py old mode 100644 new mode 100755 index d1390392..da033408 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -1,11 +1,16 @@ #!/usr/bin/env python + +"""Provide functions to merge multiple versions.yml files.""" + + import yaml import platform from textwrap import dedent def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" html = [ dedent( """\\ @@ -44,46 +49,53 @@ def _make_versions_html(versions): return "\\n".join(html) -versions_this_module = {} -versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, -} - -with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - -# aggregate versions by the module name (derived from fully-qualified process name) -versions_by_module = {} -for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - assert versions_by_module[module] == process_versions, ( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - -versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", -} - -versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), -} - -with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) -with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - -with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index 207258ad..e1ed9288 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -2,7 +2,7 @@ process FASTP { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::fastp=0.23.2' : null) + conda "bioconda::fastp=0.23.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' : 'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }" diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index a8159a57..4b604749 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/nextflow.config b/nextflow.config index 4a65c188..38f64d3e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,25 +9,19 @@ // Global default params, used in configs params { - // Subworflow selection - subworkflow = 'bcellmagic' - // Input parameters input = null + mode = "fastq" + miairr="$projectDir/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv" + index_file = false - // Databases options - igblast_base = null - imgtdb_base = null - save_databases = false - - // Bcellmagic specific options --------- - - // protocol options + // ---------------------------- + // sequencing protocol options + // ---------------------------- library_generation_method = null race_linker = null // Primer and UMI inputs - index_file = null cprimers = null vprimers = null vprimer_start = 0 @@ -50,7 +44,9 @@ params { trim_nextseq = false save_trimmed = false - // pRESTO options + // -------------------------- + // sequence assembly options + // -------------------------- filterseq_q = 20 primer_maxerror = 0.2 primer_mask_mode = 'cut' @@ -59,31 +55,52 @@ params { buildconsensus_maxgap = 0.5 cluster_sets = true - // Clustering parameters - set_cluster_threshold = false - cluster_threshold = 0.14 - threshold_method = 'density' + // ----------------------- + // vdj annotation options + // ----------------------- + productive_only = true + reassign = true + igblast_base = null + imgtdb_base = null + save_databases = true - // Downstream analysis - skip_report = false + + // ----------------------- + // bulk filtering options + // ----------------------- + remove_chimeric = true + detect_contamination = null + collapseby = 'sample_id' + + // ----------------------- + // clonal analysis options + // ----------------------- + cloneby = 'subject_id' + singlecell = 'single_cell' + clonal_threshold = 'auto' + skip_all_clones_report = false + + // tree lineage options + igphyml="/usr/local/bin/igphyml" skip_lineage = false - // Report + + // old bcellmagic options ---------------------- + // Clustering parameters + // threshold_method = 'density' + + // ----------------------- + // reporting options + // ----------------------- + skip_report = false report_rmd = "$projectDir/assets/repertoire_comparison.Rmd" report_css = "$projectDir/assets/nf-core_style.css" report_logo = "$projectDir/assets/nf-core-airrflow_logo_light.png" + report_logo_img = "$projectDir/assets/nf-core-airrflow_logo_reports.png" - // ----------------------------------- - - // Reveal specific options ----------- - collapseby = 'input_id' - cloneby = 'subject_id' - singlecell='single_cell' - reassign = true - productive_only = true - remove_chimeric = true - threshold = 'auto' - miairr="$projectDir/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv" + // ----------------------- + // generic nf-core options + // ----------------------- // References igenomes_base = 's3://ngi-igenomes/igenomes' @@ -107,10 +124,10 @@ params { monochrome_logs = false hook_url = null help = false + version = false validate_params = true show_hidden_params = false schema_ignore_params = 'genomes' - enable_conda = false // Config options custom_config_version = 'master' @@ -150,7 +167,7 @@ try { profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { - params.enable_conda = true + conda.enabled = true docker.enabled = false singularity.enabled = false podman.enabled = false @@ -158,7 +175,7 @@ profiles { charliecloud.enabled = false } mamba { - params.enable_conda = true + conda.enabled = true conda.useMamba = true docker.enabled = false singularity.enabled = false @@ -167,6 +184,7 @@ profiles { charliecloud.enabled = false } docker { + conda.enabled = false docker.enabled = true singularity.enabled = false podman.enabled = false @@ -174,7 +192,11 @@ profiles { charliecloud.enabled = false docker.runOptions = '-u $(id -u):$(id -g)' } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } singularity { + conda.enabled = false singularity.enabled = true singularity.autoMounts = true docker.enabled = false @@ -183,6 +205,7 @@ profiles { charliecloud.enabled = false } podman { + conda.enabled = false podman.enabled = true singularity.enabled = false docker.enabled = false @@ -190,6 +213,7 @@ profiles { charliecloud.enabled = false } shifter { + conda.enabled = false shifter.enabled = true singularity.enabled = false docker.enabled = false @@ -197,6 +221,7 @@ profiles { charliecloud.enabled = false } charliecloud { + conda.enabled = false charliecloud.enabled = true docker.enabled = false singularity.enabled = false @@ -211,10 +236,10 @@ profiles { test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } test_tcr { includeConfig 'conf/test_tcr.config' } - test_tcr_thr { includeConfig 'conf/test_tcr_thr.config' } test_no_umi { includeConfig 'conf/test_no_umi.config' } - test_reveal { includeConfig 'conf/test_reveal.config' } - test_reveal_no_cc { includeConfig 'conf/test_reveal_no_cc.config' } + test_assembled { includeConfig 'conf/test_assembled.config' } + test_raw_immcantation_devel { includeConfig 'conf/test_raw_immcantation_devel.config' } + test_assembled_immcantation_devel { includeConfig 'conf/test_assembled_immcantation_devel.config' } test_nocluster { includeConfig 'conf/test_nocluster.config' } test_fetchimgt { includeConfig 'conf/test_fetchimgt.config' } } @@ -261,12 +286,12 @@ dag { manifest { name = 'nf-core/airrflow' - author = 'Gisela Gabernet, Simon Heumos, Alexander Peltzer' + author = """Gisela Gabernet, Susanna Marquez, Alexander Peltzer, Simon Heumos""" homePage = 'https://github.com/nf-core/airrflow' - description = 'B and T cell repertoire analysis pipeline with the Immcantation framework.' + description = """B and T cell repertoire analysis pipeline with the Immcantation framework.""" mainScript = 'main.nf' - nextflowVersion = '!>=21.10.3' - version = '2.4.0' + nextflowVersion = '!>=22.10.1' + version = '3.0dev' doi = '10.5281/zenodo.2642009' } diff --git a/nextflow_schema.json b/nextflow_schema.json index dbfaa6df..7ab3dabb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -12,21 +12,20 @@ "description": "Define where the pipeline should find input data and save output data.", "required": ["input", "outdir"], "properties": { - "subworkflow": { - "type": "string", - "default": "bcellmagic", - "fa_icon": "fas fa-project-diagram", - "hidden": true, - "description": "Specify the subworkflow to be executed.", - "enum": ["bcellmagic", "reveal"] - }, "input": { "type": "string", "mimetype": "text/tsv", - "fa_icon": "fas fa-dna", + "fa_icon": "fas fa-table", "description": "Path to a tsv file providing paths to the fastq files for each sample and the necessary metadata for the analysis.", "help_text": "The input file includes important sample metadata and the path to the R1 and R2 fastq files, and index read file (I), if available. The file should include the following columns, separated with tabs, with exactly these header names:\n\n```bash\nID Source Treatment Extraction_time Population R1 R2 I1\nQMKMK072AD Patient_2 Drug_treatment baseline p sample_S8_L001_R1_001.fastq.gz sample_S8_L001_R2_001.fastq.gz sample_S8_L001_I1_001.fastq.gz\n```\n\nThis metadata will then be automatically annotated in a column with the same header in the tables outputed by the pipeline. Where:\n\n* ID: sample ID.\n* Source: patient or organism code.\n* Treatment: treatment condition applied to the sample.\n* Extraction_time: time of cell extraction for the sample.\n* Population: B-cell population (e.g. naive, double-negative, memory, plasmablast).\n* R1: path to fastq file with first mates of paired-end sequencing.\n* R2: path to fastq file with second mates of paired-end sequencing.\n* I1: path to fastq with illumina index and UMI (unique molecular identifier) barcode (optional column)\n\nSpecify the path for your input file like this:\n\n```bash\n--input 'path/to/metadata/metadata_sheet.tsv'\n```" }, + "mode": { + "type": "string", + "default": "fastq", + "description": "Specify the processing mode for the pipeline. Available options are \"fastq\" and \"assembled\".ptions are: 'raw'", + "enum": ["fastq", "assembled"], + "fa_icon": "fas fa-terminal" + }, "outdir": { "type": "string", "format": "directory-path", @@ -39,6 +38,12 @@ "fa_icon": "fas fa-envelope", "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + }, + "miairr": { + "type": "string", + "default": "bcellmagic/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv", + "description": "Path to MiAIRR-BioSample mapping", + "fa_icon": "fas fa-table" } } }, @@ -63,33 +68,6 @@ }, "fa_icon": "fas fa-flask" }, - "databases_cache": { - "title": "Databases cache", - "type": "object", - "description": "Define the paths to the igblast and IMGT databases if you have them cached.", - "default": "", - "properties": { - "igblast_base": { - "type": "string", - "description": "Path to the cached igblast database.", - "help_text": "If it is not provided, the database will be newly downloaded.", - "fa_icon": "fas fa-database" - }, - "imgtdb_base": { - "type": "string", - "description": "Path to the cached igblast database.", - "help_text": "If it is not provided, the database will be newly downloaded.", - "fa_icon": "fas fa-database" - }, - "save_databases": { - "type": "boolean", - "description": "Save databases so you can use the cache in future runs.", - "fa_icon": "fas fa-file-download" - } - }, - "help_text": "", - "fa_icon": "fas fa-database" - }, "primer_handling": { "title": "Primer handling", "type": "object", @@ -128,7 +106,7 @@ "primer_revpr": { "type": "boolean", "description": "Specify to match the tail-end of the sequence against the reverse complement of the primers. This also reverses the behavior of the --start argument, such that start position is relative to the tail-end of the sequence. (default: False)Maximum scoring error for the Presto MaxPrimer process for the C and/or V region primers identification.", - "fa_icon": "fas fa-barcode" + "fa_icon": "fas fa-dna" } }, "fa_icon": "fas fa-dna" @@ -139,12 +117,6 @@ "description": "Define how UMI barcodes should be treated.", "default": "", "properties": { - "index_file": { - "type": "boolean", - "description": "Indicate if UMI indices are recorded in a separate index file.", - "help_text": "Set to `true` if UMI barcodes are to be read from a separate illumina index fastq file. If Illumina indices and UMI barcodes are already integrated into the R1 reads, leave the default `--index_file false`.\n\nThe pipeline requires UMI barcodes for identifying unique transcripts. These barcodes are typically read from an index file but sometimes can be provided merged with the start of the R1 or R2 reads. If provided in an additional index file, set the `--index_file` parameter, if provided merged with the R1 or R2 reads, set the `--umi_position` parameter.", - "fa_icon": "fas fa-barcode" - }, "umi_position": { "type": "string", "default": "R1", @@ -162,6 +134,13 @@ "umi_start": { "type": "integer", "description": "UMI barcode start position in the index read.", + "fa_icon": "fas fa-barcode", + "default": 0 + }, + "index_file": { + "type": "boolean", + "description": "Indicate if UMI indices are recorded in a separate index file.", + "help_text": "Set to `true` if UMI barcodes are to be read from a separate illumina index fastq file. If Illumina indices and UMI barcodes are already integrated into the R1 reads, leave the default `--index_file false`.\n\nThe pipeline requires UMI barcodes for identifying unique transcripts. These barcodes are typically read from an index file but sometimes can be provided merged with the start of the R1 or R2 reads. If provided in an additional index file, set the `--index_file` parameter, if provided merged with the R1 or R2 reads, set the `--umi_position` parameter.", "fa_icon": "fas fa-barcode" } }, @@ -184,8 +163,9 @@ }, "adapter_fasta": { "type": "string", - "default": null, - "fa_icon": "fas fa-file" + "default": "None", + "fa_icon": "fas fa-file", + "description": "Fasta file with adapter sequences to be trimmed." }, "clip_r1": { "type": "integer", @@ -223,34 +203,34 @@ } } }, - "presto_options": { - "title": "pRESTO options", + "sequence_assembly_options": { + "title": "sequence assembly options", "type": "object", - "description": "Options for the presto tools", + "description": "Options for the pRESTO sequence assembly processes", "default": "", "properties": { "filterseq_q": { "type": "integer", "default": 20, - "description": "Quality threshold for Presto FilterSeq sequence filtering.", + "description": "Quality threshold for pRESTO FilterSeq sequence filtering.", "fa_icon": "fas fa-filter" }, "primer_maxerror": { "type": "number", "default": 0.2, - "description": "Maximum primer scoring error in the Presto MaskPrimer step for the C and/or V region primers identification.", + "description": "Maximum primer scoring error in the pRESTO MaskPrimer step for the C and/or V region primers identification.", "fa_icon": "fas fa-align-center" }, "primer_consensus": { "type": "number", "default": 0.6, - "description": "Maximum error for building the primer consensus in the Presto Buildconsensus step.", + "description": "Maximum error for building the primer consensus in the pRESTO Buildconsensus step.", "fa_icon": "fas fa-align-center" }, "primer_mask_mode": { "type": "string", "default": "cut", - "description": "Masking mode for the Presto MaskPrimer step. Available: cut, mask, trim, tag.", + "description": "Masking mode for the pRESTO MaskPrimer step. Available: cut, mask, trim, tag.", "enum": ["cut", "mask", "trim", "tag"], "help_text": "The primer masking modes will perform the following actions:\n\n* `cut`: remove both the primer region and the preceding sequence.\n* `mask`: replace the primer region with Ns and remove the preceding sequence.\n* `trim`: remove the region preceding the primer, but leave the primer region intact.\n* `tag`: leave the input sequence unmodified.", "fa_icon": "fas fa-mask" @@ -259,77 +239,130 @@ "type": "number", "default": 0.1, "fa_icon": "fas fa-align-center", - "description": "Maximum error for building the sequence consensus in the Presto BuildConsensus step." + "description": "Maximum error for building the sequence consensus in the pRESTO BuildConsensus step." }, "buildconsensus_maxgap": { "type": "number", "default": 0.5, "fa_icon": "fas fa-align-center", - "description": "Maximum gap for building the sequence consensus in the Presto BuildConsensus step." + "description": "Maximum gap for building the sequence consensus in the pRESTO BuildConsensus step." }, "cluster_sets": { "type": "boolean", "default": true, "fa_icon": "fas fa-layer-group", - "description": "Cluster sequences by similarity regardless of any annotation with Presto ClusterSets and annotate the cluster ID additionally to the UMI barcode." + "description": "Cluster sequences by similarity regardless of any annotation with pRESTO ClusterSets and annotate the cluster ID additionally to the UMI barcode." } }, "fa_icon": "fas fa-align-center" }, - "clustering_threshold_definition": { - "title": "Clustering threshold definition", + "vdj_annotation_options": { + "title": "VDJ annotation options", "type": "object", - "description": "Define how the B-cell clonal trees should be calculated.", + "description": "Options for the VDJ annotation processes.", "default": "", "properties": { - "set_cluster_threshold": { + "reassign": { "type": "boolean", - "description": "Set to true if to manually adjust the clustering threshold for cell clones.", - "help_text": "Set the `--set_cluster_threshold` parameter to allow manual cluster hamming distance threshold definition. Then specify the value in the `--cluster_threshold` parameter.\n\nBy default, the pipeline will define clones for each of the samples, as two sequences having the same V gene assignment, C gene assignment, J-gene assignment, and junction length. Additionally, the similarity of the junction region sequences will be assessed by hamming distances. A distance threshold for determining if two sequences come from the same clone is automatically determined by the process shazam. Alternatively, a hamming distance threshold can be manually set by setting the `--set_cluster_threshold` and `--cluster_threshold` parameters.", - "fa_icon": "fab fa-pagelines" + "default": true, + "description": "Whether to reassign genes if the input file is an AIRR formatted tabulated file.", + "fa_icon": "fas fa-redo" }, - "cluster_threshold": { - "type": "number", - "default": 0.14, - "description": "Set the clustering threshold Hamming distance value.", - "help_text": "To have any effect, the `--set_cluster_threshold` parameter needs to be set to `true`.\n\nBy default, the pipeline will define clones for each of the samples, as two sequences having the same V gene assignment, C gene assignment, J-gene assignment, and junction length. Additionally, the similarity of the junction region sequences will be assessed by hamming distances. A distance threshold for determining if two sequences come from the same clone is automatically determined by the process shazam. Alternatively, a hamming distance threshold can be manually set by setting the `--set_cluster_threshold` and `--cluster_threshold` parameters.", - "fa_icon": "fab fa-pagelines" + "productive_only": { + "type": "boolean", + "default": true, + "description": "Subset to productive sequences.", + "fa_icon": "fab fa-product-hunt" + }, + "save_databases": { + "type": "boolean", + "description": "Save databases so you can use the cache in future runs.", + "fa_icon": "fas fa-file-download" }, - "threshold_method": { + "imgtdb_base": { "type": "string", - "default": "density", - "description": "Set the method for finding the clustering threshold.", - "help_text": "This method will be used to find the Hamming nearest neighbor distances threshold for determining if a sequence belongs to the same B/T-cell clone or not. Available methods are \"gmm\" for a maximum-likelihood Gamma or Gaussian mixture fitting, and \"density\" for fitting a binned approximation to the ordinary kernel density estimate to the nearest neighbor distances.", - "fa_icon": "fab fa-pagelines" + "description": "Path to the cached IMGT database.", + "help_text": "If it is not provided, the database will be newly downloaded.", + "fa_icon": "fas fa-database" + }, + "igblast_base": { + "type": "string", + "description": "Path to the cached igblast database.", + "help_text": "If it is not provided, the database will be newly downloaded.", + "fa_icon": "fas fa-database" } }, - "help_text": "By default, the pipeline will define clones for each of the samples, as two sequences having the same V gene assignment, C gene assignment, J-gene assignment, and junction length. Additionally, the similarity of the CDR3 sequences will be assessed by Hamming distances. \n\nA distance threshold for determining if two sequences come from the same clone or not is automatically determined by the process shazam. Alternatively, a hamming distance threshold can be manually set by setting the `--set_cluster_threshold` and `--cluster_threshold` parameters.", - "fa_icon": "fab fa-pagelines" + "fa_icon": "fas fa-edit" }, - "downstream_options": { - "title": "Downstream options", + "bulk_filtering_options": { + "title": "Bulk filtering options", "type": "object", - "description": "Define downstream analysis options.", + "description": "Options for bulk sequence filtering after VDJ assignment.", "default": "", "properties": { - "skip_report": { + "collapseby": { + "type": "string", + "default": "filename,cell_id", + "description": "Name of the field used to collapse duplicated sequences.", + "fa_icon": "fas fa-compress-alt" + }, + "detect_contamination": { "type": "boolean", - "description": "Skip repertoire analysis and report generation", - "fa_icon": "fas fa-angle-double-right" + "fa_icon": "fas fa-search", + "description": "Whether to run the process to detect contamination." + }, + "remove_chimeric": { + "type": "boolean", + "default": true, + "description": "Whether to apply the chimera removal filter.", + "fa_icon": "fas fa-minus-square" + } + }, + "fa_icon": "fas fa-filter" + }, + "clonal_analysis_options": { + "title": "Clonal analysis options", + "type": "object", + "description": "Define how the B-cell clonal trees should be calculated.", + "default": "", + "properties": { + "clonal_threshold": { + "type": ["string", "number"], + "default": "auto", + "fa_icon": "fab fa-pagelines", + "description": "Set the clustering threshold Hamming distance value. Default: 'auto'" }, "skip_lineage": { "type": "boolean", "description": "Skip clonal lineage analysis and lineage tree plotting.", "fa_icon": "fas fa-angle-double-right" }, - "skip_multiqc": { + "cloneby": { + "type": "string", + "default": "subject_id", + "description": "Name of the field used to group data files to identify clones.", + "fa_icon": "fab fa-pagelines" + }, + "igphyml": { + "type": "string", + "default": "/usr/local/share/igphyml/src/igphyml", + "description": "Path to IgPhyml executable.", + "fa_icon": "fas fa-file" + }, + "singlecell": { + "type": "string", + "default": "single_cell", + "description": "Name of the field used to determine if a sample is single cell sequencing or not.", + "fa_icon": "fas fa-border-all" + }, + "skip_all_clones_report": { "type": "boolean", - "description": "Skip multiqc report", + "description": "Skip report of EnchantR DefineClones for all samples together.", "fa_icon": "fas fa-angle-double-right" } }, - "help_text": "Downstream analyses include a series of R scripts based on the Immcantation Alakazam, ChangeO and Shazam packages to calculate:\n- Clonal abundance and diversity\n- Clonal lineage tree export in graphML\n- Clonal overlap and statistics\n- Mutational load\n- Isotype and V-family distribution", - "fa_icon": "fas fa-angle-double-right" + "help_text": "By default, the pipeline will define clones for each of the samples, as two sequences having the same V gene assignment, C gene assignment, J-gene assignment, and junction length. Additionally, the similarity of the CDR3 sequences will be assessed by Hamming distances. \n\nA distance threshold for determining if two sequences come from the same clone or not is automatically determined by the process shazam. Alternatively, a hamming distance threshold can be manually set by setting the `--set_cluster_threshold` and `--cluster_threshold` parameters.", + "fa_icon": "fab fa-pagelines" }, "report_options": { "title": "Report options", @@ -340,34 +373,39 @@ "report_rmd": { "type": "string", "default": "${projectDir}/assets/repertoire_comparison.Rmd", - "description": "Custom report Rmarkdown file." + "description": "Custom report Rmarkdown file.", + "fa_icon": "far fa-file-code" }, "report_css": { "type": "string", "default": "${projectDir}/assets/nf-core_style.css", - "description": "Custom report style file in css format." + "description": "Custom report style file in css format.", + "fa_icon": "far fa-file-code" }, "report_logo": { "type": "string", "default": "${projectDir}/assets/nf-core-airrflow_logo_light.png", - "description": "Custom logo for the report." - } - } - }, - "software_packaging_options": { - "title": "Software packaging options", - "type": "object", - "description": "Options for software packaging", - "default": "", - "properties": { - "enable_conda": { + "description": "Custom logo for the report.", + "fa_icon": "far fa-file-code" + }, + "report_logo_img": { + "type": "string", + "default": "${projectDir}/assets/nf-core-airrflow_logo_reports.png", + "description": "Custom logo for the EnchantR reports.", + "fa_icon": "far fa-file-code" + }, + "skip_report": { + "type": "boolean", + "description": "Skip repertoire analysis and report generation.", + "fa_icon": "fas fa-angle-double-right" + }, + "skip_multiqc": { "type": "boolean", - "fa_icon": "fas fa-box", - "description": "Enable conda to run pipeline with conda environment." + "description": "Skip multiqc report.", + "fa_icon": "fas fa-angle-double-right" } }, - "help_text": "Software packaging options", - "fa_icon": "fas fa-angle-double-right" + "fa_icon": "far fa-file-code" }, "reference_genome_options": { "title": "Reference genome options", @@ -445,7 +483,7 @@ "max_job_request_options": { "title": "Max job request options", "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", + "fa_icon": "fas fa-microchip", "description": "Set the top limit for requested resources for any single job.", "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", "properties": { @@ -490,6 +528,12 @@ "fa_icon": "fas fa-question-circle", "hidden": true }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, "publish_dir_mode": { "type": "string", "default": "copy", @@ -536,7 +580,7 @@ "type": "string", "description": "Incoming hook URL for messaging service", "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, "multiqc_config": { @@ -578,54 +622,6 @@ "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." } } - }, - "subworkflow_reveal": { - "title": "Subworkflow Reveal", - "type": "object", - "description": "Arguments for this subworkflow", - "default": "", - "properties": { - "collapseby": { - "type": "string", - "default": "filename,cell_id", - "description": "Name of the field used to collapse duplicated sequences" - }, - "cloneby": { - "type": "string", - "default": "subject_id", - "description": "Name of the field used to group data files to identify clones" - }, - "reassign": { - "type": "boolean", - "default": true, - "description": "Whether to reassign genes if the input file is an AIRR formatted tabulated file" - }, - "productive_only": { - "type": "boolean", - "default": true, - "description": "Subset to productive sequences" - }, - "remove_chimeric": { - "type": "boolean", - "default": true, - "description": "Whether to apply the chimera removal filter" - }, - "threshold": { - "type": ["string", "number"], - "default": "auto", - "description": "Use `auto` to automatically set a threshold to identify clonally related sequences. Set" - }, - "miairr": { - "type": "string", - "default": "bcellmagic/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv", - "description": "Path to MiAIRR-BioSample mapping" - }, - "singlecell": { - "type": "string", - "default": "single_cell", - "description": "Whether input samples include single cell sequencing samples" - } - } } }, "allOf": [ @@ -635,9 +631,6 @@ { "$ref": "#/definitions/protocol" }, - { - "$ref": "#/definitions/databases_cache" - }, { "$ref": "#/definitions/primer_handling" }, @@ -648,19 +641,19 @@ "$ref": "#/definitions/adapter_trimming" }, { - "$ref": "#/definitions/presto_options" + "$ref": "#/definitions/sequence_assembly_options" }, { - "$ref": "#/definitions/clustering_threshold_definition" + "$ref": "#/definitions/vdj_annotation_options" }, { - "$ref": "#/definitions/downstream_options" + "$ref": "#/definitions/bulk_filtering_options" }, { - "$ref": "#/definitions/report_options" + "$ref": "#/definitions/clonal_analysis_options" }, { - "$ref": "#/definitions/software_packaging_options" + "$ref": "#/definitions/report_options" }, { "$ref": "#/definitions/reference_genome_options" @@ -673,9 +666,6 @@ }, { "$ref": "#/definitions/generic_options" - }, - { - "$ref": "#/definitions/subworkflow_reveal" } ] } diff --git a/subworkflows/local/reveal_input_check.nf b/subworkflows/local/assembled_input_check.nf similarity index 69% rename from subworkflows/local/reveal_input_check.nf rename to subworkflows/local/assembled_input_check.nf index 4dd08032..5bab29b7 100644 --- a/subworkflows/local/reveal_input_check.nf +++ b/subworkflows/local/assembled_input_check.nf @@ -2,34 +2,35 @@ * Check input samplesheet and get channels */ -include { - VALIDATE_INPUT - } from '../../modules/local/enchantr/validate_input' +include { VALIDATE_INPUT } from '../../modules/local/enchantr/validate_input' -workflow REVEAL_INPUT_CHECK { +workflow ASSEMBLED_INPUT_CHECK { take: samplesheet // file: /path/to/samplesheet.csv miairr collapseby cloneby - reassign + //reassign main: - VALIDATE_INPUT ( samplesheet, miairr, collapseby, cloneby, reassign) - validated_input = VALIDATE_INPUT.out.validated_input - validated_input + // TODO: validate input should check that sample_ids are unique + + VALIDATE_INPUT ( samplesheet, miairr, collapseby, cloneby ) //removed reassign + ch_validated_input = VALIDATE_INPUT.out.validated_input + ch_validated_input .splitCsv(header: true, sep:'\t') .map { get_meta(it) } .branch { it -> fasta: it[0].filename =~ /[fasta|fa]$/ tsv: it[0].filename =~ /tsv$/ } - .set{ch_metadata} + .set{ ch_metadata } emit: ch_fasta = ch_metadata.fasta ch_tsv = ch_metadata.tsv - validated_input = validated_input + validated_input = ch_validated_input + versions = VALIDATE_INPUT.out.versions } // Function to map @@ -37,7 +38,7 @@ def get_meta (LinkedHashMap col) { def meta = [:] - meta.id = col.id + meta.id = col.sample_id meta.filename = col.filename meta.subject_id = col.subject_id meta.species = col.species diff --git a/subworkflows/local/bulk_qc_and_filter.nf b/subworkflows/local/bulk_qc_and_filter.nf new file mode 100644 index 00000000..247a0341 --- /dev/null +++ b/subworkflows/local/bulk_qc_and_filter.nf @@ -0,0 +1,69 @@ +include { CHANGEO_CREATEGERMLINES } from '../../modules/local/changeo/changeo_creategermlines' +include { REMOVE_CHIMERIC } from '../../modules/local/enchantr/remove_chimeric' +include { DETECT_CONTAMINATION } from '../../modules/local/enchantr/detect_contamination' +include { COLLAPSE_DUPLICATES } from '../../modules/local/enchantr/collapse_duplicates' + +workflow BULK_QC_AND_FILTER { + + take: + ch_repertoire // tuple [meta, repertoire_tab] + ch_imgt + + main: + + ch_versions = Channel.empty() + ch_logs = Channel.empty() + + // Remove chimeric sequences if requested + if (params.remove_chimeric) { + + // Create germlines (not --cloned) + CHANGEO_CREATEGERMLINES( + ch_repertoire, + ch_imgt.collect() + ) + ch_logs = ch_logs.mix(CHANGEO_CREATEGERMLINES.out.logs) + ch_versions = ch_versions.mix(CHANGEO_CREATEGERMLINES.out.versions.ifEmpty(null)) + + // Remove chimera + REMOVE_CHIMERIC( + CHANGEO_CREATEGERMLINES.out.tab, + ch_imgt.collect() + ) + ch_logs = ch_logs.mix(REMOVE_CHIMERIC.out.logs) + ch_versions = ch_versions.mix(REMOVE_CHIMERIC.out.versions.ifEmpty(null)) + ch_bulk_chimeric_pass = REMOVE_CHIMERIC.out.tab + + + } else { + ch_bulk_chimeric_pass = ch_repertoire + } + + // For Bulk data, detect cross-contamination + // This is only informative at this time + // TODO: add a flag to specify remove suspicious sequences + // and update file size log accordingly + + if (params.detect_contamination) { + DETECT_CONTAMINATION( + ch_bulk_chimeric_pass + .map{ it -> [ it[1] ] } + .collect() + ) + ch_logs = ch_logs.mix(DETECT_CONTAMINATION.out.logs) + ch_versions = ch_versions.mix(DETECT_CONTAMINATION.out.versions.ifEmpty(null)) + } + + COLLAPSE_DUPLICATES( + ch_bulk_chimeric_pass + ) + + ch_versions = ch_versions.mix(COLLAPSE_DUPLICATES.out.versions.ifEmpty(null)) + ch_logs = ch_logs.mix(COLLAPSE_DUPLICATES.out.logs) + + emit: + versions = ch_versions + repertoires = COLLAPSE_DUPLICATES.out.tab + logs = ch_logs + +} diff --git a/subworkflows/local/clonal_analysis.nf b/subworkflows/local/clonal_analysis.nf new file mode 100644 index 00000000..7ec74a2c --- /dev/null +++ b/subworkflows/local/clonal_analysis.nf @@ -0,0 +1,108 @@ +include { FIND_THRESHOLD } from '../../modules/local/enchantr/find_threshold' +include { DEFINE_CLONES as DEFINE_CLONES_COMPUTE } from '../../modules/local/enchantr/define_clones' +include { DEFINE_CLONES as DEFINE_CLONES_REPORT } from '../../modules/local/enchantr/define_clones' +include { DOWSER_LINEAGES } from '../../modules/local/enchantr/dowser_lineages' + +workflow CLONAL_ANALYSIS { + take: + ch_repertoire + ch_imgt + ch_logo + + main: + ch_versions = Channel.empty() + ch_logs = Channel.empty() + + + if (params.clonal_threshold == "auto") { + + ch_find_threshold = ch_repertoire.map{ it -> it[1] } + .collect() + + FIND_THRESHOLD ( + ch_find_threshold, + ch_logo + ) + ch_threshold = FIND_THRESHOLD.out.mean_threshold + ch_versions = ch_versions.mix(FIND_THRESHOLD.out.versions) + + clone_threshold = ch_threshold + .splitText( limit:1 ) { it.trim().toString() } + .dump(tag: 'clone_threshold') + .filter { it != 'NA'} + .filter { it != 'NaN' } + .ifEmpty { exit 1, "Automatic clone_threshold is 'NA'. Consider setting params.threshold manually."} + + } else { + clone_threshold = params.clonal_threshold + } + + // prepare ch for define clones + ch_repertoire.map{ it -> [ it[0]."${params.cloneby}", + it[0].id, + it[0].subject_id, + it[0].species, + it[0].single_cell, + it[0].locus, + it[1] ] } + .groupTuple() + .map{ get_meta_tabs(it) } + .set{ ch_define_clones } + + DEFINE_CLONES_COMPUTE( + ch_define_clones, + clone_threshold.collect(), + ch_imgt.collect() + ) + ch_versions = ch_versions.mix(DEFINE_CLONES_COMPUTE.out.versions) + ch_logs = ch_logs.mix(DEFINE_CLONES_COMPUTE.out.logs) + + // prepare ch for define clones all samples report + DEFINE_CLONES_COMPUTE.out.tab + .collect() + .map { it -> [ [id:'all_reps'], it ] } + .set{ch_all_repertoires_cloned} + + if (!params.skip_all_clones_report){ + DEFINE_CLONES_REPORT( + ch_all_repertoires_cloned, + clone_threshold.collect(), + ch_imgt.collect() + ) + } + + // prepare ch for dowser lineages + DEFINE_CLONES_COMPUTE.out.tab + .flatten() + .map { it -> [ [id: "${it.baseName}".replaceFirst("__clone-pass", "")], it ] } + .set{ch_repertoires_cloned} + + if (!params.skip_lineage){ + DOWSER_LINEAGES( + ch_repertoires_cloned + ) + ch_versions = ch_versions.mix(DOWSER_LINEAGES.out.versions) + } + + emit: + repertoire = ch_all_repertoires_cloned + versions = ch_versions + logs = ch_logs +} + +// Function to map +def get_meta_tabs(arr) { + def meta = [:] + meta.id = [arr[0]].unique().join("") + meta.sample_ids = arr[1] + meta.subject_id = arr[2] + meta.species = arr[3] + meta.single_cell = arr[4].unique().join("") + meta.locus = arr[5].unique().join("") + + def array = [] + + array = [ meta, arr[6].flatten() ] + + return array +} diff --git a/subworkflows/local/fastq_input_check.nf b/subworkflows/local/fastq_input_check.nf new file mode 100644 index 00000000..c5206916 --- /dev/null +++ b/subworkflows/local/fastq_input_check.nf @@ -0,0 +1,74 @@ +/* + * Check input samplesheet and get read channels + */ + +include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' +//TODO: when enchantr supports input samplesheet from raw sequencing, update code here to commented one. +//include { VALIDATE_INPUT } from '../../modules/local/enchantr/validate_input' + +workflow FASTQ_INPUT_CHECK { + take: + samplesheet // file: /path/to/samplesheet.tsv + + main: + SAMPLESHEET_CHECK ( samplesheet ) + .tsv + .splitCsv ( header:true, sep:'\t' ) + .map { create_fastq_channels(it) } + .set { ch_reads } + // VALIDATE_INPUT( + // samplesheet, + // params.miairr, + // params.collapseby, + // params.cloneby + // ) + + // VALIDATE_INPUT.out.validated_input + // .splitCsv(header: true, sep:'\t') + // .map { get_meta(it) } + // .set{ ch_reads } + + emit: + reads = ch_reads // channel: [ val(meta), [ reads ] ] + versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] + samplesheet = SAMPLESHEET_CHECK.out.tsv // tsv metadata file +} + +// Function to map +def create_fastq_channels(LinkedHashMap col) { + + def meta = [:] + + meta.id = col.sample_id + meta.subject_id = col.subject_id + meta.species = col.species + meta.collapseby_group = col."${params.collapseby}" + meta.cloneby_group = col."${params.cloneby}" + meta.filetype = "fastq" + meta.single_cell = col.single_cell.toLowerCase() + meta.locus = col.pcr_target_locus + + def array = [] + if (!file(col.filename_R1).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${col.filename_R1}" + } + if (!file(col.filename_R2).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${col.filename_R2}" + } + if (col.filename_I1) { + if (!params.index_file){ + exit 1, "ERROR: --index_file was not provided but the index file path is specified in the samplesheet!" + } + if (!file(col.filename_I1).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Index read FastQ file does not exist!\n${col.filename_I1}" + } + array = [ meta, [ file(col.filename_R1), file(col.filename_R2), file(col.filename_I1) ] ] + } else { + + array = [ meta, [ file(col.filename_R1), file(col.filename_R2) ] ] + if (params.index_file) { + exit 1, "ERROR: --index_file was provided but the index file path is not specified in the samplesheet!" + } + } + return array +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index ac0b456f..00000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Check input samplesheet and get read channels - */ - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.tsv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .tsv - .splitCsv ( header:true, sep:'\t' ) - .map { create_fastq_channels(it) } - .set { reads } - - emit: - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to map -def create_fastq_channels(LinkedHashMap col) { - def meta = [:] - meta.id = col.sample_id - meta.subject = col.subject_id - meta.locus = col.pcr_target_locus - meta.species = col.species - - def array = [] - if (!file(col.filename_R1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${col.filename_R1}" - } - if (!file(col.filename_R2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${col.filename_R2}" - } - if (col.filename_I1) { - if (!file(col.filename_I1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Index read FastQ file does not exist!\n${col.filename_I1}" - } - array = [ meta, [ file(col.filename_R1), file(col.filename_R2), file(col.filename_I1) ] ] - } else { - - array = [ meta, [ file(col.filename_R1), file(col.filename_R2) ] ] - } - return array -} diff --git a/subworkflows/local/merge_tables_wf.nf b/subworkflows/local/merge_tables_wf.nf deleted file mode 100644 index e0acfd1b..00000000 --- a/subworkflows/local/merge_tables_wf.nf +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Get tables and group together the ones from the same subject - */ - -include { MERGE_TABLES } from '../../modules/local/merge_tables' - -workflow MERGE_TABLES_WF { - take: - ch_tables - ch_samplesheet - - main: - ch_tables - .dump() - .map{it -> [ it[0].subject+'_'+it[0].locus, it[0].id, it[0].locus, it[0].subject, it[0].species, it[1] ]} - .groupTuple() - .dump() - .map{ get_meta_tabs(it) } - .dump() - .set{ch_merge_tables} - - MERGE_TABLES( - ch_merge_tables, - ch_samplesheet.collect() - ) - - emit: - tab = MERGE_TABLES.out.tab // channel: [ val(meta), tab ] -} - -// Function to map -def get_meta_tabs(arr) { - def meta = [:] - meta.id = arr[0] - meta.samples = arr[1] - meta.locus = arr[2].unique().join("") - meta.subject = arr[3].unique().join("") - meta.species = arr[4].unique().join("") - - def array = [] - - array = [ meta, arr[5].flatten() ] - - return array -} diff --git a/subworkflows/local/presto_umi.nf b/subworkflows/local/presto_umi.nf index 5bcd972d..0c5b92aa 100644 --- a/subworkflows/local/presto_umi.nf +++ b/subworkflows/local/presto_umi.nf @@ -33,15 +33,11 @@ workflow PRESTO_UMI { ch_versions = Channel.empty() - // prepare reads for fastp - ch_reads.dump(tag:'presto umi reads') - // Merge UMI from index file to R1 if provided if (params.index_file) { // ch for fastp reads R1 R2 ch_reads.map{ meta, reads -> [meta, [reads[0], reads[1]]] } - .dump(tag: 'presto_umi_R1_R2_reads') .set{ ch_reads_R1_R2 } // Fastp reads R1 R2 @@ -61,7 +57,6 @@ workflow PRESTO_UMI { .map{ meta, reads -> [meta.id, meta, reads[2]] } ch_meta_R1_R2_index = ch_meta_R1_R2.join( ch_meta_index ) .map{ id, meta1, R1, R2, meta2, index -> [ meta1, R1, R2, index ] } - .dump(tag: 'ch_merge_umi') MERGE_UMI ( ch_meta_R1_R2_index ) ch_gunzip = MERGE_UMI.out.reads diff --git a/subworkflows/local/repertoire_analysis_reporting.nf b/subworkflows/local/repertoire_analysis_reporting.nf new file mode 100644 index 00000000..3a4152b7 --- /dev/null +++ b/subworkflows/local/repertoire_analysis_reporting.nf @@ -0,0 +1,76 @@ +include { PARSE_LOGS } from '../../modules/local/parse_logs.nf' +include { REPORT_FILE_SIZE } from '../../modules/local/enchantr/report_file_size.nf' +include { AIRRFLOW_REPORT } from '../../modules/local/airrflow_report/airrflow_report' + +workflow REPERTOIRE_ANALYSIS_REPORTING { + + take: + ch_presto_filterseq_logs + ch_presto_maskprimers_logs + ch_presto_pairseq_logs + ch_presto_clustersets_logs + ch_presto_buildconsensus_logs + ch_presto_postconsensus_pairseq_logs + ch_presto_assemblepairs_logs + ch_presto_collapseseq_logs + ch_presto_splitseq_logs + ch_reassign_logs + ch_changeo_makedb_logs + ch_vdj_annotation_logs + ch_bulk_qc_and_filter_logs + ch_sc_qc_and_filter_logs + ch_clonal_analysis_logs + ch_repertoires + ch_input + ch_report_rmd + ch_report_css + ch_report_logo + ch_metadata + + main: + ch_versions = Channel.empty() + + if (params.mode == "fastq") { + PARSE_LOGS( + ch_presto_filterseq_logs, + ch_presto_maskprimers_logs, + ch_presto_pairseq_logs, + ch_presto_clustersets_logs, + ch_presto_buildconsensus_logs, + ch_presto_postconsensus_pairseq_logs, + ch_presto_assemblepairs_logs, + ch_presto_collapseseq_logs, + ch_presto_splitseq_logs, + ch_changeo_makedb_logs, + ch_input + ) + ch_versions = ch_versions.mix(PARSE_LOGS.out.versions) + ch_parsed_logs = PARSE_LOGS.out.logs + + } else { + ch_parsed_logs = Channel.empty() + } + + ch_logs = ch_vdj_annotation_logs.mix(ch_bulk_qc_and_filter_logs, + ch_reassign_logs, + ch_sc_qc_and_filter_logs, + ch_clonal_analysis_logs) + REPORT_FILE_SIZE( + ch_logs.collect().ifEmpty([]), + ch_metadata + ) + ch_versions = ch_versions.mix(REPORT_FILE_SIZE.out.versions) + + AIRRFLOW_REPORT( + ch_repertoires, + ch_parsed_logs.collect().ifEmpty([]), + REPORT_FILE_SIZE.out.table.ifEmpty([]), + ch_report_rmd, + ch_report_css, + ch_report_logo + ) + ch_versions = ch_versions.mix(AIRRFLOW_REPORT.out.versions) + + emit: + versions = ch_versions +} diff --git a/subworkflows/local/sequence_assembly.nf b/subworkflows/local/sequence_assembly.nf new file mode 100644 index 00000000..59a89ee0 --- /dev/null +++ b/subworkflows/local/sequence_assembly.nf @@ -0,0 +1,234 @@ +#!/usr/bin/env nextflow +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + + +// Rmarkdown report file +ch_report_rmd = Channel.fromPath(params.report_rmd, checkIfExists: true) +ch_report_css = Channel.fromPath(params.report_css, checkIfExists: true) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Local: Sub-workflows +include { FASTQ_INPUT_CHECK } from '../../subworkflows/local/fastq_input_check' +include { PRESTO_UMI } from '../../subworkflows/local/presto_umi' +include { PRESTO_SANS_UMI } from '../../subworkflows/local/presto_sans_umi' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { FASTQC } from '../../modules/nf-core/fastqc/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + +workflow SEQUENCE_ASSEMBLY { + + take: + ch_input // channel: + + main: + + // Validate params + if (!params.library_generation_method) { + exit 1, "Please specify a library generation method with the `--library_generation_method` option." + } + + if (params.adapter_fasta) { + ch_adapter_fasta = Channel.fromPath(params.adapter_fasta, checkIfExists: true) + } else { + ch_adapter_fasta = [] + } + + + // Validate library generation method parameter + if (params.library_generation_method == 'specific_pcr_umi'){ + if (params.vprimers) { + ch_vprimers_fasta = Channel.fromPath(params.vprimers, checkIfExists: true) + } else { + exit 1, "Please provide a V-region primers fasta file with the '--vprimers' option when using the 'specific_pcr_umi' library generation method." + } + if (params.cprimers) { + ch_cprimers_fasta = Channel.fromPath(params.cprimers, checkIfExists: true) + } else { + exit 1, "Please provide a C-region primers fasta file with the '--cprimers' option when using the 'specific_pcr_umi' library generation method." + } + if (params.race_linker) { + exit 1, "Please do not set '--race_linker' when using the 'specific_pcr_umi' library generation method." + } + if (params.umi_length < 2) { + exit 1, "The 'specific_pcr_umi' library generation method requires setting the '--umi_length' to a value greater than 1." + } + } else if (params.library_generation_method == 'specific_pcr') { + if (params.vprimers) { + ch_vprimers_fasta = Channel.fromPath(params.vprimers, checkIfExists: true) + } else { + exit 1, "Please provide a V-region primers fasta file with the '--vprimers' option when using the 'specific_pcr' library generation method." + } + if (params.cprimers) { + ch_cprimers_fasta = Channel.fromPath(params.cprimers, checkIfExists: true) + } else { + exit 1, "Please provide a C-region primers fasta file with the '--cprimers' option when using the 'specific_pcr' library generation method." + } + if (params.race_linker) { + exit 1, "Please do not set '--race_linker' when using the 'specific_pcr' library generation method." + } + if (params.umi_length > 0) { + exit 1, "Please do not set a UMI length with the library preparation method 'specific_pcr'. Please specify instead a method that suports umi." + } else { + params.umi_length = 0 + } + } else if (params.library_generation_method == 'dt_5p_race_umi') { + if (params.vprimers) { + exit 1, "The oligo-dT 5'-RACE UMI library generation method does not accept V-region primers, please provide a linker with '--race_linker' instead or select another library method option." + } else if (params.race_linker) { + ch_vprimers_fasta = Channel.fromPath(params.race_linker, checkIfExists: true) + } else { + exit 1, "The oligo-dT 5'-RACE UMI library generation method requires a linker or Template Switch Oligo sequence, please provide it with the option '--race_linker'." + } + if (params.cprimers) { + ch_cprimers_fasta = Channel.fromPath(params.cprimers, checkIfExists: true) + } else { + exit 1, "The oligo-dT 5'-RACE UMI library generation method requires the C-region primer sequences, please provide a fasta file with the '--cprimers' option." + } + if (params.umi_length < 2) { + exit 1, "The oligo-dT 5'-RACE UMI 'dt_5p_race_umi' library generation method requires specifying the '--umi_length' to a value greater than 1." + } + } else if (params.library_generation_method == 'dt_5p_race') { + if (params.vprimers) { + exit 1, "The oligo-dT 5'-RACE library generation method does not accept V-region primers, please provide a linker with '--race_linker' instead or select another library method option." + } else if (params.race_linker) { + ch_vprimers_fasta = Channel.fromPath(params.race_linker, checkIfExists: true) + } else { + exit 1, "The oligo-dT 5'-RACE library generation method requires a linker or Template Switch Oligo sequence, please provide it with the option '--race_linker'." + } + if (params.cprimers) { + ch_cprimers_fasta = Channel.fromPath(params.cprimers, checkIfExists: true) + } else { + exit 1, "The oligo-dT 5'-RACE library generation method requires the C-region primer sequences, please provide a fasta file with the '--cprimers' option." + } + if (params.umi_length > 0) { + exit 1, "Please do not set a UMI length with the library preparation method oligo-dT 5'-RACE 'dt_5p_race'. Please specify instead a method that suports umi (e.g. 'dt_5p_race_umi')." + } else { + params.umi_length = 0 + } + } else { + exit 1, "The provided library generation method is not supported. Please check the docs for `--library_generation_method`." + } + + // Validate UMI position + if (params.index_file & params.umi_position == 'R2') {exit 1, "Please do not set `--umi_position` option if index file with UMIs is provided."} + if (params.umi_length < 0) {exit 1, "Please provide the UMI barcode length in the option `--umi_length`. To run without UMIs, set umi_length to 0."} + if (!params.index_file & params.umi_start != 0) {exit 1, "Setting a UMI start position is only allowed when providing the UMIs in a separate index read file. If so, please provide the `--index_file` flag as well."} + + + // + // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // + ch_versions = Channel.empty() + + FASTQ_INPUT_CHECK(ch_input) + ch_versions = ch_versions.mix(FASTQ_INPUT_CHECK.out.versions) + + ch_reads = FASTQ_INPUT_CHECK.out.reads + + if (params.umi_length == 0) { + // + // SUBWORKFLOW: pRESTO without UMIs + // + PRESTO_SANS_UMI ( + ch_reads, + ch_cprimers_fasta, + ch_vprimers_fasta, + ch_adapter_fasta + ) + ch_presto_fasta = PRESTO_SANS_UMI.out.fasta + ch_presto_software = PRESTO_SANS_UMI.out.software + ch_fastp_reads_html = PRESTO_SANS_UMI.out.fastp_reads_html + ch_fastp_reads_json = PRESTO_SANS_UMI.out.fastp_reads_json + ch_fastqc_postassembly = PRESTO_SANS_UMI.out.fastqc_postassembly_gz + ch_presto_assemblepairs_logs = PRESTO_SANS_UMI.out.presto_assemblepairs_logs + ch_presto_filterseq_logs = PRESTO_SANS_UMI.out.presto_filterseq_logs + ch_presto_maskprimers_logs = PRESTO_SANS_UMI.out.presto_maskprimers_logs + ch_presto_collapseseq_logs = PRESTO_SANS_UMI.out.presto_collapseseq_logs + ch_presto_splitseq_logs = PRESTO_SANS_UMI.out.presto_splitseq_logs + ch_presto_pairseq_logs = Channel.empty() + ch_presto_clustersets_logs = Channel.empty() + ch_presto_buildconsensus_logs = Channel.empty() + ch_presto_postconsensus_pairseq_logs = Channel.empty() + + } else { + // + // SUBWORKFLOW: pRESTO with UMIs + // + PRESTO_UMI ( + ch_reads, + ch_cprimers_fasta, + ch_vprimers_fasta, + ch_adapter_fasta + ) + ch_presto_fasta = PRESTO_UMI.out.fasta + ch_presto_software = PRESTO_UMI.out.software + ch_fastp_reads_html = PRESTO_UMI.out.fastp_reads_html + ch_fastp_reads_json = PRESTO_UMI.out.fastp_reads_json + ch_fastqc_postassembly = PRESTO_UMI.out.fastqc_postassembly_gz + ch_presto_filterseq_logs = PRESTO_UMI.out.presto_filterseq_logs + ch_presto_maskprimers_logs = PRESTO_UMI.out.presto_maskprimers_logs + ch_presto_pairseq_logs = PRESTO_UMI.out.presto_pairseq_logs + ch_presto_clustersets_logs = PRESTO_UMI.out.presto_clustersets_logs + ch_presto_buildconsensus_logs = PRESTO_UMI.out.presto_buildconsensus_logs + ch_presto_postconsensus_pairseq_logs = PRESTO_UMI.out.presto_postconsensus_pairseq_logs + ch_presto_assemblepairs_logs = PRESTO_UMI.out.presto_assemblepairs_logs + ch_presto_collapseseq_logs = PRESTO_UMI.out.presto_collapseseq_logs + ch_presto_splitseq_logs = PRESTO_UMI.out.presto_splitseq_logs + } + + ch_versions = ch_versions.mix(ch_presto_software) + + emit: + versions = ch_versions + // assembled sequences in fasta format + fasta = ch_presto_fasta + // validated metadata + samplesheet = FASTQ_INPUT_CHECK.out.samplesheet + //fastp + fastp_reads_html = ch_fastp_reads_html + fastp_reads_json = ch_fastp_reads_json + // fastqc files for multiQC report + fastqc_postassembly = ch_fastqc_postassembly + // presto logs for html report + presto_filterseq_logs = ch_presto_filterseq_logs + presto_maskprimers_logs = ch_presto_maskprimers_logs + presto_pairseq_logs = ch_presto_pairseq_logs + presto_clustersets_logs = ch_presto_clustersets_logs + presto_buildconsensus_logs = ch_presto_buildconsensus_logs + presto_postconsensus_pairseq_logs = ch_presto_postconsensus_pairseq_logs + presto_assemblepairs_logs = ch_presto_assemblepairs_logs + presto_collapseseq_logs = ch_presto_collapseseq_logs + presto_splitseq_logs = ch_presto_splitseq_logs +} diff --git a/subworkflows/local/single_cell_qc_and_filtering.nf b/subworkflows/local/single_cell_qc_and_filtering.nf new file mode 100644 index 00000000..9de2701e --- /dev/null +++ b/subworkflows/local/single_cell_qc_and_filtering.nf @@ -0,0 +1,40 @@ +include { SINGLE_CELL_QC } from '../../modules/local/enchantr/single_cell_qc' + +workflow SINGLE_CELL_QC_AND_FILTERING { + take: + repertoires // tuple [meta, repertoire_tab] + + main: + ch_versions = Channel.empty() + ch_logs = Channel.empty() + + repertoires + .map{ it -> [ it[0].id, + it[0] ] } + .set{ch_onlymeta} + + repertoires + .map { it -> it[1]} + .collect() + .set{ch_repertoire_allsc} + + SINGLE_CELL_QC( + ch_repertoire_allsc + ) + + SINGLE_CELL_QC.out.tab + .flatten() + .map { it -> [ "${it.baseName}".replaceFirst("__scqc-pass", ""), it ] } + .set{ch_repertoire_after_scqc_with_sampleid} + + ch_logs = ch_logs.mix(SINGLE_CELL_QC.out.logs) + ch_versions = ch_versions.mix(SINGLE_CELL_QC.out.versions.ifEmpty(null)) + + ch_repertoire_after_scqc_withmeta = ch_onlymeta.join(ch_repertoire_after_scqc_with_sampleid) + .map{ it -> [ it[1], it[2] ]} + + emit: + versions = ch_versions + repertoires = ch_repertoire_after_scqc_withmeta + logs = ch_logs +} diff --git a/subworkflows/local/vdj_annotation.nf b/subworkflows/local/vdj_annotation.nf new file mode 100644 index 00000000..d660b4ac --- /dev/null +++ b/subworkflows/local/vdj_annotation.nf @@ -0,0 +1,125 @@ +include { FETCH_DATABASES } from '../../modules/local/fetch_databases' +include { UNZIP_DB as UNZIP_IGBLAST } from '../../modules/local/unzip_db' +include { UNZIP_DB as UNZIP_IMGT } from '../../modules/local/unzip_db' +include { CHANGEO_ASSIGNGENES } from '../../modules/local/changeo/changeo_assigngenes' +include { CHANGEO_MAKEDB } from '../../modules/local/changeo/changeo_makedb' +include { CHANGEO_PARSEDB_SPLIT } from '../../modules/local/changeo/changeo_parsedb_split' + +// reveal +include { FILTER_QUALITY } from '../../modules/local/reveal/filter_quality' +include { FILTER_JUNCTION_MOD3 } from '../../modules/local/reveal/filter_junction_mod3' +include { ADD_META_TO_TAB } from '../../modules/local/reveal/add_meta_to_tab' + + +workflow VDJ_ANNOTATION { + + take: + ch_fasta // [meta, fasta] + ch_validated_samplesheet + + main: + ch_versions = Channel.empty() + ch_logs = Channel.empty() + + // FETCH DATABASES + // TODO: this can take a long time, and the progress shows 0%. Would be + // nice to have some better progress reporting. + // And maybe run this as 2 separate steps, one for IMGT and one for IgBLAST? + if( params.igblast_base ){ + if (params.igblast_base.endsWith(".zip")) { + Channel.fromPath("${params.igblast_base}") + .ifEmpty{ exit 1, "IGBLAST DB not found: ${params.igblast_base}" } + .set { ch_igblast_zipped } + UNZIP_IGBLAST( ch_igblast_zipped.collect() ) + ch_igblast = UNZIP_IGBLAST.out.unzipped + ch_versions = ch_versions.mix(UNZIP_IGBLAST.out.versions.ifEmpty(null)) + } else { + Channel.fromPath("${params.igblast_base}") + .ifEmpty { exit 1, "IGBLAST DB not found: ${params.igblast_base}" } + .set { ch_igblast } + } + } + + if( params.imgtdb_base ){ + if (params.imgtdb_base.endsWith(".zip")) { + Channel.fromPath("${params.imgtdb_base}") + .ifEmpty{ exit 1, "IMGTDB not found: ${params.imgtdb_base}" } + .set { ch_imgt_zipped } + UNZIP_IMGT( ch_imgt_zipped.collect() ) + ch_imgt = UNZIP_IMGT.out.unzipped + ch_versions = ch_versions.mix(UNZIP_IMGT.out.versions.ifEmpty(null)) + } else { + Channel.fromPath("${params.imgtdb_base}") + .ifEmpty { exit 1, "IMGTDB not found: ${params.imgtdb_base}" } + .set { ch_imgt } + } + } + + if (!params.igblast_base | !params.imgtdb_base) { + FETCH_DATABASES() + ch_igblast = FETCH_DATABASES.out.igblast + ch_imgt = FETCH_DATABASES.out.imgt + ch_versions = ch_versions.mix(FETCH_DATABASES.out.versions.ifEmpty(null)) + } + + CHANGEO_ASSIGNGENES ( + ch_fasta, + ch_igblast.collect() + ) + + ch_logs = ch_logs.mix(CHANGEO_ASSIGNGENES.out.logs) + ch_versions = ch_versions.mix(CHANGEO_ASSIGNGENES.out.versions.ifEmpty(null)) + + CHANGEO_MAKEDB ( + CHANGEO_ASSIGNGENES.out.fasta, + CHANGEO_ASSIGNGENES.out.blast, + ch_imgt.collect() + ) + ch_logs = ch_logs.mix(CHANGEO_MAKEDB.out.logs) + ch_versions = ch_versions.mix(CHANGEO_MAKEDB.out.versions.ifEmpty(null)) + + // Apply quality filters: + // - locus should match v_call chain + // - seq alignment min length informative positions 200 + // - max 10% N nucleotides + // TODO: emit versions + FILTER_QUALITY( + CHANGEO_MAKEDB.out.tab + ) + ch_logs = ch_logs.mix(FILTER_QUALITY.out.logs) + + if (params.productive_only) { + CHANGEO_PARSEDB_SPLIT ( + FILTER_QUALITY.out.tab + ) + ch_logs = ch_logs.mix(CHANGEO_PARSEDB_SPLIT.out.logs) + ch_versions = ch_versions.mix(CHANGEO_PARSEDB_SPLIT.out.versions.ifEmpty(null)) + + // Apply filter: junction length multiple of 3 + // TODO: Add to enchantr and emit versions? + FILTER_JUNCTION_MOD3( + CHANGEO_PARSEDB_SPLIT.out.tab + ) + ch_logs = ch_logs.mix(FILTER_JUNCTION_MOD3.out.logs) + ch_repertoire = FILTER_JUNCTION_MOD3.out.tab.ifEmpty(null) + } else { + ch_repertoire = FILTER_QUALITY.out.tab.ifEmpty(null) + } + + ADD_META_TO_TAB( + ch_repertoire, + ch_validated_samplesheet + ) + //TODO: emit versions + ch_logs = ch_logs.mix(ADD_META_TO_TAB.out.logs) + + + emit: + versions = ch_versions + repertoire = ADD_META_TO_TAB.out.tab + imgt = ch_imgt + igblast = ch_igblast + changeo_makedb_logs = CHANGEO_MAKEDB.out.logs + logs = ch_logs + +} diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf new file mode 100644 index 00000000..f237998e --- /dev/null +++ b/workflows/airrflow.nf @@ -0,0 +1,273 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowAirrflow.initialise(params, log) + +// TODO nf-core: Add all file path parameters for the pipeline to the list below +// Check input path parameters to see if they exist +def checkPathParamList = [ params.input, params.multiqc_config ] +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +if (params.input) { + ch_input = Channel.fromPath(params.input, checkIfExists: true) +} else { + exit 1, "Please provide input file containing the sample metadata with the '--input' option." +} + +// TODO: check that params.reassign can only be false if input file is fasta tsv (and V/D/J assignments are available). + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() + +// Report files +ch_report_rmd = Channel.fromPath(params.report_rmd, checkIfExists: true) +ch_report_css = Channel.fromPath(params.report_css, checkIfExists: true) +ch_report_logo = Channel.fromPath(params.report_logo, checkIfExists: true) +ch_report_logo_img = Channel.fromPath(params.report_logo_img, checkIfExists: true) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR } from '../modules/local/changeo/changeo_convertdb_fasta' + + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// +include { SEQUENCE_ASSEMBLY } from '../subworkflows/local/sequence_assembly' +include { ASSEMBLED_INPUT_CHECK } from '../subworkflows/local/assembled_input_check' +include { VDJ_ANNOTATION } from '../subworkflows/local/vdj_annotation' +include { BULK_QC_AND_FILTER } from '../subworkflows/local/bulk_qc_and_filter' +include { SINGLE_CELL_QC_AND_FILTERING } from '../subworkflows/local/single_cell_qc_and_filtering' +include { CLONAL_ANALYSIS } from '../subworkflows/local/clonal_analysis' +include { REPERTOIRE_ANALYSIS_REPORTING } from '../subworkflows/local/repertoire_analysis_reporting' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Info required for completion email and summary +def multiqc_report = [] + +workflow AIRRFLOW { + + ch_versions = Channel.empty() + ch_reassign_logs = Channel.empty() + + if ( params.mode == "fastq" ) { + + // Perform sequence assembly if input type is fastq + SEQUENCE_ASSEMBLY( ch_input ) + + ch_fasta = SEQUENCE_ASSEMBLY.out.fasta + ch_versions = ch_versions.mix(SEQUENCE_ASSEMBLY.out.versions) + ch_fastp_html = SEQUENCE_ASSEMBLY.out.fastp_reads_html + ch_fastp_json = SEQUENCE_ASSEMBLY.out.fastp_reads_json + ch_fastqc_postassembly_mqc = SEQUENCE_ASSEMBLY.out.fastqc_postassembly + ch_validated_samplesheet = SEQUENCE_ASSEMBLY.out.samplesheet.collect() + + ch_presto_filterseq_logs = SEQUENCE_ASSEMBLY.out.presto_filterseq_logs + ch_presto_maskprimers_logs = SEQUENCE_ASSEMBLY.out.presto_maskprimers_logs + ch_presto_pairseq_logs = SEQUENCE_ASSEMBLY.out.presto_pairseq_logs + ch_presto_clustersets_logs = SEQUENCE_ASSEMBLY.out.presto_clustersets_logs + ch_presto_buildconsensus_logs = SEQUENCE_ASSEMBLY.out.presto_buildconsensus_logs + ch_presto_postconsensus_pairseq_logs = SEQUENCE_ASSEMBLY.out.presto_postconsensus_pairseq_logs + ch_presto_assemblepairs_logs = SEQUENCE_ASSEMBLY.out.presto_assemblepairs_logs + ch_presto_collapseseq_logs = SEQUENCE_ASSEMBLY.out.presto_collapseseq_logs + ch_presto_splitseq_logs = SEQUENCE_ASSEMBLY.out.presto_splitseq_logs + + } else if ( params.mode == "assembled" ) { + + ASSEMBLED_INPUT_CHECK (ch_input, + params.miairr, + params.collapseby, + params.cloneby) + ch_versions = ch_versions.mix( ASSEMBLED_INPUT_CHECK.out.versions.ifEmpty([]) ) + + if (params.reassign) { + CHANGEO_CONVERTDB_FASTA_FROM_AIRR( + ASSEMBLED_INPUT_CHECK.out.ch_tsv + ) + ch_fasta_from_tsv = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.fasta + ch_versions = ch_versions.mix(CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.versions.ifEmpty([])) + ch_reassign_logs = ch_reassign_logs.mix(CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.logs) + } else { + ch_fasta_from_tsv = Channel.empty() + } + + ch_fasta = ASSEMBLED_INPUT_CHECK.out.ch_fasta.mix(ch_fasta_from_tsv) + ch_validated_samplesheet = ASSEMBLED_INPUT_CHECK.out.validated_input.collect() + + ch_presto_filterseq_logs = Channel.empty() + ch_presto_maskprimers_logs = Channel.empty() + ch_presto_pairseq_logs = Channel.empty() + ch_presto_clustersets_logs = Channel.empty() + ch_presto_buildconsensus_logs = Channel.empty() + ch_presto_postconsensus_pairseq_logs = Channel.empty() + ch_presto_assemblepairs_logs = Channel.empty() + ch_presto_collapseseq_logs = Channel.empty() + ch_presto_splitseq_logs = Channel.empty() + ch_fastp_html = Channel.empty() + ch_fastp_json = Channel.empty() + ch_fastqc_postassembly_mqc = Channel.empty() + + } else { + exit 1, "Mode parameter value not valid." + } + // Perform V(D)J annotation and filtering + VDJ_ANNOTATION( + ch_fasta, + ch_validated_samplesheet.collect() + ) + ch_versions = ch_versions.mix( VDJ_ANNOTATION.out.versions.ifEmpty([])) + + // Split bulk and single cell repertoires + ch_repertoire_by_processing = VDJ_ANNOTATION.out.repertoire + .branch { it -> + single: it[0].single_cell == 'true' + bulk: it[0].single_cell == 'false' + } + + // Bulk: Assign germlines and filtering + ch_repertoire_by_processing.bulk + .dump(tag: 'bulk') + + BULK_QC_AND_FILTER( + ch_repertoire_by_processing.bulk, + VDJ_ANNOTATION.out.imgt.collect() + ) + ch_versions = ch_versions.mix( BULK_QC_AND_FILTER.out.versions.ifEmpty([])) + + ch_bulk_filtered = BULK_QC_AND_FILTER.out.repertoires + + // Single cell: QC and filtering + ch_repertoire_by_processing.single + .dump(tag: 'single') + + SINGLE_CELL_QC_AND_FILTERING( + ch_repertoire_by_processing.single + ) + ch_versions = ch_versions.mix( SINGLE_CELL_QC_AND_FILTERING.out.versions.ifEmpty([]) ) + + // Mixing bulk and single cell channels for clonal analysis + ch_repertoires_for_clones = ch_bulk_filtered + .mix(SINGLE_CELL_QC_AND_FILTERING.out.repertoires) + .dump(tag: 'sc bulk mix') + + // Clonal analysis + CLONAL_ANALYSIS( + ch_repertoires_for_clones, + VDJ_ANNOTATION.out.imgt.collect(), + ch_report_logo_img.collect().ifEmpty([]) + ) + ch_versions = ch_versions.mix( CLONAL_ANALYSIS.out.versions.ifEmpty([])) + + if (!params.skip_report){ + REPERTOIRE_ANALYSIS_REPORTING( + ch_presto_filterseq_logs.collect().ifEmpty([]), + ch_presto_maskprimers_logs.collect().ifEmpty([]), + ch_presto_pairseq_logs.collect().ifEmpty([]), + ch_presto_clustersets_logs.collect().ifEmpty([]), + ch_presto_buildconsensus_logs.collect().ifEmpty([]), + ch_presto_postconsensus_pairseq_logs.collect().ifEmpty([]), + ch_presto_assemblepairs_logs.collect().ifEmpty([]), + ch_presto_collapseseq_logs.collect().ifEmpty([]), + ch_presto_splitseq_logs.collect().ifEmpty([]), + ch_reassign_logs.collect().ifEmpty([]), + VDJ_ANNOTATION.out.changeo_makedb_logs.collect().ifEmpty([]), + VDJ_ANNOTATION.out.logs.collect().ifEmpty([]), + BULK_QC_AND_FILTER.out.logs.collect().ifEmpty([]), + SINGLE_CELL_QC_AND_FILTERING.out.logs.collect().ifEmpty([]), + CLONAL_ANALYSIS.out.logs.collect().ifEmpty([]), + CLONAL_ANALYSIS.out.repertoire, + ch_input.collect(), + ch_report_rmd.collect(), + ch_report_css.collect(), + ch_report_logo.collect(), + ch_validated_samplesheet.collect() + ) + } + ch_versions = ch_versions.mix( REPERTOIRE_ANALYSIS_REPORTING.out.versions ) + ch_versions.dump(tag: "channel_versions") + // Software versions + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + // + // MODULE: MultiQC + // + if (!params.skip_multiqc) { + workflow_summary = WorkflowAirrflow.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) + + ch_multiqc_files = Channel.empty() + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + ch_multiqc_files = ch_multiqc_files.mix(ch_fastp_html.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_fastp_json.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_fastqc_postassembly_mqc.collect{it[1]}.ifEmpty([])) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.collect(), + ch_multiqc_custom_config.collect().ifEmpty([]), + ch_report_logo.collect().ifEmpty([]) + ) + multiqc_report = MULTIQC.out.report.toList() + } + +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + } + NfcoreTemplate.summary(workflow, params, log) + + if (params.hook_url) { + NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/bcellmagic.nf b/workflows/bcellmagic.nf deleted file mode 100644 index caefe289..00000000 --- a/workflows/bcellmagic.nf +++ /dev/null @@ -1,446 +0,0 @@ -#!/usr/bin/env nextflow -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowBcellmagic.initialise(params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = Channel.fromPath(params.input, checkIfExists: true) } else { exit 1, "Please provide input file containing the sample metadata with the '--input' option." } - -if (!params.library_generation_method) { - exit 1, "Please specify a library generation method with the `--library_generation_method` option." -} - -// Check other params -if (params.adapter_fasta) { ch_adapter_fasta = Channel.fromPath(params.adapter_fasta, checkIfExists: true) } else { ch_adapter_fasta = [] } - -// Validate library generation method parameter -if (params.library_generation_method == 'specific_pcr_umi'){ - if (params.vprimers) { - ch_vprimers_fasta = Channel.fromPath(params.vprimers, checkIfExists: true) - } else { - exit 1, "Please provide a V-region primers fasta file with the '--vprimers' option when using the 'specific_pcr_umi' library generation method." - } - if (params.cprimers) { - ch_cprimers_fasta = Channel.fromPath(params.cprimers, checkIfExists: true) - } else { - exit 1, "Please provide a C-region primers fasta file with the '--cprimers' option when using the 'specific_pcr_umi' library generation method." - } - if (params.race_linker) { - exit 1, "Please do not set '--race_linker' when using the 'specific_pcr_umi' library generation method." - } - if (params.umi_length < 2) { - exit 1, "The 'specific_pcr_umi' library generation method requires setting the '--umi_length' to a value greater than 1." - } -} else if (params.library_generation_method == 'specific_pcr') { - if (params.vprimers) { - ch_vprimers_fasta = Channel.fromPath(params.vprimers, checkIfExists: true) - } else { - exit 1, "Please provide a V-region primers fasta file with the '--vprimers' option when using the 'specific_pcr' library generation method." - } - if (params.cprimers) { - ch_cprimers_fasta = Channel.fromPath(params.cprimers, checkIfExists: true) - } else { - exit 1, "Please provide a C-region primers fasta file with the '--cprimers' option when using the 'specific_pcr' library generation method." - } - if (params.race_linker) { - exit 1, "Please do not set '--race_linker' when using the 'specific_pcr' library generation method." - } - if (params.umi_length > 0) { - exit 1, "Please do not set a UMI length with the library preparation method 'specific_pcr'. Please specify instead a method that suports umi." - } else { - params.umi_length = 0 - } -} else if (params.library_generation_method == 'dt_5p_race_umi') { - if (params.vprimers) { - exit 1, "The oligo-dT 5'-RACE UMI library generation method does not accept V-region primers, please provide a linker with '--race_linker' instead or select another library method option." - } else if (params.race_linker) { - ch_vprimers_fasta = Channel.fromPath(params.race_linker, checkIfExists: true) - } else { - exit 1, "The oligo-dT 5'-RACE UMI library generation method requires a linker or Template Switch Oligo sequence, please provide it with the option '--race_linker'." - } - if (params.cprimers) { - ch_cprimers_fasta = Channel.fromPath(params.cprimers, checkIfExists: true) - } else { - exit 1, "The oligo-dT 5'-RACE UMI library generation method requires the C-region primer sequences, please provide a fasta file with the '--cprimers' option." - } - if (params.umi_length < 2) { - exit 1, "The oligo-dT 5'-RACE UMI 'dt_5p_race_umi' library generation method requires specifying the '--umi_length' to a value greater than 1." - } -} else if (params.library_generation_method == 'dt_5p_race') { - if (params.vprimers) { - exit 1, "The oligo-dT 5'-RACE library generation method does not accept V-region primers, please provide a linker with '--race_linker' instead or select another library method option." - } else if (params.race_linker) { - ch_vprimers_fasta = Channel.fromPath(params.race_linker, checkIfExists: true) - } else { - exit 1, "The oligo-dT 5'-RACE library generation method requires a linker or Template Switch Oligo sequence, please provide it with the option '--race_linker'." - } - if (params.cprimers) { - ch_cprimers_fasta = Channel.fromPath(params.cprimers, checkIfExists: true) - } else { - exit 1, "The oligo-dT 5'-RACE library generation method requires the C-region primer sequences, please provide a fasta file with the '--cprimers' option." - } - if (params.umi_length > 0) { - exit 1, "Please do not set a UMI length with the library preparation method oligo-dT 5'-RACE 'dt_5p_race'. Please specify instead a method that suports umi (e.g. 'dt_5p_race_umi')." - } else { - params.umi_length = 0 - } -} else { - exit 1, "The provided library generation method is not supported. Please check the docs for `--library_generation_method`." -} - -// Validate UMI position -if (params.index_file & params.umi_position == 'R2') {exit 1, "Please do not set `--umi_position` option if index file with UMIs is provided."} -if (params.umi_length < 0) {exit 1, "Please provide the UMI barcode length in the option `--umi_length`. To run without UMIs, set umi_length to 0."} -if (!params.index_file & params.umi_start != 0) {exit 1, "Setting a UMI start position is only allowed when providing the UMIs in a separate index read file. If so, please provide the `--index_file` flag as well."} - -// Rmarkdown report file -ch_report_rmd = Channel.fromPath(params.report_rmd, checkIfExists: true) -ch_report_css = Channel.fromPath(params.report_css, checkIfExists: true) -ch_report_logo = Channel.fromPath(params.report_logo, checkIfExists: true) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -//CHANGEO -include { FETCH_DATABASES } from '../modules/local/fetch_databases' -include { UNZIP_DB as UNZIP_IGBLAST } from '../modules/local/unzip_db' -include { UNZIP_DB as UNZIP_IMGT } from '../modules/local/unzip_db' -include { CHANGEO_ASSIGNGENES } from '../modules/local/changeo/changeo_assigngenes' -include { CHANGEO_MAKEDB } from '../modules/local/changeo/changeo_makedb' -include { CHANGEO_PARSEDB_SPLIT } from '../modules/local/changeo/changeo_parsedb_split' -include { CHANGEO_PARSEDB_SELECT } from '../modules/local/changeo/changeo_parsedb_select' -include { CHANGEO_CONVERTDB_FASTA } from '../modules/local/changeo/changeo_convertdb_fasta' - -//SHAZAM -include { SHAZAM_THRESHOLD } from '../modules/local/shazam/shazam_threshold' - -//CHANGEO -include { CHANGEO_DEFINECLONES } from '../modules/local/changeo/changeo_defineclones' -include { CHANGEO_CREATEGERMLINES } from '../modules/local/changeo/changeo_creategermlines' -include { CHANGEO_BUILDTREES } from '../modules/local/changeo/changeo_buildtrees' - -//ALAKAZAM -include { ALAKAZAM_LINEAGE } from '../modules/local/alakazam/alakazam_lineage' -include { ALAKAZAM_SHAZAM_REPERTOIRES } from '../modules/local/alakazam/alakazam_shazam_repertoires' - -//LOG PARSING -include { PARSE_LOGS } from '../modules/local/parse_logs' - -// Local: Sub-workflows -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { MERGE_TABLES_WF } from '../subworkflows/local/merge_tables_wf' -include { PRESTO_UMI } from '../subworkflows/local/presto_umi' -include { PRESTO_SANS_UMI } from '../subworkflows/local/presto_sans_umi' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// Info required for completion email and summary -def multiqc_report = [] - -workflow BCELLMAGIC { - - ch_versions = Channel.empty() - - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( ch_input ) - - INPUT_CHECK.out.reads.dump(tag: 'input reads') - - ch_reads = INPUT_CHECK - .out - .reads - .dump(tag: 'input reads') - - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - - if (params.umi_length == 0) { - // - // SUBWORKFLOW: pRESTO without UMIs - // - PRESTO_SANS_UMI ( - ch_reads, - ch_cprimers_fasta, - ch_vprimers_fasta, - ch_adapter_fasta - ) - ch_presto_fasta = PRESTO_SANS_UMI.out.fasta - ch_presto_software = PRESTO_SANS_UMI.out.software - ch_fastp_reads_html = PRESTO_SANS_UMI.out.fastp_reads_html - ch_fastp_reads_json = PRESTO_SANS_UMI.out.fastp_reads_json - ch_fastqc_postassembly_gz = PRESTO_SANS_UMI.out.fastqc_postassembly_gz - ch_presto_assemblepairs_logs = PRESTO_SANS_UMI.out.presto_assemblepairs_logs - ch_presto_filterseq_logs = PRESTO_SANS_UMI.out.presto_filterseq_logs - ch_presto_maskprimers_logs = PRESTO_SANS_UMI.out.presto_maskprimers_logs - ch_presto_collapseseq_logs = PRESTO_SANS_UMI.out.presto_collapseseq_logs - ch_presto_splitseq_logs = PRESTO_SANS_UMI.out.presto_splitseq_logs - // These channels will be empty in the sans-UMI workflow - ch_presto_pairseq_logs = Channel.empty() - ch_presto_clustersets_logs = Channel.empty() - ch_presto_buildconsensus_logs = Channel.empty() - ch_presto_postconsensus_pairseq_logs = Channel.empty() - } else { - // - // SUBWORKFLOW: pRESTO with UMIs - // - PRESTO_UMI ( - ch_reads, - ch_cprimers_fasta, - ch_vprimers_fasta, - ch_adapter_fasta - ) - ch_presto_fasta = PRESTO_UMI.out.fasta - ch_presto_software = PRESTO_UMI.out.software - ch_fastp_reads_html = PRESTO_UMI.out.fastp_reads_html - ch_fastp_reads_json = PRESTO_UMI.out.fastp_reads_json - ch_fastqc_postassembly_gz = PRESTO_UMI.out.fastqc_postassembly_gz - ch_presto_filterseq_logs = PRESTO_UMI.out.presto_filterseq_logs - ch_presto_maskprimers_logs = PRESTO_UMI.out.presto_maskprimers_logs - ch_presto_pairseq_logs = PRESTO_UMI.out.presto_pairseq_logs - ch_presto_clustersets_logs = PRESTO_UMI.out.presto_clustersets_logs - ch_presto_buildconsensus_logs = PRESTO_UMI.out.presto_buildconsensus_logs - ch_presto_postconsensus_pairseq_logs = PRESTO_UMI.out.presto_postconsensus_pairseq_logs - ch_presto_assemblepairs_logs = PRESTO_UMI.out.presto_assemblepairs_logs - ch_presto_collapseseq_logs = PRESTO_UMI.out.presto_collapseseq_logs - ch_presto_splitseq_logs = PRESTO_UMI.out.presto_splitseq_logs - } - - ch_versions = ch_versions.mix(ch_presto_software) - - // FETCH DATABASES - // If paths to databases are provided - if( params.igblast_base ){ - - if (params.igblast_base.endsWith(".zip")) { - Channel.fromPath("${params.igblast_base}") - .ifEmpty{ exit 1, "IGBLAST DB not found: ${params.igblast_base}" } - .set { ch_igblast_zipped } - UNZIP_IGBLAST( ch_igblast_zipped.collect() ) - ch_igblast = UNZIP_IGBLAST.out.unzipped - ch_versions = ch_versions.mix(UNZIP_IGBLAST.out.versions.ifEmpty(null)) - } else { - Channel.fromPath("${params.igblast_base}") - .ifEmpty { exit 1, "IGBLAST DB not found: ${params.igblast_base}" } - .set { ch_igblast } - } - } - if( params.imgtdb_base ){ - - if (params.imgtdb_base.endsWith(".zip")) { - Channel.fromPath("${params.imgtdb_base}") - .ifEmpty{ exit 1, "IMGTDB not found: ${params.imgtdb_base}" } - .set { ch_imgt_zipped } - UNZIP_IMGT( ch_imgt_zipped.collect() ) - ch_imgt = UNZIP_IMGT.out.unzipped - ch_versions = ch_versions.mix(UNZIP_IMGT.out.versions.ifEmpty(null)) - } else { - Channel.fromPath("${params.imgtdb_base}") - .ifEmpty { exit 1, "IMGTDB not found: ${params.imgtdb_base}" } - .set { ch_imgt } - } - } - - if (!params.igblast_base | !params.imgtdb_base) { - FETCH_DATABASES() - ch_igblast = FETCH_DATABASES.out.igblast - ch_imgt = FETCH_DATABASES.out.imgt - ch_versions = ch_versions.mix(FETCH_DATABASES.out.versions.ifEmpty(null)) - } - - // Run Igblast for gene assignment - CHANGEO_ASSIGNGENES ( - ch_presto_fasta, - ch_igblast.collect() - ) - ch_versions = ch_versions.mix(CHANGEO_ASSIGNGENES.out.versions.ifEmpty(null)) - - // Make IgBlast results table - CHANGEO_MAKEDB ( - CHANGEO_ASSIGNGENES.out.fasta, - CHANGEO_ASSIGNGENES.out.blast, - ch_imgt.collect() - ) - ch_versions = ch_versions.mix(CHANGEO_MAKEDB.out.versions.ifEmpty(null)) - - // Select only productive sequences. - CHANGEO_PARSEDB_SPLIT ( - CHANGEO_MAKEDB.out.tab - ) - ch_versions = ch_versions.mix(CHANGEO_PARSEDB_SPLIT.out.versions.ifEmpty(null)) - - // Selecting IGH for ig loci, TR for tr loci. - CHANGEO_PARSEDB_SELECT( - CHANGEO_PARSEDB_SPLIT.out.tab - ) - ch_versions = ch_versions.mix(CHANGEO_PARSEDB_SELECT.out.versions.ifEmpty(null)) - - // Convert sequence table to fasta. - CHANGEO_CONVERTDB_FASTA ( - CHANGEO_PARSEDB_SELECT.out.tab - ) - ch_versions = ch_versions.mix(CHANGEO_CONVERTDB_FASTA.out.versions.ifEmpty(null)) - - // Subworkflow: merge tables from the same patient - MERGE_TABLES_WF( - CHANGEO_PARSEDB_SELECT.out.tab, - ch_input.collect() - ) - - // Shazam clonal threshold - // Only if threshold is not manually set - if (!params.set_cluster_threshold){ - SHAZAM_THRESHOLD( - MERGE_TABLES_WF.out.tab, - ch_imgt.collect() - ) - ch_tab_for_changeo_defineclones = SHAZAM_THRESHOLD.out.tab - ch_threshold = SHAZAM_THRESHOLD.out.threshold - ch_versions = ch_versions.mix(SHAZAM_THRESHOLD.out.versions.ifEmpty(null)) - } else { - ch_tab_for_changeo_defineclones = MERGE_TABLES_WF.out.tab - ch_threshold = file('EMPTY') - } - - // Define B-cell clones - CHANGEO_DEFINECLONES( - ch_tab_for_changeo_defineclones, - ch_threshold, - ) - ch_versions = ch_versions.mix(CHANGEO_DEFINECLONES.out.versions.ifEmpty(null)) - - // Identify germline sequences - CHANGEO_CREATEGERMLINES( - CHANGEO_DEFINECLONES.out.tab, - ch_imgt.collect() - ) - ch_versions = ch_versions.mix(CHANGEO_CREATEGERMLINES.out.versions.ifEmpty(null)) - - // Lineage reconstruction alakazam - if (!params.skip_lineage) { - ALAKAZAM_LINEAGE( - CHANGEO_CREATEGERMLINES.out.tab - ) - ch_versions = ch_versions.mix(ALAKAZAM_LINEAGE.out.versions.ifEmpty(null)) - } - - ch_all_tabs_repertoire = CHANGEO_CREATEGERMLINES.out.tab - .map{ it -> [ it[1] ] } - .collect() - - // Process logs parsing: getting sequence numbers - PARSE_LOGS( - ch_presto_filterseq_logs.collect(), - ch_presto_maskprimers_logs.collect(), - ch_presto_pairseq_logs.collect().ifEmpty([]), - ch_presto_clustersets_logs.collect().ifEmpty([]), - ch_presto_buildconsensus_logs.collect().ifEmpty([]), - ch_presto_postconsensus_pairseq_logs.collect().ifEmpty([]), - ch_presto_assemblepairs_logs.collect(), - ch_presto_collapseseq_logs.collect(), - ch_presto_splitseq_logs.collect(), - CHANGEO_MAKEDB.out.logs.collect(), - ch_input - ) - ch_versions = ch_versions.mix(PARSE_LOGS.out.versions.ifEmpty(null)) - - // Alakazam shazam repertoire comparison report - if (!params.skip_report){ - ALAKAZAM_SHAZAM_REPERTOIRES( - ch_all_tabs_repertoire, - PARSE_LOGS.out.logs.collect(), - ch_report_rmd.collect(), - ch_report_css.collect(), - ch_report_logo.collect() - ) - ch_versions = ch_versions.mix(ALAKAZAM_SHAZAM_REPERTOIRES.out.versions.ifEmpty(null)) - } - - // Software versions - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - - // - // MODULE: MultiQC - // - if (!params.skip_multiqc) { - workflow_summary = WorkflowBcellmagic.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - ch_multiqc_files = Channel.empty() - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(ch_fastp_reads_json.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_fastp_reads_html.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_fastqc_postassembly_gz.collect{it[1]}.ifEmpty([])) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.collect(), - ch_multiqc_custom_config.collect().ifEmpty([]), - ch_report_logo.collect().ifEmpty([]) - ) - multiqc_report = MULTIQC.out.report.toList() - ch_versions = ch_versions.mix( MULTIQC.out.versions ) - } -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ diff --git a/workflows/reveal.nf b/workflows/reveal.nf deleted file mode 100644 index 7627e7fb..00000000 --- a/workflows/reveal.nf +++ /dev/null @@ -1,390 +0,0 @@ -#!/usr/bin/env nextflow -/* -======================================================================================== - VALIDATE INPUTS -======================================================================================== -*/ - -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowBcellmagic.initialise(params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = Channel.fromPath(params.input) } else { exit 1, "Please provide input file containing the sample metadata with the '--input' option." } - -if (params.miairr) { - file(params.miairr, checkIfExists: true) -} - -// If paths to databases are provided -if( params.igblast_base ){ - Channel.fromPath("${params.igblast_base}") - .ifEmpty { exit 1, "IGBLAST DB not found: ${params.igblast_base}" } - .set { ch_igblast } -} -if( params.imgtdb_base ){ - Channel.fromPath("${params.imgtdb_base}") - .ifEmpty { exit 1, "IMGTDB not found: ${params.imgtdb_base}" } - .set { ch_imgt } -} - -/* -======================================================================================== - CONFIG FILES -======================================================================================== -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() - -/* -======================================================================================== - IMPORT LOCAL MODULES/SUBWORKFLOWS -======================================================================================== -*/ - -// Modules: local -include { IMMCANTATION } from '../modules/local/reveal/immcantation_container_version' -include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR } from '../modules/local/changeo/changeo_convertdb_fasta' -include { FETCH_DATABASES } from '../modules/local/fetch_databases' -include { UNZIP_DB as UNZIP_IGBLAST } from '../modules/local/unzip_db' -include { UNZIP_DB as UNZIP_IMGT } from '../modules/local/unzip_db' -include { CHANGEO_ASSIGNGENES_REVEAL } from '../modules/local/reveal/changeo_assigngenes_reveal' -include { CHANGEO_MAKEDB_REVEAL } from '../modules/local/reveal/changeo_makedb_reveal' -include { FILTER_QUALITY } from '../modules/local/reveal/filter_quality' -include { CHANGEO_PARSEDB_SPLIT as CHANGEO_PARSEDB_SPLIT_REVEAL} from '../modules/local/changeo/changeo_parsedb_split' -include { FILTER_JUNCTION_MOD3 } from '../modules/local/reveal/filter_junction_mod3' -include { CHANGEO_CREATEGERMLINES_REVEAL as CREATEGERMLINES } from '../modules/local/reveal/changeo_creategermlines_reveal' -include { REMOVE_CHIMERIC } from '../modules/local/enchantr/remove_chimeric' -include { SINGLE_CELL_QC } from '../modules/local/enchantr/single_cell_qc' -include { ADD_META_TO_TAB } from '../modules/local/reveal/add_meta_to_tab' -include { DETECT_CONTAMINATION } from '../modules/local/enchantr/detect_contamination' -include { COLLAPSE_DUPLICATES } from '../modules/local/enchantr/collapse_duplicates' -include { FIND_THRESHOLD } from '../modules/local/enchantr/find_threshold' -include { DEFINE_CLONES } from '../modules/local/enchantr/define_clones' -include { DOWSER_LINEAGES } from '../modules/local/enchantr/dowser_lineages' -include { REPORT_FILE_SIZE } from '../modules/local/enchantr/report_file_size' - -// Local: Sub-workflows -include { REVEAL_INPUT_CHECK } from '../subworkflows/local/reveal_input_check' - -/* -======================================================================================== - IMPORT NF-CORE MODULES/SUBWORKFLOWS -======================================================================================== -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' - -/* -======================================================================================== - RUN MAIN WORKFLOW -======================================================================================== -*/ - -// Info required for completion email and summary -def multiqc_report = [] - -workflow REVEAL { - - log.warn "\n----------\nREVEAL lifecycle stage: experimental.\n----------\n" - - ch_versions = Channel.empty() - ch_file_sizes = Channel.empty() - - IMMCANTATION() - ch_versions = ch_versions.mix(IMMCANTATION.out.versions) - - // SUBWORKFLOW: Read in samplesheet, validate - // and emit channels for fasta and tsv files - REVEAL_INPUT_CHECK (ch_input, params.miairr, params.collapseby, params.cloneby, params.reassign) - - // If reassign requested, generate fasta from the tsv files - if (params.reassign) { - CHANGEO_CONVERTDB_FASTA_FROM_AIRR( - REVEAL_INPUT_CHECK.out.ch_tsv - ) - ch_fasta_from_tsv = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.fasta - ch_versions = ch_versions.mix(CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.versions.ifEmpty(null)) - ch_file_sizes = ch_file_sizes.mix(CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.logs) - } else { - ch_fasta_from_tsv = Channel.empty() - } - - - // mix all fasta - ch_fasta = REVEAL_INPUT_CHECK.out.ch_fasta.mix(ch_fasta_from_tsv) - - // FETCH DATABASES - // TODO: this can take a long time, and the progress shows 0%. Would be - // nice to have some better progress reporting. - // And maybe run this as 2 separate steps, one for IMGT and one for IgBLAST? - if( params.igblast_base ){ - - if (params.igblast_base.endsWith(".zip")) { - Channel.fromPath("${params.igblast_base}") - .ifEmpty{ exit 1, "IGBLAST DB not found: ${params.igblast_base}" } - .set { ch_igblast_zipped } - UNZIP_IGBLAST( ch_igblast_zipped.collect() ) - ch_igblast = UNZIP_IGBLAST.out.unzipped - ch_versions = ch_versions.mix(UNZIP_IGBLAST.out.versions.ifEmpty(null)) - } else { - Channel.fromPath("${params.igblast_base}") - .ifEmpty { exit 1, "IGBLAST DB not found: ${params.igblast_base}" } - .set { ch_igblast } - } - } - if( params.imgtdb_base ){ - - if (params.imgtdb_base.endsWith(".zip")) { - Channel.fromPath("${params.imgtdb_base}") - .ifEmpty{ exit 1, "IMGTDB not found: ${params.imgtdb_base}" } - .set { ch_imgt_zipped } - UNZIP_IMGT( ch_imgt_zipped.collect() ) - ch_imgt = UNZIP_IMGT.out.unzipped - ch_versions = ch_versions.mix(UNZIP_IMGT.out.versions.ifEmpty(null)) - } else { - Channel.fromPath("${params.imgtdb_base}") - .ifEmpty { exit 1, "IMGTDB not found: ${params.imgtdb_base}" } - .set { ch_imgt } - } - } - - if (!params.igblast_base | !params.imgtdb_base) { - FETCH_DATABASES() - ch_igblast = FETCH_DATABASES.out.igblast - ch_imgt = FETCH_DATABASES.out.imgt - ch_versions = ch_versions.mix(FETCH_DATABASES.out.versions.ifEmpty(null)) - } - - // Run Igblast for gene assignment - CHANGEO_ASSIGNGENES_REVEAL ( - ch_fasta, - ch_igblast.collect() - ) - ch_file_sizes = ch_file_sizes.mix(CHANGEO_ASSIGNGENES_REVEAL.out.logs) - ch_versions = ch_versions.mix(CHANGEO_ASSIGNGENES_REVEAL.out.versions.ifEmpty(null)) - - // Parse IgBlast results - CHANGEO_MAKEDB_REVEAL ( - CHANGEO_ASSIGNGENES_REVEAL.out.fasta, - CHANGEO_ASSIGNGENES_REVEAL.out.blast, - ch_imgt.collect() - ) - ch_file_sizes = ch_file_sizes.mix(CHANGEO_MAKEDB_REVEAL.out.logs) - ch_versions = ch_versions.mix(CHANGEO_MAKEDB_REVEAL.out.versions.ifEmpty(null)) - - // Apply quality filters - // TODO: mv to enchantr and emit versions - FILTER_QUALITY(CHANGEO_MAKEDB_REVEAL.out.tab) - ch_file_sizes = ch_file_sizes.mix(FILTER_QUALITY.out.logs) - - // Select only productive sequences and - // sequences with junction length multiple of 3 - if (params.productive_only) { - CHANGEO_PARSEDB_SPLIT_REVEAL ( - FILTER_QUALITY.out.tab - ) - ch_file_sizes = ch_file_sizes.mix(CHANGEO_PARSEDB_SPLIT_REVEAL.out.logs) - ch_versions = ch_versions.mix(CHANGEO_PARSEDB_SPLIT_REVEAL.out.versions.ifEmpty(null)) - - // TODO: Add to enchantr and emit versions? - FILTER_JUNCTION_MOD3( - CHANGEO_PARSEDB_SPLIT_REVEAL.out.tab - ) - ch_file_sizes = ch_file_sizes.mix(FILTER_JUNCTION_MOD3.out.logs) - ch_repertoire = FILTER_JUNCTION_MOD3.out.tab.ifEmpty(null) - } else { - ch_repertoire = FILTER_QUALITY.out.tab.ifEmpty(null) - } - - // Add metadata to the rearrangement files, to be used later - // for grouping, subsetting, plotting.... - ADD_META_TO_TAB( - ch_repertoire, - REVEAL_INPUT_CHECK.out.validated_input.collect() - ) - ch_file_sizes = ch_file_sizes.mix(ADD_META_TO_TAB.out.logs) - - ch_repertoire_by_processing = ADD_META_TO_TAB.out.tab - .dump(tag: 'meta_to_tab_out') - .branch { it -> - single: it[0].single_cell == 'true' - bulk: it[0].single_cell == 'false' - } - - ch_repertoire_by_processing.bulk - .dump(tag: 'bulk') - - ch_repertoire_by_processing.single - .dump(tag: 'single') - - // For bulk datasets, remove chimeric sequences - // if requested - if (params.remove_chimeric) { - - // Create germlines (not --cloned) - CREATEGERMLINES( - ch_repertoire_by_processing.bulk, - ch_imgt.collect() - ) - ch_file_sizes = ch_file_sizes.mix(CREATEGERMLINES.out.logs) - ch_versions = ch_versions.mix(CREATEGERMLINES.out.versions.ifEmpty(null)) - - // Remove chimera - REMOVE_CHIMERIC( - CREATEGERMLINES.out.tab, - ch_imgt.collect() - ) - ch_file_sizes = ch_file_sizes.mix(REMOVE_CHIMERIC.out.logs) - ch_bulk_chimeric_pass = REMOVE_CHIMERIC.out.tab - ch_versions = ch_versions.mix(REMOVE_CHIMERIC.out.versions.ifEmpty(null)) - - } else { - ch_bulk_chimeric_pass = ch_repertoire_by_processing.bulk - } - - // For Bulk data, detect cross-contamination - // This is only informative at this time - // TODO: add a flag to specify remove suspicious sequences - // and update file size log accordingly - DETECT_CONTAMINATION( - ch_bulk_chimeric_pass - .map{ it -> [ it[1] ] } - .collect(), - 'id') - // TODO file size - ch_versions = ch_versions.mix(DETECT_CONTAMINATION.out.versions.ifEmpty(null)) - - COLLAPSE_DUPLICATES( - ch_bulk_chimeric_pass - .map{ it -> [ it[1] ] } - .collect(), - params.collapseby - ) - ch_versions = ch_versions.mix(COLLAPSE_DUPLICATES.out.versions.ifEmpty(null)) - // TODO file size - // TODO channel by params.cloneby - - - // For single cell, specific QC - // analyze all files together, looking for overlaps - SINGLE_CELL_QC( - ch_repertoire_by_processing.single - .map{ it -> [ it[1] ] } - .collect() - ) - ch_file_sizes = ch_file_sizes.mix(SINGLE_CELL_QC.out.logs) - ch_versions = ch_versions.mix(SINGLE_CELL_QC.out.versions.ifEmpty(null)) - - // If params.threshold is auto, - // 1) use distToNearest and findThreshold to determine - // the threshold that will be used to identify sets of clonally - // related sequences. If threshold found, continue, to 2), else, - // stop and report a threshold could be identified. - // 2) create a report with plots of the distToNearest distribution - // and the threshold. - // Else - // Use the threshold to find clones, grouping by params.cloneby and - // create a report - - if (params.threshold == "auto") { - FIND_THRESHOLD ( - COLLAPSE_DUPLICATES.out.tab.mix(SINGLE_CELL_QC.out.tab) - .map{ it -> [ it[1] ] } - .collect(), - params.cloneby, - params.singlecell - ) - ch_threshold = FIND_THRESHOLD.out.mean_threshold - - clone_threshold = ch_threshold - .splitText( limit:1 ) { it.trim().toString() } - .dump(tag: 'clone_threshold') - .filter { it != 'NA'} - .dump(tag: "threshold") - .ifEmpty { exit 1, "Automatic clone_threshold is 'NA'. Consider setting params.threshold manually."} - - } else { - // TODO: Fix * --threshold: expected type: String, found: Integer (1) - clone_threshold = params.threshold - } - - DEFINE_CLONES( - SINGLE_CELL_QC.out.tab.mix(COLLAPSE_DUPLICATES.out.tab) - .map{ it -> [ it[1] ] } - .collect(), - params.cloneby, - params.singlecell, - clone_threshold, - ch_imgt.collect() - ) - - DOWSER_LINEAGES( - DEFINE_CLONES.out.tab - .map{ it -> [ it[1] ] } - .flatten() - ) - - // TODO fix file sizes - // Process logs to report file sizes at each step - REPORT_FILE_SIZE ( - ch_file_sizes.map { it }.collect() - ) - - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - - // - // MODULE: MultiQC - // - if (!params.skip_multiqc) { - workflow_summary = WorkflowBcellmagic.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_config) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - - // ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.yaml.collect()) - - MULTIQC ( - ch_multiqc_files.collect() - ) - multiqc_report = MULTIQC.out.report.toList() - ch_versions = ch_versions.mix(MULTIQC.out.versions) - } - -} - -/* -======================================================================================== - COMPLETION EMAIL AND SUMMARY -======================================================================================== -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) -} - -/* -======================================================================================== - THE END -======================================================================================== -*/