From d6d8fee17113274a16d6b7ce8e21eb5e39b7323a Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 27 Jun 2024 14:44:32 +0200 Subject: [PATCH 1/5] Replace PORECHOP with official module --- CHANGELOG.md | 4 + conf/modules.config | 4 +- modules.json | 5 ++ modules/local/porechop.nf | 25 ------ .../nf-core/porechop/porechop/environment.yml | 7 ++ modules/nf-core/porechop/porechop/main.nf | 48 ++++++++++ modules/nf-core/porechop/porechop/meta.yml | 62 +++++++++++++ .../porechop/porechop/tests/main.nf.test | 61 +++++++++++++ .../porechop/porechop/tests/main.nf.test.snap | 88 +++++++++++++++++++ .../porechop/porechop/tests/nextflow.config | 9 ++ .../nf-core/porechop/porechop/tests/tags.yml | 2 + workflows/mag.nf | 8 +- 12 files changed, 292 insertions(+), 31 deletions(-) delete mode 100644 modules/local/porechop.nf create mode 100644 modules/nf-core/porechop/porechop/environment.yml create mode 100644 modules/nf-core/porechop/porechop/main.nf create mode 100644 modules/nf-core/porechop/porechop/meta.yml create mode 100644 modules/nf-core/porechop/porechop/tests/main.nf.test create mode 100644 modules/nf-core/porechop/porechop/tests/main.nf.test.snap create mode 100644 modules/nf-core/porechop/porechop/tests/nextflow.config create mode 100644 modules/nf-core/porechop/porechop/tests/tags.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index bd58f661..5f71f21a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Dependencies` +| Tool | Previous version | New version | +| -------- | ---------------- | ----------- | +| Porechop | 0.2.3_seqan2.1.1 | 0.2.4 | + ### `Deprecated` ## 3.0.1 [2024-06-10] diff --git a/conf/modules.config b/conf/modules.config index a3be8574..d3ff4dbc 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -167,11 +167,11 @@ process { ] } - withName: PORECHOP { + withName: PORECHOP_PORECHOP { publishDir = [ path: { "${params.outdir}/QC_longreads/porechop" }, mode: params.publish_dir_mode, - pattern: "*_porechop.fastq", + pattern: "*_trimmed.fastq", enabled: params.save_porechop_reads ] ext.prefix = { "${meta.id}_run${meta.run}_trimmed" } diff --git a/modules.json b/modules.json index cabd3125..0d150080 100644 --- a/modules.json +++ b/modules.json @@ -197,6 +197,11 @@ "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] }, + "porechop/porechop": { + "branch": "master", + "git_sha": "1d68c7f248d1a480c5959548a9234602b771199e", + "installed_by": ["modules"] + }, "prodigal": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", diff --git a/modules/local/porechop.nf b/modules/local/porechop.nf deleted file mode 100644 index 91576887..00000000 --- a/modules/local/porechop.nf +++ /dev/null @@ -1,25 +0,0 @@ -process PORECHOP { - tag "$meta.id" - - conda "bioconda::porechop=0.2.3_seqan2.1.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/porechop:0.2.3_seqan2.1.1--py36h2d50403_3' : - 'biocontainers/porechop:0.2.3_seqan2.1.1--py36h2d50403_3' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("${meta.id}_porechop.fastq") , emit: reads - path "versions.yml" , emit: versions - - script: - """ - porechop -i ${reads} -t ${task.cpus} -o ${meta.id}_porechop.fastq - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - porechop: \$(porechop --version) - END_VERSIONS - """ -} diff --git a/modules/nf-core/porechop/porechop/environment.yml b/modules/nf-core/porechop/porechop/environment.yml new file mode 100644 index 00000000..28b67c16 --- /dev/null +++ b/modules/nf-core/porechop/porechop/environment.yml @@ -0,0 +1,7 @@ +name: porechop_porechop +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::porechop=0.2.4 diff --git a/modules/nf-core/porechop/porechop/main.nf b/modules/nf-core/porechop/porechop/main.nf new file mode 100644 index 00000000..1ff02a12 --- /dev/null +++ b/modules/nf-core/porechop/porechop/main.nf @@ -0,0 +1,48 @@ +process PORECHOP_PORECHOP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/porechop:0.2.4--py39h7cff6ad_2' : + 'biocontainers/porechop:0.2.4--py39h7cff6ad_2' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + porechop \\ + -i $reads \\ + -t $task.cpus \\ + $args \\ + -o ${prefix}.fastq.gz \\ + > ${prefix}.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + porechop: \$( porechop --version ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fastq + gzip ${prefix}.fastq + touch ${prefix}.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + porechop: \$( porechop --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/porechop/porechop/meta.yml b/modules/nf-core/porechop/porechop/meta.yml new file mode 100644 index 00000000..13be76f2 --- /dev/null +++ b/modules/nf-core/porechop/porechop/meta.yml @@ -0,0 +1,62 @@ +name: "porechop_porechop" +description: Adapter removal and demultiplexing of Oxford Nanopore reads +keywords: + - adapter + - nanopore + - demultiplexing +tools: + - porechop: + description: Adapter removal and demultiplexing of Oxford Nanopore reads + homepage: "https://github.com/rrwick/Porechop" + documentation: "https://github.com/rrwick/Porechop" + tool_dev_url: "https://github.com/rrwick/Porechop" + doi: "10.1099/mgen.0.000132" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: fastq/fastq.gz file + pattern: "*.{fastq,fastq.gz,fq,fq.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: Demultiplexed and/or adapter-trimmed fastq.gz file + pattern: "*.{fastq.gz}" + - log: + type: file + description: Log file containing stdout information + pattern: "*.log" +authors: + - "@ggabernet" + - "@jasmezz" + - "@d4straub" + - "@LaurenceKuhl" + - "@SusiJo" + - "@jonasscheid" + - "@jonoave" + - "@GokceOGUZ" + - "@jfy133" +maintainers: + - "@ggabernet" + - "@jasmezz" + - "@d4straub" + - "@LaurenceKuhl" + - "@SusiJo" + - "@jonasscheid" + - "@jonoave" + - "@GokceOGUZ" + - "@jfy133" diff --git a/modules/nf-core/porechop/porechop/tests/main.nf.test b/modules/nf-core/porechop/porechop/tests/main.nf.test new file mode 100644 index 00000000..4c3c3d65 --- /dev/null +++ b/modules/nf-core/porechop/porechop/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process PORECHOP_PORECHOP" + script "../main.nf" + process "PORECHOP_PORECHOP" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "porechop" + tag "porechop/porechop" + + test("sarscov2 - nanopore - fastq") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], + file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match("reads") }, + { assert snapshot(process.out.versions).match("versions") }, + // complete log is not stable. These first lines should be stable + { assert snapshot(path(process.out.log.get(0).get(1)).readLines()[0..7]).match("log")} + ) + } + + } + + + test("stub") { + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + +} diff --git a/modules/nf-core/porechop/porechop/tests/main.nf.test.snap b/modules/nf-core/porechop/porechop/tests/main.nf.test.snap new file mode 100644 index 00000000..cf544d2d --- /dev/null +++ b/modules/nf-core/porechop/porechop/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,712c0753b56d0fb530092dfb5bdf2e5c" + ] + ], + "timestamp": "2023-12-18T07:47:16.83444" + }, + "log": { + "content": [ + [ + "", + "\u001b[1m\u001b[4mLoading reads\u001b[0m", + "test.fastq.gz", + "100 reads loaded", + "", + "", + "\u001b[1m\u001b[4mLooking for known adapter sets\u001b[0m", + "" + ] + ], + "timestamp": "2023-12-18T07:47:16.853899" + }, + "reads": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_porechop.fastq.gz:md5,886fdb859fb50e0dddd35007bcff043e" + ] + ] + ], + "timestamp": "2023-12-18T07:47:16.811393" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_porechop.fastq.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_porechop.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,712c0753b56d0fb530092dfb5bdf2e5c" + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test_porechop.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_porechop.fastq.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,712c0753b56d0fb530092dfb5bdf2e5c" + ] + } + ], + "timestamp": "2023-12-18T07:47:37.814949" + } +} \ No newline at end of file diff --git a/modules/nf-core/porechop/porechop/tests/nextflow.config b/modules/nf-core/porechop/porechop/tests/nextflow.config new file mode 100644 index 00000000..a9ecf7b6 --- /dev/null +++ b/modules/nf-core/porechop/porechop/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + + withName: PORECHOP_PORECHOP { + ext.args = '' + ext.prefix = { "${meta.id}_porechop" } + } + +} diff --git a/modules/nf-core/porechop/porechop/tests/tags.yml b/modules/nf-core/porechop/porechop/tests/tags.yml new file mode 100644 index 00000000..743645c2 --- /dev/null +++ b/modules/nf-core/porechop/porechop/tests/tags.yml @@ -0,0 +1,2 @@ +porechop/porechop: + - "modules/nf-core/porechop/porechop/**" diff --git a/workflows/mag.nf b/workflows/mag.nf index 7594b370..24ab9309 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -32,6 +32,7 @@ include { ARIA2 as ARIA2_UNTAR } from '../modul include { FASTQC as FASTQC_RAW } from '../modules/nf-core/fastqc/main' include { FASTQC as FASTQC_TRIMMED } from '../modules/nf-core/fastqc/main' include { SEQTK_MERGEPE } from '../modules/nf-core/seqtk/mergepe/main' +include { PORECHOP_PORECHOP } from '../modules/nf-core/porechop/porechop/main' include { BBMAP_BBNORM } from '../modules/nf-core/bbmap/bbnorm/main' include { FASTP } from '../modules/nf-core/fastp/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' @@ -56,7 +57,6 @@ include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../modules include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' -include { PORECHOP } from '../modules/local/porechop' include { NANOLYSE } from '../modules/local/nanolyse' include { FILTLONG } from '../modules/local/filtlong' include { NANOPLOT as NANOPLOT_RAW } from '../modules/local/nanoplot' @@ -369,11 +369,11 @@ workflow MAG { if ( !params.assembly_input ) { if (!params.skip_adapter_trimming) { - PORECHOP ( + PORECHOP_PORECHOP ( ch_raw_long_reads ) - ch_long_reads = PORECHOP.out.reads - ch_versions = ch_versions.mix(PORECHOP.out.versions.first()) + ch_long_reads = PORECHOP_PORECHOP.out.reads + ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first()) } if (!params.keep_lambda) { From 5e452052399355fd0d8f71413e539c084753b915 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 27 Jun 2024 16:37:36 +0200 Subject: [PATCH 2/5] Bump nanoplot version 1.41.6 for valid docker container --- CHANGELOG.md | 1 + conf/modules.config | 11 +- modules.json | 5 + modules/local/nanolyse.nf | 4 +- modules/local/nanoplot.nf | 33 ----- modules/nf-core/nanoplot/environment.yml | 7 + modules/nf-core/nanoplot/main.nf | 58 ++++++++ modules/nf-core/nanoplot/meta.yml | 62 +++++++++ modules/nf-core/nanoplot/tests/main.nf.test | 94 +++++++++++++ .../nf-core/nanoplot/tests/main.nf.test.snap | 131 ++++++++++++++++++ modules/nf-core/nanoplot/tests/tags.yaml | 2 + workflows/mag.nf | 4 +- 12 files changed, 374 insertions(+), 38 deletions(-) delete mode 100644 modules/local/nanoplot.nf create mode 100644 modules/nf-core/nanoplot/environment.yml create mode 100644 modules/nf-core/nanoplot/main.nf create mode 100644 modules/nf-core/nanoplot/meta.yml create mode 100644 modules/nf-core/nanoplot/tests/main.nf.test create mode 100644 modules/nf-core/nanoplot/tests/main.nf.test.snap create mode 100644 modules/nf-core/nanoplot/tests/tags.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f71f21a..7458472a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | Tool | Previous version | New version | | -------- | ---------------- | ----------- | | Porechop | 0.2.3_seqan2.1.1 | 0.2.4 | +| NanoPlot | 1.26.3 | 1.41.6 | ### `Deprecated` diff --git a/conf/modules.config b/conf/modules.config index d3ff4dbc..be17815e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -206,6 +206,11 @@ process { withName: NANOPLOT_RAW { ext.prefix = 'raw' + ext.args = { [ + "-p raw_", + "--title ${meta.id}_raw", + "-c darkblue", + ].join(' ').trim() } publishDir = [ path: { "${params.outdir}/QC_longreads/NanoPlot/${meta.id}" }, mode: params.publish_dir_mode, @@ -214,7 +219,11 @@ process { } withName: NANOPLOT_FILTERED { - ext.prefix = 'filtered' + ext.args = { [ + "-p filtered_", + "--title ${meta.id}_filtered", + "-c darkblue", + ].join(' ').trim() } publishDir = [ path: { "${params.outdir}/QC_longreads/NanoPlot/${meta.id}" }, mode: params.publish_dir_mode, diff --git a/modules.json b/modules.json index 0d150080..ddce6955 100644 --- a/modules.json +++ b/modules.json @@ -197,6 +197,11 @@ "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] }, + "nanoplot": { + "branch": "master", + "git_sha": "3135090b46f308a260fc9d5991d7d2f9c0785309", + "installed_by": ["modules"] + }, "porechop/porechop": { "branch": "master", "git_sha": "1d68c7f248d1a480c5959548a9234602b771199e", diff --git a/modules/local/nanolyse.nf b/modules/local/nanolyse.nf index 4cd46d4f..9e800ef0 100644 --- a/modules/local/nanolyse.nf +++ b/modules/local/nanolyse.nf @@ -17,9 +17,9 @@ process NANOLYSE { script: """ - cat ${reads} | NanoLyse --reference $nanolyse_db | gzip > ${meta.id}_nanolyse.fastq.gz + zcat ${reads} | NanoLyse --reference $nanolyse_db | gzip > ${meta.id}_nanolyse.fastq.gz echo "NanoLyse reference: $params.lambda_reference" >${meta.id}_nanolyse.log - cat ${reads} | echo "total reads before NanoLyse: \$((`wc -l`/4))" >>${meta.id}_nanolyse.log + zcat ${reads} | echo "total reads before NanoLyse: \$((`wc -l`/4))" >>${meta.id}_nanolyse.log gunzip -c ${meta.id}_nanolyse.fastq.gz | echo "total reads after NanoLyse: \$((`wc -l`/4))" >> ${meta.id}_nanolyse.log cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/nanoplot.nf b/modules/local/nanoplot.nf deleted file mode 100644 index d3c347aa..00000000 --- a/modules/local/nanoplot.nf +++ /dev/null @@ -1,33 +0,0 @@ -process NANOPLOT { - tag "$meta.id" - - conda "bioconda::nanoplot=1.26.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/nanoplot:1.26.3--py_0' : - 'biocontainers/nanoplot:1.26.3--py_0' }" - - input: - tuple val(meta), path(reads) - - output: - path '*.png' , emit: png - path '*.html' , emit: html - path '*.txt' , emit: txt - path "versions.yml" , emit: versions - - script: - def prefix = task.ext.prefix ? "-p ${task.ext.prefix}_" : '' - def title = task.ext.prefix ? "${meta.id}_${task.ext.prefix}" : "${meta.id}" - """ - NanoPlot -t ${task.cpus} \ - ${prefix} \ - --title ${title} \ - -c darkblue \ - --fastq ${reads} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - NanoPlot: \$(NanoPlot --version | sed -e "s/NanoPlot //g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/nanoplot/environment.yml b/modules/nf-core/nanoplot/environment.yml new file mode 100644 index 00000000..219cd2e3 --- /dev/null +++ b/modules/nf-core/nanoplot/environment.yml @@ -0,0 +1,7 @@ +name: nanoplot +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::nanoplot=1.41.6 diff --git a/modules/nf-core/nanoplot/main.nf b/modules/nf-core/nanoplot/main.nf new file mode 100644 index 00000000..c1816caf --- /dev/null +++ b/modules/nf-core/nanoplot/main.nf @@ -0,0 +1,58 @@ +process NANOPLOT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/nanoplot:1.41.6--pyhdfd78af_0' : + 'biocontainers/nanoplot:1.41.6--pyhdfd78af_0' }" + + input: + tuple val(meta), path(ontfile) + + output: + tuple val(meta), path("*.html") , emit: html + tuple val(meta), path("*.png") , optional: true, emit: png + tuple val(meta), path("*.txt") , emit: txt + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def input_file = ("$ontfile".endsWith(".fastq.gz") || "$ontfile".endsWith(".fq.gz")) ? "--fastq ${ontfile}" : + ("$ontfile".endsWith(".txt")) ? "--summary ${ontfile}" : '' + """ + NanoPlot \\ + $args \\ + -t $task.cpus \\ + $input_file + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nanoplot: \$(echo \$(NanoPlot --version 2>&1) | sed 's/^.*NanoPlot //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch LengthvsQualityScatterPlot_dot.html + touch LengthvsQualityScatterPlot_kde.html + touch NanoPlot-report.html + touch NanoPlot_20240301_1130.log + touch NanoStats.txt + touch Non_weightedHistogramReadlength.html + touch Non_weightedLogTransformed_HistogramReadlength.html + touch WeightedHistogramReadlength.html + touch WeightedLogTransformed_HistogramReadlength.html + touch Yield_By_Length.html + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nanoplot: \$(echo \$(NanoPlot --version 2>&1) | sed 's/^.*NanoPlot //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/nanoplot/meta.yml b/modules/nf-core/nanoplot/meta.yml new file mode 100644 index 00000000..46fbd562 --- /dev/null +++ b/modules/nf-core/nanoplot/meta.yml @@ -0,0 +1,62 @@ +name: nanoplot +description: Run NanoPlot on nanopore-sequenced reads +keywords: + - quality control + - qc + - fastq + - sequencing summary + - nanopore +tools: + - nanoplot: + description: | + NanoPlot is a tool for ploting long-read sequencing data and + alignment. + homepage: http://nanoplot.bioinf.be + documentation: https://github.com/wdecoster/NanoPlot + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq: + type: file + description: | + List of input basecalled-FastQ files. + - summary_txt: + type: file + description: | + List of sequencing_summary.txt files from running basecalling. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: NanoPlot report + pattern: "*{.html}" + - png: + type: file + description: Plots generated by NanoPlot + pattern: "*{.png}" + - txt: + type: file + description: Stats from NanoPlot + pattern: "*{.txt}" + - log: + type: file + description: log file of NanoPlot run + pattern: "*{.log}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@yuukiiwa" +maintainers: + - "@drpatelh" + - "@yuukiiwa" diff --git a/modules/nf-core/nanoplot/tests/main.nf.test b/modules/nf-core/nanoplot/tests/main.nf.test new file mode 100644 index 00000000..29b57c10 --- /dev/null +++ b/modules/nf-core/nanoplot/tests/main.nf.test @@ -0,0 +1,94 @@ +nextflow_process { + + name "Test Process NANOPLOT" + tag "modules_nfcore" + tag "modules" + tag "nanoplot" + script "../main.nf" + process "NANOPLOT" + + test("NanoPlot summary") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.test_data['sarscov2']['nanopore']['test_sequencing_summary'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.txt, + process.out.versions + ).match() + }, + { + with(process.out.html.get(0)) { + assert get(1).collect { p -> file(p).getName() }.contains("NanoPlot-report.html") + } + } + ) + } + + } + + test("NanoPlot FASTQ") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.txt, + process.out.versions + ).match() + }, + { + with(process.out.html.get(0)) { + assert get(1).collect { p -> file(p).getName() }.contains("NanoPlot-report.html") + } + } + ) + } + + } + + test("NanoPlot - stub") { + + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.test_data['sarscov2']['nanopore']['test_sequencing_summary'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/nanoplot/tests/main.nf.test.snap b/modules/nf-core/nanoplot/tests/main.nf.test.snap new file mode 100644 index 00000000..f7f8028a --- /dev/null +++ b/modules/nf-core/nanoplot/tests/main.nf.test.snap @@ -0,0 +1,131 @@ +{ + "NanoPlot - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "LengthvsQualityScatterPlot_dot.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "LengthvsQualityScatterPlot_kde.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "NanoPlot-report.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Non_weightedHistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Non_weightedLogTransformed_HistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "WeightedHistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "WeightedLogTransformed_HistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Yield_By_Length.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "NanoStats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "NanoPlot_20240301_1130.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,961cee64736aeb9e56b65d05ee3cd1a5" + ], + "html": [ + [ + { + "id": "test" + }, + [ + "LengthvsQualityScatterPlot_dot.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "LengthvsQualityScatterPlot_kde.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "NanoPlot-report.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Non_weightedHistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Non_weightedLogTransformed_HistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "WeightedHistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "WeightedLogTransformed_HistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Yield_By_Length.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "log": [ + [ + { + "id": "test" + }, + "NanoPlot_20240301_1130.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "png": [ + + ], + "txt": [ + [ + { + "id": "test" + }, + "NanoStats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,961cee64736aeb9e56b65d05ee3cd1a5" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-03-01T14:54:18.083198" + }, + "NanoPlot FASTQ": { + "content": [ + [ + [ + { + "id": "test" + }, + "NanoStats.txt:md5,50373c7543e71e3baf040926f0c69ac1" + ] + ], + [ + "versions.yml:md5,961cee64736aeb9e56b65d05ee3cd1a5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-10-17T16:18:44.848688965" + }, + "NanoPlot summary": { + "content": [ + [ + [ + { + "id": "test" + }, + "NanoStats.txt:md5,90464bf7049ca66106de56e7eac23dd4" + ] + ], + [ + "versions.yml:md5,961cee64736aeb9e56b65d05ee3cd1a5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-10-17T16:18:31.104601192" + } +} \ No newline at end of file diff --git a/modules/nf-core/nanoplot/tests/tags.yaml b/modules/nf-core/nanoplot/tests/tags.yaml new file mode 100644 index 00000000..7c6ce3fa --- /dev/null +++ b/modules/nf-core/nanoplot/tests/tags.yaml @@ -0,0 +1,2 @@ +nanoplot: + - modules/nf-core/nanoplot/** diff --git a/workflows/mag.nf b/workflows/mag.nf index 24ab9309..19d7f574 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -33,6 +33,8 @@ include { FASTQC as FASTQC_RAW } from '../modul include { FASTQC as FASTQC_TRIMMED } from '../modules/nf-core/fastqc/main' include { SEQTK_MERGEPE } from '../modules/nf-core/seqtk/mergepe/main' include { PORECHOP_PORECHOP } from '../modules/nf-core/porechop/porechop/main' +include { NANOPLOT as NANOPLOT_RAW } from '../modules/nf-core/nanoplot/main' +include { NANOPLOT as NANOPLOT_FILTERED } from '../modules/nf-core/nanoplot/main' include { BBMAP_BBNORM } from '../modules/nf-core/bbmap/bbnorm/main' include { FASTP } from '../modules/nf-core/fastp/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' @@ -59,8 +61,6 @@ include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../modules include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' include { NANOLYSE } from '../modules/local/nanolyse' include { FILTLONG } from '../modules/local/filtlong' -include { NANOPLOT as NANOPLOT_RAW } from '../modules/local/nanoplot' -include { NANOPLOT as NANOPLOT_FILTERED } from '../modules/local/nanoplot' include { KRAKEN2_DB_PREPARATION } from '../modules/local/kraken2_db_preparation' include { KRAKEN2 } from '../modules/local/kraken2' include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../modules/local/pool_single_reads' From b45d57a9a9d944cbfe8ac82bc885a6e00faab21d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 27 Jun 2024 16:51:30 +0200 Subject: [PATCH 3/5] Remove some old local modules --- conf/base.config | 6 ++--- modules/local/centrifuge_db_preparation.nf | 27 ---------------------- modules/local/krona_db.nf | 21 ----------------- 3 files changed, 3 insertions(+), 51 deletions(-) delete mode 100644 modules/local/centrifuge_db_preparation.nf delete mode 100644 modules/local/krona_db.nf diff --git a/conf/base.config b/conf/base.config index 7dec9e28..4cbf14f0 100644 --- a/conf/base.config +++ b/conf/base.config @@ -77,7 +77,7 @@ process { memory = { check_max (8.GB * task.attempt, 'memory' ) } time = { check_max (6.h * task.attempt, 'time' ) } } - withName: PORECHOP { + withName: PORECHOP_PORECHOP { cpus = { check_max (4 * task.attempt, 'cpus' ) } memory = { check_max (30.GB * task.attempt, 'memory' ) } time = { check_max (4.h * task.attempt, 'time' ) } @@ -93,7 +93,7 @@ process { memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) } time = { check_max (24.h * (2**(task.attempt-1)), 'time' ) } } - withName: CENTRIFUGE { + withName: CENTRIFUGE_CENTRIFUGE { cpus = { check_max (8 * task.attempt, 'cpus' ) } memory = { check_max (40.GB * task.attempt, 'memory' ) } time = { check_max (12.h * task.attempt, 'time' ) } @@ -103,7 +103,7 @@ process { memory = { check_max (40.GB * task.attempt, 'memory' ) } time = { check_max (12.h * task.attempt, 'time' ) } } - withName: KRONA { + withName: KRONA_KTIMPORTTAXONOMY { cpus = { check_max (8 * task.attempt, 'cpus' ) } memory = { check_max (20.GB * task.attempt, 'memory' ) } time = { check_max (12.h * task.attempt, 'time' ) } diff --git a/modules/local/centrifuge_db_preparation.nf b/modules/local/centrifuge_db_preparation.nf deleted file mode 100644 index fe48512c..00000000 --- a/modules/local/centrifuge_db_preparation.nf +++ /dev/null @@ -1,27 +0,0 @@ -process CENTRIFUGE_DB_PREPARATION { - - conda "conda-forge::sed=4.7" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" - - input: - path db - - output: - path "database/*.cf", emit: db - path "versions.yml" , emit: versions - - script: - """ - mkdir db_tmp - tar -xf "${db}" -C db_tmp - mkdir database - mv `find db_tmp/ -name "*.cf"` database/ - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tar: \$(tar --version 2>&1 | sed -n 1p | sed 's/tar (GNU tar) //') - END_VERSIONS - """ -} diff --git a/modules/local/krona_db.nf b/modules/local/krona_db.nf deleted file mode 100644 index 0b1f4125..00000000 --- a/modules/local/krona_db.nf +++ /dev/null @@ -1,21 +0,0 @@ -process KRONA_DB { - - conda "bioconda::krona=2.7.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/krona:2.7.1--pl526_5' : - 'biocontainers/krona:2.7.1--pl526_5' }" - - output: - path("taxonomy/taxonomy.tab"), emit: db - path "versions.yml" , emit: versions - - script: - """ - ktUpdateTaxonomy.sh taxonomy - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ktImportTaxonomy: \$(ktImportTaxonomy 2>&1 | sed -n '/KronaTools /p' | sed 's/^.*KronaTools //; s/ - ktImportTaxonomy.*//') - END_VERSIONS - """ -} From 4bf46108f53a4411a0f8690f7cd633d8bfbfecde Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 27 Jun 2024 17:11:47 +0200 Subject: [PATCH 4/5] update NanoLyse module --- CHANGELOG.md | 3 ++ docs/output.md | 2 +- modules.json | 5 +++ modules/nf-core/nanolyse/environment.yml | 7 ++++ modules/nf-core/nanolyse/main.nf | 34 ++++++++++++++++ modules/nf-core/nanolyse/meta.yml | 49 ++++++++++++++++++++++++ workflows/mag.nf | 4 +- 7 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 modules/nf-core/nanolyse/environment.yml create mode 100644 modules/nf-core/nanolyse/main.nf create mode 100644 modules/nf-core/nanolyse/meta.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 7458472a..a703a764 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [#632](https://github.com/nf-core/mag/pull/632) - Use default NanoLyse log of just removed reads rather than custom (by @jfy133) + ### `Fixed` ### `Dependencies` @@ -17,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | -------- | ---------------- | ----------- | | Porechop | 0.2.3_seqan2.1.1 | 0.2.4 | | NanoPlot | 1.26.3 | 1.41.6 | +| NanoLyse | 1.1.0 | 1.2.0 | ### `Deprecated` diff --git a/docs/output.md b/docs/output.md index 838fc7c5..cb3704ed 100644 --- a/docs/output.md +++ b/docs/output.md @@ -105,7 +105,7 @@ The pipeline uses Nanolyse to map the reads against the Lambda phage and removes Output files - `QC_longreads/NanoLyse/` - - `[sample]_nanolyse.log`: Contains a brief log file indicating how many reads have been retained. + - `[sample]_nanolyse.log`: Contains a brief log file indicating how many reads have been removed. diff --git a/modules.json b/modules.json index ddce6955..288c46da 100644 --- a/modules.json +++ b/modules.json @@ -197,6 +197,11 @@ "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] }, + "nanolyse": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "nanoplot": { "branch": "master", "git_sha": "3135090b46f308a260fc9d5991d7d2f9c0785309", diff --git a/modules/nf-core/nanolyse/environment.yml b/modules/nf-core/nanolyse/environment.yml new file mode 100644 index 00000000..7d738ba9 --- /dev/null +++ b/modules/nf-core/nanolyse/environment.yml @@ -0,0 +1,7 @@ +name: nanolyse +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::nanolyse=1.2.0 diff --git a/modules/nf-core/nanolyse/main.nf b/modules/nf-core/nanolyse/main.nf new file mode 100644 index 00000000..68d5d804 --- /dev/null +++ b/modules/nf-core/nanolyse/main.nf @@ -0,0 +1,34 @@ +process NANOLYSE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/nanolyse:1.2.0--py_0' : + 'biocontainers/nanolyse:1.2.0--py_0' }" + + input: + tuple val(meta), path(fastq) + path fasta + + output: + tuple val(meta), path("*.fastq.gz"), emit: fastq + path "*.log" , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + gunzip -c $fastq | NanoLyse -r $fasta | gzip > ${prefix}.fastq.gz + mv NanoLyse.log ${prefix}.nanolyse.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nanolyse: \$(NanoLyse --version 2>&1 | sed -e "s/NanoLyse //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/nanolyse/meta.yml b/modules/nf-core/nanolyse/meta.yml new file mode 100644 index 00000000..375ad9bc --- /dev/null +++ b/modules/nf-core/nanolyse/meta.yml @@ -0,0 +1,49 @@ +name: nanolyse +description: DNA contaminant removal using NanoLyse +keywords: + - contaminant_removal +tools: + - nanolyse: + description: | + DNA contaminant removal using NanoLyse + homepage: https://github.com/wdecoster/nanolyse + documentation: https://github.com/wdecoster/nanolyse#nanolyse + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq: + type: file + description: | + Basecalled reads in FASTQ.GZ format + pattern: "*.fastq.gz" + - fasta: + type: file + description: | + A reference fasta file against which to filter. + pattern: "*.fasta" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq: + type: file + description: Reads with contaminants removed in FASTQ format + pattern: "*.fastq.gz" + - log: + type: file + description: Log of the Nanolyse run. + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@yuukiiwa" +maintainers: + - "@yuukiiwa" diff --git a/workflows/mag.nf b/workflows/mag.nf index 19d7f574..f71d4218 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -35,6 +35,7 @@ include { SEQTK_MERGEPE } from '../modul include { PORECHOP_PORECHOP } from '../modules/nf-core/porechop/porechop/main' include { NANOPLOT as NANOPLOT_RAW } from '../modules/nf-core/nanoplot/main' include { NANOPLOT as NANOPLOT_FILTERED } from '../modules/nf-core/nanoplot/main' +include { NANOLYSE } from '../modules/nf-core/nanolyse/main' include { BBMAP_BBNORM } from '../modules/nf-core/bbmap/bbnorm/main' include { FASTP } from '../modules/nf-core/fastp/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' @@ -59,7 +60,6 @@ include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../modules include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' -include { NANOLYSE } from '../modules/local/nanolyse' include { FILTLONG } from '../modules/local/filtlong' include { KRAKEN2_DB_PREPARATION } from '../modules/local/kraken2_db_preparation' include { KRAKEN2 } from '../modules/local/kraken2' @@ -381,7 +381,7 @@ workflow MAG { ch_long_reads, ch_nanolyse_db ) - ch_long_reads = NANOLYSE.out.reads + ch_long_reads = NANOLYSE.out.fastq ch_versions = ch_versions.mix(NANOLYSE.out.versions.first()) } From 42db409d1b99ab4079ca89913cccabda8b564d14 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 27 Jun 2024 17:13:15 +0200 Subject: [PATCH 5/5] Output docs update --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index cb3704ed..d044e544 100644 --- a/docs/output.md +++ b/docs/output.md @@ -105,7 +105,7 @@ The pipeline uses Nanolyse to map the reads against the Lambda phage and removes Output files - `QC_longreads/NanoLyse/` - - `[sample]_nanolyse.log`: Contains a brief log file indicating how many reads have been removed. + - `[sample]_[run]_lambdafiltered.nanolyse.log`: Contains a brief log file indicating how many reads have been removed.