From f531c5d548e2bd213748abad274781a7930ce699 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 26 Apr 2024 19:52:30 -0500 Subject: [PATCH 01/25] Replace ext/publishDir with params/publish definition Signed-off-by: Ben Sherman --- conf/base.config | 6 ----- main.nf | 5 ++++ modules/local/aspera_cli/main.nf | 2 +- modules/local/aspera_cli/nextflow.config | 17 ------------ .../multiqc_mappings_config/nextflow.config | 9 ------- modules/local/sra_fastq_ftp/main.nf | 2 +- modules/local/sra_fastq_ftp/nextflow.config | 17 ------------ .../local/sra_ids_to_runinfo/nextflow.config | 8 ------ .../local/sra_runinfo_to_ftp/nextflow.config | 9 ------- .../local/sra_to_samplesheet/nextflow.config | 8 ------ modules/nf-core/sratools/fasterqdump/main.nf | 8 +++--- .../sratools/fasterqdump/nextflow.config | 10 ------- .../fasterqdump/tests/nextflow.config | 5 ---- .../nf-core/sratools/prefetch/nextflow.config | 8 ------ nextflow.config | 12 ++++----- .../main.nf | 10 ++++++- .../nextflow.config | 2 -- workflows/sra/main.nf | 27 ++++++++++++++----- workflows/sra/nextflow.config | 8 ------ 19 files changed, 46 insertions(+), 127 deletions(-) delete mode 100644 modules/local/aspera_cli/nextflow.config delete mode 100644 modules/local/multiqc_mappings_config/nextflow.config delete mode 100644 modules/local/sra_fastq_ftp/nextflow.config delete mode 100644 modules/local/sra_ids_to_runinfo/nextflow.config delete mode 100644 modules/local/sra_runinfo_to_ftp/nextflow.config delete mode 100644 modules/local/sra_to_samplesheet/nextflow.config delete mode 100644 modules/nf-core/sratools/fasterqdump/nextflow.config delete mode 100644 modules/nf-core/sratools/fasterqdump/tests/nextflow.config delete mode 100644 modules/nf-core/sratools/prefetch/nextflow.config delete mode 100644 subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config delete mode 100644 workflows/sra/nextflow.config diff --git a/conf/base.config b/conf/base.config index 6af79a7b..6af45542 100644 --- a/conf/base.config +++ b/conf/base.config @@ -14,12 +14,6 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' diff --git a/main.nf b/main.nf index 52539e40..e178d8a5 100644 --- a/main.nf +++ b/main.nf @@ -86,6 +86,11 @@ workflow { ) } +publish { + directory params.outdir + mode params.publish_dir_mode +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index b38d17c0..36882cdb 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -10,6 +10,7 @@ process ASPERA_CLI { input: tuple val(meta), val(fastq) val user + var args output: tuple val(meta), path("*fastq.gz"), emit: fastq @@ -17,7 +18,6 @@ process ASPERA_CLI { path "versions.yml" , emit: versions script: - def args = task.ext.args ?: '' def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" if (meta.single_end) { """ diff --git a/modules/local/aspera_cli/nextflow.config b/modules/local/aspera_cli/nextflow.config deleted file mode 100644 index fa2dbd90..00000000 --- a/modules/local/aspera_cli/nextflow.config +++ /dev/null @@ -1,17 +0,0 @@ -process { - withName: 'ASPERA_CLI' { - ext.args = '-QT -l 300m -P33001' - publishDir = [ - [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz" - ], - [ - path: { "${params.outdir}/fastq/md5" }, - mode: params.publish_dir_mode, - pattern: "*.md5" - ] - ] - } -} diff --git a/modules/local/multiqc_mappings_config/nextflow.config b/modules/local/multiqc_mappings_config/nextflow.config deleted file mode 100644 index 11c58341..00000000 --- a/modules/local/multiqc_mappings_config/nextflow.config +++ /dev/null @@ -1,9 +0,0 @@ -process { - withName: 'MULTIQC_MAPPINGS_CONFIG' { - publishDir = [ - path: { "${params.outdir}/samplesheet" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} diff --git a/modules/local/sra_fastq_ftp/main.nf b/modules/local/sra_fastq_ftp/main.nf index e2274d46..f7100055 100644 --- a/modules/local/sra_fastq_ftp/main.nf +++ b/modules/local/sra_fastq_ftp/main.nf @@ -11,6 +11,7 @@ process SRA_FASTQ_FTP { input: tuple val(meta), val(fastq) + val args output: tuple val(meta), path("*fastq.gz"), emit: fastq @@ -18,7 +19,6 @@ process SRA_FASTQ_FTP { path "versions.yml" , emit: versions script: - def args = task.ext.args ?: '' if (meta.single_end) { """ wget \\ diff --git a/modules/local/sra_fastq_ftp/nextflow.config b/modules/local/sra_fastq_ftp/nextflow.config deleted file mode 100644 index 56e43959..00000000 --- a/modules/local/sra_fastq_ftp/nextflow.config +++ /dev/null @@ -1,17 +0,0 @@ -process { - withName: 'SRA_FASTQ_FTP' { - ext.args = '-t 5 -nv -c -T 60' - publishDir = [ - [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz" - ], - [ - path: { "${params.outdir}/fastq/md5" }, - mode: params.publish_dir_mode, - pattern: "*.md5" - ] - ] - } -} diff --git a/modules/local/sra_ids_to_runinfo/nextflow.config b/modules/local/sra_ids_to_runinfo/nextflow.config deleted file mode 100644 index 9b9d0b16..00000000 --- a/modules/local/sra_ids_to_runinfo/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -process { - withName: 'SRA_IDS_TO_RUNINFO' { - publishDir = [ - path: { "${params.outdir}/metadata" }, - enabled: false - ] - } -} diff --git a/modules/local/sra_runinfo_to_ftp/nextflow.config b/modules/local/sra_runinfo_to_ftp/nextflow.config deleted file mode 100644 index 43263648..00000000 --- a/modules/local/sra_runinfo_to_ftp/nextflow.config +++ /dev/null @@ -1,9 +0,0 @@ -process { - withName: 'SRA_RUNINFO_TO_FTP' { - publishDir = [ - path: { "${params.outdir}/metadata" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} diff --git a/modules/local/sra_to_samplesheet/nextflow.config b/modules/local/sra_to_samplesheet/nextflow.config deleted file mode 100644 index da241c1a..00000000 --- a/modules/local/sra_to_samplesheet/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -process { - withName: SRA_TO_SAMPLESHEET { - publishDir = [ - path: { "${params.outdir}/samplesheet" }, - enabled: false - ] - } -} diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index e7cf157a..ca94a925 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -11,6 +11,8 @@ process SRATOOLS_FASTERQDUMP { tuple val(meta), path(sra) path ncbi_settings path certificate + val fasterqdump_args // = '--split-files --include-technical' + val pigz_args // = '' output: tuple val(meta), path('*.fastq.gz'), emit: reads @@ -20,8 +22,6 @@ process SRATOOLS_FASTERQDUMP { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def outfile = meta.single_end ? "${prefix}.fastq" : prefix def key_file = '' @@ -34,14 +34,14 @@ process SRATOOLS_FASTERQDUMP { export NCBI_SETTINGS="\$PWD/${ncbi_settings}" fasterq-dump \\ - $args \\ + $fasterqdump_args \\ --threads $task.cpus \\ --outfile $outfile \\ ${key_file} \\ ${sra} pigz \\ - $args2 \\ + $pigz_args \\ --no-name \\ --processes $task.cpus \\ *.fastq diff --git a/modules/nf-core/sratools/fasterqdump/nextflow.config b/modules/nf-core/sratools/fasterqdump/nextflow.config deleted file mode 100644 index f98b140d..00000000 --- a/modules/nf-core/sratools/fasterqdump/nextflow.config +++ /dev/null @@ -1,10 +0,0 @@ -process { - withName: SRATOOLS_FASTERQDUMP { - ext.args = '--split-files --include-technical' - publishDir = [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz" - ] - } -} \ No newline at end of file diff --git a/modules/nf-core/sratools/fasterqdump/tests/nextflow.config b/modules/nf-core/sratools/fasterqdump/tests/nextflow.config deleted file mode 100644 index 23e4100b..00000000 --- a/modules/nf-core/sratools/fasterqdump/tests/nextflow.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: SRATOOLS_FASTERQDUMP { - ext.args = '' - } -} \ No newline at end of file diff --git a/modules/nf-core/sratools/prefetch/nextflow.config b/modules/nf-core/sratools/prefetch/nextflow.config deleted file mode 100644 index a2ca8848..00000000 --- a/modules/nf-core/sratools/prefetch/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -process { - withName: SRATOOLS_PREFETCH { - publishDir = [ - path: { "${params.outdir}/sra" }, - enabled: false - ] - } -} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 7f4f8ebf..bad8464d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,6 +19,11 @@ params { skip_fastq_download = false dbgap_key = null + aspera_cli_args = '-QT -l 300m -P33001' + sra_fastq_ftp_args = '-t 5 -nv -c -T 60' + sratools_fasterqdump_args = '' + sratools_pigz_args = '' + // Boilerplate options outdir = null publish_dir_mode = 'copy' @@ -67,15 +72,11 @@ try { System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } -// Workflow specific configs -includeConfig './workflows/sra/nextflow.config' - profiles { debug { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false - nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -193,9 +194,6 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -// Disable process selector warnings by default. Use debug profile to enable warnings. -nextflow.enable.configProcessNamesValidation = false - def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index fbeacf4a..256c8104 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -9,6 +9,8 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { take: ch_sra_ids // channel: [ val(meta), val(id) ] ch_dbgap_key // channel: [ path(dbgap_key) ] + sratools_fasterqdump_args // string + sratools_pigz_args // string main: @@ -30,7 +32,13 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { // // Convert the SRA format into one or more compressed FASTQ files. // - SRATOOLS_FASTERQDUMP ( SRATOOLS_PREFETCH.out.sra, ch_ncbi_settings, ch_dbgap_key ) + SRATOOLS_FASTERQDUMP ( + SRATOOLS_PREFETCH.out.sra, + ch_ncbi_settings, + ch_dbgap_key, + sratools_fasterqdump_args, + sratools_pigz_args + ) ch_versions = ch_versions.mix(SRATOOLS_FASTERQDUMP.out.versions.first()) emit: diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config deleted file mode 100644 index de803a38..00000000 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config +++ /dev/null @@ -1,2 +0,0 @@ -includeConfig '../../../modules/nf-core/sratools/prefetch/nextflow.config' -includeConfig '../../../modules/nf-core/sratools/fasterqdump/nextflow.config' diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 0c8cac0c..8cdd6845 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -93,7 +93,8 @@ workflow SRA { // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums // SRA_FASTQ_FTP ( - ch_sra_reads.ftp + ch_sra_reads.ftp, + params.sra_fastq_ftp_args ) ch_versions = ch_versions.mix(SRA_FASTQ_FTP.out.versions.first()) @@ -102,7 +103,9 @@ workflow SRA { // FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( ch_sra_reads.sratools, - params.dbgap_key ? file(params.dbgap_key, checkIfExists: true) : [] + params.dbgap_key ? file(params.dbgap_key, checkIfExists: true) : [], + params.sratools_fasterqdump_args, + params.sratools_pigz_args ) ch_versions = ch_versions.mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.versions.first()) @@ -111,7 +114,8 @@ workflow SRA { // ASPERA_CLI ( ch_sra_reads.aspera, - 'era-fasp' + 'era-fasp', + params.aspera_cli_args ) ch_versions = ch_versions.mix(ASPERA_CLI.out.versions.first()) @@ -121,6 +125,7 @@ workflow SRA { .fastq .mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.reads) .mix(ASPERA_CLI.out.fastq) + .tap { ch_fastq } .map { meta, fastq -> def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] @@ -151,7 +156,7 @@ workflow SRA { .map { it[1] } .collectFile(name:'tmp_samplesheet.csv', newLine: true, keepHeader: true, sort: { it.baseName }) .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'samplesheet.csv', storeDir: "${params.outdir}/samplesheet") + .collectFile(name:'samplesheet.csv') .set { ch_samplesheet } SRA_TO_SAMPLESHEET @@ -160,7 +165,7 @@ workflow SRA { .map { it[1] } .collectFile(name:'tmp_id_mappings.csv', newLine: true, keepHeader: true, sort: { it.baseName }) .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'id_mappings.csv', storeDir: "${params.outdir}/samplesheet") + .collectFile(name:'id_mappings.csv') .set { ch_mappings } // @@ -179,7 +184,7 @@ workflow SRA { // Collate and save software versions // softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_fetchngs_software_mqc_versions.yml', sort: true, newLine: true) + .collectFile(name: 'nf_core_fetchngs_software_mqc_versions.yml', sort: true, newLine: true) emit: samplesheet = ch_samplesheet @@ -187,6 +192,16 @@ workflow SRA { sample_mappings = ch_sample_mappings_yml sra_metadata = ch_sra_metadata versions = ch_versions.unique() + + publish: + ch_fastq >> 'fastq/' + ASPERA_CLI.out.md5 >> 'fastq/md5/' + SRA_FASTQ_FTP.out.md5 >> 'fastq/md5/' + SRA_RUNINFO_TO_FTP.out.tsv >> 'metadata/' + ch_versions_yml >> 'pipeline_info/' + ch_samplesheet >> 'samplesheet/' + ch_mappings >> 'samplesheet/' + ch_sample_mappings_yml >> 'samplesheet/' } /* diff --git a/workflows/sra/nextflow.config b/workflows/sra/nextflow.config deleted file mode 100644 index d242c238..00000000 --- a/workflows/sra/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -includeConfig "../../modules/local/multiqc_mappings_config/nextflow.config" -includeConfig "../../modules/local/aspera_cli/nextflow.config" -includeConfig "../../modules/local/sra_fastq_ftp/nextflow.config" -includeConfig "../../modules/local/sra_ids_to_runinfo/nextflow.config" -includeConfig "../../modules/local/sra_runinfo_to_ftp/nextflow.config" -includeConfig "../../modules/local/sra_to_samplesheet/nextflow.config" -includeConfig "../../modules/nf-core/sratools/prefetch/nextflow.config" -includeConfig "../../subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config" From 836ace2cb64068a17b973cc1ac248c276b1636e3 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 26 Apr 2024 19:53:21 -0500 Subject: [PATCH 02/25] Update config to comply with strict parser Signed-off-by: Ben Sherman --- conf/base.config | 40 ++++++++++++++++++++++----------------- nextflow.config | 49 ++++++------------------------------------------ 2 files changed, 29 insertions(+), 60 deletions(-) diff --git a/conf/base.config b/conf/base.config index 6af45542..5d7c5389 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,9 +10,15 @@ process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + resourceLimits = [ + cpus: params.max_cpus, + memory: params.max_memory, + time: params.max_time + ] + + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 @@ -25,30 +31,30 @@ process { // adding in your local modules too. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { 12 * task.attempt } + memory = { 72.GB * task.attempt } + time = { 16.h * task.attempt } } withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + time = { 20.h * task.attempt } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { 200.GB * task.attempt } } withLabel:error_ignore { errorStrategy = 'ignore' diff --git a/nextflow.config b/nextflow.config index bad8464d..da983b96 100644 --- a/nextflow.config +++ b/nextflow.config @@ -66,11 +66,7 @@ params { includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} +includeConfig "${params.custom_config_base}/nfcore_custom.config" profiles { debug { @@ -194,22 +190,22 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +// NOTE: Nextflow config should provide some constant for the start timestamp timeline { enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.html" } report { enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.html" } trace { enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.txt" } dag { enabled = true - file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.html" } manifest { @@ -222,36 +218,3 @@ manifest { version = '1.13.0dev' doi = 'https://doi.org/10.5281/zenodo.5070524' } - -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} From 25a1fb52e438df02a59ccbf62ba83a43b8e2570a Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 26 Apr 2024 19:56:36 -0500 Subject: [PATCH 03/25] Use param schemas as source of truth, convert to YAML Signed-off-by: Ben Sherman --- assets/schema_input.json | 17 -- assets/schema_input.yml | 12 + nextflow.config | 56 ---- nextflow_schema.json | 287 ------------------ params.yml | 258 ++++++++++++++++ .../utils_nfcore_fetchngs_pipeline/main.nf | 4 +- 6 files changed, 272 insertions(+), 362 deletions(-) delete mode 100644 assets/schema_input.json create mode 100644 assets/schema_input.yml delete mode 100644 nextflow_schema.json create mode 100644 params.yml diff --git a/assets/schema_input.json b/assets/schema_input.json deleted file mode 100644 index db9ffc00..00000000 --- a/assets/schema_input.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_input.json", - "title": "nf-core/fetchngs pipeline - params.input schema", - "description": "Schema for the file provided with params.input", - "type": "array", - "items": { - "type": "object", - "properties": { - "": { - "type": "string", - "pattern": "^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\\d+)$", - "errorMessage": "Please provide a valid SRA, ENA, DDBJ or GEO identifier" - } - } - } -} diff --git a/assets/schema_input.yml b/assets/schema_input.yml new file mode 100644 index 00000000..29760b88 --- /dev/null +++ b/assets/schema_input.yml @@ -0,0 +1,12 @@ +$schema: http://json-schema.org/draft-07/schema +$id: https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_input.yml +title: nf-core/fetchngs pipeline - params.input schema +description: Schema for the file provided with params.input +type: array +items: + type: object + properties: + '': + type: string + pattern: ^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\\d+)$ + errorMessage: Please provide a valid SRA, ENA, DDBJ or GEO identifier diff --git a/nextflow.config b/nextflow.config index da983b96..74e1444a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -6,62 +6,6 @@ ---------------------------------------------------------------------------------------- */ -// Global default params, used in configs -params { - - // Input options - input = null - nf_core_pipeline = null - nf_core_rnaseq_strandedness = 'auto' - ena_metadata_fields = null - sample_mapping_fields = 'experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description' - download_method = 'ftp' - skip_fastq_download = false - dbgap_key = null - - aspera_cli_args = '-QT -l 300m -P33001' - sra_fastq_ftp_args = '-t 5 -nv -c -T 60' - sratools_fasterqdump_args = '' - sratools_pigz_args = '' - - // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false - - // Config options - config_profile_name = null - config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null - - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' - - // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationShowHiddenParams = false - validationSchemaIgnoreParams = '' - validate_params = true - - // Deprecated options - // See: https://github.com/nf-core/fetchngs/pull/279/files#r1494459480 - force_sratools_download = false - -} - // Load base.config by default for all pipelines includeConfig 'conf/base.config' diff --git a/nextflow_schema.json b/nextflow_schema.json deleted file mode 100644 index 29f7b710..00000000 --- a/nextflow_schema.json +++ /dev/null @@ -1,287 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/fetchngs/master/nextflow_schema.json", - "title": "nf-core/fetchngs pipeline parameters", - "description": "Pipeline to fetch metadata and raw FastQ files from public databases", - "type": "object", - "definitions": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], - "properties": { - "input": { - "type": "string", - "format": "file-path", - "exists": true, - "schema": "assets/schema_input.json", - "mimetype": "text/csv", - "pattern": "^\\S+\\.(csv|tsv|txt)$", - "fa_icon": "fas fa-file-excel", - "description": "File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files." - }, - "ena_metadata_fields": { - "type": "string", - "fa_icon": "fas fa-columns", - "description": "Comma-separated list of ENA metadata fields to fetch before downloading data.", - "help_text": "The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run)." - }, - "sample_mapping_fields": { - "type": "string", - "fa_icon": "fas fa-columns", - "description": "Comma-separated list of ENA metadata fields used to create a separate 'id_mappings.csv' and 'multiqc_config.yml' with selected fields that can be used to rename samples in general and in MultiQC.", - "default": "experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description" - }, - "nf_core_pipeline": { - "type": "string", - "fa_icon": "fab fa-apple", - "description": "Name of supported nf-core pipeline e.g. 'rnaseq'. A samplesheet for direct use with the pipeline will be created with the appropriate columns.", - "enum": ["rnaseq", "atacseq", "viralrecon", "taxprofiler"] - }, - "nf_core_rnaseq_strandedness": { - "type": "string", - "fa_icon": "fas fa-dna", - "description": "Value for 'strandedness' entry added to samplesheet created when using '--nf_core_pipeline rnaseq'.", - "help_text": "The default is 'auto' which can be used with nf-core/rnaseq v3.10 onwards to auto-detect strandedness during the pipeline execution.", - "default": "auto" - }, - "download_method": { - "type": "string", - "default": "ftp", - "fa_icon": "fas fa-download", - "enum": ["aspera", "ftp", "sratools"], - "description": "Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'.", - "help_text": "FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ." - }, - "skip_fastq_download": { - "type": "boolean", - "fa_icon": "fas fa-fast-forward", - "description": "Only download metadata for public data database ids and don't download the FastQ files." - }, - "dbgap_key": { - "type": "string", - "fa_icon": "fas fa-address-card", - "help_text": "Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit. Users with granted access to controlled data can download the JWT cart file for the study from the SRA Run Selector upon logging in. The JWT file can only be used on cloud platforms and is valid for 1 hour upon creation.", - "format": "file-path", - "description": "dbGaP repository key." - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - }, - "email": { - "type": "string", - "description": "Email address for completion summary.", - "fa_icon": "fas fa-envelope", - "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - } - } - }, - "institutional_config_options": { - "title": "Institutional config options", - "type": "object", - "fa_icon": "fas fa-university", - "description": "Parameters used to describe centralised config profiles. These should not be edited.", - "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", - "properties": { - "custom_config_version": { - "type": "string", - "description": "Git commit id for Institutional configs.", - "default": "master", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "custom_config_base": { - "type": "string", - "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", - "hidden": true, - "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", - "fa_icon": "fas fa-users-cog" - }, - "config_profile_name": { - "type": "string", - "description": "Institutional config name.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_description": { - "type": "string", - "description": "Institutional config description.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_contact": { - "type": "string", - "description": "Institutional config contact information.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_url": { - "type": "string", - "description": "Institutional config URL link.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - } - } - }, - "max_job_request_options": { - "title": "Max job request options", - "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" - }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "128.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } - }, - "generic_options": { - "title": "Generic options", - "type": "object", - "fa_icon": "fas fa-file-import", - "description": "Less common options for the pipeline, typically set in a config file.", - "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", - "properties": { - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, - "version": { - "type": "boolean", - "description": "Display version and exit.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], - "hidden": true - }, - "email_on_fail": { - "type": "string", - "description": "Email address for completion summary, only when pipeline fails.", - "fa_icon": "fas fa-exclamation-triangle", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", - "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", - "hidden": true - }, - "plaintext_email": { - "type": "boolean", - "description": "Send plain-text email instead of HTML.", - "fa_icon": "fas fa-remove-format", - "hidden": true - }, - "monochrome_logs": { - "type": "boolean", - "description": "Do not use coloured log outputs.", - "fa_icon": "fas fa-palette", - "hidden": true - }, - "hook_url": { - "type": "string", - "description": "Incoming hook URL for messaging service", - "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", - "hidden": true - }, - "validate_params": { - "type": "boolean", - "description": "Boolean whether to validate parameters against the schema at runtime", - "default": true, - "fa_icon": "fas fa-check-square", - "hidden": true - }, - "validationShowHiddenParams": { - "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "validationFailUnrecognisedParams": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters fails when an unrecognised parameter is found.", - "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." - }, - "validationLenientMode": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters in lenient more.", - "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." - } - } - }, - "deprecated_options": { - "title": "Deprecated options", - "type": "object", - "description": "List of parameters that have been deprecated.", - "default": "", - "fa_icon": "fas fa-calendar-times", - "properties": { - "force_sratools_download": { - "type": "boolean", - "fa_icon": "fas fa-times-circle", - "description": "This parameter has been deprecated. Please use '--download_method sratools' instead.", - "enum": [false], - "hidden": true - } - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/institutional_config_options" - }, - { - "$ref": "#/definitions/max_job_request_options" - }, - { - "$ref": "#/definitions/generic_options" - }, - { - "$ref": "#/definitions/deprecated_options" - } - ] -} diff --git a/params.yml b/params.yml new file mode 100644 index 00000000..afb19664 --- /dev/null +++ b/params.yml @@ -0,0 +1,258 @@ +$schema: http://json-schema.org/draft-07/schema +$id: https://raw.githubusercontent.com/nf-core/fetchngs/master/params.yml +title: nf-core/fetchngs pipeline parameters +description: Pipeline to fetch metadata and raw FastQ files from public databases +type: object +definitions: + input_output_options: + title: Input/output options + type: object + fa_icon: fas fa-terminal + description: Define where the pipeline should find input data and save output data. + required: + - input + - outdir + properties: + input: + type: string + format: file-path + exists: true + schema: assets/schema_input.yml + mimetype: text/csv + pattern: ^\\S+\\.(csv|tsv|txt)$ + fa_icon: fas fa-file-excel + description: File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files. + ena_metadata_fields: + type: string + fa_icon: fas fa-columns + description: Comma-separated list of ENA metadata fields to fetch before downloading data. + help_text: The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run). + sample_mapping_fields: + type: string + fa_icon: fas fa-columns + description: Comma-separated list of ENA metadata fields used to create a separate 'id_mappings.csv' and 'multiqc_config.yml' with selected fields that can be used to rename samples in general and in MultiQC. + default: experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description + nf_core_pipeline: + type: string + fa_icon: fab fa-apple + description: Name of supported nf-core pipeline e.g. 'rnaseq'. A samplesheet for direct use with the pipeline will be created with the appropriate columns. + enum: + - rnaseq + - atacseq + - viralrecon + - taxprofiler + nf_core_rnaseq_strandedness: + type: string + fa_icon: fas fa-dna + description: Value for 'strandedness' entry added to samplesheet created when using '--nf_core_pipeline rnaseq'. + help_text: The default is 'auto' which can be used with nf-core/rnaseq v3.10 onwards to auto-detect strandedness during the pipeline execution. + default: auto + download_method: + type: string + default: ftp + fa_icon: fas fa-download + enum: + - aspera + - ftp + - sratools + description: Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'. + help_text: FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ. + skip_fastq_download: + type: boolean + fa_icon: fas fa-fast-forward + description: Only download metadata for public data database ids and don't download the FastQ files. + dbgap_key: + type: string + fa_icon: fas fa-address-card + help_text: Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit. Users with granted access to controlled data can download the JWT cart file for the study from the SRA Run Selector upon logging in. The JWT file can only be used on cloud platforms and is valid for 1 hour upon creation. + format: file-path + description: dbGaP repository key. + aspera_cli_args: + type: string + default: -QT -l 300m -P33001 + sra_fastq_ftp_args: + type: string + default: -t 5 -nv -c -T 60 + sratools_fasterqdump_args: + type: string + default: '' + sratools_pigz_args: + type: string + default: '' + outdir: + type: string + format: directory-path + description: The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. + fa_icon: fas fa-folder-open + email: + type: string + description: Email address for completion summary. + fa_icon: fas fa-envelope + help_text: Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run. + institutional_config_options: + title: Institutional config options + type: object + fa_icon: fas fa-university + description: Parameters used to describe centralised config profiles. These should not be edited. + help_text: The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline. + properties: + custom_config_version: + type: string + description: Git commit id for Institutional configs. + default: master + hidden: true + fa_icon: fas fa-users-cog + custom_config_base: + type: string + description: Base directory for Institutional configs. + default: https://raw.githubusercontent.com/nf-core/configs/master + hidden: true + help_text: If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter. + fa_icon: fas fa-users-cog + config_profile_name: + type: string + description: Institutional config name. + hidden: true + fa_icon: fas fa-users-cog + config_profile_description: + type: string + description: Institutional config description. + hidden: true + fa_icon: fas fa-users-cog + config_profile_contact: + type: string + description: Institutional config contact information. + hidden: true + fa_icon: fas fa-users-cog + config_profile_url: + type: string + description: Institutional config URL link. + hidden: true + fa_icon: fas fa-users-cog + max_job_request_options: + title: Max job request options + type: object + fa_icon: fab fa-acquisitions-incorporated + description: Set the top limit for requested resources for any single job. + help_text: If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details. + properties: + max_cpus: + type: integer + description: Maximum number of CPUs that can be requested for any single job. + default: 16 + fa_icon: fas fa-microchip + hidden: true + help_text: Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1` + max_memory: + type: string + description: Maximum amount of memory that can be requested for any single job. + default: 128.GB + fa_icon: fas fa-memory + pattern: ^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$ + hidden: true + help_text: Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'` + max_time: + type: string + description: Maximum amount of time that can be requested for any single job. + default: 240.h + fa_icon: far fa-clock + pattern: ^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$ + hidden: true + help_text: Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'` + generic_options: + title: Generic options + type: object + fa_icon: fas fa-file-import + description: Less common options for the pipeline, typically set in a config file. + help_text: These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`. + properties: + help: + type: boolean + description: Display help text. + fa_icon: fas fa-question-circle + hidden: true + version: + type: boolean + description: Display version and exit. + fa_icon: fas fa-question-circle + hidden: true + publish_dir_mode: + type: string + default: copy + description: Method used to save pipeline results to output directory. + help_text: The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details. + fa_icon: fas fa-copy + enum: + - symlink + - rellink + - link + - copy + - copyNoFollow + - move + hidden: true + email_on_fail: + type: string + description: Email address for completion summary, only when pipeline fails. + fa_icon: fas fa-exclamation-triangle + help_text: An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully. + hidden: true + plaintext_email: + type: boolean + description: Send plain-text email instead of HTML. + fa_icon: fas fa-remove-format + hidden: true + monochrome_logs: + type: boolean + description: Do not use coloured log outputs. + fa_icon: fas fa-palette + hidden: true + hook_url: + type: string + description: Incoming hook URL for messaging service + fa_icon: fas fa-people-group + help_text: Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported. + hidden: true + validate_params: + type: boolean + description: Boolean whether to validate parameters against the schema at runtime + default: true + fa_icon: fas fa-check-square + hidden: true + validationShowHiddenParams: + type: boolean + fa_icon: far fa-eye-slash + description: Show all params when using `--help` + hidden: true + help_text: By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters. + validationFailUnrecognisedParams: + type: boolean + fa_icon: far fa-check-circle + description: Validation of parameters fails when an unrecognised parameter is found. + hidden: true + help_text: By default, when an unrecognised parameter is found, it returns a warinig. + validationLenientMode: + type: boolean + fa_icon: far fa-check-circle + description: Validation of parameters in lenient more. + hidden: true + help_text: Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode). + deprecated_options: + title: Deprecated options + type: object + description: List of parameters that have been deprecated. + default: '' + fa_icon: fas fa-calendar-times + properties: + force_sratools_download: + type: boolean + fa_icon: fas fa-times-circle + description: This parameter has been deprecated. Please use '--download_method sratools' instead. + enum: + - false + hidden: true +allOf: + - $ref: "#/definitions/input_output_options" + - $ref: "#/definitions/institutional_config_options" + - $ref: "#/definitions/max_job_request_options" + - $ref: "#/definitions/generic_options" + - $ref: "#/definitions/deprecated_options" diff --git a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf index 0c4307b5..5e317f09 100644 --- a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf @@ -62,7 +62,7 @@ workflow PIPELINE_INITIALISATION { pre_help_text, post_help_text, validate_params, - "nextflow_schema.json" + "params.yml" ) // @@ -112,7 +112,7 @@ workflow PIPELINE_COMPLETION { main: - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + summary_params = paramsSummaryMap(workflow, parameters_schema: "params.yml") // // Completion email and summary From 505806a9341b840fa5c47c5f5af55c2aa63c7b19 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 26 Apr 2024 20:01:04 -0500 Subject: [PATCH 04/25] Use eval output, topic channels to collect tool versions Signed-off-by: Ben Sherman --- modules/local/aspera_cli/main.nf | 12 +-------- modules/local/multiqc_mappings_config/main.nf | 7 +---- modules/local/sra_fastq_ftp/main.nf | 12 +-------- modules/local/sra_ids_to_runinfo/main.nf | 7 +---- modules/local/sra_runinfo_to_ftp/main.nf | 7 +---- .../custom/sratoolsncbisettings/main.nf | 2 +- .../templates/detect_ncbi_settings.sh | 5 ---- modules/nf-core/sratools/fasterqdump/main.nf | 9 ++----- modules/nf-core/sratools/prefetch/main.nf | 2 +- .../prefetch/templates/retry_with_backoff.sh | 5 ---- modules/nf-core/untar/main.nf | 12 +-------- .../main.nf | 7 ----- .../nf-core/utils_nfcore_pipeline/main.nf | 27 ++++++++----------- workflows/sra/main.nf | 12 ++------- 14 files changed, 23 insertions(+), 103 deletions(-) diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index 36882cdb..670d6c5a 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -15,7 +15,7 @@ process ASPERA_CLI { output: tuple val(meta), path("*fastq.gz"), emit: fastq tuple val(meta), path("*md5") , emit: md5 - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('aspera_cli'), eval('ascli --version'), topic: versions script: def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" @@ -31,11 +31,6 @@ process ASPERA_CLI { echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5 md5sum -c ${meta.id}.fastq.gz.md5 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - aspera_cli: \$(ascli --version) - END_VERSIONS """ } else { """ @@ -58,11 +53,6 @@ process ASPERA_CLI { echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5 md5sum -c ${meta.id}_2.fastq.gz.md5 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - aspera_cli: \$(ascli --version) - END_VERSIONS """ } } diff --git a/modules/local/multiqc_mappings_config/main.nf b/modules/local/multiqc_mappings_config/main.nf index 8efe1caa..738069c7 100644 --- a/modules/local/multiqc_mappings_config/main.nf +++ b/modules/local/multiqc_mappings_config/main.nf @@ -11,17 +11,12 @@ process MULTIQC_MAPPINGS_CONFIG { output: path "*yml" , emit: yml - path "versions.yml", emit: versions + tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions script: """ multiqc_mappings_config.py \\ $csv \\ multiqc_config.yml - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS """ } diff --git a/modules/local/sra_fastq_ftp/main.nf b/modules/local/sra_fastq_ftp/main.nf index f7100055..55cf70ef 100644 --- a/modules/local/sra_fastq_ftp/main.nf +++ b/modules/local/sra_fastq_ftp/main.nf @@ -16,7 +16,7 @@ process SRA_FASTQ_FTP { output: tuple val(meta), path("*fastq.gz"), emit: fastq tuple val(meta), path("*md5") , emit: md5 - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('wget'), eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')"), topic: versions script: if (meta.single_end) { @@ -28,11 +28,6 @@ process SRA_FASTQ_FTP { echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5 md5sum -c ${meta.id}.fastq.gz.md5 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - wget: \$(echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')) - END_VERSIONS """ } else { """ @@ -51,11 +46,6 @@ process SRA_FASTQ_FTP { echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5 md5sum -c ${meta.id}_2.fastq.gz.md5 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - wget: \$(echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')) - END_VERSIONS """ } } diff --git a/modules/local/sra_ids_to_runinfo/main.nf b/modules/local/sra_ids_to_runinfo/main.nf index 7d47f5e3..4e7f9e87 100644 --- a/modules/local/sra_ids_to_runinfo/main.nf +++ b/modules/local/sra_ids_to_runinfo/main.nf @@ -14,7 +14,7 @@ process SRA_IDS_TO_RUNINFO { output: path "*.tsv" , emit: tsv - path "versions.yml", emit: versions + tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions script: def metadata_fields = fields ? "--ena_metadata_fields ${fields}" : '' @@ -24,10 +24,5 @@ process SRA_IDS_TO_RUNINFO { id.txt \\ ${id}.runinfo.tsv \\ $metadata_fields - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS """ } diff --git a/modules/local/sra_runinfo_to_ftp/main.nf b/modules/local/sra_runinfo_to_ftp/main.nf index 9c83cf53..40f6a03c 100644 --- a/modules/local/sra_runinfo_to_ftp/main.nf +++ b/modules/local/sra_runinfo_to_ftp/main.nf @@ -11,17 +11,12 @@ process SRA_RUNINFO_TO_FTP { output: path "*.tsv" , emit: tsv - path "versions.yml", emit: versions + tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions script: """ sra_runinfo_to_ftp.py \\ ${runinfo.join(',')} \\ ${runinfo.toString().tokenize(".")[0]}.runinfo_ftp.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS """ } diff --git a/modules/nf-core/custom/sratoolsncbisettings/main.nf b/modules/nf-core/custom/sratoolsncbisettings/main.nf index 577117ed..7dcb66e9 100644 --- a/modules/nf-core/custom/sratoolsncbisettings/main.nf +++ b/modules/nf-core/custom/sratoolsncbisettings/main.nf @@ -12,7 +12,7 @@ process CUSTOM_SRATOOLSNCBISETTINGS { output: path('*.mkfg') , emit: ncbi_settings - path 'versions.yml', emit: versions + tuple val("${task.process}"), val('sratools'), eval("vdb-config --version 2>&1 | grep -Eo '[0-9.]+'"), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh b/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh index cfe3a324..b553659b 100644 --- a/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh +++ b/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh @@ -38,8 +38,3 @@ else fi cp "${NCBI_SETTINGS}" ./ fi - -cat <<-END_VERSIONS > versions.yml -"!{task.process}": - sratools: $(vdb-config --version 2>&1 | grep -Eo '[0-9.]+') -END_VERSIONS diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index ca94a925..df45971f 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -16,7 +16,8 @@ process SRATOOLS_FASTERQDUMP { output: tuple val(meta), path('*.fastq.gz'), emit: reads - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('sratools'), eval("fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+'"), topic: versions + tuple val("${task.process}"), val('pigz'), eval("pigz --version 2>&1 | sed 's/pigz //g'"), topic: versions when: task.ext.when == null || task.ext.when @@ -45,11 +46,5 @@ process SRATOOLS_FASTERQDUMP { --no-name \\ --processes $task.cpus \\ *.fastq - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sratools: \$(fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+') - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS """ } diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf index 3c30739a..6c3cde54 100644 --- a/modules/nf-core/sratools/prefetch/main.nf +++ b/modules/nf-core/sratools/prefetch/main.nf @@ -14,7 +14,7 @@ process SRATOOLS_PREFETCH { output: tuple val(meta), path(id), emit: sra - path 'versions.yml' , emit: versions + tuple val("${task.process}"), val('sratools'), eval("prefetch --version 2>&1 | grep -Eo '[0-9.]+'"), topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh b/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh index a72a4bfb..72156740 100755 --- a/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh +++ b/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh @@ -48,8 +48,3 @@ retry_with_backoff !{args2} \ !{id} [ -f !{id}.sralite ] && vdb-validate !{id}.sralite || vdb-validate !{id} - -cat <<-END_VERSIONS > versions.yml -"!{task.process}": - sratools: $(prefetch --version 2>&1 | grep -Eo '[0-9.]+') -END_VERSIONS diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 8a75bb95..de3a38c2 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -12,7 +12,7 @@ process UNTAR { output: tuple val(meta), path("$prefix"), emit: untar - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('untar'), eval("echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//'"), topic: versions when: task.ext.when == null || task.ext.when @@ -42,11 +42,6 @@ process UNTAR { $archive \\ $args2 fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS """ stub: @@ -54,10 +49,5 @@ process UNTAR { """ mkdir $prefix touch ${prefix}/file.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS """ } diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index 256c8104..3a57d1b2 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -13,21 +13,16 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { sratools_pigz_args // string main: - - ch_versions = Channel.empty() - // // Detect existing NCBI user settings or create new ones. // CUSTOM_SRATOOLSNCBISETTINGS ( ch_sra_ids.collect() ) ch_ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS.out.ncbi_settings - ch_versions = ch_versions.mix(CUSTOM_SRATOOLSNCBISETTINGS.out.versions) // // Prefetch sequencing reads in SRA format. // SRATOOLS_PREFETCH ( ch_sra_ids, ch_ncbi_settings, ch_dbgap_key ) - ch_versions = ch_versions.mix(SRATOOLS_PREFETCH.out.versions.first()) // // Convert the SRA format into one or more compressed FASTQ files. @@ -39,9 +34,7 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { sratools_fasterqdump_args, sratools_pigz_args ) - ch_versions = ch_versions.mix(SRATOOLS_FASTERQDUMP.out.versions.first()) emit: reads = SRATOOLS_FASTERQDUMP.out.reads // channel: [ val(meta), [ reads ] ] - versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index a8b55d6f..a60c69da 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -2,7 +2,6 @@ // Subworkflow with utility functions specific to the nf-core pipeline template // -import org.yaml.snakeyaml.Yaml import nextflow.extension.FilesEx /* @@ -92,15 +91,6 @@ def getWorkflowVersion() { return version_string } -// -// Get software versions for pipeline -// -def processVersionsFromYAML(yaml_file) { - Yaml yaml = new Yaml() - versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } - return yaml.dumpAsMap(versions).trim() -} - // // Get workflow version for pipeline // @@ -117,10 +107,15 @@ def workflowVersionToYAML() { // def softwareVersionsToYAML(ch_versions) { return ch_versions - .unique() - .map { processVersionsFromYAML(it) } - .unique() - .mix(Channel.of(workflowVersionToYAML())) + .unique() + .map { process, name, version -> + """ + ${process.tokenize(':').last()}: + ${name}: ${version} + """.stripIndent().trim() + } + .unique() + .mix(Channel.of(workflowVersionToYAML())) } // @@ -358,13 +353,13 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi // Write summary e-mail HTML to a file def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.toPath().copyTo("${outdir}/pipeline_info/pipeline_report.html"); output_hf.delete() // Write summary e-mail TXT to a file def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } - FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.toPath().copyTo("${outdir}/pipeline_info/pipeline_report.txt"); output_tf.delete() } diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 8cdd6845..faa5ee08 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -32,8 +32,6 @@ workflow SRA { ids // channel: [ ids ] main: - ch_versions = Channel.empty() - // // MODULE: Get SRA run information for public database ids // @@ -41,7 +39,6 @@ workflow SRA { ids, params.ena_metadata_fields ?: '' ) - ch_versions = ch_versions.mix(SRA_IDS_TO_RUNINFO.out.versions.first()) // // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ] @@ -49,7 +46,6 @@ workflow SRA { SRA_RUNINFO_TO_FTP ( SRA_IDS_TO_RUNINFO.out.tsv ) - ch_versions = ch_versions.mix(SRA_RUNINFO_TO_FTP.out.versions.first()) SRA_RUNINFO_TO_FTP .out @@ -96,7 +92,6 @@ workflow SRA { ch_sra_reads.ftp, params.sra_fastq_ftp_args ) - ch_versions = ch_versions.mix(SRA_FASTQ_FTP.out.versions.first()) // // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools. @@ -107,7 +102,6 @@ workflow SRA { params.sratools_fasterqdump_args, params.sratools_pigz_args ) - ch_versions = ch_versions.mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.versions.first()) // // MODULE: If Aspera link is provided in run information then download FastQ directly via Aspera CLI and validate with md5sums @@ -117,7 +111,6 @@ workflow SRA { 'era-fasp', params.aspera_cli_args ) - ch_versions = ch_versions.mix(ASPERA_CLI.out.versions.first()) // Isolate FASTQ channel which will be added to emit block SRA_FASTQ_FTP @@ -176,22 +169,21 @@ workflow SRA { MULTIQC_MAPPINGS_CONFIG ( ch_mappings ) - ch_versions = ch_versions.mix(MULTIQC_MAPPINGS_CONFIG.out.versions) ch_sample_mappings_yml = MULTIQC_MAPPINGS_CONFIG.out.yml } // // Collate and save software versions // - softwareVersionsToYAML(ch_versions) + softwareVersionsToYAML(Channel.topic('versions')) .collectFile(name: 'nf_core_fetchngs_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_versions_yml } emit: samplesheet = ch_samplesheet mappings = ch_mappings sample_mappings = ch_sample_mappings_yml sra_metadata = ch_sra_metadata - versions = ch_versions.unique() publish: ch_fastq >> 'fastq/' From 5ae15624bd4929b1e017669abe8a96fe41816bfc Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 26 Apr 2024 20:04:38 -0500 Subject: [PATCH 05/25] Use static types, record types Signed-off-by: Ben Sherman --- modules/local/aspera_cli/main.nf | 19 +++++++---- modules/local/multiqc_mappings_config/main.nf | 8 +++-- modules/local/sra_fastq_ftp/main.nf | 16 +++++++--- modules/local/sra_ids_to_runinfo/main.nf | 10 +++--- modules/local/sra_runinfo_to_ftp/main.nf | 8 +++-- modules/local/sra_to_samplesheet/main.nf | 12 +++---- .../custom/sratoolsncbisettings/main.nf | 9 +++--- modules/nf-core/sratools/fasterqdump/main.nf | 28 +++++++++------- modules/nf-core/sratools/prefetch/main.nf | 24 +++++++------- .../prefetch/templates/retry_with_backoff.sh | 4 +-- modules/nf-core/untar/main.nf | 25 +++++++++------ types/types.nf | 5 +++ workflows/sra/main.nf | 32 +++++++++++-------- 13 files changed, 121 insertions(+), 79 deletions(-) create mode 100644 types/types.nf diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index 670d6c5a..06655507 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -1,3 +1,5 @@ +include { Sample } from '../../types/types' + process ASPERA_CLI { tag "$meta.id" label 'process_medium' @@ -8,16 +10,21 @@ process ASPERA_CLI { 'biocontainers/aspera-cli:4.14.0--hdfd78af_1' }" input: - tuple val(meta), val(fastq) - val user - var args + Sample input + String user + String args output: - tuple val(meta), path("*fastq.gz"), emit: fastq - tuple val(meta), path("*md5") , emit: md5 - tuple val("${task.process}"), val('aspera_cli'), eval('ascli --version'), topic: versions + Sample fastq = new Sample(meta, path("*fastq.gz")) + Sample md5 = new Sample(meta, path("*md5")) + + topic: + [ task.process, 'aspera_cli', eval('ascli --version') ] >> 'versions' script: + meta = input.meta + fastq = input.files + def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" if (meta.single_end) { """ diff --git a/modules/local/multiqc_mappings_config/main.nf b/modules/local/multiqc_mappings_config/main.nf index 738069c7..f5913375 100644 --- a/modules/local/multiqc_mappings_config/main.nf +++ b/modules/local/multiqc_mappings_config/main.nf @@ -7,11 +7,13 @@ process MULTIQC_MAPPINGS_CONFIG { 'biocontainers/python:3.9--1' }" input: - path csv + Path csv output: - path "*yml" , emit: yml - tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions + Path yml = path("multiqc_config.yml") + + topic: + [ task.process, 'python', eval("python --version | sed 's/Python //g'") ] >> 'versions' script: """ diff --git a/modules/local/sra_fastq_ftp/main.nf b/modules/local/sra_fastq_ftp/main.nf index 55cf70ef..958876df 100644 --- a/modules/local/sra_fastq_ftp/main.nf +++ b/modules/local/sra_fastq_ftp/main.nf @@ -1,3 +1,4 @@ +include { Sample } from '../../types/types' process SRA_FASTQ_FTP { tag "$meta.id" @@ -10,15 +11,20 @@ process SRA_FASTQ_FTP { 'biocontainers/wget:1.20.1' }" input: - tuple val(meta), val(fastq) - val args + Sample input + String args output: - tuple val(meta), path("*fastq.gz"), emit: fastq - tuple val(meta), path("*md5") , emit: md5 - tuple val("${task.process}"), val('wget'), eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')"), topic: versions + Sample fastq = new Sample(meta, path("*fastq.gz")) + Sample md5 = new Sample(meta, path("*md5")) + + topic: + [ task.process, 'wget', eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')") ] >> 'versions' script: + meta = input.meta + fastq = input.files + if (meta.single_end) { """ wget \\ diff --git a/modules/local/sra_ids_to_runinfo/main.nf b/modules/local/sra_ids_to_runinfo/main.nf index 4e7f9e87..001417fd 100644 --- a/modules/local/sra_ids_to_runinfo/main.nf +++ b/modules/local/sra_ids_to_runinfo/main.nf @@ -9,12 +9,14 @@ process SRA_IDS_TO_RUNINFO { 'biocontainers/python:3.9--1' }" input: - val id - val fields + String id + String fields output: - path "*.tsv" , emit: tsv - tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions + Path tsv = path("*.runinfo.tsv") + + topic: + [ task.process, 'python', eval("python --version | sed 's/Python //g'") ] >> 'versions' script: def metadata_fields = fields ? "--ena_metadata_fields ${fields}" : '' diff --git a/modules/local/sra_runinfo_to_ftp/main.nf b/modules/local/sra_runinfo_to_ftp/main.nf index 40f6a03c..aafce77c 100644 --- a/modules/local/sra_runinfo_to_ftp/main.nf +++ b/modules/local/sra_runinfo_to_ftp/main.nf @@ -7,11 +7,13 @@ process SRA_RUNINFO_TO_FTP { 'biocontainers/python:3.9--1' }" input: - path runinfo + Path runinfo output: - path "*.tsv" , emit: tsv - tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions + Path tsv = path("*.runinfo_ftp.tsv") + + topic: + [ task.process, 'python', eval("python --version | sed 's/Python //g'") ] >> 'versions' script: """ diff --git a/modules/local/sra_to_samplesheet/main.nf b/modules/local/sra_to_samplesheet/main.nf index 92edf5df..a2651527 100644 --- a/modules/local/sra_to_samplesheet/main.nf +++ b/modules/local/sra_to_samplesheet/main.nf @@ -6,14 +6,14 @@ process SRA_TO_SAMPLESHEET { memory 100.MB input: - val meta - val pipeline - val strandedness - val mapping_fields + Map meta + String pipeline + String strandedness + String mapping_fields output: - tuple val(meta), path("*samplesheet.csv"), emit: samplesheet - tuple val(meta), path("*mappings.csv") , emit: mappings + Sample samplesheet = new Sample(meta, path("*samplesheet.csv")) + Sample mappings = new Sample(meta, path("*mappings.csv")) exec: // diff --git a/modules/nf-core/custom/sratoolsncbisettings/main.nf b/modules/nf-core/custom/sratoolsncbisettings/main.nf index 7dcb66e9..36ba3da0 100644 --- a/modules/nf-core/custom/sratoolsncbisettings/main.nf +++ b/modules/nf-core/custom/sratoolsncbisettings/main.nf @@ -8,14 +8,13 @@ process CUSTOM_SRATOOLSNCBISETTINGS { 'biocontainers/sra-tools:3.0.8--h9f5acd7_0' }" input: - val ids + List ids output: - path('*.mkfg') , emit: ncbi_settings - tuple val("${task.process}"), val('sratools'), eval("vdb-config --version 2>&1 | grep -Eo '[0-9.]+'"), topic: versions + Path ncbi_settings = path('*.mkfg') - when: - task.ext.when == null || task.ext.when + topic: + [ task.process, 'sratools', eval("vdb-config --version 2>&1 | grep -Eo '[0-9.]+'") ] >> 'versions' shell: config = "/LIBS/GUID = \"${UUID.randomUUID().toString()}\"\\n/libs/cloud/report_instance_identity = \"true\"\\n" diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index df45971f..fe8da3d8 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -1,3 +1,5 @@ +include { Sample } from '../../types/types' + process SRATOOLS_FASTERQDUMP { tag "$meta.id" label 'process_medium' @@ -8,22 +10,26 @@ process SRATOOLS_FASTERQDUMP { 'quay.io/biocontainers/mulled-v2-5f89fe0cd045cb1d615630b9261a1d17943a9b6a:6a9ff0e76ec016c3d0d27e0c0d362339f2d787e6-0' }" input: - tuple val(meta), path(sra) - path ncbi_settings - path certificate - val fasterqdump_args // = '--split-files --include-technical' - val pigz_args // = '' + Tuple2 input + Path ncbi_settings + Path certificate + String fasterqdump_args = '--split-files --include-technical' + String pigz_args = '' + String prefix = '' output: - tuple val(meta), path('*.fastq.gz'), emit: reads - tuple val("${task.process}"), val('sratools'), eval("fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+'"), topic: versions - tuple val("${task.process}"), val('pigz'), eval("pigz --version 2>&1 | sed 's/pigz //g'"), topic: versions + Sample reads = new Sample(meta, path('*.fastq.gz')) - when: - task.ext.when == null || task.ext.when + topic: + [ task.process, 'sratools', eval("fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+'") ] >> 'versions' + [ task.process, 'pigz', eval("pigz --version 2>&1 | sed 's/pigz //g'") ] >> 'versions' script: - def prefix = task.ext.prefix ?: "${meta.id}" + meta = input.v1 + sra = input.v2 + if( !prefix ) + prefix = "${meta.id}" + def outfile = meta.single_end ? "${prefix}.fastq" : prefix def key_file = '' if (certificate.toString().endsWith('.jwt')) { diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf index 6c3cde54..c7a7a57d 100644 --- a/modules/nf-core/sratools/prefetch/main.nf +++ b/modules/nf-core/sratools/prefetch/main.nf @@ -8,26 +8,28 @@ process SRATOOLS_PREFETCH { 'biocontainers/sra-tools:3.0.8--h9f5acd7_0' }" input: - tuple val(meta), val(id) - path ncbi_settings - path certificate + Tuple2 input + Path ncbi_settings + Path certificate + String prefetch_args = '' + String retry_args = '5 1 100' // output: - tuple val(meta), path(id), emit: sra - tuple val("${task.process}"), val('sratools'), eval("prefetch --version 2>&1 | grep -Eo '[0-9.]+'"), topic: versions + Tuple2 sra = input - when: - task.ext.when == null || task.ext.when + topic: + [ task.process, 'sratools', eval("prefetch --version 2>&1 | grep -Eo '[0-9.]+'") ] >> 'versions' shell: - args = task.ext.args ?: '' - args2 = task.ext.args2 ?: '5 1 100' // + meta = input.v1 + id = input.v2 + if (certificate) { if (certificate.toString().endsWith('.jwt')) { - args += " --perm ${certificate}" + prefetch_args += " --perm ${certificate}" } else if (certificate.toString().endsWith('.ngc')) { - args += " --ngc ${certificate}" + prefetch_args += " --ngc ${certificate}" } } diff --git a/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh b/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh index 72156740..7643c651 100755 --- a/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh +++ b/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh @@ -42,9 +42,9 @@ retry_with_backoff() { export NCBI_SETTINGS="$PWD/!{ncbi_settings}" -retry_with_backoff !{args2} \ +retry_with_backoff !{retry_args} \ prefetch \ - !{args} \ + !{prefetch_args} \ !{id} [ -f !{id}.sralite ] && vdb-validate !{id}.sralite || vdb-validate !{id} diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index de3a38c2..88d8d59c 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -1,3 +1,5 @@ +include { Sample } from '../../types/types' + process UNTAR { tag "$archive" label 'process_single' @@ -8,20 +10,22 @@ process UNTAR { 'nf-core/ubuntu:20.04' }" input: - tuple val(meta), path(archive) + Sample input + String args = '' + String args2 = '' + String prefix = '' output: - tuple val(meta), path("$prefix"), emit: untar - tuple val("${task.process}"), val('untar'), eval("echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//'"), topic: versions + Sample untar = new Sample(meta, path("$prefix")) - when: - task.ext.when == null || task.ext.when + topic: + [ task.process, 'untar', eval("echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//'") ] >> 'versions' script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) - + meta = input.meta + archive = input.files.first() + if( !prefix ) + prefix = meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "") """ mkdir $prefix @@ -45,7 +49,8 @@ process UNTAR { """ stub: - prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + if( !prefix ) + prefix = meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "") """ mkdir $prefix touch ${prefix}/file.txt diff --git a/types/types.nf b/types/types.nf new file mode 100644 index 00000000..a1a38148 --- /dev/null +++ b/types/types.nf @@ -0,0 +1,5 @@ + +record Sample { + Map meta + List files +} diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index faa5ee08..3d5dbb60 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -20,6 +20,14 @@ include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcor include { FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS } from '../../subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT RECORD TYPES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { Sample } from '../../types/types' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -77,11 +85,11 @@ workflow SRA { } aspera: download_method == 'aspera' - return [ meta, meta.fastq_aspera.tokenize(';').take(2) ] + return new Sample( meta, meta.fastq_aspera.tokenize(';').take(2).collect( name -> file(name) ) ) ftp: download_method == 'ftp' - return [ meta, [ meta.fastq_1, meta.fastq_2 ] ] + return new Sample( meta, [ file(meta.fastq_1), file(meta.fastq_2) ] ) sratools: download_method == 'sratools' - return [ meta, meta.run_accession ] + return new Tuple2( meta, meta.run_accession ) } .set { ch_sra_reads } @@ -120,14 +128,12 @@ workflow SRA { .mix(ASPERA_CLI.out.fastq) .tap { ch_fastq } .map { - meta, fastq -> - def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] - def meta_clone = meta.clone() - - meta_clone.fastq_1 = reads[0] ? "${params.outdir}/fastq/${reads[0].getName()}" : '' - meta_clone.fastq_2 = reads[1] && !meta.single_end ? "${params.outdir}/fastq/${reads[1].getName()}" : '' - - return meta_clone + sample -> + def reads = sample.files + def meta = sample.meta.clone() + meta.fastq_1 = reads[0] ? "${params.outdir}/fastq/${reads[0].getName()}" : '' + meta.fastq_2 = reads[1] && !meta.single_end ? "${params.outdir}/fastq/${reads[1].getName()}" : '' + return meta } .set { ch_sra_metadata } } @@ -146,7 +152,7 @@ workflow SRA { SRA_TO_SAMPLESHEET .out .samplesheet - .map { it[1] } + .map { sample -> sample.files.first() } .collectFile(name:'tmp_samplesheet.csv', newLine: true, keepHeader: true, sort: { it.baseName }) .map { it.text.tokenize('\n').join('\n') } .collectFile(name:'samplesheet.csv') @@ -155,7 +161,7 @@ workflow SRA { SRA_TO_SAMPLESHEET .out .mappings - .map { it[1] } + .map { sample -> sample.files.first() } .collectFile(name:'tmp_id_mappings.csv', newLine: true, keepHeader: true, sort: { it.baseName }) .map { it.text.tokenize('\n').join('\n') } .collectFile(name:'id_mappings.csv') From b2f563d65c284e961504d1d10bb50846bd5d542d Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 26 Apr 2024 20:05:27 -0500 Subject: [PATCH 06/25] Refactor params as inputs for SRA workflow Signed-off-by: Ben Sherman --- main.nf | 16 ++++++++++++- workflows/sra/main.nf | 56 ++++++++++++++++++++++++++----------------- 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/main.nf b/main.nf index e178d8a5..7158a85b 100644 --- a/main.nf +++ b/main.nf @@ -40,7 +40,21 @@ workflow NFCORE_FETCHNGS { // // WORKFLOW: Download FastQ files for SRA / ENA / GEO / DDBJ ids // - SRA ( ids ) + SRA ( + ids, + params.ena_metadata_fields ?: '', + params.sample_mapping_fields, + params.nf_core_pipeline ?: '', + params.nf_core_rnaseq_strandedness ?: 'auto', + params.download_method, + params.skip_fastq_download, + params.dbgap_key, + params.aspera_cli_args, + params.sra_fastq_ftp_args, + params.sratools_fasterqdump_args, + params.sratools_pigz_args, + params.outdir + ) } diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 3d5dbb60..bd72eaaf 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -37,7 +37,19 @@ include { Sample } from '../../types/types' workflow SRA { take: - ids // channel: [ ids ] + ids // channel: [ ids ] + ena_metadata_fields // string + sample_mapping_fields // string + nf_core_pipeline // string + nf_core_rnaseq_strandedness // string + download_method // enum: 'aspera' | 'ftp' | 'sratools' + skip_fastq_download // boolean + dbgap_key // string + aspera_cli_args // string + sra_fastq_ftp_args // string + sratools_fasterqdump_args // string + sratools_pigz_args // string + outdir // string main: // @@ -45,7 +57,7 @@ workflow SRA { // SRA_IDS_TO_RUNINFO ( ids, - params.ena_metadata_fields ?: '' + ena_metadata_fields ) // @@ -68,27 +80,27 @@ workflow SRA { .unique() .set { ch_sra_metadata } - if (!params.skip_fastq_download) { + if (!skip_fastq_download) { ch_sra_metadata .branch { meta -> - def download_method = 'ftp' + def method = 'ftp' // meta.fastq_aspera is a metadata string with ENA fasp links supported by Aspera // For single-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz' // For paired-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_2.fastq.gz' - if (meta.fastq_aspera && params.download_method == 'aspera') { - download_method = 'aspera' + if (meta.fastq_aspera && download_method == 'aspera') { + method = 'aspera' } - if ((!meta.fastq_aspera && !meta.fastq_1) || params.download_method == 'sratools') { - download_method = 'sratools' + if ((!meta.fastq_aspera && !meta.fastq_1) || download_method == 'sratools') { + method = 'sratools' } - aspera: download_method == 'aspera' + aspera: method == 'aspera' return new Sample( meta, meta.fastq_aspera.tokenize(';').take(2).collect( name -> file(name) ) ) - ftp: download_method == 'ftp' + ftp: method == 'ftp' return new Sample( meta, [ file(meta.fastq_1), file(meta.fastq_2) ] ) - sratools: download_method == 'sratools' + sratools: method == 'sratools' return new Tuple2( meta, meta.run_accession ) } .set { ch_sra_reads } @@ -98,7 +110,7 @@ workflow SRA { // SRA_FASTQ_FTP ( ch_sra_reads.ftp, - params.sra_fastq_ftp_args + sra_fastq_ftp_args ) // @@ -106,9 +118,9 @@ workflow SRA { // FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( ch_sra_reads.sratools, - params.dbgap_key ? file(params.dbgap_key, checkIfExists: true) : [], - params.sratools_fasterqdump_args, - params.sratools_pigz_args + dbgap_key ? file(dbgap_key, checkIfExists: true) : [], + sratools_fasterqdump_args, + sratools_pigz_args ) // @@ -117,7 +129,7 @@ workflow SRA { ASPERA_CLI ( ch_sra_reads.aspera, 'era-fasp', - params.aspera_cli_args + aspera_cli_args ) // Isolate FASTQ channel which will be added to emit block @@ -131,8 +143,8 @@ workflow SRA { sample -> def reads = sample.files def meta = sample.meta.clone() - meta.fastq_1 = reads[0] ? "${params.outdir}/fastq/${reads[0].getName()}" : '' - meta.fastq_2 = reads[1] && !meta.single_end ? "${params.outdir}/fastq/${reads[1].getName()}" : '' + meta.fastq_1 = reads[0] ? "${outdir}/fastq/${reads[0].getName()}" : '' + meta.fastq_2 = reads[1] && !meta.single_end ? "${outdir}/fastq/${reads[1].getName()}" : '' return meta } .set { ch_sra_metadata } @@ -143,9 +155,9 @@ workflow SRA { // SRA_TO_SAMPLESHEET ( ch_sra_metadata, - params.nf_core_pipeline ?: '', - params.nf_core_rnaseq_strandedness ?: 'auto', - params.sample_mapping_fields + nf_core_pipeline, + nf_core_rnaseq_strandedness, + sample_mapping_fields ) // Merge samplesheets and mapping files across all samples @@ -171,7 +183,7 @@ workflow SRA { // MODULE: Create a MutiQC config file with sample name mappings // ch_sample_mappings_yml = Channel.empty() - if (params.sample_mapping_fields) { + if (sample_mapping_fields) { MULTIQC_MAPPINGS_CONFIG ( ch_mappings ) From 24b34cfaf46b1ccd740ce5814eac15646deb1bfc Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 30 Apr 2024 15:01:04 -0500 Subject: [PATCH 07/25] New dataflow syntax Signed-off-by: Ben Sherman --- main.nf | 10 +- modules/local/sra_to_samplesheet/main.nf | 46 +-- .../utils_nfcore_fetchngs_pipeline/main.nf | 23 +- .../main.nf | 47 +-- .../nf-core/utils_nfcore_pipeline/main.nf | 22 +- workflows/sra/main.nf | 270 +++++++++--------- 6 files changed, 212 insertions(+), 206 deletions(-) diff --git a/main.nf b/main.nf index 7158a85b..6e0cabd4 100644 --- a/main.nf +++ b/main.nf @@ -9,7 +9,7 @@ ---------------------------------------------------------------------------------------- */ -nextflow.enable.dsl = 2 +nextflow.preview.dsl = 3 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -33,7 +33,8 @@ include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_fetc workflow NFCORE_FETCHNGS { take: - ids // channel: database ids read in from --input + ids // Channel + params // ParamsMap main: @@ -69,7 +70,7 @@ workflow { // // SUBWORKFLOW: Run initialisation tasks // - PIPELINE_INITIALISATION ( + ids = PIPELINE_INITIALISATION ( params.version, params.help, params.validate_params, @@ -84,7 +85,8 @@ workflow { // WORKFLOW: Run primary workflows for the pipeline // NFCORE_FETCHNGS ( - PIPELINE_INITIALISATION.out.ids + ids, + params, ) // diff --git a/modules/local/sra_to_samplesheet/main.nf b/modules/local/sra_to_samplesheet/main.nf index a2651527..9272d131 100644 --- a/modules/local/sra_to_samplesheet/main.nf +++ b/modules/local/sra_to_samplesheet/main.nf @@ -1,25 +1,39 @@ process SRA_TO_SAMPLESHEET { - tag "$meta.id" - executor 'local' memory 100.MB input: - Map meta + List sra_metadata String pipeline String strandedness String mapping_fields output: - Sample samplesheet = new Sample(meta, path("*samplesheet.csv")) - Sample mappings = new Sample(meta, path("*mappings.csv")) + Path samplesheet = path("samplesheet.csv") + Path mappings = path("mappings.csv") exec: // // Create samplesheet containing metadata // + def records = sra_metadata.collect { meta -> + getSraRecord(meta, pipeline, strandedness, mappings) + } + + def samplesheet = records + .collect { pipeline_map, mappings_map -> pipeline_map } + .sort { record -> record.id } + mergeCsv(samplesheet, task.workDir.resolve('samplesheet.csv')) + + def mappings = records + .collect { pipeline_map, mappings_map -> mappings_map } + .sort { record -> record.id } + mergeCsv(mappings, task.workDir.resolve('id_mappings.csv')) +} + +def getSraRecord(Map meta, String pipeline, String strandedness, String mapping_fields) { // Remove custom keys needed to download the data def meta_clone = meta.clone() meta_clone.remove("id") @@ -30,7 +44,7 @@ process SRA_TO_SAMPLESHEET { meta_clone.remove("single_end") // Add relevant fields to the beginning of the map - pipeline_map = [ + def pipeline_map = [ sample : "${meta.id.split('_')[0..-2].join('_')}", fastq_1 : meta.fastq_1, fastq_2 : meta.fastq_2 @@ -48,28 +62,16 @@ process SRA_TO_SAMPLESHEET { } pipeline_map << meta_clone - // Create a samplesheet - samplesheet = pipeline_map.keySet().collect{ '"' + it + '"'}.join(",") + '\n' - samplesheet += pipeline_map.values().collect{ '"' + it + '"'}.join(",") - - // Write samplesheet to file - def samplesheet_file = task.workDir.resolve("${meta.id}.samplesheet.csv") - samplesheet_file.text = samplesheet - // // Create sample id mappings file // - mappings_map = pipeline_map.clone() - def fields = mapping_fields ? ['sample'] + mapping_fields.split(',').collect{ it.trim().toLowerCase() } : [] + def mappings_map = pipeline_map.clone() + def fields = mapping_fields ? ['sample'] + mapping_fields.split(',').collect{ v -> v.trim().toLowerCase() } : [] if ((mappings_map.keySet() + fields).unique().size() != mappings_map.keySet().size()) { error("Invalid option for '--sample_mapping_fields': ${mapping_fields}.\nValid options: ${mappings_map.keySet().join(', ')}") } - // Create mappings - mappings = fields.collect{ '"' + it + '"'}.join(",") + '\n' - mappings += mappings_map.subMap(fields).values().collect{ '"' + it + '"'}.join(",") + mappings_map = mappings_map.subMap(fields) - // Write mappings to file - def mappings_file = task.workDir.resolve("${meta.id}.mappings.csv") - mappings_file.text = mappings + return [ pipeline_map, mappings_map ] } diff --git a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf index 5e317f09..98d64346 100644 --- a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf @@ -75,23 +75,22 @@ workflow PIPELINE_INITIALISATION { // // Auto-detect input id type // - ch_input = file(input) - if (isSraId(ch_input)) { - sraCheckENAMetadataFields(ena_metadata_fields) - } else { + input = file(input) + if (!isSraId(input)) error('Ids provided via --input not recognised please make sure they are either SRA / ENA / GEO / DDBJ ids!') - } + sraCheckENAMetadataFields(ena_metadata_fields) // Read in ids from --input file - Channel - .from(ch_input) - .splitCsv(header:false, sep:'', strip:true) - .map { it[0] } - .unique() - .set { ch_ids } + input // Path + |> Channel.of // Channel + |> flatMap { csv -> + splitCsv(csv, header: false, schema: 'assets/schema_input.yml') + } // Channel + |> unique // Channel + |> set { ids } // Channel emit: - ids = ch_ids + ids // Channel } /* diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index 3a57d1b2..a0f2df83 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -7,34 +7,39 @@ include { SRATOOLS_FASTERQDUMP } from '../../../modules/nf-core/sratools/ // workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { take: - ch_sra_ids // channel: [ val(meta), val(id) ] - ch_dbgap_key // channel: [ path(dbgap_key) ] - sratools_fasterqdump_args // string - sratools_pigz_args // string + sra_ids // Channel> + dbgap_key // Path + sratools_fasterqdump_args // String + sratools_pigz_args // String main: // // Detect existing NCBI user settings or create new ones. // - CUSTOM_SRATOOLSNCBISETTINGS ( ch_sra_ids.collect() ) - ch_ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS.out.ncbi_settings + sra_ids // Channel> + |> collect // List> + |> CUSTOM_SRATOOLSNCBISETTINGS // Path + |> set { ncbi_settings } // Path - // - // Prefetch sequencing reads in SRA format. - // - SRATOOLS_PREFETCH ( ch_sra_ids, ch_ncbi_settings, ch_dbgap_key ) + sra_ids // Channel> + |> map { input -> + // + // Prefetch sequencing reads in SRA format. + // + input = SRATOOLS_PREFETCH ( input, ncbi_settings, dbgap_key ) - // - // Convert the SRA format into one or more compressed FASTQ files. - // - SRATOOLS_FASTERQDUMP ( - SRATOOLS_PREFETCH.out.sra, - ch_ncbi_settings, - ch_dbgap_key, - sratools_fasterqdump_args, - sratools_pigz_args - ) + // + // Convert the SRA format into one or more compressed FASTQ files. + // + SRATOOLS_FASTERQDUMP ( + input, + ncbi_settings, + dbgap_key, + sratools_fasterqdump_args, + sratools_pigz_args ) + } // Channel + |> set { reads } // Channel emit: - reads = SRATOOLS_FASTERQDUMP.out.reads // channel: [ val(meta), [ reads ] ] + reads // Channel } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index a60c69da..01b8fe9b 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -105,17 +105,25 @@ def workflowVersionToYAML() { // // Get channel of software versions used in pipeline in YAML format // -def softwareVersionsToYAML(ch_versions) { - return ch_versions - .unique() - .map { process, name, version -> +workflow softwareVersionsToYAML { + take: + versions + + main: + versions // Channel> + |> unique // Channel> + |> map { process, name, version -> """ ${process.tokenize(':').last()}: ${name}: ${version} """.stripIndent().trim() - } - .unique() - .mix(Channel.of(workflowVersionToYAML())) + } // Channel + |> unique // Channel + |> mix( workflowVersionToYAML() ) // Channel + |> set { versions_yml } + + emit: + versions_yml } // diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index bd72eaaf..4820db42 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -37,181 +37,171 @@ include { Sample } from '../../types/types' workflow SRA { take: - ids // channel: [ ids ] - ena_metadata_fields // string - sample_mapping_fields // string - nf_core_pipeline // string - nf_core_rnaseq_strandedness // string - download_method // enum: 'aspera' | 'ftp' | 'sratools' + ids // List + ena_metadata_fields // String + sample_mapping_fields // String + nf_core_pipeline // String + nf_core_rnaseq_strandedness // String + download_method // String enum: 'aspera' | 'ftp' | 'sratools' skip_fastq_download // boolean - dbgap_key // string - aspera_cli_args // string - sra_fastq_ftp_args // string - sratools_fasterqdump_args // string - sratools_pigz_args // string - outdir // string + dbgap_key // String + aspera_cli_args // String + sra_fastq_ftp_args // String + sratools_fasterqdump_args // String + sratools_pigz_args // String + outdir // String main: - // - // MODULE: Get SRA run information for public database ids - // - SRA_IDS_TO_RUNINFO ( - ids, - ena_metadata_fields - ) - - // - // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ] - // - SRA_RUNINFO_TO_FTP ( - SRA_IDS_TO_RUNINFO.out.tsv - ) - - SRA_RUNINFO_TO_FTP - .out - .tsv - .splitCsv(header:true, sep:'\t') - .map { - meta -> - def meta_clone = meta.clone() - meta_clone.single_end = meta_clone.single_end.toBoolean() - return meta_clone - } - .unique() - .set { ch_sra_metadata } + ids // Channel + // + // MODULE: Get SRA run information for public database ids + // + |> map { id -> + SRA_IDS_TO_RUNINFO ( id, ena_metadata_fields ) + } // Channel + // + // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ] + // + |> map(SRA_RUNINFO_TO_FTP) // Channel + |> set { runinfo_ftp } // Channel + |> flatMap { tsv -> + splitCsv(tsv, header:true, sep:'\t') + } // Channel + |> map { meta -> + meta + [single_end: meta.single_end.toBoolean()] + } // Channel + |> unique // Channel + |> set { sra_metadata } // Channel if (!skip_fastq_download) { - ch_sra_metadata - .branch { - meta -> - def method = 'ftp' - // meta.fastq_aspera is a metadata string with ENA fasp links supported by Aspera - // For single-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz' - // For paired-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_2.fastq.gz' - if (meta.fastq_aspera && download_method == 'aspera') { - method = 'aspera' - } - if ((!meta.fastq_aspera && !meta.fastq_1) || download_method == 'sratools') { - method = 'sratools' - } - - aspera: method == 'aspera' - return new Sample( meta, meta.fastq_aspera.tokenize(';').take(2).collect( name -> file(name) ) ) - ftp: method == 'ftp' - return new Sample( meta, [ file(meta.fastq_1), file(meta.fastq_2) ] ) - sratools: method == 'sratools' - return new Tuple2( meta, meta.run_accession ) - } - .set { ch_sra_reads } - // // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums // - SRA_FASTQ_FTP ( - ch_sra_reads.ftp, - sra_fastq_ftp_args - ) + sra_metadata + |> filter { meta -> + getDownloadMethod(meta, download_method) == 'ftp' + } // Channel + |> map { meta -> + def sample = new Sample( meta, [ file(meta.fastq_1), file(meta.fastq_2) ] ) + SRA_FASTQ_FTP ( sample, sra_fastq_ftp_args ) + } // fastq: Channel, md5: Channel + |> set { ftp_samples } // fastq: Channel, md5: Channel // // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools. // - FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( - ch_sra_reads.sratools, - dbgap_key ? file(dbgap_key, checkIfExists: true) : [], - sratools_fasterqdump_args, - sratools_pigz_args - ) - + sra_metadata + |> filter { meta -> + getDownloadMethod(meta, download_method) == 'sratools' + } // Channel + |> map { meta -> + new Tuple2( meta, meta.run_accession ) + } // Channel> + |> FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( + dbgap_key ? file(dbgap_key, checkIfExists: true) : [], + sratools_fasterqdump_args, + sratools_pigz_args ) // Channel + |> set { sratools_samples } // Channel + // // MODULE: If Aspera link is provided in run information then download FastQ directly via Aspera CLI and validate with md5sums // - ASPERA_CLI ( - ch_sra_reads.aspera, - 'era-fasp', - aspera_cli_args - ) + sra_metadata + |> filter { meta -> + getDownloadMethod(meta, download_method) == 'aspera' + } // Channel + |> map { meta -> + def sample = new Sample( meta, meta.fastq_aspera.tokenize(';').take(2).collect( name -> file(name) ) ) + ASPERA_CLI ( sample, 'era-fasp', aspera_cli_args ) + } // fastq: Channel, md5: Channel + |> set { aspera_samples } // fastq: Channel, md5: Channel // Isolate FASTQ channel which will be added to emit block - SRA_FASTQ_FTP - .out - .fastq - .mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.reads) - .mix(ASPERA_CLI.out.fastq) - .tap { ch_fastq } - .map { - sample -> - def reads = sample.files - def meta = sample.meta.clone() - meta.fastq_1 = reads[0] ? "${outdir}/fastq/${reads[0].getName()}" : '' - meta.fastq_2 = reads[1] && !meta.single_end ? "${outdir}/fastq/${reads[1].getName()}" : '' - return meta - } - .set { ch_sra_metadata } + fastq = mix(ftp_samples.fastq, sratools_samples.reads, aspera_samples.fastq) + md5 = mix(ftp_samples.md5, aspera_samples.md5) + + fastq // Channel + |> map { sample -> + def reads = sample.files + def meta = sample.meta + meta + [ + fastq_1: reads[0] ? "${outdir}/fastq/${reads[0].getName()}" : '', + fastq_2: reads[1] && !meta.single_end ? "${outdir}/fastq/${reads[1].getName()}" : '' + ] + } // Channel + |> set { sra_metadata } // Channel } // // MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet // - SRA_TO_SAMPLESHEET ( - ch_sra_metadata, - nf_core_pipeline, - nf_core_rnaseq_strandedness, - sample_mapping_fields - ) - - // Merge samplesheets and mapping files across all samples - SRA_TO_SAMPLESHEET - .out - .samplesheet - .map { sample -> sample.files.first() } - .collectFile(name:'tmp_samplesheet.csv', newLine: true, keepHeader: true, sort: { it.baseName }) - .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'samplesheet.csv') - .set { ch_samplesheet } - - SRA_TO_SAMPLESHEET - .out - .mappings - .map { sample -> sample.files.first() } - .collectFile(name:'tmp_id_mappings.csv', newLine: true, keepHeader: true, sort: { it.baseName }) - .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'id_mappings.csv') - .set { ch_mappings } + sra_metadata // Channel + |> collect // List + |> { sra_metadata -> + SRA_TO_SAMPLESHEET ( + sra_metadata, + nf_core_pipeline, + nf_core_rnaseq_strandedness, + sample_mapping_fields ) + } // samplesheet: Path, mappings: Path + |> set { index_files } // samplesheet: Path, mappings: Path + + samplesheet = index_files.samplesheet // Path + mappings = index_files.mappings // Path // // MODULE: Create a MutiQC config file with sample name mappings // - ch_sample_mappings_yml = Channel.empty() - if (sample_mapping_fields) { - MULTIQC_MAPPINGS_CONFIG ( - ch_mappings - ) - ch_sample_mappings_yml = MULTIQC_MAPPINGS_CONFIG.out.yml - } + sample_mappings = sample_mapping_fields + ? MULTIQC_MAPPINGS_CONFIG ( mappings ) // Path + : null // // Collate and save software versions // - softwareVersionsToYAML(Channel.topic('versions')) - .collectFile(name: 'nf_core_fetchngs_software_mqc_versions.yml', sort: true, newLine: true) - .set { ch_versions_yml } + 'versions' // String + |> Channel.topic // Channel> + |> softwareVersionsToYAML // Channel + |> collect(sort: true) // List + |> exec('SOFTWARE_VERIONS') { versions -> + def path = task.workDir.resolve('nf_core_fetchngs_software_mqc_versions.yml') + mergeText(versions, path, newLine: true) + return path + } // Path + |> set { versions_yml } // Path emit: - samplesheet = ch_samplesheet - mappings = ch_mappings - sample_mappings = ch_sample_mappings_yml - sra_metadata = ch_sra_metadata + samplesheet + mappings + sample_mappings + sra_metadata publish: - ch_fastq >> 'fastq/' - ASPERA_CLI.out.md5 >> 'fastq/md5/' - SRA_FASTQ_FTP.out.md5 >> 'fastq/md5/' - SRA_RUNINFO_TO_FTP.out.tsv >> 'metadata/' - ch_versions_yml >> 'pipeline_info/' - ch_samplesheet >> 'samplesheet/' - ch_mappings >> 'samplesheet/' - ch_sample_mappings_yml >> 'samplesheet/' + fastq >> 'fastq/' + md5 >> 'fastq/md5/' + runinfo_ftp >> 'metadata/' + versions_yml >> 'pipeline_info/' + samplesheet >> 'samplesheet/' + mappings >> 'samplesheet/' + sample_mappings >> 'samplesheet/' +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +def getDownloadMethod(Map meta, String download_method) { + // meta.fastq_aspera is a metadata string with ENA fasp links supported by Aspera + // For single-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz' + // For paired-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_2.fastq.gz' + if (meta.fastq_aspera && download_method == 'aspera') + return 'aspera' + if ((!meta.fastq_aspera && !meta.fastq_1) || download_method == 'sratools') + return 'sratools' + return 'ftp' } /* From 2771765c771cc7df7cf4932051fd6e86a18c2ec9 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 18 May 2024 14:40:37 -0500 Subject: [PATCH 08/25] Simplify process inputs/outputs Signed-off-by: Ben Sherman --- modules/local/aspera_cli/main.nf | 15 +++---- modules/local/multiqc_mappings_config/main.nf | 2 +- modules/local/sra_fastq_ftp/main.nf | 14 +++---- modules/local/sra_ids_to_runinfo/main.nf | 2 +- modules/local/sra_runinfo_to_ftp/main.nf | 2 +- .../custom/sratoolsncbisettings/main.nf | 2 +- modules/nf-core/sratools/fasterqdump/main.nf | 15 +++---- modules/nf-core/sratools/prefetch/main.nf | 10 ++--- modules/nf-core/untar/main.nf | 13 +++---- .../main.nf | 21 +++++----- .../nf-core/utils_nfcore_pipeline/main.nf | 2 - workflows/sra/main.nf | 39 +++++++++++-------- 12 files changed, 65 insertions(+), 72 deletions(-) diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index 06655507..871addbe 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -1,5 +1,3 @@ -include { Sample } from '../../types/types' - process ASPERA_CLI { tag "$meta.id" label 'process_medium' @@ -10,21 +8,20 @@ process ASPERA_CLI { 'biocontainers/aspera-cli:4.14.0--hdfd78af_1' }" input: - Sample input + Map meta + List fastq String user String args output: - Sample fastq = new Sample(meta, path("*fastq.gz")) - Sample md5 = new Sample(meta, path("*md5")) + meta = meta + fastq = path("*fastq.gz") + md5 = path("*md5") topic: - [ task.process, 'aspera_cli', eval('ascli --version') ] >> 'versions' + tuple( task.process, 'aspera_cli', eval('ascli --version') ) >> 'versions' script: - meta = input.meta - fastq = input.files - def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" if (meta.single_end) { """ diff --git a/modules/local/multiqc_mappings_config/main.nf b/modules/local/multiqc_mappings_config/main.nf index f5913375..1fa32cdb 100644 --- a/modules/local/multiqc_mappings_config/main.nf +++ b/modules/local/multiqc_mappings_config/main.nf @@ -13,7 +13,7 @@ process MULTIQC_MAPPINGS_CONFIG { Path yml = path("multiqc_config.yml") topic: - [ task.process, 'python', eval("python --version | sed 's/Python //g'") ] >> 'versions' + tuple( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' script: """ diff --git a/modules/local/sra_fastq_ftp/main.nf b/modules/local/sra_fastq_ftp/main.nf index 958876df..37357a03 100644 --- a/modules/local/sra_fastq_ftp/main.nf +++ b/modules/local/sra_fastq_ftp/main.nf @@ -1,4 +1,3 @@ -include { Sample } from '../../types/types' process SRA_FASTQ_FTP { tag "$meta.id" @@ -11,20 +10,19 @@ process SRA_FASTQ_FTP { 'biocontainers/wget:1.20.1' }" input: - Sample input + Map meta + List fastq String args output: - Sample fastq = new Sample(meta, path("*fastq.gz")) - Sample md5 = new Sample(meta, path("*md5")) + meta = meta + fastq = path("*fastq.gz") + md5 = path("*md5") topic: - [ task.process, 'wget', eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')") ] >> 'versions' + tuple( task.process, 'wget', eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')") ) >> 'versions' script: - meta = input.meta - fastq = input.files - if (meta.single_end) { """ wget \\ diff --git a/modules/local/sra_ids_to_runinfo/main.nf b/modules/local/sra_ids_to_runinfo/main.nf index 001417fd..919940c5 100644 --- a/modules/local/sra_ids_to_runinfo/main.nf +++ b/modules/local/sra_ids_to_runinfo/main.nf @@ -16,7 +16,7 @@ process SRA_IDS_TO_RUNINFO { Path tsv = path("*.runinfo.tsv") topic: - [ task.process, 'python', eval("python --version | sed 's/Python //g'") ] >> 'versions' + tuple( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' script: def metadata_fields = fields ? "--ena_metadata_fields ${fields}" : '' diff --git a/modules/local/sra_runinfo_to_ftp/main.nf b/modules/local/sra_runinfo_to_ftp/main.nf index aafce77c..f5dbe6dc 100644 --- a/modules/local/sra_runinfo_to_ftp/main.nf +++ b/modules/local/sra_runinfo_to_ftp/main.nf @@ -13,7 +13,7 @@ process SRA_RUNINFO_TO_FTP { Path tsv = path("*.runinfo_ftp.tsv") topic: - [ task.process, 'python', eval("python --version | sed 's/Python //g'") ] >> 'versions' + tuple( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' script: """ diff --git a/modules/nf-core/custom/sratoolsncbisettings/main.nf b/modules/nf-core/custom/sratoolsncbisettings/main.nf index 36ba3da0..2dbcf49f 100644 --- a/modules/nf-core/custom/sratoolsncbisettings/main.nf +++ b/modules/nf-core/custom/sratoolsncbisettings/main.nf @@ -14,7 +14,7 @@ process CUSTOM_SRATOOLSNCBISETTINGS { Path ncbi_settings = path('*.mkfg') topic: - [ task.process, 'sratools', eval("vdb-config --version 2>&1 | grep -Eo '[0-9.]+'") ] >> 'versions' + tuple( task.process, 'sratools', eval("vdb-config --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' shell: config = "/LIBS/GUID = \"${UUID.randomUUID().toString()}\"\\n/libs/cloud/report_instance_identity = \"true\"\\n" diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index fe8da3d8..8fbddfb2 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -1,5 +1,3 @@ -include { Sample } from '../../types/types' - process SRATOOLS_FASTERQDUMP { tag "$meta.id" label 'process_medium' @@ -10,7 +8,8 @@ process SRATOOLS_FASTERQDUMP { 'quay.io/biocontainers/mulled-v2-5f89fe0cd045cb1d615630b9261a1d17943a9b6a:6a9ff0e76ec016c3d0d27e0c0d362339f2d787e6-0' }" input: - Tuple2 input + Map meta + Path sra Path ncbi_settings Path certificate String fasterqdump_args = '--split-files --include-technical' @@ -18,18 +17,16 @@ process SRATOOLS_FASTERQDUMP { String prefix = '' output: - Sample reads = new Sample(meta, path('*.fastq.gz')) + meta + fastq = path('*.fastq.gz') topic: - [ task.process, 'sratools', eval("fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+'") ] >> 'versions' - [ task.process, 'pigz', eval("pigz --version 2>&1 | sed 's/pigz //g'") ] >> 'versions' + tuple( task.process, 'sratools', eval("fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' + tuple( task.process, 'pigz', eval("pigz --version 2>&1 | sed 's/pigz //g'") ) >> 'versions' script: - meta = input.v1 - sra = input.v2 if( !prefix ) prefix = "${meta.id}" - def outfile = meta.single_end ? "${prefix}.fastq" : prefix def key_file = '' if (certificate.toString().endsWith('.jwt')) { diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf index c7a7a57d..735f1fc8 100644 --- a/modules/nf-core/sratools/prefetch/main.nf +++ b/modules/nf-core/sratools/prefetch/main.nf @@ -8,22 +8,20 @@ process SRATOOLS_PREFETCH { 'biocontainers/sra-tools:3.0.8--h9f5acd7_0' }" input: - Tuple2 input + Map meta Path ncbi_settings Path certificate String prefetch_args = '' String retry_args = '5 1 100' // output: - Tuple2 sra = input + sra = path(id) topic: - [ task.process, 'sratools', eval("prefetch --version 2>&1 | grep -Eo '[0-9.]+'") ] >> 'versions' + tuple( task.process, 'sratools', eval("prefetch --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' shell: - meta = input.v1 - id = input.v2 - + id = meta.run_accession if (certificate) { if (certificate.toString().endsWith('.jwt')) { prefetch_args += " --perm ${certificate}" diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 88d8d59c..ef081b97 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -1,5 +1,3 @@ -include { Sample } from '../../types/types' - process UNTAR { tag "$archive" label 'process_single' @@ -10,22 +8,23 @@ process UNTAR { 'nf-core/ubuntu:20.04' }" input: - Sample input + Map meta + Path archive String args = '' String args2 = '' String prefix = '' output: - Sample untar = new Sample(meta, path("$prefix")) + meta + untar = path("$prefix") topic: - [ task.process, 'untar', eval("echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//'") ] >> 'versions' + tuple( task.process, 'untar', eval("echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//'") ) >> 'versions' script: - meta = input.meta - archive = input.files.first() if( !prefix ) prefix = meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "") + """ mkdir $prefix diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index a0f2df83..f0f5c343 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -7,7 +7,7 @@ include { SRATOOLS_FASTERQDUMP } from '../../../modules/nf-core/sratools/ // workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { take: - sra_ids // Channel> + sra_metadata // Channel dbgap_key // Path sratools_fasterqdump_args // String sratools_pigz_args // String @@ -16,30 +16,31 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { // // Detect existing NCBI user settings or create new ones. // - sra_ids // Channel> - |> collect // List> + sra_metadata // Channel + |> collect // List |> CUSTOM_SRATOOLSNCBISETTINGS // Path |> set { ncbi_settings } // Path - sra_ids // Channel> - |> map { input -> + sra_metadata // Channel + |> map { meta -> // // Prefetch sequencing reads in SRA format. // - input = SRATOOLS_PREFETCH ( input, ncbi_settings, dbgap_key ) + sra = SRATOOLS_PREFETCH ( meta, ncbi_settings, dbgap_key ) // // Convert the SRA format into one or more compressed FASTQ files. // SRATOOLS_FASTERQDUMP ( - input, + meta, + sra, ncbi_settings, dbgap_key, sratools_fasterqdump_args, sratools_pigz_args ) - } // Channel - |> set { reads } // Channel + } // Channel)> + |> set { reads } // Channel)> emit: - reads // Channel + reads // Channel)> } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index 01b8fe9b..074d9d98 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -2,8 +2,6 @@ // Subworkflow with utility functions specific to the nf-core pipeline template // -import nextflow.extension.FilesEx - /* ======================================================================================== SUBWORKFLOW DEFINITION diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 4820db42..f8e6327e 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -83,10 +83,10 @@ workflow SRA { getDownloadMethod(meta, download_method) == 'ftp' } // Channel |> map { meta -> - def sample = new Sample( meta, [ file(meta.fastq_1), file(meta.fastq_2) ] ) - SRA_FASTQ_FTP ( sample, sra_fastq_ftp_args ) - } // fastq: Channel, md5: Channel - |> set { ftp_samples } // fastq: Channel, md5: Channel + def fastq = [ file(meta.fastq_1), file(meta.fastq_2) ] + SRA_FASTQ_FTP ( meta, fastq, sra_fastq_ftp_args ) + } // Channel, md5: List)> + |> set { ftp_samples } // Channel, md5: List)> // // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools. @@ -95,14 +95,11 @@ workflow SRA { |> filter { meta -> getDownloadMethod(meta, download_method) == 'sratools' } // Channel - |> map { meta -> - new Tuple2( meta, meta.run_accession ) - } // Channel> |> FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( dbgap_key ? file(dbgap_key, checkIfExists: true) : [], sratools_fasterqdump_args, - sratools_pigz_args ) // Channel - |> set { sratools_samples } // Channel + sratools_pigz_args ) // Channel)> + |> set { sratools_samples } // Channel)> // // MODULE: If Aspera link is provided in run information then download FastQ directly via Aspera CLI and validate with md5sums @@ -112,14 +109,22 @@ workflow SRA { getDownloadMethod(meta, download_method) == 'aspera' } // Channel |> map { meta -> - def sample = new Sample( meta, meta.fastq_aspera.tokenize(';').take(2).collect( name -> file(name) ) ) - ASPERA_CLI ( sample, 'era-fasp', aspera_cli_args ) - } // fastq: Channel, md5: Channel - |> set { aspera_samples } // fastq: Channel, md5: Channel + def fastq = meta.fastq_aspera.tokenize(';').take(2).collect { name -> file(name) } + ASPERA_CLI ( meta, fastq, 'era-fasp', aspera_cli_args ) + } // Channel, md5: List)> + |> set { aspera_samples } // Channel, md5: List)> // Isolate FASTQ channel which will be added to emit block - fastq = mix(ftp_samples.fastq, sratools_samples.reads, aspera_samples.fastq) - md5 = mix(ftp_samples.md5, aspera_samples.md5) + fastq = mix( + ftp_samples |> map { out -> new Sample(out.meta, out.fastq) }, + sratools_samples |> map { out -> new Sample(out.meta, out.fastq) }, + aspera_samples |> map { out -> new Sample(out.meta, out.fastq) } + ) + + md5 = mix( + ftp_samples |> map { out -> new Sample(out.meta, out.md5) }, + aspera_samples |> map { out -> new Sample(out.meta, out.md5) } + ) fastq // Channel |> map { sample -> @@ -144,8 +149,8 @@ workflow SRA { nf_core_pipeline, nf_core_rnaseq_strandedness, sample_mapping_fields ) - } // samplesheet: Path, mappings: Path - |> set { index_files } // samplesheet: Path, mappings: Path + } // ProcessOut(samplesheet: Path, mappings: Path) + |> set { index_files } // ProcessOut(samplesheet: Path, mappings: Path) samplesheet = index_files.samplesheet // Path mappings = index_files.mappings // Path From 90e4ac1812f52455c8fc593a2095dfd68d13ac70 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 18 May 2024 16:05:43 -0500 Subject: [PATCH 09/25] Replace `def` with `fn` / `let` / `var` Signed-off-by: Ben Sherman --- main.nf | 6 +- modules/local/aspera_cli/main.nf | 10 +- modules/local/multiqc_mappings_config/main.nf | 4 +- modules/local/sra_fastq_ftp/main.nf | 6 +- modules/local/sra_ids_to_runinfo/main.nf | 8 +- modules/local/sra_runinfo_to_ftp/main.nf | 4 +- modules/local/sra_to_samplesheet/main.nf | 35 ++-- .../custom/sratoolsncbisettings/main.nf | 4 +- modules/nf-core/sratools/fasterqdump/main.nf | 18 +- modules/nf-core/sratools/prefetch/main.nf | 10 +- modules/nf-core/untar/main.nf | 10 +- .../utils_nfcore_fetchngs_pipeline/main.nf | 62 +++---- .../nf-core/utils_nextflow_pipeline/main.nf | 44 ++--- .../nf-core/utils_nfcore_pipeline/main.nf | 159 ++++++++---------- types/types.nf | 4 +- workflows/sra/main.nf | 38 ++--- 16 files changed, 202 insertions(+), 220 deletions(-) diff --git a/main.nf b/main.nf index 6e0cabd4..8ddddece 100644 --- a/main.nf +++ b/main.nf @@ -33,8 +33,8 @@ include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_fetc workflow NFCORE_FETCHNGS { take: - ids // Channel - params // ParamsMap + ids : Channel + params : ParamsMap main: @@ -70,7 +70,7 @@ workflow { // // SUBWORKFLOW: Run initialisation tasks // - ids = PIPELINE_INITIALISATION ( + let ids = PIPELINE_INITIALISATION ( params.version, params.help, params.validate_params, diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index 871addbe..b35eef3e 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -8,10 +8,10 @@ process ASPERA_CLI { 'biocontainers/aspera-cli:4.14.0--hdfd78af_1' }" input: - Map meta - List fastq - String user - String args + meta : Map + fastq : List + user : String + args : String output: meta = meta @@ -22,7 +22,7 @@ process ASPERA_CLI { tuple( task.process, 'aspera_cli', eval('ascli --version') ) >> 'versions' script: - def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" + let conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" if (meta.single_end) { """ $conda_prefix diff --git a/modules/local/multiqc_mappings_config/main.nf b/modules/local/multiqc_mappings_config/main.nf index 1fa32cdb..44a9ac76 100644 --- a/modules/local/multiqc_mappings_config/main.nf +++ b/modules/local/multiqc_mappings_config/main.nf @@ -7,10 +7,10 @@ process MULTIQC_MAPPINGS_CONFIG { 'biocontainers/python:3.9--1' }" input: - Path csv + csv : Path output: - Path yml = path("multiqc_config.yml") + yml : Path = path("multiqc_config.yml") topic: tuple( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' diff --git a/modules/local/sra_fastq_ftp/main.nf b/modules/local/sra_fastq_ftp/main.nf index 37357a03..93ea8fb1 100644 --- a/modules/local/sra_fastq_ftp/main.nf +++ b/modules/local/sra_fastq_ftp/main.nf @@ -10,9 +10,9 @@ process SRA_FASTQ_FTP { 'biocontainers/wget:1.20.1' }" input: - Map meta - List fastq - String args + meta : Map + fastq : List + args : String output: meta = meta diff --git a/modules/local/sra_ids_to_runinfo/main.nf b/modules/local/sra_ids_to_runinfo/main.nf index 919940c5..e6743acf 100644 --- a/modules/local/sra_ids_to_runinfo/main.nf +++ b/modules/local/sra_ids_to_runinfo/main.nf @@ -9,17 +9,17 @@ process SRA_IDS_TO_RUNINFO { 'biocontainers/python:3.9--1' }" input: - String id - String fields + id : String + fields : String output: - Path tsv = path("*.runinfo.tsv") + tsv : Path = path("*.runinfo.tsv") topic: tuple( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' script: - def metadata_fields = fields ? "--ena_metadata_fields ${fields}" : '' + let metadata_fields = fields ? "--ena_metadata_fields ${fields}" : '' """ echo $id > id.txt sra_ids_to_runinfo.py \\ diff --git a/modules/local/sra_runinfo_to_ftp/main.nf b/modules/local/sra_runinfo_to_ftp/main.nf index f5dbe6dc..c88d0b73 100644 --- a/modules/local/sra_runinfo_to_ftp/main.nf +++ b/modules/local/sra_runinfo_to_ftp/main.nf @@ -7,10 +7,10 @@ process SRA_RUNINFO_TO_FTP { 'biocontainers/python:3.9--1' }" input: - Path runinfo + runinfo : Path output: - Path tsv = path("*.runinfo_ftp.tsv") + tsv : Path = path("*.runinfo_ftp.tsv") topic: tuple( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' diff --git a/modules/local/sra_to_samplesheet/main.nf b/modules/local/sra_to_samplesheet/main.nf index 9272d131..fa04548f 100644 --- a/modules/local/sra_to_samplesheet/main.nf +++ b/modules/local/sra_to_samplesheet/main.nf @@ -4,38 +4,38 @@ process SRA_TO_SAMPLESHEET { memory 100.MB input: - List sra_metadata - String pipeline - String strandedness - String mapping_fields + sra_metadata : List + pipeline : String + strandedness : String + mapping_fields : String output: - Path samplesheet = path("samplesheet.csv") - Path mappings = path("mappings.csv") + samplesheet : Path = path("samplesheet.csv") + mappings : Path = path("mappings.csv") exec: // // Create samplesheet containing metadata // - def records = sra_metadata.collect { meta -> + let records = sra_metadata.collect { meta -> getSraRecord(meta, pipeline, strandedness, mappings) } - def samplesheet = records + let samplesheet = records .collect { pipeline_map, mappings_map -> pipeline_map } .sort { record -> record.id } mergeCsv(samplesheet, task.workDir.resolve('samplesheet.csv')) - def mappings = records + let mappings = records .collect { pipeline_map, mappings_map -> mappings_map } .sort { record -> record.id } mergeCsv(mappings, task.workDir.resolve('id_mappings.csv')) } -def getSraRecord(Map meta, String pipeline, String strandedness, String mapping_fields) { +fn getSraRecord(meta: Map, pipeline: String, strandedness: String, mapping_fields: String) -> Tuple2 { // Remove custom keys needed to download the data - def meta_clone = meta.clone() + let meta_clone = meta.clone() meta_clone.remove("id") meta_clone.remove("fastq_1") meta_clone.remove("fastq_2") @@ -44,7 +44,7 @@ def getSraRecord(Map meta, String pipeline, String strandedness, String mapping_ meta_clone.remove("single_end") // Add relevant fields to the beginning of the map - def pipeline_map = [ + let pipeline_map = [ sample : "${meta.id.split('_')[0..-2].join('_')}", fastq_1 : meta.fastq_1, fastq_2 : meta.fastq_2 @@ -65,13 +65,12 @@ def getSraRecord(Map meta, String pipeline, String strandedness, String mapping_ // // Create sample id mappings file // - def mappings_map = pipeline_map.clone() - def fields = mapping_fields ? ['sample'] + mapping_fields.split(',').collect{ v -> v.trim().toLowerCase() } : [] - if ((mappings_map.keySet() + fields).unique().size() != mappings_map.keySet().size()) { - error("Invalid option for '--sample_mapping_fields': ${mapping_fields}.\nValid options: ${mappings_map.keySet().join(', ')}") + let fields = mapping_fields ? ['sample'] + mapping_fields.split(',').collect{ v -> v.trim().toLowerCase() } : [] + if ((pipeline_map.keySet() + fields).unique().size() != pipeline_map.keySet().size()) { + error("Invalid option for '--sample_mapping_fields': ${mapping_fields}.\nValid options: ${pipeline_map.keySet().join(', ')}") } - mappings_map = mappings_map.subMap(fields) + let mappings_map = pipeline_map.subMap(fields) - return [ pipeline_map, mappings_map ] + return tuple( pipeline_map, mappings_map ) } diff --git a/modules/nf-core/custom/sratoolsncbisettings/main.nf b/modules/nf-core/custom/sratoolsncbisettings/main.nf index 2dbcf49f..55915523 100644 --- a/modules/nf-core/custom/sratoolsncbisettings/main.nf +++ b/modules/nf-core/custom/sratoolsncbisettings/main.nf @@ -8,10 +8,10 @@ process CUSTOM_SRATOOLSNCBISETTINGS { 'biocontainers/sra-tools:3.0.8--h9f5acd7_0' }" input: - List ids + ids : List output: - Path ncbi_settings = path('*.mkfg') + ncbi_settings : Path = path('*.mkfg') topic: tuple( task.process, 'sratools', eval("vdb-config --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index 8fbddfb2..99b132f4 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -8,13 +8,13 @@ process SRATOOLS_FASTERQDUMP { 'quay.io/biocontainers/mulled-v2-5f89fe0cd045cb1d615630b9261a1d17943a9b6a:6a9ff0e76ec016c3d0d27e0c0d362339f2d787e6-0' }" input: - Map meta - Path sra - Path ncbi_settings - Path certificate - String fasterqdump_args = '--split-files --include-technical' - String pigz_args = '' - String prefix = '' + meta : Map + sra : Path + ncbi_settings : Path + certificate : Path + fasterqdump_args: String = '--split-files --include-technical' + pigz_args : String = '' + prefix : String = '' output: meta @@ -27,8 +27,8 @@ process SRATOOLS_FASTERQDUMP { script: if( !prefix ) prefix = "${meta.id}" - def outfile = meta.single_end ? "${prefix}.fastq" : prefix - def key_file = '' + let outfile = meta.single_end ? "${prefix}.fastq" : prefix + var key_file = '' if (certificate.toString().endsWith('.jwt')) { key_file += " --perm ${certificate}" } else if (certificate.toString().endsWith('.ngc')) { diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf index 735f1fc8..befb5014 100644 --- a/modules/nf-core/sratools/prefetch/main.nf +++ b/modules/nf-core/sratools/prefetch/main.nf @@ -8,11 +8,11 @@ process SRATOOLS_PREFETCH { 'biocontainers/sra-tools:3.0.8--h9f5acd7_0' }" input: - Map meta - Path ncbi_settings - Path certificate - String prefetch_args = '' - String retry_args = '5 1 100' // + meta : Map + ncbi_settings : Path + certificate : Path + prefetch_args : String = '' + retry_args : String = '5 1 100' // output: sra = path(id) diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index ef081b97..6569b659 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -8,11 +8,11 @@ process UNTAR { 'nf-core/ubuntu:20.04' }" input: - Map meta - Path archive - String args = '' - String args2 = '' - String prefix = '' + meta : Map + archive : Path + args : String = '' + args2 : String = '' + prefix : String = '' output: meta diff --git a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf index 98d64346..d662ac91 100644 --- a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf @@ -29,14 +29,14 @@ include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' workflow PIPELINE_INITIALISATION { take: - version // boolean: Display version and exit - help // boolean: Display help text - validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs - nextflow_cli_args // array: List of positional nextflow CLI args - outdir // string: The output directory where the results will be saved - input // string: File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files - ena_metadata_fields // string: Comma-separated list of ENA metadata fields to fetch before downloading data + version : boolean // Display version and exit + help : boolean // Display help text + validate_params : boolean // Validate parameters against the schema at runtime + monochrome_logs : boolean // Do not use coloured log outputs + nextflow_cli_args : List // List of positional nextflow CLI args + outdir : String // The output directory where the results will be saved + input : String // File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files + ena_metadata_fields : String // Comma-separated list of ENA metadata fields to fetch before downloading data main: @@ -53,9 +53,9 @@ workflow PIPELINE_INITIALISATION { // // Validate parameters and generate parameter summary to stdout // - pre_help_text = nfCoreLogo(monochrome_logs) - post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) - def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input ids.csv --outdir " + let pre_help_text = nfCoreLogo(monochrome_logs) + let post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + let workflow_command = "nextflow run ${workflow.manifest.name} -profile --input ids.csv --outdir " UTILS_NFVALIDATION_PLUGIN ( help, workflow_command, @@ -75,13 +75,13 @@ workflow PIPELINE_INITIALISATION { // // Auto-detect input id type // - input = file(input) - if (!isSraId(input)) + let inputPath = file(input) + if (!isSraId(inputPath)) error('Ids provided via --input not recognised please make sure they are either SRA / ENA / GEO / DDBJ ids!') sraCheckENAMetadataFields(ena_metadata_fields) // Read in ids from --input file - input // Path + inputPath // Path |> Channel.of // Channel |> flatMap { csv -> splitCsv(csv, header: false, schema: 'assets/schema_input.yml') @@ -102,16 +102,16 @@ workflow PIPELINE_INITIALISATION { workflow PIPELINE_COMPLETION { take: - email // string: email address - email_on_fail // string: email address sent on pipeline failure - plaintext_email // boolean: Send plain-text email instead of HTML - outdir // path: Path to output directory where results will be published - monochrome_logs // boolean: Disable ANSI colour codes in log output - hook_url // string: hook URL for notifications + email : String // email address + email_on_fail : String // email address sent on pipeline failure + plaintext_email : boolean // Send plain-text email instead of HTML + outdir : Path // Path to output directory where results will be published + monochrome_logs : boolean // Disable ANSI colour codes in log output + hook_url : String // hook URL for notifications main: - summary_params = paramsSummaryMap(workflow, parameters_schema: "params.yml") + let summary_params = paramsSummaryMap(workflow, parameters_schema: "params.yml") // // Completion email and summary @@ -140,11 +140,11 @@ workflow PIPELINE_COMPLETION { // // Check if input ids are from the SRA // -def isSraId(input) { - def is_sra = false - def total_ids = 0 - def no_match_ids = [] - def pattern = /^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\d+)$/ +fn isSraId(input: Path) -> boolean { + var is_sra = false + var total_ids = 0 + let no_match_ids = [] + let pattern = /^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\d+)$/ input.eachLine { line -> total_ids += 1 if (!(line =~ pattern)) { @@ -152,7 +152,7 @@ def isSraId(input) { } } - def num_match = total_ids - no_match_ids.size() + let num_match = total_ids - no_match_ids.size() if (num_match > 0) { if (num_match == total_ids) { is_sra = true @@ -166,10 +166,10 @@ def isSraId(input) { // // Check and validate parameters // -def sraCheckENAMetadataFields(ena_metadata_fields) { +fn sraCheckENAMetadataFields(ena_metadata_fields) { // Check minimal ENA fields are provided to download FastQ files - def valid_ena_metadata_fields = ['run_accession', 'experiment_accession', 'library_layout', 'fastq_ftp', 'fastq_md5'] - def actual_ena_metadata_fields = ena_metadata_fields ? ena_metadata_fields.split(',').collect{ it.trim().toLowerCase() } : valid_ena_metadata_fields + let valid_ena_metadata_fields = ['run_accession', 'experiment_accession', 'library_layout', 'fastq_ftp', 'fastq_md5'] + let actual_ena_metadata_fields = ena_metadata_fields ? ena_metadata_fields.split(',').collect{ it.trim().toLowerCase() } : valid_ena_metadata_fields if (!actual_ena_metadata_fields.containsAll(valid_ena_metadata_fields)) { error("Invalid option: '${ena_metadata_fields}'. Minimally required fields for '--ena_metadata_fields': '${valid_ena_metadata_fields.join(',')}'") } @@ -178,7 +178,7 @@ def sraCheckENAMetadataFields(ena_metadata_fields) { // // Print a warning after pipeline has completed // -def sraCurateSamplesheetWarn() { +fn sraCurateSamplesheetWarn() { log.warn "=============================================================================\n" + " Please double-check the samplesheet that has been auto-created by the pipeline.\n\n" + " Public databases don't reliably hold information such as strandedness\n" + diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf index ac31f28f..0b3083a1 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -15,10 +15,10 @@ import nextflow.extension.FilesEx workflow UTILS_NEXTFLOW_PIPELINE { take: - print_version // boolean: print version - dump_parameters // boolean: dump parameters - outdir // path: base directory used to publish pipeline results - check_conda_channels // boolean: check conda channels + print_version : boolean // print version + dump_parameters : boolean // dump parameters + outdir : String // base directory used to publish pipeline results + check_conda_channels: boolean // check conda channels main: @@ -45,7 +45,7 @@ workflow UTILS_NEXTFLOW_PIPELINE { } emit: - dummy_emit = true + true } /* @@ -57,15 +57,15 @@ workflow UTILS_NEXTFLOW_PIPELINE { // // Generate version string // -def getWorkflowVersion() { - String version_string = "" +fn getWorkflowVersion() -> String { + var version_string = "" if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + let prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' version_string += "${prefix_v}${workflow.manifest.version}" } if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) + let git_shortsha = workflow.commitId.substring(0, 7) version_string += "-g${git_shortsha}" } @@ -75,11 +75,11 @@ def getWorkflowVersion() { // // Dump pipeline parameters to a JSON file // -def dumpParametersToJSON(outdir) { - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def filename = "params_${timestamp}.json" - def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") - def jsonStr = JsonOutput.toJson(params) +fn dumpParametersToJSON(outdir: String) { + let timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + let filename = "params_${timestamp}.json" + let temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + let jsonStr = JsonOutput.toJson(params) temp_pf.text = JsonOutput.prettyPrint(jsonStr) FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") @@ -89,11 +89,11 @@ def dumpParametersToJSON(outdir) { // // When running with -profile conda, warn if channels have not been set-up appropriately // -def checkCondaChannels() { - Yaml parser = new Yaml() - def channels = [] +fn checkCondaChannels() { + let parser = new Yaml() + var channels: Set = [] try { - def config = parser.load("conda config --show channels".execute().text) + let config = parser.load("conda config --show channels".execute().text) channels = config.channels } catch(NullPointerException | IOException e) { log.warn "Could not verify conda channel configuration." @@ -102,12 +102,12 @@ def checkCondaChannels() { // Check that all channels are present // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + let required_channels_in_order: Set = ['conda-forge', 'bioconda', 'defaults'] + let channels_missing = !(required_channels_in_order - channels).isEmpty() // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() + let channel_priority_violation = false + let n = required_channels_in_order.size() for (int i = 0; i < n - 1; i++) { channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index 074d9d98..0ea3a9ff 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -11,7 +11,7 @@ workflow UTILS_NFCORE_PIPELINE { take: - nextflow_cli_args + nextflow_cli_args : List main: valid_config = checkConfigProvided() @@ -30,8 +30,7 @@ workflow UTILS_NFCORE_PIPELINE { // // Warn if a -profile or Nextflow config has not been provided to run the pipeline // -def checkConfigProvided() { - valid_config = true +fn checkConfigProvided() -> boolean { if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + @@ -39,15 +38,15 @@ def checkConfigProvided() { " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + "Please refer to the quick start section and usage docs for the pipeline.\n " - valid_config = false + return false } - return valid_config + return true } // // Exit pipeline if --profile contains spaces // -def checkProfileProvided(nextflow_cli_args) { +fn checkProfileProvided(nextflow_cli_args: List) { if (workflow.profile.endsWith(',')) { error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" @@ -61,7 +60,7 @@ def checkProfileProvided(nextflow_cli_args) { // // Citation string for pipeline // -def workflowCitation() { +fn workflowCitation() -> String { return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + " ${workflow.manifest.doi}\n\n" + @@ -74,15 +73,15 @@ def workflowCitation() { // // Generate workflow version string // -def getWorkflowVersion() { - String version_string = "" +fn getWorkflowVersion() -> String { + var version_string = "" if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + let prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' version_string += "${prefix_v}${workflow.manifest.version}" } if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) + let git_shortsha = workflow.commitId.substring(0, 7) version_string += "-g${git_shortsha}" } @@ -92,7 +91,7 @@ def getWorkflowVersion() { // // Get workflow version for pipeline // -def workflowVersionToYAML() { +fn workflowVersionToYAML() -> String { return """ Workflow: $workflow.manifest.name: ${getWorkflowVersion()} @@ -127,10 +126,10 @@ workflow softwareVersionsToYAML { // // Get workflow summary for MultiQC // -def paramsSummaryMultiqc(summary_params) { - def summary_section = '' +fn paramsSummaryMultiqc(summary_params: Map) -> String { + var summary_section = '' for (group in summary_params.keySet()) { - def group_params = summary_params.get(group) // This gets the parameters of that particular group + let group_params = summary_params.get(group) // This gets the parameters of that particular group if (group_params) { summary_section += "

$group

\n" summary_section += "
\n" @@ -141,7 +140,7 @@ def paramsSummaryMultiqc(summary_params) { } } - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + var yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" @@ -155,8 +154,8 @@ def paramsSummaryMultiqc(summary_params) { // // nf-core logo // -def nfCoreLogo(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) +fn nfCoreLogo(monochrome_logs: boolean = true) -> String { + let colors = logColours(monochrome_logs) String.format( """\n ${dashedLine(monochrome_logs)} @@ -174,16 +173,16 @@ def nfCoreLogo(monochrome_logs=true) { // // Return dashed line // -def dashedLine(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) +fn dashedLine(monochrome_logs: boolean = true) -> String { + let colors = logColours(monochrome_logs) return "-${colors.dim}----------------------------------------------------${colors.reset}-" } // // ANSII colours used for terminal logging // -def logColours(monochrome_logs=true) { - Map colorcodes = [:] +fn logColours(monochrome_logs: boolean = true) -> Map { + let colorcodes = [:] // Reset / Meta colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" @@ -247,46 +246,29 @@ def logColours(monochrome_logs=true) { return colorcodes } -// -// Attach the multiqc report to email -// -def attachMultiqcReport(multiqc_report) { - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - return mqc_report -} - // // Construct and send completion email // -def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { +fn completionEmail( + summary_params: Map, + email: String, + email_on_fail: String, + plaintext_email: boolean, + outdir: String, + monochrome_logs: boolean = true, + multiqc_report: Path = null) { // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } + let subject = workflow.success + ? "[$workflow.manifest.name] Successful: $workflow.runName" + : "[$workflow.manifest.name] FAILED: $workflow.runName" - def summary = [:] + let summary = [:] for (group in summary_params.keySet()) { summary << summary_params[group] } - def misc_fields = [:] + let misc_fields = [:] misc_fields['Date Started'] = workflow.start misc_fields['Date Completed'] = workflow.complete misc_fields['Pipeline script file path'] = workflow.scriptFile @@ -298,7 +280,7 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi misc_fields['Nextflow Build'] = workflow.nextflow.build misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - def email_fields = [:] + let email_fields = [:] email_fields['version'] = getWorkflowVersion() email_fields['runName'] = workflow.runName email_fields['success'] = workflow.success @@ -312,58 +294,59 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi email_fields['summary'] = summary << misc_fields // On success try attach the multiqc report - def mqc_report = attachMultiqcReport(multiqc_report) + let mqc_report = workflow.success + ? multiqc_report + : null // Check if we are only sending emails on failure - def email_address = email - if (!email && email_on_fail && !workflow.success) { - email_address = email_on_fail - } + let email_address = !email && email_on_fail && !workflow.success + ? email_on_fail + : email // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("${workflow.projectDir}/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() + let engine = new groovy.text.GStringTemplateEngine() + let tf = new File("${workflow.projectDir}/assets/email_template.txt") + let txt_template = engine.createTemplate(tf).make(email_fields) + let email_txt = txt_template.toString() // Render the HTML template - def hf = new File("${workflow.projectDir}/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() + let hf = new File("${workflow.projectDir}/assets/email_template.html") + let html_template = engine.createTemplate(hf).make(email_fields) + let email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() + let max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + let smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + let sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + let sendmail_template = engine.createTemplate(sf).make(smail_fields) + let sendmail_html = sendmail_template.toString() // Send the HTML e-mail - Map colors = logColours(monochrome_logs) + let colors = logColours(monochrome_logs) if (email_address) { try { if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail - def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + let sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + let mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] mail_cmd.execute() << email_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" } } // Write summary e-mail HTML to a file - def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + let output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } output_hf.toPath().copyTo("${outdir}/pipeline_info/pipeline_report.html"); output_hf.delete() // Write summary e-mail TXT to a file - def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + let output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } output_tf.toPath().copyTo("${outdir}/pipeline_info/pipeline_report.txt"); output_tf.delete() @@ -372,8 +355,8 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi // // Print pipeline summary on completion // -def completionSummary(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) +fn completionSummary(monochrome_logs: boolean = true) { + let colors = logColours(monochrome_logs) if (workflow.success) { if (workflow.stats.ignoredCount == 0) { log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" @@ -388,13 +371,13 @@ def completionSummary(monochrome_logs=true) { // // Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack // -def imNotification(summary_params, hook_url) { - def summary = [:] +fn imNotification(summary_params: Map, hook_url: String) { + let summary = [:] for (group in summary_params.keySet()) { summary << summary_params[group] } - def misc_fields = [:] + let misc_fields = [:] misc_fields['start'] = workflow.start misc_fields['complete'] = workflow.complete misc_fields['scriptfile'] = workflow.scriptFile @@ -406,7 +389,7 @@ def imNotification(summary_params, hook_url) { misc_fields['nxf_build'] = workflow.nextflow.build misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - def msg_fields = [:] + let msg_fields = [:] msg_fields['version'] = getWorkflowVersion() msg_fields['runName'] = workflow.runName msg_fields['success'] = workflow.success @@ -420,21 +403,21 @@ def imNotification(summary_params, hook_url) { msg_fields['summary'] = summary << misc_fields // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() + let engine = new groovy.text.GStringTemplateEngine() // Different JSON depending on the service provider // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("${workflow.projectDir}/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() + let json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + let hf = new File("${workflow.projectDir}/assets/${json_path}") + let json_template = engine.createTemplate(hf).make(msg_fields) + let json_message = json_template.toString() // POST - def post = new URL(hook_url).openConnection(); + let post = new URL(hook_url).openConnection(); post.setRequestMethod("POST") post.setDoOutput(true) post.setRequestProperty("Content-Type", "application/json") post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); + let postRC = post.getResponseCode(); if (! postRC.equals(200)) { log.warn(post.getErrorStream().getText()); } diff --git a/types/types.nf b/types/types.nf index a1a38148..57722204 100644 --- a/types/types.nf +++ b/types/types.nf @@ -1,5 +1,5 @@ record Sample { - Map meta - List files + meta : Map + files : List } diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index f8e6327e..304e4be1 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -37,19 +37,19 @@ include { Sample } from '../../types/types' workflow SRA { take: - ids // List - ena_metadata_fields // String - sample_mapping_fields // String - nf_core_pipeline // String - nf_core_rnaseq_strandedness // String - download_method // String enum: 'aspera' | 'ftp' | 'sratools' - skip_fastq_download // boolean - dbgap_key // String - aspera_cli_args // String - sra_fastq_ftp_args // String - sratools_fasterqdump_args // String - sratools_pigz_args // String - outdir // String + ids : Channel + ena_metadata_fields : String + sample_mapping_fields : String + nf_core_pipeline : String + nf_core_rnaseq_strandedness : String + download_method : String // enum: 'aspera' | 'ftp' | 'sratools' + skip_fastq_download : boolean + dbgap_key : String + aspera_cli_args : String + sra_fastq_ftp_args : String + sratools_fasterqdump_args : String + sratools_pigz_args : String + outdir : String main: ids // Channel @@ -83,7 +83,7 @@ workflow SRA { getDownloadMethod(meta, download_method) == 'ftp' } // Channel |> map { meta -> - def fastq = [ file(meta.fastq_1), file(meta.fastq_2) ] + let fastq = [ file(meta.fastq_1), file(meta.fastq_2) ] SRA_FASTQ_FTP ( meta, fastq, sra_fastq_ftp_args ) } // Channel, md5: List)> |> set { ftp_samples } // Channel, md5: List)> @@ -109,7 +109,7 @@ workflow SRA { getDownloadMethod(meta, download_method) == 'aspera' } // Channel |> map { meta -> - def fastq = meta.fastq_aspera.tokenize(';').take(2).collect { name -> file(name) } + let fastq = meta.fastq_aspera.tokenize(';').take(2).collect { name -> file(name) } ASPERA_CLI ( meta, fastq, 'era-fasp', aspera_cli_args ) } // Channel, md5: List)> |> set { aspera_samples } // Channel, md5: List)> @@ -128,8 +128,8 @@ workflow SRA { fastq // Channel |> map { sample -> - def reads = sample.files - def meta = sample.meta + let reads = sample.files + let meta = sample.meta meta + [ fastq_1: reads[0] ? "${outdir}/fastq/${reads[0].getName()}" : '', fastq_2: reads[1] && !meta.single_end ? "${outdir}/fastq/${reads[1].getName()}" : '' @@ -170,7 +170,7 @@ workflow SRA { |> softwareVersionsToYAML // Channel |> collect(sort: true) // List |> exec('SOFTWARE_VERIONS') { versions -> - def path = task.workDir.resolve('nf_core_fetchngs_software_mqc_versions.yml') + let path = task.workDir.resolve('nf_core_fetchngs_software_mqc_versions.yml') mergeText(versions, path, newLine: true) return path } // Path @@ -198,7 +198,7 @@ workflow SRA { ======================================================================================== */ -def getDownloadMethod(Map meta, String download_method) { +fn getDownloadMethod(meta: Map, download_method: String) -> String { // meta.fastq_aspera is a metadata string with ENA fasp links supported by Aspera // For single-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz' // For paired-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_2.fastq.gz' From a7bebba776fee84cfb57d7192d4770acc71a196a Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sat, 18 May 2024 16:32:54 -0500 Subject: [PATCH 10/25] Omit name for single process output Signed-off-by: Ben Sherman --- modules/local/multiqc_mappings_config/main.nf | 2 +- modules/local/sra_ids_to_runinfo/main.nf | 2 +- modules/local/sra_runinfo_to_ftp/main.nf | 5 +++-- modules/nf-core/custom/sratoolsncbisettings/main.nf | 2 +- modules/nf-core/sratools/prefetch/main.nf | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/local/multiqc_mappings_config/main.nf b/modules/local/multiqc_mappings_config/main.nf index 44a9ac76..d1f1d837 100644 --- a/modules/local/multiqc_mappings_config/main.nf +++ b/modules/local/multiqc_mappings_config/main.nf @@ -10,7 +10,7 @@ process MULTIQC_MAPPINGS_CONFIG { csv : Path output: - yml : Path = path("multiqc_config.yml") + path("multiqc_config.yml") topic: tuple( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' diff --git a/modules/local/sra_ids_to_runinfo/main.nf b/modules/local/sra_ids_to_runinfo/main.nf index e6743acf..565d1fb5 100644 --- a/modules/local/sra_ids_to_runinfo/main.nf +++ b/modules/local/sra_ids_to_runinfo/main.nf @@ -13,7 +13,7 @@ process SRA_IDS_TO_RUNINFO { fields : String output: - tsv : Path = path("*.runinfo.tsv") + path("${id}.runinfo.tsv") topic: tuple( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' diff --git a/modules/local/sra_runinfo_to_ftp/main.nf b/modules/local/sra_runinfo_to_ftp/main.nf index c88d0b73..18687b81 100644 --- a/modules/local/sra_runinfo_to_ftp/main.nf +++ b/modules/local/sra_runinfo_to_ftp/main.nf @@ -10,15 +10,16 @@ process SRA_RUNINFO_TO_FTP { runinfo : Path output: - tsv : Path = path("*.runinfo_ftp.tsv") + path("${prefix}.runinfo_ftp.tsv") topic: tuple( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' script: + prefix = runinfo.toString().tokenize(".")[0] """ sra_runinfo_to_ftp.py \\ ${runinfo.join(',')} \\ - ${runinfo.toString().tokenize(".")[0]}.runinfo_ftp.tsv + ${prefix}.runinfo_ftp.tsv """ } diff --git a/modules/nf-core/custom/sratoolsncbisettings/main.nf b/modules/nf-core/custom/sratoolsncbisettings/main.nf index 55915523..1cca4673 100644 --- a/modules/nf-core/custom/sratoolsncbisettings/main.nf +++ b/modules/nf-core/custom/sratoolsncbisettings/main.nf @@ -11,7 +11,7 @@ process CUSTOM_SRATOOLSNCBISETTINGS { ids : List output: - ncbi_settings : Path = path('*.mkfg') + path('*.mkfg') topic: tuple( task.process, 'sratools', eval("vdb-config --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf index befb5014..301a60af 100644 --- a/modules/nf-core/sratools/prefetch/main.nf +++ b/modules/nf-core/sratools/prefetch/main.nf @@ -15,7 +15,7 @@ process SRATOOLS_PREFETCH { retry_args : String = '5 1 100' // output: - sra = path(id) + path(id) topic: tuple( task.process, 'sratools', eval("prefetch --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' From f791e1e16baad4f7e0d9e0681a3fcde48cc39c6f Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sun, 22 Sep 2024 23:29:28 -0500 Subject: [PATCH 11/25] minor updates Signed-off-by: Ben Sherman --- main.nf | 2 +- modules/local/aspera_cli/main.nf | 16 ++++++++-------- modules/local/multiqc_mappings_config/main.nf | 12 ++++++------ modules/local/sra_fastq_ftp/main.nf | 16 ++++++++-------- modules/local/sra_ids_to_runinfo/main.nf | 12 ++++++------ modules/local/sra_runinfo_to_ftp/main.nf | 12 ++++++------ modules/local/sra_to_samplesheet/main.nf | 17 ++++++++++------- .../nf-core/custom/sratoolsncbisettings/main.nf | 10 +++++----- modules/nf-core/sratools/fasterqdump/main.nf | 16 ++++++++-------- modules/nf-core/sratools/prefetch/main.nf | 12 ++++++------ modules/nf-core/untar/main.nf | 14 +++++++------- .../main.nf | 2 +- .../nf-core/utils_nextflow_pipeline/main.nf | 12 ++++++------ types/types.nf | 2 +- 14 files changed, 79 insertions(+), 76 deletions(-) diff --git a/main.nf b/main.nf index 8ddddece..75f50236 100644 --- a/main.nf +++ b/main.nf @@ -102,7 +102,7 @@ workflow { ) } -publish { +output { directory params.outdir mode params.publish_dir_mode } diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index b35eef3e..195cf284 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -13,14 +13,6 @@ process ASPERA_CLI { user : String args : String - output: - meta = meta - fastq = path("*fastq.gz") - md5 = path("*md5") - - topic: - tuple( task.process, 'aspera_cli', eval('ascli --version') ) >> 'versions' - script: let conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" if (meta.single_end) { @@ -59,4 +51,12 @@ process ASPERA_CLI { md5sum -c ${meta.id}_2.fastq.gz.md5 """ } + + output: + meta = meta + fastq = path("*fastq.gz") + md5 = path("*md5") + + topic: + ( task.process, 'aspera_cli', eval('ascli --version') ) >> 'versions' } diff --git a/modules/local/multiqc_mappings_config/main.nf b/modules/local/multiqc_mappings_config/main.nf index d1f1d837..f3b9fac1 100644 --- a/modules/local/multiqc_mappings_config/main.nf +++ b/modules/local/multiqc_mappings_config/main.nf @@ -9,16 +9,16 @@ process MULTIQC_MAPPINGS_CONFIG { input: csv : Path - output: - path("multiqc_config.yml") - - topic: - tuple( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' - script: """ multiqc_mappings_config.py \\ $csv \\ multiqc_config.yml """ + + output: + path("multiqc_config.yml") + + topic: + ( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' } diff --git a/modules/local/sra_fastq_ftp/main.nf b/modules/local/sra_fastq_ftp/main.nf index 93ea8fb1..6a3cd91e 100644 --- a/modules/local/sra_fastq_ftp/main.nf +++ b/modules/local/sra_fastq_ftp/main.nf @@ -14,14 +14,6 @@ process SRA_FASTQ_FTP { fastq : List args : String - output: - meta = meta - fastq = path("*fastq.gz") - md5 = path("*md5") - - topic: - tuple( task.process, 'wget', eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')") ) >> 'versions' - script: if (meta.single_end) { """ @@ -52,4 +44,12 @@ process SRA_FASTQ_FTP { md5sum -c ${meta.id}_2.fastq.gz.md5 """ } + + output: + meta = meta + fastq = path("*fastq.gz") + md5 = path("*md5") + + topic: + ( task.process, 'wget', eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')") ) >> 'versions' } diff --git a/modules/local/sra_ids_to_runinfo/main.nf b/modules/local/sra_ids_to_runinfo/main.nf index 565d1fb5..cab4f32c 100644 --- a/modules/local/sra_ids_to_runinfo/main.nf +++ b/modules/local/sra_ids_to_runinfo/main.nf @@ -12,12 +12,6 @@ process SRA_IDS_TO_RUNINFO { id : String fields : String - output: - path("${id}.runinfo.tsv") - - topic: - tuple( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' - script: let metadata_fields = fields ? "--ena_metadata_fields ${fields}" : '' """ @@ -27,4 +21,10 @@ process SRA_IDS_TO_RUNINFO { ${id}.runinfo.tsv \\ $metadata_fields """ + + output: + path("${id}.runinfo.tsv") + + topic: + ( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' } diff --git a/modules/local/sra_runinfo_to_ftp/main.nf b/modules/local/sra_runinfo_to_ftp/main.nf index 18687b81..783c59ea 100644 --- a/modules/local/sra_runinfo_to_ftp/main.nf +++ b/modules/local/sra_runinfo_to_ftp/main.nf @@ -9,12 +9,6 @@ process SRA_RUNINFO_TO_FTP { input: runinfo : Path - output: - path("${prefix}.runinfo_ftp.tsv") - - topic: - tuple( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' - script: prefix = runinfo.toString().tokenize(".")[0] """ @@ -22,4 +16,10 @@ process SRA_RUNINFO_TO_FTP { ${runinfo.join(',')} \\ ${prefix}.runinfo_ftp.tsv """ + + output: + path("${prefix}.runinfo_ftp.tsv") + + topic: + ( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' } diff --git a/modules/local/sra_to_samplesheet/main.nf b/modules/local/sra_to_samplesheet/main.nf index fa04548f..a18efe53 100644 --- a/modules/local/sra_to_samplesheet/main.nf +++ b/modules/local/sra_to_samplesheet/main.nf @@ -4,22 +4,18 @@ process SRA_TO_SAMPLESHEET { memory 100.MB input: - sra_metadata : List + sra_metadata : List> pipeline : String strandedness : String mapping_fields : String - output: - samplesheet : Path = path("samplesheet.csv") - mappings : Path = path("mappings.csv") - exec: // // Create samplesheet containing metadata // let records = sra_metadata.collect { meta -> - getSraRecord(meta, pipeline, strandedness, mappings) + getSraRecord(meta, pipeline, strandedness, mapping_fields) } let samplesheet = records @@ -31,6 +27,10 @@ process SRA_TO_SAMPLESHEET { .collect { pipeline_map, mappings_map -> mappings_map } .sort { record -> record.id } mergeCsv(mappings, task.workDir.resolve('id_mappings.csv')) + + output: + samplesheet : Path = path("samplesheet.csv") + mappings : Path = path("mappings.csv") } fn getSraRecord(meta: Map, pipeline: String, strandedness: String, mapping_fields: String) -> Tuple2 { @@ -72,5 +72,8 @@ fn getSraRecord(meta: Map, pipeline: String, strandedness: String, mapping_field let mappings_map = pipeline_map.subMap(fields) - return tuple( pipeline_map, mappings_map ) + return ( pipeline_map, mappings_map ) +} + +fn mergeCsv() { } diff --git a/modules/nf-core/custom/sratoolsncbisettings/main.nf b/modules/nf-core/custom/sratoolsncbisettings/main.nf index 1cca4673..a79124b4 100644 --- a/modules/nf-core/custom/sratoolsncbisettings/main.nf +++ b/modules/nf-core/custom/sratoolsncbisettings/main.nf @@ -10,13 +10,13 @@ process CUSTOM_SRATOOLSNCBISETTINGS { input: ids : List + shell: + config = "/LIBS/GUID = \"${UUID.randomUUID().toString()}\"\\n/libs/cloud/report_instance_identity = \"true\"\\n" + template 'detect_ncbi_settings.sh' + output: path('*.mkfg') topic: - tuple( task.process, 'sratools', eval("vdb-config --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' - - shell: - config = "/LIBS/GUID = \"${UUID.randomUUID().toString()}\"\\n/libs/cloud/report_instance_identity = \"true\"\\n" - template 'detect_ncbi_settings.sh' + ( task.process, 'sratools', eval("vdb-config --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' } diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index 99b132f4..1f675d90 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -16,14 +16,6 @@ process SRATOOLS_FASTERQDUMP { pigz_args : String = '' prefix : String = '' - output: - meta - fastq = path('*.fastq.gz') - - topic: - tuple( task.process, 'sratools', eval("fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' - tuple( task.process, 'pigz', eval("pigz --version 2>&1 | sed 's/pigz //g'") ) >> 'versions' - script: if( !prefix ) prefix = "${meta.id}" @@ -50,4 +42,12 @@ process SRATOOLS_FASTERQDUMP { --processes $task.cpus \\ *.fastq """ + + output: + meta + fastq = path('*.fastq.gz') + + topic: + ( task.process, 'sratools', eval("fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' + ( task.process, 'pigz', eval("pigz --version 2>&1 | sed 's/pigz //g'") ) >> 'versions' } diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf index 301a60af..4194181c 100644 --- a/modules/nf-core/sratools/prefetch/main.nf +++ b/modules/nf-core/sratools/prefetch/main.nf @@ -14,12 +14,6 @@ process SRATOOLS_PREFETCH { prefetch_args : String = '' retry_args : String = '5 1 100' // - output: - path(id) - - topic: - tuple( task.process, 'sratools', eval("prefetch --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' - shell: id = meta.run_accession if (certificate) { @@ -32,4 +26,10 @@ process SRATOOLS_PREFETCH { } template 'retry_with_backoff.sh' + + output: + path(id) + + topic: + ( task.process, 'sratools', eval("prefetch --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' } diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 6569b659..671ee35a 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -14,13 +14,6 @@ process UNTAR { args2 : String = '' prefix : String = '' - output: - meta - untar = path("$prefix") - - topic: - tuple( task.process, 'untar', eval("echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//'") ) >> 'versions' - script: if( !prefix ) prefix = meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "") @@ -54,4 +47,11 @@ process UNTAR { mkdir $prefix touch ${prefix}/file.txt """ + + output: + meta + untar = path("$prefix") + + topic: + ( task.process, 'untar', eval("echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//'") ) >> 'versions' } diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index f0f5c343..73ec2376 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -26,7 +26,7 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { // // Prefetch sequencing reads in SRA format. // - sra = SRATOOLS_PREFETCH ( meta, ncbi_settings, dbgap_key ) + let sra = SRATOOLS_PREFETCH ( meta, ncbi_settings, dbgap_key ) // // Convert the SRA format into one or more compressed FASTQ files. diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf index 0b3083a1..0b29dee7 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -2,9 +2,9 @@ // Subworkflow with functionality that may be useful for any Nextflow pipeline // -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput -import nextflow.extension.FilesEx +// import org.yaml.snakeyaml.Yaml +// import groovy.json.JsonOutput +// import nextflow.extension.FilesEx /* ======================================================================================== @@ -108,9 +108,9 @@ fn checkCondaChannels() { // Check that they are in the right order let channel_priority_violation = false let n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } + // for (int i = 0; i < n - 1; i++) { + // channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + // } if (channels_missing | channel_priority_violation) { log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + diff --git a/types/types.nf b/types/types.nf index 57722204..487afa94 100644 --- a/types/types.nf +++ b/types/types.nf @@ -1,5 +1,5 @@ record Sample { - meta : Map + meta : Map files : List } From 70409459ce935dccf035b593af8f3f22b29a687b Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sun, 3 Nov 2024 00:31:11 +0100 Subject: [PATCH 12/25] Revert changes to JSOn schemas Signed-off-by: Ben Sherman --- assets/schema_input.json | 17 ++ assets/schema_input.yml | 12 - nextflow_schema.json | 287 ++++++++++++++++++ params.yml | 258 ---------------- .../utils_nfcore_fetchngs_pipeline/main.nf | 6 +- 5 files changed, 307 insertions(+), 273 deletions(-) create mode 100644 assets/schema_input.json delete mode 100644 assets/schema_input.yml create mode 100644 nextflow_schema.json delete mode 100644 params.yml diff --git a/assets/schema_input.json b/assets/schema_input.json new file mode 100644 index 00000000..0b41eab3 --- /dev/null +++ b/assets/schema_input.json @@ -0,0 +1,17 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_input.json", + "title": "nf-core/fetchngs pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "": { + "type": "string", + "pattern": "^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\\d+)$", + "errorMessage": "Please provide a valid SRA, ENA, DDBJ or GEO identifier" + } + } + } +} diff --git a/assets/schema_input.yml b/assets/schema_input.yml deleted file mode 100644 index 29760b88..00000000 --- a/assets/schema_input.yml +++ /dev/null @@ -1,12 +0,0 @@ -$schema: http://json-schema.org/draft-07/schema -$id: https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_input.yml -title: nf-core/fetchngs pipeline - params.input schema -description: Schema for the file provided with params.input -type: array -items: - type: object - properties: - '': - type: string - pattern: ^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\\d+)$ - errorMessage: Please provide a valid SRA, ENA, DDBJ or GEO identifier diff --git a/nextflow_schema.json b/nextflow_schema.json new file mode 100644 index 00000000..6dbce31e --- /dev/null +++ b/nextflow_schema.json @@ -0,0 +1,287 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/fetchngs/master/nextflow_schema.json", + "title": "nf-core/fetchngs pipeline parameters", + "description": "Pipeline to fetch metadata and raw FastQ files from public databases", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["input", "outdir"], + "properties": { + "input": { + "type": "string", + "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", + "mimetype": "text/csv", + "pattern": "^\\S+\\.(csv|tsv|txt)$", + "fa_icon": "fas fa-file-excel", + "description": "File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files." + }, + "ena_metadata_fields": { + "type": "string", + "fa_icon": "fas fa-columns", + "description": "Comma-separated list of ENA metadata fields to fetch before downloading data.", + "help_text": "The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run)." + }, + "sample_mapping_fields": { + "type": "string", + "fa_icon": "fas fa-columns", + "description": "Comma-separated list of ENA metadata fields used to create a separate 'id_mappings.csv' and 'multiqc_config.yml' with selected fields that can be used to rename samples in general and in MultiQC.", + "default": "experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description" + }, + "nf_core_pipeline": { + "type": "string", + "fa_icon": "fab fa-apple", + "description": "Name of supported nf-core pipeline e.g. 'rnaseq'. A samplesheet for direct use with the pipeline will be created with the appropriate columns.", + "enum": ["rnaseq", "atacseq", "viralrecon", "taxprofiler"] + }, + "nf_core_rnaseq_strandedness": { + "type": "string", + "fa_icon": "fas fa-dna", + "description": "Value for 'strandedness' entry added to samplesheet created when using '--nf_core_pipeline rnaseq'.", + "help_text": "The default is 'auto' which can be used with nf-core/rnaseq v3.10 onwards to auto-detect strandedness during the pipeline execution.", + "default": "auto" + }, + "download_method": { + "type": "string", + "default": "ftp", + "fa_icon": "fas fa-download", + "enum": ["aspera", "ftp", "sratools"], + "description": "Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'.", + "help_text": "FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ." + }, + "skip_fastq_download": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Only download metadata for public data database ids and don't download the FastQ files." + }, + "dbgap_key": { + "type": "string", + "fa_icon": "fas fa-address-card", + "help_text": "Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit. Users with granted access to controlled data can download the JWT cart file for the study from the SRA Run Selector upon logging in. The JWT file can only be used on cloud platforms and is valid for 1 hour upon creation.", + "format": "file-path", + "description": "dbGaP repository key." + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + }, + "max_job_request_options": { + "title": "Max job request options", + "type": "object", + "fa_icon": "fab fa-acquisitions-incorporated", + "description": "Set the top limit for requested resources for any single job.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "properties": { + "max_cpus": { + "type": "integer", + "description": "Maximum number of CPUs that can be requested for any single job.", + "default": 16, + "fa_icon": "fas fa-microchip", + "hidden": true, + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + }, + "max_memory": { + "type": "string", + "description": "Maximum amount of memory that can be requested for any single job.", + "default": "128.GB", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "hidden": true, + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + }, + "max_time": { + "type": "string", + "description": "Maximum amount of time that can be requested for any single job.", + "default": "240.h", + "fa_icon": "far fa-clock", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", + "hidden": true, + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "validationShowHiddenParams": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", + "hidden": true, + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + } + } + }, + "deprecated_options": { + "title": "Deprecated options", + "type": "object", + "description": "List of parameters that have been deprecated.", + "default": "", + "fa_icon": "fas fa-calendar-times", + "properties": { + "force_sratools_download": { + "type": "boolean", + "fa_icon": "fas fa-times-circle", + "description": "This parameter has been deprecated. Please use '--download_method sratools' instead.", + "enum": [false], + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/institutional_config_options" + }, + { + "$ref": "#/definitions/max_job_request_options" + }, + { + "$ref": "#/definitions/generic_options" + }, + { + "$ref": "#/definitions/deprecated_options" + } + ] +} diff --git a/params.yml b/params.yml deleted file mode 100644 index afb19664..00000000 --- a/params.yml +++ /dev/null @@ -1,258 +0,0 @@ -$schema: http://json-schema.org/draft-07/schema -$id: https://raw.githubusercontent.com/nf-core/fetchngs/master/params.yml -title: nf-core/fetchngs pipeline parameters -description: Pipeline to fetch metadata and raw FastQ files from public databases -type: object -definitions: - input_output_options: - title: Input/output options - type: object - fa_icon: fas fa-terminal - description: Define where the pipeline should find input data and save output data. - required: - - input - - outdir - properties: - input: - type: string - format: file-path - exists: true - schema: assets/schema_input.yml - mimetype: text/csv - pattern: ^\\S+\\.(csv|tsv|txt)$ - fa_icon: fas fa-file-excel - description: File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files. - ena_metadata_fields: - type: string - fa_icon: fas fa-columns - description: Comma-separated list of ENA metadata fields to fetch before downloading data. - help_text: The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run). - sample_mapping_fields: - type: string - fa_icon: fas fa-columns - description: Comma-separated list of ENA metadata fields used to create a separate 'id_mappings.csv' and 'multiqc_config.yml' with selected fields that can be used to rename samples in general and in MultiQC. - default: experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description - nf_core_pipeline: - type: string - fa_icon: fab fa-apple - description: Name of supported nf-core pipeline e.g. 'rnaseq'. A samplesheet for direct use with the pipeline will be created with the appropriate columns. - enum: - - rnaseq - - atacseq - - viralrecon - - taxprofiler - nf_core_rnaseq_strandedness: - type: string - fa_icon: fas fa-dna - description: Value for 'strandedness' entry added to samplesheet created when using '--nf_core_pipeline rnaseq'. - help_text: The default is 'auto' which can be used with nf-core/rnaseq v3.10 onwards to auto-detect strandedness during the pipeline execution. - default: auto - download_method: - type: string - default: ftp - fa_icon: fas fa-download - enum: - - aspera - - ftp - - sratools - description: Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'. - help_text: FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ. - skip_fastq_download: - type: boolean - fa_icon: fas fa-fast-forward - description: Only download metadata for public data database ids and don't download the FastQ files. - dbgap_key: - type: string - fa_icon: fas fa-address-card - help_text: Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit. Users with granted access to controlled data can download the JWT cart file for the study from the SRA Run Selector upon logging in. The JWT file can only be used on cloud platforms and is valid for 1 hour upon creation. - format: file-path - description: dbGaP repository key. - aspera_cli_args: - type: string - default: -QT -l 300m -P33001 - sra_fastq_ftp_args: - type: string - default: -t 5 -nv -c -T 60 - sratools_fasterqdump_args: - type: string - default: '' - sratools_pigz_args: - type: string - default: '' - outdir: - type: string - format: directory-path - description: The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. - fa_icon: fas fa-folder-open - email: - type: string - description: Email address for completion summary. - fa_icon: fas fa-envelope - help_text: Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run. - institutional_config_options: - title: Institutional config options - type: object - fa_icon: fas fa-university - description: Parameters used to describe centralised config profiles. These should not be edited. - help_text: The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline. - properties: - custom_config_version: - type: string - description: Git commit id for Institutional configs. - default: master - hidden: true - fa_icon: fas fa-users-cog - custom_config_base: - type: string - description: Base directory for Institutional configs. - default: https://raw.githubusercontent.com/nf-core/configs/master - hidden: true - help_text: If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter. - fa_icon: fas fa-users-cog - config_profile_name: - type: string - description: Institutional config name. - hidden: true - fa_icon: fas fa-users-cog - config_profile_description: - type: string - description: Institutional config description. - hidden: true - fa_icon: fas fa-users-cog - config_profile_contact: - type: string - description: Institutional config contact information. - hidden: true - fa_icon: fas fa-users-cog - config_profile_url: - type: string - description: Institutional config URL link. - hidden: true - fa_icon: fas fa-users-cog - max_job_request_options: - title: Max job request options - type: object - fa_icon: fab fa-acquisitions-incorporated - description: Set the top limit for requested resources for any single job. - help_text: If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details. - properties: - max_cpus: - type: integer - description: Maximum number of CPUs that can be requested for any single job. - default: 16 - fa_icon: fas fa-microchip - hidden: true - help_text: Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1` - max_memory: - type: string - description: Maximum amount of memory that can be requested for any single job. - default: 128.GB - fa_icon: fas fa-memory - pattern: ^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$ - hidden: true - help_text: Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'` - max_time: - type: string - description: Maximum amount of time that can be requested for any single job. - default: 240.h - fa_icon: far fa-clock - pattern: ^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$ - hidden: true - help_text: Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'` - generic_options: - title: Generic options - type: object - fa_icon: fas fa-file-import - description: Less common options for the pipeline, typically set in a config file. - help_text: These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`. - properties: - help: - type: boolean - description: Display help text. - fa_icon: fas fa-question-circle - hidden: true - version: - type: boolean - description: Display version and exit. - fa_icon: fas fa-question-circle - hidden: true - publish_dir_mode: - type: string - default: copy - description: Method used to save pipeline results to output directory. - help_text: The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details. - fa_icon: fas fa-copy - enum: - - symlink - - rellink - - link - - copy - - copyNoFollow - - move - hidden: true - email_on_fail: - type: string - description: Email address for completion summary, only when pipeline fails. - fa_icon: fas fa-exclamation-triangle - help_text: An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully. - hidden: true - plaintext_email: - type: boolean - description: Send plain-text email instead of HTML. - fa_icon: fas fa-remove-format - hidden: true - monochrome_logs: - type: boolean - description: Do not use coloured log outputs. - fa_icon: fas fa-palette - hidden: true - hook_url: - type: string - description: Incoming hook URL for messaging service - fa_icon: fas fa-people-group - help_text: Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported. - hidden: true - validate_params: - type: boolean - description: Boolean whether to validate parameters against the schema at runtime - default: true - fa_icon: fas fa-check-square - hidden: true - validationShowHiddenParams: - type: boolean - fa_icon: far fa-eye-slash - description: Show all params when using `--help` - hidden: true - help_text: By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters. - validationFailUnrecognisedParams: - type: boolean - fa_icon: far fa-check-circle - description: Validation of parameters fails when an unrecognised parameter is found. - hidden: true - help_text: By default, when an unrecognised parameter is found, it returns a warinig. - validationLenientMode: - type: boolean - fa_icon: far fa-check-circle - description: Validation of parameters in lenient more. - hidden: true - help_text: Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode). - deprecated_options: - title: Deprecated options - type: object - description: List of parameters that have been deprecated. - default: '' - fa_icon: fas fa-calendar-times - properties: - force_sratools_download: - type: boolean - fa_icon: fas fa-times-circle - description: This parameter has been deprecated. Please use '--download_method sratools' instead. - enum: - - false - hidden: true -allOf: - - $ref: "#/definitions/input_output_options" - - $ref: "#/definitions/institutional_config_options" - - $ref: "#/definitions/max_job_request_options" - - $ref: "#/definitions/generic_options" - - $ref: "#/definitions/deprecated_options" diff --git a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf index d662ac91..ba15b922 100644 --- a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf @@ -62,7 +62,7 @@ workflow PIPELINE_INITIALISATION { pre_help_text, post_help_text, validate_params, - "params.yml" + "nextflow_schema.json" ) // @@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION { inputPath // Path |> Channel.of // Channel |> flatMap { csv -> - splitCsv(csv, header: false, schema: 'assets/schema_input.yml') + splitCsv(csv, header: false, schema: 'assets/schema_input.json') } // Channel |> unique // Channel |> set { ids } // Channel @@ -111,7 +111,7 @@ workflow PIPELINE_COMPLETION { main: - let summary_params = paramsSummaryMap(workflow, parameters_schema: "params.yml") + let summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") // // Completion email and summary From aef604bbe4347b92a998d146e505056541025a38 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sun, 3 Nov 2024 18:29:56 +0100 Subject: [PATCH 13/25] Add SraParams type Signed-off-by: Ben Sherman --- main.nf | 34 ++++----- modules/nf-core/sratools/fasterqdump/main.nf | 2 +- modules/nf-core/sratools/prefetch/main.nf | 2 +- .../main.nf | 2 +- types/types.nf | 5 -- workflows/sra/main.nf | 70 ++++++++++--------- 6 files changed, 59 insertions(+), 56 deletions(-) delete mode 100644 types/types.nf diff --git a/main.nf b/main.nf index 75f50236..cac0b723 100644 --- a/main.nf +++ b/main.nf @@ -9,17 +9,18 @@ ---------------------------------------------------------------------------------------- */ -nextflow.preview.dsl = 3 +nextflow.preview.types = true /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS + IMPORT FUNCTIONS / MODULES / WORKFLOWS / TYPES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ include { SRA } from './workflows/sra' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline' +include { SraParams } from './workflows/sra' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -34,7 +35,7 @@ workflow NFCORE_FETCHNGS { take: ids : Channel - params : ParamsMap + params : SraParams main: @@ -43,18 +44,7 @@ workflow NFCORE_FETCHNGS { // SRA ( ids, - params.ena_metadata_fields ?: '', - params.sample_mapping_fields, - params.nf_core_pipeline ?: '', - params.nf_core_rnaseq_strandedness ?: 'auto', - params.download_method, - params.skip_fastq_download, - params.dbgap_key, - params.aspera_cli_args, - params.sra_fastq_ftp_args, - params.sratools_fasterqdump_args, - params.sratools_pigz_args, - params.outdir + params ) } @@ -86,7 +76,19 @@ workflow { // NFCORE_FETCHNGS ( ids, - params, + SraParams( + params.ena_metadata_fields ?: '', + params.sample_mapping_fields, + params.nf_core_pipeline ?: '', + params.nf_core_rnaseq_strandedness ?: 'auto', + params.download_method, + params.skip_fastq_download, + params.dbgap_key, + params.aspera_cli_args, + params.sra_fastq_ftp_args, + params.sratools_fasterqdump_args, + params.sratools_pigz_args + ) ) // diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index 1f675d90..8bfca892 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -11,7 +11,7 @@ process SRATOOLS_FASTERQDUMP { meta : Map sra : Path ncbi_settings : Path - certificate : Path + certificate : Path? fasterqdump_args: String = '--split-files --include-technical' pigz_args : String = '' prefix : String = '' diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf index 4194181c..6a05fbcc 100644 --- a/modules/nf-core/sratools/prefetch/main.nf +++ b/modules/nf-core/sratools/prefetch/main.nf @@ -10,7 +10,7 @@ process SRATOOLS_PREFETCH { input: meta : Map ncbi_settings : Path - certificate : Path + certificate : Path? prefetch_args : String = '' retry_args : String = '5 1 100' // diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index 73ec2376..3db93757 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -8,7 +8,7 @@ include { SRATOOLS_FASTERQDUMP } from '../../../modules/nf-core/sratools/ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { take: sra_metadata // Channel - dbgap_key // Path + dbgap_key // Path? sratools_fasterqdump_args // String sratools_pigz_args // String diff --git a/types/types.nf b/types/types.nf deleted file mode 100644 index 487afa94..00000000 --- a/types/types.nf +++ /dev/null @@ -1,5 +0,0 @@ - -record Sample { - meta : Map - files : List -} diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 304e4be1..6fbea927 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -20,14 +20,6 @@ include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcor include { FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS } from '../../subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools' -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT RECORD TYPES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { Sample } from '../../types/types' - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -38,18 +30,7 @@ workflow SRA { take: ids : Channel - ena_metadata_fields : String - sample_mapping_fields : String - nf_core_pipeline : String - nf_core_rnaseq_strandedness : String - download_method : String // enum: 'aspera' | 'ftp' | 'sratools' - skip_fastq_download : boolean - dbgap_key : String - aspera_cli_args : String - sra_fastq_ftp_args : String - sratools_fasterqdump_args : String - sratools_pigz_args : String - outdir : String + params : SraParams main: ids // Channel @@ -57,7 +38,7 @@ workflow SRA { // MODULE: Get SRA run information for public database ids // |> map { id -> - SRA_IDS_TO_RUNINFO ( id, ena_metadata_fields ) + SRA_IDS_TO_RUNINFO ( id, params.ena_metadata_fields ) } // Channel // // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ] @@ -80,11 +61,11 @@ workflow SRA { // sra_metadata |> filter { meta -> - getDownloadMethod(meta, download_method) == 'ftp' + getDownloadMethod(meta, params.download_method) == 'ftp' } // Channel |> map { meta -> let fastq = [ file(meta.fastq_1), file(meta.fastq_2) ] - SRA_FASTQ_FTP ( meta, fastq, sra_fastq_ftp_args ) + SRA_FASTQ_FTP ( meta, fastq, params.sra_fastq_ftp_args ) } // Channel, md5: List)> |> set { ftp_samples } // Channel, md5: List)> @@ -93,12 +74,12 @@ workflow SRA { // sra_metadata |> filter { meta -> - getDownloadMethod(meta, download_method) == 'sratools' + getDownloadMethod(meta, params.download_method) == 'sratools' } // Channel |> FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( - dbgap_key ? file(dbgap_key, checkIfExists: true) : [], - sratools_fasterqdump_args, - sratools_pigz_args ) // Channel)> + params.dbgap_key ? file(params.dbgap_key, checkIfExists: true) : null, + params.sratools_fasterqdump_args, + params.sratools_pigz_args ) // Channel)> |> set { sratools_samples } // Channel)> // @@ -106,11 +87,11 @@ workflow SRA { // sra_metadata |> filter { meta -> - getDownloadMethod(meta, download_method) == 'aspera' + getDownloadMethod(meta, params.download_method) == 'aspera' } // Channel |> map { meta -> let fastq = meta.fastq_aspera.tokenize(';').take(2).collect { name -> file(name) } - ASPERA_CLI ( meta, fastq, 'era-fasp', aspera_cli_args ) + ASPERA_CLI ( meta, fastq, 'era-fasp', params.aspera_cli_args ) } // Channel, md5: List)> |> set { aspera_samples } // Channel, md5: List)> @@ -146,9 +127,9 @@ workflow SRA { |> { sra_metadata -> SRA_TO_SAMPLESHEET ( sra_metadata, - nf_core_pipeline, - nf_core_rnaseq_strandedness, - sample_mapping_fields ) + params.nf_core_pipeline, + params.nf_core_rnaseq_strandedness, + params.sample_mapping_fields ) } // ProcessOut(samplesheet: Path, mappings: Path) |> set { index_files } // ProcessOut(samplesheet: Path, mappings: Path) @@ -209,6 +190,31 @@ fn getDownloadMethod(meta: Map, download_method: String) -> String { return 'ftp' } +/* +======================================================================================== + TYPES +======================================================================================== +*/ + +record SraParams { + ena_metadata_fields : String + sample_mapping_fields : String + nf_core_pipeline : String + nf_core_rnaseq_strandedness : String + download_method : String // enum: 'aspera' | 'ftp' | 'sratools' + skip_fastq_download : boolean + dbgap_key : String? + aspera_cli_args : String + sra_fastq_ftp_args : String + sratools_fasterqdump_args : String + sratools_pigz_args : String +} + +record Sample { + meta : Map + files : List +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END From f1f763c0234949f3991453c0b196fad5c0d748bf Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Sun, 3 Nov 2024 20:27:14 +0100 Subject: [PATCH 14/25] Refactor workflow outputs Signed-off-by: Ben Sherman --- bin/multiqc_mappings_config.py | 13 -- main.nf | 71 ++++---- modules/local/multiqc_mappings_config/main.nf | 24 --- .../tests/main.nf.test | 24 --- .../tests/main.nf.test.snap | 31 ---- modules/local/sra_to_samplesheet/main.nf | 79 --------- .../sra_to_samplesheet/tests/main.nf.test | 27 --- .../tests/main.nf.test.snap | 117 ------------- nextflow.config | 13 +- .../nf-core/utils_nfcore_pipeline/main.nf | 60 +++---- workflows/sra/main.nf | 162 ++++++------------ 11 files changed, 123 insertions(+), 498 deletions(-) delete mode 100755 bin/multiqc_mappings_config.py delete mode 100644 modules/local/multiqc_mappings_config/main.nf delete mode 100644 modules/local/multiqc_mappings_config/tests/main.nf.test delete mode 100644 modules/local/multiqc_mappings_config/tests/main.nf.test.snap delete mode 100644 modules/local/sra_to_samplesheet/main.nf delete mode 100644 modules/local/sra_to_samplesheet/tests/main.nf.test delete mode 100644 modules/local/sra_to_samplesheet/tests/main.nf.test.snap diff --git a/bin/multiqc_mappings_config.py b/bin/multiqc_mappings_config.py deleted file mode 100755 index 3ffe35ec..00000000 --- a/bin/multiqc_mappings_config.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env python - -import sys - -with open(sys.argv[1], "r") as fin, open(sys.argv[2], "w") as fout: - header = fin.readline().split(",") - config = "sample_names_rename_buttons:\n" - config += "\n".join([" - " + x.strip('"') for x in header]) - config += "sample_names_rename:\n" - rename = [] - for line in fin: - rename.append(f" - [{', '.join(line.strip().split(','))}]") - fout.write(config + "\n".join(sorted(rename)) + "\n") diff --git a/main.nf b/main.nf index cac0b723..b1e1be5b 100644 --- a/main.nf +++ b/main.nf @@ -20,35 +20,9 @@ nextflow.preview.types = true include { SRA } from './workflows/sra' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline' +include { SOFTWARE_VERSIONS } from './subworkflows/nf-core/utils_nfcore_pipeline' include { SraParams } from './workflows/sra' -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOWS FOR PIPELINE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// WORKFLOW: Run main nf-core/fetchngs analysis pipeline depending on type of identifier provided -// -workflow NFCORE_FETCHNGS { - - take: - ids : Channel - params : SraParams - - main: - - // - // WORKFLOW: Download FastQ files for SRA / ENA / GEO / DDBJ ids - // - SRA ( - ids, - params - ) - -} - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -57,6 +31,7 @@ workflow NFCORE_FETCHNGS { workflow { + main: // // SUBWORKFLOW: Run initialisation tasks // @@ -74,13 +49,10 @@ workflow { // // WORKFLOW: Run primary workflows for the pipeline // - NFCORE_FETCHNGS ( + let samples = SRA ( ids, SraParams( params.ena_metadata_fields ?: '', - params.sample_mapping_fields, - params.nf_core_pipeline ?: '', - params.nf_core_rnaseq_strandedness ?: 'auto', params.download_method, params.skip_fastq_download, params.dbgap_key, @@ -91,6 +63,11 @@ workflow { ) ) + // + // SUBWORKFLOW: Collect software versions + // + let versions = SOFTWARE_VERSIONS() + // // SUBWORKFLOW: Run completion tasks // @@ -102,11 +79,39 @@ workflow { params.monochrome_logs, params.hook_url ) + + publish: + samples >> 'samples' + versions >> 'versions' } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + WORKFLOW OUTPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + output { - directory params.outdir - mode params.publish_dir_mode + samples { + path { _sample -> + let dirs = [ + 'fastq': 'fastq', + 'md5': 'fastq/md5' + ] + return { file -> "${dirs[file.ext]}/${file.baseName}" } + } + index { + path 'samplesheet/samplesheet.json' + sort { sample -> sample.id } + } + } + + versions { + path '.' + index { + path 'nf_core_fetchngs_software_mqc_versions.yml' + } + } } /* diff --git a/modules/local/multiqc_mappings_config/main.nf b/modules/local/multiqc_mappings_config/main.nf deleted file mode 100644 index f3b9fac1..00000000 --- a/modules/local/multiqc_mappings_config/main.nf +++ /dev/null @@ -1,24 +0,0 @@ - -process MULTIQC_MAPPINGS_CONFIG { - - conda "conda-forge::python=3.9.5" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.9--1' : - 'biocontainers/python:3.9--1' }" - - input: - csv : Path - - script: - """ - multiqc_mappings_config.py \\ - $csv \\ - multiqc_config.yml - """ - - output: - path("multiqc_config.yml") - - topic: - ( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' -} diff --git a/modules/local/multiqc_mappings_config/tests/main.nf.test b/modules/local/multiqc_mappings_config/tests/main.nf.test deleted file mode 100644 index dbb4d74f..00000000 --- a/modules/local/multiqc_mappings_config/tests/main.nf.test +++ /dev/null @@ -1,24 +0,0 @@ -nextflow_process { - - name "Test process: MULTIQC_MAPPINGS_CONFIG" - script "../main.nf" - process "MULTIQC_MAPPINGS_CONFIG" - - test("Should run without failures") { - - when { - process { - """ - input[0] = file(params.pipelines_testdata_base_path + 'csv/SRX9626017_SRR13191702.mappings.csv', checkIfExists: true) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } -} diff --git a/modules/local/multiqc_mappings_config/tests/main.nf.test.snap b/modules/local/multiqc_mappings_config/tests/main.nf.test.snap deleted file mode 100644 index 43e46f61..00000000 --- a/modules/local/multiqc_mappings_config/tests/main.nf.test.snap +++ /dev/null @@ -1,31 +0,0 @@ -{ - "Should run without failures": { - "content": [ - { - "0": [ - [ - "multiqc_config.yml:md5,7f3cb10fff83ba9eb3e8fa6862d1290a", - "versions.yml:md5,dd4c66f0551d15510b36bb2e2b2fdd73" - ] - ], - "1": [ - "versions.yml:md5,dd4c66f0551d15510b36bb2e2b2fdd73" - ], - "versions": [ - "versions.yml:md5,dd4c66f0551d15510b36bb2e2b2fdd73" - ], - "yml": [ - [ - "multiqc_config.yml:md5,7f3cb10fff83ba9eb3e8fa6862d1290a", - "versions.yml:md5,dd4c66f0551d15510b36bb2e2b2fdd73" - ] - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T11:52:12.65888" - } -} \ No newline at end of file diff --git a/modules/local/sra_to_samplesheet/main.nf b/modules/local/sra_to_samplesheet/main.nf deleted file mode 100644 index a18efe53..00000000 --- a/modules/local/sra_to_samplesheet/main.nf +++ /dev/null @@ -1,79 +0,0 @@ - -process SRA_TO_SAMPLESHEET { - executor 'local' - memory 100.MB - - input: - sra_metadata : List> - pipeline : String - strandedness : String - mapping_fields : String - - exec: - // - // Create samplesheet containing metadata - // - - let records = sra_metadata.collect { meta -> - getSraRecord(meta, pipeline, strandedness, mapping_fields) - } - - let samplesheet = records - .collect { pipeline_map, mappings_map -> pipeline_map } - .sort { record -> record.id } - mergeCsv(samplesheet, task.workDir.resolve('samplesheet.csv')) - - let mappings = records - .collect { pipeline_map, mappings_map -> mappings_map } - .sort { record -> record.id } - mergeCsv(mappings, task.workDir.resolve('id_mappings.csv')) - - output: - samplesheet : Path = path("samplesheet.csv") - mappings : Path = path("mappings.csv") -} - -fn getSraRecord(meta: Map, pipeline: String, strandedness: String, mapping_fields: String) -> Tuple2 { - // Remove custom keys needed to download the data - let meta_clone = meta.clone() - meta_clone.remove("id") - meta_clone.remove("fastq_1") - meta_clone.remove("fastq_2") - meta_clone.remove("md5_1") - meta_clone.remove("md5_2") - meta_clone.remove("single_end") - - // Add relevant fields to the beginning of the map - let pipeline_map = [ - sample : "${meta.id.split('_')[0..-2].join('_')}", - fastq_1 : meta.fastq_1, - fastq_2 : meta.fastq_2 - ] - - // Add nf-core pipeline specific entries - if (pipeline) { - if (pipeline == 'rnaseq') { - pipeline_map << [ strandedness: strandedness ] - } else if (pipeline == 'atacseq') { - pipeline_map << [ replicate: 1 ] - } else if (pipeline == 'taxprofiler') { - pipeline_map << [ fasta: '' ] - } - } - pipeline_map << meta_clone - - // - // Create sample id mappings file - // - let fields = mapping_fields ? ['sample'] + mapping_fields.split(',').collect{ v -> v.trim().toLowerCase() } : [] - if ((pipeline_map.keySet() + fields).unique().size() != pipeline_map.keySet().size()) { - error("Invalid option for '--sample_mapping_fields': ${mapping_fields}.\nValid options: ${pipeline_map.keySet().join(', ')}") - } - - let mappings_map = pipeline_map.subMap(fields) - - return ( pipeline_map, mappings_map ) -} - -fn mergeCsv() { -} diff --git a/modules/local/sra_to_samplesheet/tests/main.nf.test b/modules/local/sra_to_samplesheet/tests/main.nf.test deleted file mode 100644 index ed765158..00000000 --- a/modules/local/sra_to_samplesheet/tests/main.nf.test +++ /dev/null @@ -1,27 +0,0 @@ -nextflow_process { - - name "Test process: SRA_TO_SAMPLESHEET" - script "../main.nf" - process "SRA_TO_SAMPLESHEET" - - test("Should run without failures") { - - when { - process { - """ - input[0] = [id:'ERX1188904_ERR1109373', run_accession:'ERR1109373', experiment_accession:'ERX1188904', sample_accession:'SAMEA3643867', experiment_alias:'ena-EXPERIMENT-CAM-03-11-2015-17:01:52:847-7', run_alias:'ena-RUN-CAM-03-11-2015-17:01:52:847-7', sample_alias:'sample_56', study_alias:'ena-STUDY-CAM-02-11-2015-17:42:24:189-13', library_layout:'PAIRED', experiment_title:'Illumina HiSeq 2500 paired end sequencing', sample_title:'RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome', sample_description:'RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome', fastq_md5:'8d7d7b854d0207d1226477a30103fade;9fd57225d6c07a31843276d6df9b15c0;5a62e8f785687dce890cfb4fe3e607f9', fastq_ftp:'ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_2.fastq.gz', fastq_1:'./results/fastq/ERX1188904_ERR1109373_1.fastq.gz', fastq_2:'./results/fastq/ERX1188904_ERR1109373_2.fastq.gz', md5_1:'9fd57225d6c07a31843276d6df9b15c0', md5_2:'5a62e8f785687dce890cfb4fe3e607f9', single_end:false] - input[1] = 'rnaseq' - input[2] = 'auto' - input[3] = 'experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description' - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } -} diff --git a/modules/local/sra_to_samplesheet/tests/main.nf.test.snap b/modules/local/sra_to_samplesheet/tests/main.nf.test.snap deleted file mode 100644 index 568f3ea7..00000000 --- a/modules/local/sra_to_samplesheet/tests/main.nf.test.snap +++ /dev/null @@ -1,117 +0,0 @@ -{ - "Should run without failures": { - "content": [ - { - "0": [ - [ - { - "id": "ERX1188904_ERR1109373", - "run_accession": "ERR1109373", - "experiment_accession": "ERX1188904", - "sample_accession": "SAMEA3643867", - "experiment_alias": "ena-EXPERIMENT-CAM-03-11-2015-17:01:52:847-7", - "run_alias": "ena-RUN-CAM-03-11-2015-17:01:52:847-7", - "sample_alias": "sample_56", - "study_alias": "ena-STUDY-CAM-02-11-2015-17:42:24:189-13", - "library_layout": "PAIRED", - "experiment_title": "Illumina HiSeq 2500 paired end sequencing", - "sample_title": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "sample_description": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "fastq_md5": "8d7d7b854d0207d1226477a30103fade;9fd57225d6c07a31843276d6df9b15c0;5a62e8f785687dce890cfb4fe3e607f9", - "fastq_ftp": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_2.fastq.gz", - "fastq_1": "./results/fastq/ERX1188904_ERR1109373_1.fastq.gz", - "fastq_2": "./results/fastq/ERX1188904_ERR1109373_2.fastq.gz", - "md5_1": "9fd57225d6c07a31843276d6df9b15c0", - "md5_2": "5a62e8f785687dce890cfb4fe3e607f9", - "single_end": false - }, - "ERX1188904_ERR1109373.samplesheet.csv:md5,e7898191d57258e049ee7129d36f5c08" - ] - ], - "1": [ - [ - { - "id": "ERX1188904_ERR1109373", - "run_accession": "ERR1109373", - "experiment_accession": "ERX1188904", - "sample_accession": "SAMEA3643867", - "experiment_alias": "ena-EXPERIMENT-CAM-03-11-2015-17:01:52:847-7", - "run_alias": "ena-RUN-CAM-03-11-2015-17:01:52:847-7", - "sample_alias": "sample_56", - "study_alias": "ena-STUDY-CAM-02-11-2015-17:42:24:189-13", - "library_layout": "PAIRED", - "experiment_title": "Illumina HiSeq 2500 paired end sequencing", - "sample_title": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "sample_description": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "fastq_md5": "8d7d7b854d0207d1226477a30103fade;9fd57225d6c07a31843276d6df9b15c0;5a62e8f785687dce890cfb4fe3e607f9", - "fastq_ftp": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_2.fastq.gz", - "fastq_1": "./results/fastq/ERX1188904_ERR1109373_1.fastq.gz", - "fastq_2": "./results/fastq/ERX1188904_ERR1109373_2.fastq.gz", - "md5_1": "9fd57225d6c07a31843276d6df9b15c0", - "md5_2": "5a62e8f785687dce890cfb4fe3e607f9", - "single_end": false - }, - "ERX1188904_ERR1109373.mappings.csv:md5,d09ddb4f0709675e5dfe1eadf12c608f" - ] - ], - "mappings": [ - [ - { - "id": "ERX1188904_ERR1109373", - "run_accession": "ERR1109373", - "experiment_accession": "ERX1188904", - "sample_accession": "SAMEA3643867", - "experiment_alias": "ena-EXPERIMENT-CAM-03-11-2015-17:01:52:847-7", - "run_alias": "ena-RUN-CAM-03-11-2015-17:01:52:847-7", - "sample_alias": "sample_56", - "study_alias": "ena-STUDY-CAM-02-11-2015-17:42:24:189-13", - "library_layout": "PAIRED", - "experiment_title": "Illumina HiSeq 2500 paired end sequencing", - "sample_title": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "sample_description": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "fastq_md5": "8d7d7b854d0207d1226477a30103fade;9fd57225d6c07a31843276d6df9b15c0;5a62e8f785687dce890cfb4fe3e607f9", - "fastq_ftp": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_2.fastq.gz", - "fastq_1": "./results/fastq/ERX1188904_ERR1109373_1.fastq.gz", - "fastq_2": "./results/fastq/ERX1188904_ERR1109373_2.fastq.gz", - "md5_1": "9fd57225d6c07a31843276d6df9b15c0", - "md5_2": "5a62e8f785687dce890cfb4fe3e607f9", - "single_end": false - }, - "ERX1188904_ERR1109373.mappings.csv:md5,d09ddb4f0709675e5dfe1eadf12c608f" - ] - ], - "samplesheet": [ - [ - { - "id": "ERX1188904_ERR1109373", - "run_accession": "ERR1109373", - "experiment_accession": "ERX1188904", - "sample_accession": "SAMEA3643867", - "experiment_alias": "ena-EXPERIMENT-CAM-03-11-2015-17:01:52:847-7", - "run_alias": "ena-RUN-CAM-03-11-2015-17:01:52:847-7", - "sample_alias": "sample_56", - "study_alias": "ena-STUDY-CAM-02-11-2015-17:42:24:189-13", - "library_layout": "PAIRED", - "experiment_title": "Illumina HiSeq 2500 paired end sequencing", - "sample_title": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "sample_description": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "fastq_md5": "8d7d7b854d0207d1226477a30103fade;9fd57225d6c07a31843276d6df9b15c0;5a62e8f785687dce890cfb4fe3e607f9", - "fastq_ftp": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_2.fastq.gz", - "fastq_1": "./results/fastq/ERX1188904_ERR1109373_1.fastq.gz", - "fastq_2": "./results/fastq/ERX1188904_ERR1109373_2.fastq.gz", - "md5_1": "9fd57225d6c07a31843276d6df9b15c0", - "md5_2": "5a62e8f785687dce890cfb4fe3e607f9", - "single_end": false - }, - "ERX1188904_ERR1109373.samplesheet.csv:md5,e7898191d57258e049ee7129d36f5c08" - ] - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T11:51:38.244046" - } -} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 74e1444a..19e05583 100644 --- a/nextflow.config +++ b/nextflow.config @@ -134,22 +134,27 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// workflow outputs +outputDir = params.outdir +workflow.output.mode = params.publish_dir_mode + // NOTE: Nextflow config should provide some constant for the start timestamp +params.trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${params.trace_timestamp}.html" } report { enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.html" + file = "${params.outdir}/pipeline_info/execution_report_${params.trace_timestamp}.html" } trace { enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${params.trace_timestamp}.txt" } dag { enabled = true - file = "${params.outdir}/pipeline_info/pipeline_dag_${timestamp.format( 'yyyy-MM-dd_HH-mm-ss')}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${params.trace_timestamp}.html" } manifest { diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index 0ea3a9ff..f8465855 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -21,6 +21,31 @@ workflow UTILS_NFCORE_PIPELINE { valid_config } +// +// Get channel of software versions used in pipeline +// +workflow SOFTWARE_VERSIONS { + main: + let processVersions = Channel.topic('versions', (String,String,String)) + let workflowVersions = [ + [ 'Workflow', workflow.manifest.name, getWorkflowVersion() ], + [ 'Workflow', 'Nextflow', workflow.nextflow.version ] + ] + let versions = collect(processVersions) + workflowVersions + + emit: + versions // List<(String,String,String)> + .unique() // List<(String,String,String)> + .groupBy { (process, _, _) -> process } // List<(String,Bag<(String,String,String)>)> + .collect { (process, tools) -> + let simpleName = process.tokenize(':').last() + let toolsMap = tools.inject([:]) { acc, (_, name, version) -> + acc + [ (name): version ] + } + return [ simpleName: toolsMap ] + } // List>> +} + /* ======================================================================================== FUNCTIONS @@ -88,41 +113,6 @@ fn getWorkflowVersion() -> String { return version_string } -// -// Get workflow version for pipeline -// -fn workflowVersionToYAML() -> String { - return """ - Workflow: - $workflow.manifest.name: ${getWorkflowVersion()} - Nextflow: $workflow.nextflow.version - """.stripIndent().trim() -} - -// -// Get channel of software versions used in pipeline in YAML format -// -workflow softwareVersionsToYAML { - take: - versions - - main: - versions // Channel> - |> unique // Channel> - |> map { process, name, version -> - """ - ${process.tokenize(':').last()}: - ${name}: ${version} - """.stripIndent().trim() - } // Channel - |> unique // Channel - |> mix( workflowVersionToYAML() ) // Channel - |> set { versions_yml } - - emit: - versions_yml -} - // // Get workflow summary for MultiQC // diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 6fbea927..d5144f1d 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -9,8 +9,6 @@ include { SRA_FASTQ_FTP } from '../../modules/local/sra_fastq_ftp' include { SRA_IDS_TO_RUNINFO } from '../../modules/local/sra_ids_to_runinfo' include { SRA_RUNINFO_TO_FTP } from '../../modules/local/sra_runinfo_to_ftp' include { ASPERA_CLI } from '../../modules/local/aspera_cli' -include { SRA_TO_SAMPLESHEET } from '../../modules/local/sra_to_samplesheet' -include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -54,123 +52,67 @@ workflow SRA { |> unique // Channel |> set { sra_metadata } // Channel - if (!skip_fastq_download) { - - // - // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums - // - sra_metadata - |> filter { meta -> - getDownloadMethod(meta, params.download_method) == 'ftp' - } // Channel - |> map { meta -> - let fastq = [ file(meta.fastq_1), file(meta.fastq_2) ] - SRA_FASTQ_FTP ( meta, fastq, params.sra_fastq_ftp_args ) - } // Channel, md5: List)> - |> set { ftp_samples } // Channel, md5: List)> - - // - // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools. - // - sra_metadata - |> filter { meta -> - getDownloadMethod(meta, params.download_method) == 'sratools' - } // Channel - |> FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( - params.dbgap_key ? file(params.dbgap_key, checkIfExists: true) : null, - params.sratools_fasterqdump_args, - params.sratools_pigz_args ) // Channel)> - |> set { sratools_samples } // Channel)> - - // - // MODULE: If Aspera link is provided in run information then download FastQ directly via Aspera CLI and validate with md5sums - // - sra_metadata - |> filter { meta -> - getDownloadMethod(meta, params.download_method) == 'aspera' - } // Channel - |> map { meta -> - let fastq = meta.fastq_aspera.tokenize(';').take(2).collect { name -> file(name) } - ASPERA_CLI ( meta, fastq, 'era-fasp', params.aspera_cli_args ) - } // Channel, md5: List)> - |> set { aspera_samples } // Channel, md5: List)> - - // Isolate FASTQ channel which will be added to emit block - fastq = mix( - ftp_samples |> map { out -> new Sample(out.meta, out.fastq) }, - sratools_samples |> map { out -> new Sample(out.meta, out.fastq) }, - aspera_samples |> map { out -> new Sample(out.meta, out.fastq) } - ) - - md5 = mix( - ftp_samples |> map { out -> new Sample(out.meta, out.md5) }, - aspera_samples |> map { out -> new Sample(out.meta, out.md5) } - ) - - fastq // Channel - |> map { sample -> - let reads = sample.files - let meta = sample.meta - meta + [ - fastq_1: reads[0] ? "${outdir}/fastq/${reads[0].getName()}" : '', - fastq_2: reads[1] && !meta.single_end ? "${outdir}/fastq/${reads[1].getName()}" : '' - ] - } // Channel - |> set { sra_metadata } // Channel - } - // - // MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet + // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums // - sra_metadata // Channel - |> collect // List - |> { sra_metadata -> - SRA_TO_SAMPLESHEET ( - sra_metadata, - params.nf_core_pipeline, - params.nf_core_rnaseq_strandedness, - params.sample_mapping_fields ) - } // ProcessOut(samplesheet: Path, mappings: Path) - |> set { index_files } // ProcessOut(samplesheet: Path, mappings: Path) - - samplesheet = index_files.samplesheet // Path - mappings = index_files.mappings // Path + sra_metadata + |> filter { meta -> + !skip_fastq_download && getDownloadMethod(meta, params.download_method) == 'ftp' + } // Channel + |> map { meta -> + let fastq = [ file(meta.fastq_1), file(meta.fastq_2) ] + let out = SRA_FASTQ_FTP ( meta, fastq, params.sra_fastq_ftp_args ) + new Sample(out.meta, out.fastq, out.md5) + } // Channel + |> set { ftp_samples } // Channel // - // MODULE: Create a MutiQC config file with sample name mappings + // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools. // - sample_mappings = sample_mapping_fields - ? MULTIQC_MAPPINGS_CONFIG ( mappings ) // Path - : null + sra_metadata + |> filter { meta -> + !skip_fastq_download && getDownloadMethod(meta, params.download_method) == 'sratools' + } // Channel + |> FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( + params.dbgap_key ? file(params.dbgap_key, checkIfExists: true) : null, + params.sratools_fasterqdump_args, + params.sratools_pigz_args ) // Channel)> + |> map { out -> + new Sample(out.meta, out.fastq, []) + } // Channel + |> set { sratools_samples } // Channel // - // Collate and save software versions + // MODULE: If Aspera link is provided in run information then download FastQ directly via Aspera CLI and validate with md5sums // - 'versions' // String - |> Channel.topic // Channel> - |> softwareVersionsToYAML // Channel - |> collect(sort: true) // List - |> exec('SOFTWARE_VERIONS') { versions -> - let path = task.workDir.resolve('nf_core_fetchngs_software_mqc_versions.yml') - mergeText(versions, path, newLine: true) - return path - } // Path - |> set { versions_yml } // Path + sra_metadata + |> filter { meta -> + !skip_fastq_download && getDownloadMethod(meta, params.download_method) == 'aspera' + } // Channel + |> map { meta -> + let fastq = meta.fastq_aspera.tokenize(';').take(2).collect { name -> file(name) } + let out = ASPERA_CLI ( meta, fastq, 'era-fasp', params.aspera_cli_args ) + new Sample(out.meta, out.fastq, out.md5) + } // Channel + |> set { aspera_samples } // Channel + + mix( ftp_samples, sratools_samples, aspera_samples ) // Channel + |> map { sample -> + let meta = sample.meta + meta + [ + fastq_1: sample.fastq[0], + fastq_2: sample.fastq[1] && !meta.single_end ? sample.fastq[1] : null, + md5_1: sample.md5[0], + md5_2: sample.md5[1] && !meta.single_end ? sample.md5[1] : null + ] + } // Channel + |> set { samples } // Channel emit: - samplesheet - mappings - sample_mappings - sra_metadata + samples publish: - fastq >> 'fastq/' - md5 >> 'fastq/md5/' - runinfo_ftp >> 'metadata/' - versions_yml >> 'pipeline_info/' - samplesheet >> 'samplesheet/' - mappings >> 'samplesheet/' - sample_mappings >> 'samplesheet/' + runinfo_ftp >> 'metadata' } /* @@ -198,9 +140,6 @@ fn getDownloadMethod(meta: Map, download_method: String) -> String { record SraParams { ena_metadata_fields : String - sample_mapping_fields : String - nf_core_pipeline : String - nf_core_rnaseq_strandedness : String download_method : String // enum: 'aspera' | 'ftp' | 'sratools' skip_fastq_download : boolean dbgap_key : String? @@ -212,7 +151,8 @@ record SraParams { record Sample { meta : Map - files : List + fastq : List + md5 : List } /* From c37a3b275fc2b343fe23932e17a156d920fee38a Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 4 Nov 2024 08:53:27 +0100 Subject: [PATCH 15/25] Make Sample type more precise Signed-off-by: Ben Sherman --- main.nf | 5 +- modules/local/aspera_cli/main.nf | 13 +++-- modules/local/sra_fastq_ftp/main.nf | 18 +++--- modules/local/sra_ids_to_runinfo/main.nf | 2 +- modules/local/sra_runinfo_to_ftp/main.nf | 7 +-- .../custom/sratoolsncbisettings/main.nf | 4 +- modules/nf-core/sratools/fasterqdump/main.nf | 9 ++- modules/nf-core/sratools/prefetch/main.nf | 8 +-- modules/nf-core/untar/environment.yml | 11 ---- modules/nf-core/untar/main.nf | 57 ------------------- modules/nf-core/untar/meta.yml | 46 --------------- modules/nf-core/untar/tests/main.nf.test | 45 --------------- modules/nf-core/untar/tests/main.nf.test.snap | 42 -------------- .../main.nf | 27 +++++---- .../nf-core/utils_nfcore_pipeline/main.nf | 16 +++--- workflows/sra/main.nf | 57 +++++++------------ 16 files changed, 79 insertions(+), 288 deletions(-) delete mode 100644 modules/nf-core/untar/environment.yml delete mode 100644 modules/nf-core/untar/main.nf delete mode 100644 modules/nf-core/untar/meta.yml delete mode 100644 modules/nf-core/untar/tests/main.nf.test delete mode 100644 modules/nf-core/untar/tests/main.nf.test.snap diff --git a/main.nf b/main.nf index b1e1be5b..b0e6e33c 100644 --- a/main.nf +++ b/main.nf @@ -22,6 +22,7 @@ include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_fetc include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline' include { SOFTWARE_VERSIONS } from './subworkflows/nf-core/utils_nfcore_pipeline' include { SraParams } from './workflows/sra' +include { Sample } from './workflows/sra' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -92,7 +93,7 @@ workflow { */ output { - samples { + samples: Sample { path { _sample -> let dirs = [ 'fastq': 'fastq', @@ -106,7 +107,7 @@ output { } } - versions { + versions: Map> { path '.' index { path 'nf_core_fetchngs_software_mqc_versions.yml' diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index 195cf284..3e5d92a2 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -8,14 +8,14 @@ process ASPERA_CLI { 'biocontainers/aspera-cli:4.14.0--hdfd78af_1' }" input: - meta : Map - fastq : List + meta : Map user : String args : String script: let conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" - if (meta.single_end) { + let fastq = meta.fastq_aspera.tokenize(';') + if (meta.single_end.toBoolean()) { """ $conda_prefix @@ -53,9 +53,10 @@ process ASPERA_CLI { } output: - meta = meta - fastq = path("*fastq.gz") - md5 = path("*md5") + fastq_1 : Path = file('*_1.fastq.gz') + fastq_2 : Path? = file('*_2.fastq.gz') + md5_1 : Path = file('*_1.fastq.gz.md5') + md5_2 : Path? = file('*_2.fastq.gz.md5') topic: ( task.process, 'aspera_cli', eval('ascli --version') ) >> 'versions' diff --git a/modules/local/sra_fastq_ftp/main.nf b/modules/local/sra_fastq_ftp/main.nf index 6a3cd91e..e2264541 100644 --- a/modules/local/sra_fastq_ftp/main.nf +++ b/modules/local/sra_fastq_ftp/main.nf @@ -10,17 +10,16 @@ process SRA_FASTQ_FTP { 'biocontainers/wget:1.20.1' }" input: - meta : Map - fastq : List + meta : Map args : String script: - if (meta.single_end) { + if (meta.single_end.toBoolean()) { """ wget \\ $args \\ -O ${meta.id}.fastq.gz \\ - ${fastq[0]} + ${meta.fastq_1} echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5 md5sum -c ${meta.id}.fastq.gz.md5 @@ -30,7 +29,7 @@ process SRA_FASTQ_FTP { wget \\ $args \\ -O ${meta.id}_1.fastq.gz \\ - ${fastq[0]} + ${meta.fastq_1} echo "${meta.md5_1} ${meta.id}_1.fastq.gz" > ${meta.id}_1.fastq.gz.md5 md5sum -c ${meta.id}_1.fastq.gz.md5 @@ -38,7 +37,7 @@ process SRA_FASTQ_FTP { wget \\ $args \\ -O ${meta.id}_2.fastq.gz \\ - ${fastq[1]} + ${meta.fastq_2} echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5 md5sum -c ${meta.id}_2.fastq.gz.md5 @@ -46,9 +45,10 @@ process SRA_FASTQ_FTP { } output: - meta = meta - fastq = path("*fastq.gz") - md5 = path("*md5") + fastq_1 : Path = file('*_1.fastq.gz') + fastq_2 : Path? = file('*_2.fastq.gz') + md5_1 : Path = file('*_1.fastq.gz.md5') + md5_2 : Path? = file('*_2.fastq.gz.md5') topic: ( task.process, 'wget', eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')") ) >> 'versions' diff --git a/modules/local/sra_ids_to_runinfo/main.nf b/modules/local/sra_ids_to_runinfo/main.nf index cab4f32c..6f04091f 100644 --- a/modules/local/sra_ids_to_runinfo/main.nf +++ b/modules/local/sra_ids_to_runinfo/main.nf @@ -23,7 +23,7 @@ process SRA_IDS_TO_RUNINFO { """ output: - path("${id}.runinfo.tsv") + file('*.runinfo.tsv') topic: ( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' diff --git a/modules/local/sra_runinfo_to_ftp/main.nf b/modules/local/sra_runinfo_to_ftp/main.nf index 783c59ea..1842b726 100644 --- a/modules/local/sra_runinfo_to_ftp/main.nf +++ b/modules/local/sra_runinfo_to_ftp/main.nf @@ -10,15 +10,14 @@ process SRA_RUNINFO_TO_FTP { runinfo : Path script: - prefix = runinfo.toString().tokenize(".")[0] """ sra_runinfo_to_ftp.py \\ - ${runinfo.join(',')} \\ - ${prefix}.runinfo_ftp.tsv + ${runinfo} \\ + ${runinfo.baseName.tokenize(".")[0]}.runinfo_ftp.tsv """ output: - path("${prefix}.runinfo_ftp.tsv") + file('*.runinfo_ftp.tsv') topic: ( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' diff --git a/modules/nf-core/custom/sratoolsncbisettings/main.nf b/modules/nf-core/custom/sratoolsncbisettings/main.nf index a79124b4..14d49007 100644 --- a/modules/nf-core/custom/sratoolsncbisettings/main.nf +++ b/modules/nf-core/custom/sratoolsncbisettings/main.nf @@ -8,14 +8,14 @@ process CUSTOM_SRATOOLSNCBISETTINGS { 'biocontainers/sra-tools:3.0.8--h9f5acd7_0' }" input: - ids : List + ids : Bag> shell: config = "/LIBS/GUID = \"${UUID.randomUUID().toString()}\"\\n/libs/cloud/report_instance_identity = \"true\"\\n" template 'detect_ncbi_settings.sh' output: - path('*.mkfg') + file('*.mkfg') topic: ( task.process, 'sratools', eval("vdb-config --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index 8bfca892..6c8c27c5 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -8,7 +8,7 @@ process SRATOOLS_FASTERQDUMP { 'quay.io/biocontainers/mulled-v2-5f89fe0cd045cb1d615630b9261a1d17943a9b6a:6a9ff0e76ec016c3d0d27e0c0d362339f2d787e6-0' }" input: - meta : Map + meta : Map sra : Path ncbi_settings : Path certificate : Path? @@ -21,9 +21,9 @@ process SRATOOLS_FASTERQDUMP { prefix = "${meta.id}" let outfile = meta.single_end ? "${prefix}.fastq" : prefix var key_file = '' - if (certificate.toString().endsWith('.jwt')) { + if (certificate.baseName.endsWith('.jwt')) { key_file += " --perm ${certificate}" - } else if (certificate.toString().endsWith('.ngc')) { + } else if (certificate.baseName.endsWith('.ngc')) { key_file += " --ngc ${certificate}" } """ @@ -44,8 +44,7 @@ process SRATOOLS_FASTERQDUMP { """ output: - meta - fastq = path('*.fastq.gz') + files('*.fastq.gz').sort() topic: ( task.process, 'sratools', eval("fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf index 6a05fbcc..d6bbf690 100644 --- a/modules/nf-core/sratools/prefetch/main.nf +++ b/modules/nf-core/sratools/prefetch/main.nf @@ -8,7 +8,7 @@ process SRATOOLS_PREFETCH { 'biocontainers/sra-tools:3.0.8--h9f5acd7_0' }" input: - meta : Map + meta : Map ncbi_settings : Path certificate : Path? prefetch_args : String = '' @@ -17,10 +17,10 @@ process SRATOOLS_PREFETCH { shell: id = meta.run_accession if (certificate) { - if (certificate.toString().endsWith('.jwt')) { + if (certificate.baseName.endsWith('.jwt')) { prefetch_args += " --perm ${certificate}" } - else if (certificate.toString().endsWith('.ngc')) { + else if (certificate.baseName.endsWith('.ngc')) { prefetch_args += " --ngc ${certificate}" } } @@ -28,7 +28,7 @@ process SRATOOLS_PREFETCH { template 'retry_with_backoff.sh' output: - path(id) + file(id) topic: ( task.process, 'sratools', eval("prefetch --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml deleted file mode 100644 index 0c9cbb10..00000000 --- a/modules/nf-core/untar/environment.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: untar - -channels: - - conda-forge - - bioconda - - defaults - -dependencies: - - conda-forge::grep=3.11 - - conda-forge::sed=4.7 - - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf deleted file mode 100644 index 671ee35a..00000000 --- a/modules/nf-core/untar/main.nf +++ /dev/null @@ -1,57 +0,0 @@ -process UNTAR { - tag "$archive" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" - - input: - meta : Map - archive : Path - args : String = '' - args2 : String = '' - prefix : String = '' - - script: - if( !prefix ) - prefix = meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "") - - """ - mkdir $prefix - - ## Ensures --strip-components only applied when top level of tar contents is a directory - ## If just files or multiple directories, place all in prefix - if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then - tar \\ - -C $prefix --strip-components 1 \\ - -xavf \\ - $args \\ - $archive \\ - $args2 - else - tar \\ - -C $prefix \\ - -xavf \\ - $args \\ - $archive \\ - $args2 - fi - """ - - stub: - if( !prefix ) - prefix = meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "") - """ - mkdir $prefix - touch ${prefix}/file.txt - """ - - output: - meta - untar = path("$prefix") - - topic: - ( task.process, 'untar', eval("echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//'") ) >> 'versions' -} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml deleted file mode 100644 index a9a2110f..00000000 --- a/modules/nf-core/untar/meta.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: untar -description: Extract files. -keywords: - - untar - - uncompress - - extract -tools: - - untar: - description: | - Extract tar.gz files. - documentation: https://www.gnu.org/software/tar/manual/ - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be untar - pattern: "*.{tar}.{gz}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - untar: - type: directory - description: Directory containing contents of archive - pattern: "*/" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@matthdsm" - - "@jfy133" -maintainers: - - "@joseespinosa" - - "@drpatelh" - - "@matthdsm" - - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test deleted file mode 100644 index 98b769ad..00000000 --- a/modules/nf-core/untar/tests/main.nf.test +++ /dev/null @@ -1,45 +0,0 @@ -nextflow_process { - - name "Test Process UNTAR" - script "../main.nf" - process "UNTAR" - - test("test_untar") { - - when { - process { - """ - input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out.untar).match("test_untar") }, - ) - } - - } - - test("test_untar_onlyfiles") { - - when { - process { - """ - input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out.untar).match("test_untar_onlyfiles") }, - ) - } - - } - -} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap deleted file mode 100644 index 64550292..00000000 --- a/modules/nf-core/untar/tests/main.nf.test.snap +++ /dev/null @@ -1,42 +0,0 @@ -{ - "test_untar_onlyfiles": { - "content": [ - [ - [ - [ - - ], - [ - "hello.txt:md5,e59ff97941044f85df5297e1c302d260" - ] - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T11:49:41.320643" - }, - "test_untar": { - "content": [ - [ - [ - [ - - ], - [ - "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", - "opts.k2d:md5,a033d00cf6759407010b21700938f543", - "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" - ] - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T11:49:33.795172" - } -} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index 3db93757..eec2cb19 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -7,7 +7,7 @@ include { SRATOOLS_FASTERQDUMP } from '../../../modules/nf-core/sratools/ // workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { take: - sra_metadata // Channel + sra_metadata // Channel> dbgap_key // Path? sratools_fasterqdump_args // String sratools_pigz_args // String @@ -16,31 +16,36 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { // // Detect existing NCBI user settings or create new ones. // - sra_metadata // Channel - |> collect // List - |> CUSTOM_SRATOOLSNCBISETTINGS // Path - |> set { ncbi_settings } // Path + sra_metadata // Channel> + |> collect // Bag> (future) + |> CUSTOM_SRATOOLSNCBISETTINGS // Path (future) + |> set { ncbi_settings } // Path (future) - sra_metadata // Channel + sra_metadata // Channel> |> map { meta -> // // Prefetch sequencing reads in SRA format. // - let sra = SRATOOLS_PREFETCH ( meta, ncbi_settings, dbgap_key ) + let sra = SRATOOLS_PREFETCH ( + meta, + ncbi_settings, + dbgap_key ) // // Convert the SRA format into one or more compressed FASTQ files. // - SRATOOLS_FASTERQDUMP ( + let fastq = SRATOOLS_FASTERQDUMP ( meta, sra, ncbi_settings, dbgap_key, sratools_fasterqdump_args, sratools_pigz_args ) - } // Channel)> - |> set { reads } // Channel)> + + ( meta, fastq ) + } // Channel<(Map, List)> + |> set { reads } // Channel<(Map, List)> emit: - reads // Channel)> + reads // Channel<(Map, List)> } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index f8465855..ea00b8a8 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -27,23 +27,23 @@ workflow UTILS_NFCORE_PIPELINE { workflow SOFTWARE_VERSIONS { main: let processVersions = Channel.topic('versions', (String,String,String)) - let workflowVersions = [ + let workflowVersions = Channel.of( [ 'Workflow', workflow.manifest.name, getWorkflowVersion() ], [ 'Workflow', 'Nextflow', workflow.nextflow.version ] - ] - let versions = collect(processVersions) + workflowVersions + ) emit: - versions // List<(String,String,String)> - .unique() // List<(String,String,String)> - .groupBy { (process, _, _) -> process } // List<(String,Bag<(String,String,String)>)> - .collect { (process, tools) -> + processVersions + |> mix(workflowVersions) // Channel<(String,String,String)> + |> unique // Channel<(String,String,String)> + |> groupBy { (process, _, _) -> process } // Channel<(String,Bag<(String,String,String)>)> + |> map { (process, tools) -> let simpleName = process.tokenize(':').last() let toolsMap = tools.inject([:]) { acc, (_, name, version) -> acc + [ (name): version ] } return [ simpleName: toolsMap ] - } // List>> + } // Channel>> } /* diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index d5144f1d..747c0d41 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -45,12 +45,9 @@ workflow SRA { |> set { runinfo_ftp } // Channel |> flatMap { tsv -> splitCsv(tsv, header:true, sep:'\t') - } // Channel - |> map { meta -> - meta + [single_end: meta.single_end.toBoolean()] - } // Channel - |> unique // Channel - |> set { sra_metadata } // Channel + } // Channel> + |> unique // Channel> + |> set { sra_metadata } // Channel> // // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums @@ -58,11 +55,10 @@ workflow SRA { sra_metadata |> filter { meta -> !skip_fastq_download && getDownloadMethod(meta, params.download_method) == 'ftp' - } // Channel + } // Channel> |> map { meta -> - let fastq = [ file(meta.fastq_1), file(meta.fastq_2) ] - let out = SRA_FASTQ_FTP ( meta, fastq, params.sra_fastq_ftp_args ) - new Sample(out.meta, out.fastq, out.md5) + let out = SRA_FASTQ_FTP ( meta, params.sra_fastq_ftp_args ) + new Sample(meta.id, out.fastq_1, out.fastq_2, out.md5_1, out.md5_2) } // Channel |> set { ftp_samples } // Channel @@ -72,13 +68,15 @@ workflow SRA { sra_metadata |> filter { meta -> !skip_fastq_download && getDownloadMethod(meta, params.download_method) == 'sratools' - } // Channel + } // Channel> |> FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( params.dbgap_key ? file(params.dbgap_key, checkIfExists: true) : null, params.sratools_fasterqdump_args, - params.sratools_pigz_args ) // Channel)> - |> map { out -> - new Sample(out.meta, out.fastq, []) + params.sratools_pigz_args ) // Channel<(Map, List)> + |> map { (meta, fastq) -> + let fastq_1 = fastq[0] + let fastq_2 = !meta.single_end ? fastq[1] : null + new Sample(meta.id, fastq_1, fastq_2, null, null) } // Channel |> set { sratools_samples } // Channel @@ -88,28 +86,15 @@ workflow SRA { sra_metadata |> filter { meta -> !skip_fastq_download && getDownloadMethod(meta, params.download_method) == 'aspera' - } // Channel + } // Channel> |> map { meta -> - let fastq = meta.fastq_aspera.tokenize(';').take(2).collect { name -> file(name) } - let out = ASPERA_CLI ( meta, fastq, 'era-fasp', params.aspera_cli_args ) - new Sample(out.meta, out.fastq, out.md5) + let out = ASPERA_CLI ( meta, 'era-fasp', params.aspera_cli_args ) + new Sample(meta.id, out.fastq_1, out.fastq_2, out.md5_1, out.md5_2) } // Channel |> set { aspera_samples } // Channel - mix( ftp_samples, sratools_samples, aspera_samples ) // Channel - |> map { sample -> - let meta = sample.meta - meta + [ - fastq_1: sample.fastq[0], - fastq_2: sample.fastq[1] && !meta.single_end ? sample.fastq[1] : null, - md5_1: sample.md5[0], - md5_2: sample.md5[1] && !meta.single_end ? sample.md5[1] : null - ] - } // Channel - |> set { samples } // Channel - emit: - samples + mix( ftp_samples, sratools_samples, aspera_samples ) publish: runinfo_ftp >> 'metadata' @@ -121,7 +106,7 @@ workflow SRA { ======================================================================================== */ -fn getDownloadMethod(meta: Map, download_method: String) -> String { +fn getDownloadMethod(meta: Map, download_method: String) -> String { // meta.fastq_aspera is a metadata string with ENA fasp links supported by Aspera // For single-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz' // For paired-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_2.fastq.gz' @@ -150,9 +135,11 @@ record SraParams { } record Sample { - meta : Map - fastq : List - md5 : List + id : String + fastq_1 : Path + fastq_2 : Path? + md5_1 : Path? + md5_2 : Path? } /* From 677f8386c021c58b507dfc5106cd4736f4de0df3 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 4 Nov 2024 10:33:28 +0100 Subject: [PATCH 16/25] Revert unrelated changes Signed-off-by: Ben Sherman --- assets/schema_input.json | 30 +-- nextflow_schema.json | 570 +++++++++++++++++++-------------------- 2 files changed, 300 insertions(+), 300 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 0b41eab3..db9ffc00 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,17 +1,17 @@ { - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_input.json", - "title": "nf-core/fetchngs pipeline - params.input schema", - "description": "Schema for the file provided with params.input", - "type": "array", - "items": { - "type": "object", - "properties": { - "": { - "type": "string", - "pattern": "^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\\d+)$", - "errorMessage": "Please provide a valid SRA, ENA, DDBJ or GEO identifier" - } - } - } + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_input.json", + "title": "nf-core/fetchngs pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "": { + "type": "string", + "pattern": "^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\\d+)$", + "errorMessage": "Please provide a valid SRA, ENA, DDBJ or GEO identifier" + } + } + } } diff --git a/nextflow_schema.json b/nextflow_schema.json index 6dbce31e..29f7b710 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,287 +1,287 @@ { - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nf-core/fetchngs/master/nextflow_schema.json", - "title": "nf-core/fetchngs pipeline parameters", - "description": "Pipeline to fetch metadata and raw FastQ files from public databases", - "type": "object", - "definitions": { - "input_output_options": { - "title": "Input/output options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], - "properties": { - "input": { - "type": "string", - "format": "file-path", - "exists": true, - "schema": "assets/schema_input.json", - "mimetype": "text/csv", - "pattern": "^\\S+\\.(csv|tsv|txt)$", - "fa_icon": "fas fa-file-excel", - "description": "File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files." - }, - "ena_metadata_fields": { - "type": "string", - "fa_icon": "fas fa-columns", - "description": "Comma-separated list of ENA metadata fields to fetch before downloading data.", - "help_text": "The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run)." - }, - "sample_mapping_fields": { - "type": "string", - "fa_icon": "fas fa-columns", - "description": "Comma-separated list of ENA metadata fields used to create a separate 'id_mappings.csv' and 'multiqc_config.yml' with selected fields that can be used to rename samples in general and in MultiQC.", - "default": "experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description" - }, - "nf_core_pipeline": { - "type": "string", - "fa_icon": "fab fa-apple", - "description": "Name of supported nf-core pipeline e.g. 'rnaseq'. A samplesheet for direct use with the pipeline will be created with the appropriate columns.", - "enum": ["rnaseq", "atacseq", "viralrecon", "taxprofiler"] - }, - "nf_core_rnaseq_strandedness": { - "type": "string", - "fa_icon": "fas fa-dna", - "description": "Value for 'strandedness' entry added to samplesheet created when using '--nf_core_pipeline rnaseq'.", - "help_text": "The default is 'auto' which can be used with nf-core/rnaseq v3.10 onwards to auto-detect strandedness during the pipeline execution.", - "default": "auto" - }, - "download_method": { - "type": "string", - "default": "ftp", - "fa_icon": "fas fa-download", - "enum": ["aspera", "ftp", "sratools"], - "description": "Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'.", - "help_text": "FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ." - }, - "skip_fastq_download": { - "type": "boolean", - "fa_icon": "fas fa-fast-forward", - "description": "Only download metadata for public data database ids and don't download the FastQ files." - }, - "dbgap_key": { - "type": "string", - "fa_icon": "fas fa-address-card", - "help_text": "Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit. Users with granted access to controlled data can download the JWT cart file for the study from the SRA Run Selector upon logging in. The JWT file can only be used on cloud platforms and is valid for 1 hour upon creation.", - "format": "file-path", - "description": "dbGaP repository key." - }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - }, - "email": { - "type": "string", - "description": "Email address for completion summary.", - "fa_icon": "fas fa-envelope", - "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - } - } - }, - "institutional_config_options": { - "title": "Institutional config options", - "type": "object", - "fa_icon": "fas fa-university", - "description": "Parameters used to describe centralised config profiles. These should not be edited.", - "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", - "properties": { - "custom_config_version": { - "type": "string", - "description": "Git commit id for Institutional configs.", - "default": "master", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "custom_config_base": { - "type": "string", - "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", - "hidden": true, - "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", - "fa_icon": "fas fa-users-cog" - }, - "config_profile_name": { - "type": "string", - "description": "Institutional config name.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_description": { - "type": "string", - "description": "Institutional config description.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_contact": { - "type": "string", - "description": "Institutional config contact information.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_url": { - "type": "string", - "description": "Institutional config URL link.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - } - } - }, - "max_job_request_options": { - "title": "Max job request options", - "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" - }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "128.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } - }, - "generic_options": { - "title": "Generic options", - "type": "object", - "fa_icon": "fas fa-file-import", - "description": "Less common options for the pipeline, typically set in a config file.", - "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", - "properties": { - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, - "version": { - "type": "boolean", - "description": "Display version and exit.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], - "hidden": true - }, - "email_on_fail": { - "type": "string", - "description": "Email address for completion summary, only when pipeline fails.", - "fa_icon": "fas fa-exclamation-triangle", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", - "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", - "hidden": true - }, - "plaintext_email": { - "type": "boolean", - "description": "Send plain-text email instead of HTML.", - "fa_icon": "fas fa-remove-format", - "hidden": true - }, - "monochrome_logs": { - "type": "boolean", - "description": "Do not use coloured log outputs.", - "fa_icon": "fas fa-palette", - "hidden": true - }, - "hook_url": { - "type": "string", - "description": "Incoming hook URL for messaging service", - "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", - "hidden": true - }, - "validate_params": { - "type": "boolean", - "description": "Boolean whether to validate parameters against the schema at runtime", - "default": true, - "fa_icon": "fas fa-check-square", - "hidden": true - }, - "validationShowHiddenParams": { - "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "validationFailUnrecognisedParams": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters fails when an unrecognised parameter is found.", - "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." - }, - "validationLenientMode": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters in lenient more.", - "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." - } - } - }, - "deprecated_options": { - "title": "Deprecated options", - "type": "object", - "description": "List of parameters that have been deprecated.", - "default": "", - "fa_icon": "fas fa-calendar-times", - "properties": { - "force_sratools_download": { - "type": "boolean", - "fa_icon": "fas fa-times-circle", - "description": "This parameter has been deprecated. Please use '--download_method sratools' instead.", - "enum": [false], - "hidden": true - } - } - } - }, - "allOf": [ - { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/institutional_config_options" - }, - { - "$ref": "#/definitions/max_job_request_options" - }, - { - "$ref": "#/definitions/generic_options" - }, - { - "$ref": "#/definitions/deprecated_options" - } - ] + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/fetchngs/master/nextflow_schema.json", + "title": "nf-core/fetchngs pipeline parameters", + "description": "Pipeline to fetch metadata and raw FastQ files from public databases", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["input", "outdir"], + "properties": { + "input": { + "type": "string", + "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", + "mimetype": "text/csv", + "pattern": "^\\S+\\.(csv|tsv|txt)$", + "fa_icon": "fas fa-file-excel", + "description": "File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files." + }, + "ena_metadata_fields": { + "type": "string", + "fa_icon": "fas fa-columns", + "description": "Comma-separated list of ENA metadata fields to fetch before downloading data.", + "help_text": "The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run)." + }, + "sample_mapping_fields": { + "type": "string", + "fa_icon": "fas fa-columns", + "description": "Comma-separated list of ENA metadata fields used to create a separate 'id_mappings.csv' and 'multiqc_config.yml' with selected fields that can be used to rename samples in general and in MultiQC.", + "default": "experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description" + }, + "nf_core_pipeline": { + "type": "string", + "fa_icon": "fab fa-apple", + "description": "Name of supported nf-core pipeline e.g. 'rnaseq'. A samplesheet for direct use with the pipeline will be created with the appropriate columns.", + "enum": ["rnaseq", "atacseq", "viralrecon", "taxprofiler"] + }, + "nf_core_rnaseq_strandedness": { + "type": "string", + "fa_icon": "fas fa-dna", + "description": "Value for 'strandedness' entry added to samplesheet created when using '--nf_core_pipeline rnaseq'.", + "help_text": "The default is 'auto' which can be used with nf-core/rnaseq v3.10 onwards to auto-detect strandedness during the pipeline execution.", + "default": "auto" + }, + "download_method": { + "type": "string", + "default": "ftp", + "fa_icon": "fas fa-download", + "enum": ["aspera", "ftp", "sratools"], + "description": "Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'.", + "help_text": "FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ." + }, + "skip_fastq_download": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Only download metadata for public data database ids and don't download the FastQ files." + }, + "dbgap_key": { + "type": "string", + "fa_icon": "fas fa-address-card", + "help_text": "Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit. Users with granted access to controlled data can download the JWT cart file for the study from the SRA Run Selector upon logging in. The JWT file can only be used on cloud platforms and is valid for 1 hour upon creation.", + "format": "file-path", + "description": "dbGaP repository key." + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + }, + "max_job_request_options": { + "title": "Max job request options", + "type": "object", + "fa_icon": "fab fa-acquisitions-incorporated", + "description": "Set the top limit for requested resources for any single job.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "properties": { + "max_cpus": { + "type": "integer", + "description": "Maximum number of CPUs that can be requested for any single job.", + "default": 16, + "fa_icon": "fas fa-microchip", + "hidden": true, + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + }, + "max_memory": { + "type": "string", + "description": "Maximum amount of memory that can be requested for any single job.", + "default": "128.GB", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "hidden": true, + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + }, + "max_time": { + "type": "string", + "description": "Maximum amount of time that can be requested for any single job.", + "default": "240.h", + "fa_icon": "far fa-clock", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", + "hidden": true, + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "validationShowHiddenParams": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", + "hidden": true, + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + } + } + }, + "deprecated_options": { + "title": "Deprecated options", + "type": "object", + "description": "List of parameters that have been deprecated.", + "default": "", + "fa_icon": "fas fa-calendar-times", + "properties": { + "force_sratools_download": { + "type": "boolean", + "fa_icon": "fas fa-times-circle", + "description": "This parameter has been deprecated. Please use '--download_method sratools' instead.", + "enum": [false], + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/institutional_config_options" + }, + { + "$ref": "#/definitions/max_job_request_options" + }, + { + "$ref": "#/definitions/generic_options" + }, + { + "$ref": "#/definitions/deprecated_options" + } + ] } From e4c956f90771ebfbe24ffdc745d4e02f4490c2d8 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 5 Nov 2024 10:24:16 +0100 Subject: [PATCH 17/25] Remove old params, remove script params from config Signed-off-by: Ben Sherman --- main.nf | 76 +++++++++++++-- nextflow.config | 37 +++++-- nextflow_schema.json | 97 +------------------ .../utils_nfcore_fetchngs_pipeline/main.nf | 24 +---- workflows/sra/main.nf | 30 +++--- 5 files changed, 121 insertions(+), 143 deletions(-) diff --git a/main.nf b/main.nf index b0e6e33c..b4628ff6 100644 --- a/main.nf +++ b/main.nf @@ -21,9 +21,73 @@ include { SRA } from './workflows/sra' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline' include { SOFTWARE_VERSIONS } from './subworkflows/nf-core/utils_nfcore_pipeline' +include { DownloadMethod } from './workflows/sra' include { SraParams } from './workflows/sra' include { Sample } from './workflows/sra' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + WORKFLOW INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +params { + + // TODO: declare as Set and construct SraId with isSraId() + input: Set { + description 'Set of SRA/ENA/GEO/DDBJ identifiers to download their associated metadata and FastQ files' + } + + // TODO: declare as EnaMetadataFields and construct with sraCheckENAMetadataFields() + ena_metadata_fields: String { + description "Comma-separated list of ENA metadata fields to fetch before downloading data." + help "The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run)." + icon 'fas fa-columns' + defaultValue '' + } + + download_method: DownloadMethod { + description "Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'." + help 'FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ.' + icon 'fas fa-download' + defaultValue 'ftp' + } + + skip_fastq_download: boolean { + description "Only download metadata for public data database ids and don't download the FastQ files." + icon 'fas fa-fast-forward' + } + + dbgap_key: Path? { + description 'dbGaP repository key.' + help 'Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit. Users with granted access to controlled data can download the JWT cart file for the study from the SRA Run Selector upon logging in. The JWT file can only be used on cloud platforms and is valid for 1 hour upon creation.' + icon 'fas fa-address-card' + } + + aspera_cli_args: String { + description 'Command-line arguments for Aspera CLI' + defaultValue '' + } + + sra_fastq_ftp_args: String { + description 'Command-line arguments for wget when downloading fastq files via FTP' + defaultValue '' + } + + sratools_fasterqdump_args: String { + description 'Command-line arguments for sratools fasterqdump' + defaultValue '' + } + + sratools_pigz_args: String { + description 'Command-line arguments for sratools pigz' + defaultValue '' + } + + // TODO: ... + +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -36,24 +100,22 @@ workflow { // // SUBWORKFLOW: Run initialisation tasks // - let ids = PIPELINE_INITIALISATION ( + PIPELINE_INITIALISATION ( params.version, params.help, params.validate_params, params.monochrome_logs, args, - params.outdir, - params.input, - params.ena_metadata_fields + workflow.outputDir ) // // WORKFLOW: Run primary workflows for the pipeline // let samples = SRA ( - ids, + Channel.fromList(params.input), SraParams( - params.ena_metadata_fields ?: '', + params.ena_metadata_fields, params.download_method, params.skip_fastq_download, params.dbgap_key, @@ -76,7 +138,7 @@ workflow { params.email, params.email_on_fail, params.plaintext_email, - params.outdir, + workflow.outputDir, params.monochrome_logs, params.hook_url ) diff --git a/nextflow.config b/nextflow.config index 19e05583..0897d9c9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -6,11 +6,33 @@ ---------------------------------------------------------------------------------------- */ +// Config params +params { + + // Institutional config options + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs" + config_profile_contact = null + config_profile_url = null + + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' + + // Report options + trace_suffix = "_${new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')}" + +} + // Load base.config by default for all pipelines includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions -includeConfig "${params.custom_config_base}/nfcore_custom.config" +includeConfig "${params.custom_config_base}/${params.custom_config_version}/nfcore_custom.config" profiles { debug { @@ -135,26 +157,23 @@ env { process.shell = ['/bin/bash', '-euo', 'pipefail'] // workflow outputs -outputDir = params.outdir -workflow.output.mode = params.publish_dir_mode +workflow.output.mode = 'copy' -// NOTE: Nextflow config should provide some constant for the start timestamp -params.trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${params.trace_timestamp}.html" + file = "${outputDir}/pipeline_info/execution_timeline${params.trace_suffix}.html" } report { enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${params.trace_timestamp}.html" + file = "${outputDir}/pipeline_info/execution_report${params.trace_suffix}.html" } trace { enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${params.trace_timestamp}.txt" + file = "${outputDir}/pipeline_info/execution_trace${params.trace_suffix}.txt" } dag { enabled = true - file = "${params.outdir}/pipeline_info/pipeline_dag_${params.trace_timestamp}.html" + file = "${outputDir}/pipeline_info/pipeline_dag${params.trace_suffix}.html" } manifest { diff --git a/nextflow_schema.json b/nextflow_schema.json index 29f7b710..dbac4c00 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,12 +5,12 @@ "description": "Pipeline to fetch metadata and raw FastQ files from public databases", "type": "object", "definitions": { - "input_output_options": { - "title": "Input/output options", + "input_options": { + "title": "Input options", "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": ["input"], "properties": { "input": { "type": "string", @@ -28,25 +28,6 @@ "description": "Comma-separated list of ENA metadata fields to fetch before downloading data.", "help_text": "The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run)." }, - "sample_mapping_fields": { - "type": "string", - "fa_icon": "fas fa-columns", - "description": "Comma-separated list of ENA metadata fields used to create a separate 'id_mappings.csv' and 'multiqc_config.yml' with selected fields that can be used to rename samples in general and in MultiQC.", - "default": "experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description" - }, - "nf_core_pipeline": { - "type": "string", - "fa_icon": "fab fa-apple", - "description": "Name of supported nf-core pipeline e.g. 'rnaseq'. A samplesheet for direct use with the pipeline will be created with the appropriate columns.", - "enum": ["rnaseq", "atacseq", "viralrecon", "taxprofiler"] - }, - "nf_core_rnaseq_strandedness": { - "type": "string", - "fa_icon": "fas fa-dna", - "description": "Value for 'strandedness' entry added to samplesheet created when using '--nf_core_pipeline rnaseq'.", - "help_text": "The default is 'auto' which can be used with nf-core/rnaseq v3.10 onwards to auto-detect strandedness during the pipeline execution.", - "default": "auto" - }, "download_method": { "type": "string", "default": "ftp", @@ -67,12 +48,6 @@ "format": "file-path", "description": "dbGaP repository key." }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - }, "email": { "type": "string", "description": "Email address for completion summary.", @@ -99,7 +74,7 @@ "custom_config_base": { "type": "string", "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "default": "https://raw.githubusercontent.com/nf-core/configs", "hidden": true, "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", "fa_icon": "fas fa-users-cog" @@ -184,15 +159,6 @@ "fa_icon": "fas fa-question-circle", "hidden": true }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], - "hidden": true - }, "email_on_fail": { "type": "string", "description": "Email address for completion summary, only when pipeline fails.", @@ -219,69 +185,16 @@ "fa_icon": "fas fa-people-group", "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true - }, - "validate_params": { - "type": "boolean", - "description": "Boolean whether to validate parameters against the schema at runtime", - "default": true, - "fa_icon": "fas fa-check-square", - "hidden": true - }, - "validationShowHiddenParams": { - "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "validationFailUnrecognisedParams": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters fails when an unrecognised parameter is found.", - "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." - }, - "validationLenientMode": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters in lenient more.", - "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." - } - } - }, - "deprecated_options": { - "title": "Deprecated options", - "type": "object", - "description": "List of parameters that have been deprecated.", - "default": "", - "fa_icon": "fas fa-calendar-times", - "properties": { - "force_sratools_download": { - "type": "boolean", - "fa_icon": "fas fa-times-circle", - "description": "This parameter has been deprecated. Please use '--download_method sratools' instead.", - "enum": [false], - "hidden": true } } } }, "allOf": [ { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/institutional_config_options" - }, - { - "$ref": "#/definitions/max_job_request_options" + "$ref": "#/definitions/input_options" }, { "$ref": "#/definitions/generic_options" - }, - { - "$ref": "#/definitions/deprecated_options" } ] } diff --git a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf index ba15b922..eb72d35e 100644 --- a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf @@ -35,8 +35,6 @@ workflow PIPELINE_INITIALISATION { monochrome_logs : boolean // Do not use coloured log outputs nextflow_cli_args : List // List of positional nextflow CLI args outdir : String // The output directory where the results will be saved - input : String // File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files - ena_metadata_fields : String // Comma-separated list of ENA metadata fields to fetch before downloading data main: @@ -55,7 +53,7 @@ workflow PIPELINE_INITIALISATION { // let pre_help_text = nfCoreLogo(monochrome_logs) let post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) - let workflow_command = "nextflow run ${workflow.manifest.name} -profile --input ids.csv --outdir " + let workflow_command = "nextflow run ${workflow.manifest.name} -profile --input ids.csv -output-dir " UTILS_NFVALIDATION_PLUGIN ( help, workflow_command, @@ -71,26 +69,6 @@ workflow PIPELINE_INITIALISATION { UTILS_NFCORE_PIPELINE ( nextflow_cli_args ) - - // - // Auto-detect input id type - // - let inputPath = file(input) - if (!isSraId(inputPath)) - error('Ids provided via --input not recognised please make sure they are either SRA / ENA / GEO / DDBJ ids!') - sraCheckENAMetadataFields(ena_metadata_fields) - - // Read in ids from --input file - inputPath // Path - |> Channel.of // Channel - |> flatMap { csv -> - splitCsv(csv, header: false, schema: 'assets/schema_input.json') - } // Channel - |> unique // Channel - |> set { ids } // Channel - - emit: - ids // Channel } /* diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 747c0d41..651c09dd 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -54,7 +54,7 @@ workflow SRA { // sra_metadata |> filter { meta -> - !skip_fastq_download && getDownloadMethod(meta, params.download_method) == 'ftp' + !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.FTP } // Channel> |> map { meta -> let out = SRA_FASTQ_FTP ( meta, params.sra_fastq_ftp_args ) @@ -67,10 +67,10 @@ workflow SRA { // sra_metadata |> filter { meta -> - !skip_fastq_download && getDownloadMethod(meta, params.download_method) == 'sratools' + !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.SRATOOLS } // Channel> |> FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( - params.dbgap_key ? file(params.dbgap_key, checkIfExists: true) : null, + params.dbgap_key, params.sratools_fasterqdump_args, params.sratools_pigz_args ) // Channel<(Map, List)> |> map { (meta, fastq) -> @@ -85,7 +85,7 @@ workflow SRA { // sra_metadata |> filter { meta -> - !skip_fastq_download && getDownloadMethod(meta, params.download_method) == 'aspera' + !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.ASPERA } // Channel> |> map { meta -> let out = ASPERA_CLI ( meta, 'era-fasp', params.aspera_cli_args ) @@ -106,15 +106,15 @@ workflow SRA { ======================================================================================== */ -fn getDownloadMethod(meta: Map, download_method: String) -> String { +fn getDownloadMethod(meta: Map, userMethod: DownloadMethod) -> DownloadMethod { // meta.fastq_aspera is a metadata string with ENA fasp links supported by Aspera // For single-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz' // For paired-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_2.fastq.gz' - if (meta.fastq_aspera && download_method == 'aspera') - return 'aspera' - if ((!meta.fastq_aspera && !meta.fastq_1) || download_method == 'sratools') - return 'sratools' - return 'ftp' + if (meta.fastq_aspera && userMethod == DownloadMethod.ASPERA) + return DownloadMethod.ASPERA + if ((!meta.fastq_aspera && !meta.fastq_1) || userMethod == DownloadMethod.SRATOOLS) + return DownloadMethod.SRATOOLS + return DownloadMethod.FTP } /* @@ -125,15 +125,21 @@ fn getDownloadMethod(meta: Map, download_method: String) -> Strin record SraParams { ena_metadata_fields : String - download_method : String // enum: 'aspera' | 'ftp' | 'sratools' + download_method : DownloadMethod skip_fastq_download : boolean - dbgap_key : String? + dbgap_key : Path? aspera_cli_args : String sra_fastq_ftp_args : String sratools_fasterqdump_args : String sratools_pigz_args : String } +enum DownloadMethod { + ASPERA, + FTP, + SRATOOLS +} + record Sample { id : String fastq_1 : Path From e4761ce81fae2cea866334727ca828e7301e3753 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 8 Nov 2024 17:51:15 +0100 Subject: [PATCH 18/25] Replace set operator with assignment Signed-off-by: Ben Sherman --- .../main.nf | 47 +++++++++---------- workflows/sra/main.nf | 15 +++--- 2 files changed, 27 insertions(+), 35 deletions(-) diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index eec2cb19..e6fa8d52 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -16,35 +16,30 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { // // Detect existing NCBI user settings or create new ones. // - sra_metadata // Channel> - |> collect // Bag> (future) - |> CUSTOM_SRATOOLSNCBISETTINGS // Path (future) - |> set { ncbi_settings } // Path (future) + let ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS( collect(sra_metadata) ) - sra_metadata // Channel> - |> map { meta -> - // - // Prefetch sequencing reads in SRA format. - // - let sra = SRATOOLS_PREFETCH ( - meta, - ncbi_settings, - dbgap_key ) + let reads = sra_metadata |> map { meta -> + // + // Prefetch sequencing reads in SRA format. + // + let sra = SRATOOLS_PREFETCH ( + meta, + ncbi_settings, + dbgap_key ) - // - // Convert the SRA format into one or more compressed FASTQ files. - // - let fastq = SRATOOLS_FASTERQDUMP ( - meta, - sra, - ncbi_settings, - dbgap_key, - sratools_fasterqdump_args, - sratools_pigz_args ) + // + // Convert the SRA format into one or more compressed FASTQ files. + // + let fastq = SRATOOLS_FASTERQDUMP ( + meta, + sra, + ncbi_settings, + dbgap_key, + sratools_fasterqdump_args, + sratools_pigz_args ) - ( meta, fastq ) - } // Channel<(Map, List)> - |> set { reads } // Channel<(Map, List)> + ( meta, fastq ) + } // Channel<(Map, List)> emit: reads // Channel<(Map, List)> diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 651c09dd..009bb6c0 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -31,7 +31,7 @@ workflow SRA { params : SraParams main: - ids // Channel + let runinfo_ftp = ids // // MODULE: Get SRA run information for public database ids // @@ -42,17 +42,17 @@ workflow SRA { // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ] // |> map(SRA_RUNINFO_TO_FTP) // Channel - |> set { runinfo_ftp } // Channel + + let sra_metadata = runinfo_ftp |> flatMap { tsv -> splitCsv(tsv, header:true, sep:'\t') } // Channel> |> unique // Channel> - |> set { sra_metadata } // Channel> // // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums // - sra_metadata + let ftp_samples = sra_metadata |> filter { meta -> !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.FTP } // Channel> @@ -60,12 +60,11 @@ workflow SRA { let out = SRA_FASTQ_FTP ( meta, params.sra_fastq_ftp_args ) new Sample(meta.id, out.fastq_1, out.fastq_2, out.md5_1, out.md5_2) } // Channel - |> set { ftp_samples } // Channel // // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools. // - sra_metadata + let sratools_samples = sra_metadata |> filter { meta -> !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.SRATOOLS } // Channel> @@ -78,12 +77,11 @@ workflow SRA { let fastq_2 = !meta.single_end ? fastq[1] : null new Sample(meta.id, fastq_1, fastq_2, null, null) } // Channel - |> set { sratools_samples } // Channel // // MODULE: If Aspera link is provided in run information then download FastQ directly via Aspera CLI and validate with md5sums // - sra_metadata + let aspera_samples = sra_metadata |> filter { meta -> !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.ASPERA } // Channel> @@ -91,7 +89,6 @@ workflow SRA { let out = ASPERA_CLI ( meta, 'era-fasp', params.aspera_cli_args ) new Sample(meta.id, out.fastq_1, out.fastq_2, out.md5_1, out.md5_2) } // Channel - |> set { aspera_samples } // Channel emit: mix( ftp_samples, sratools_samples, aspera_samples ) From b2d817ef4ae47558f3bc73a9a07105283d8f9920 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 8 Nov 2024 18:24:00 +0100 Subject: [PATCH 19/25] Update operators Signed-off-by: Ben Sherman --- subworkflows/nf-core/utils_nfcore_pipeline/main.nf | 11 ++++++----- workflows/sra/main.nf | 12 +++++------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index ea00b8a8..17d3490b 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -28,18 +28,19 @@ workflow SOFTWARE_VERSIONS { main: let processVersions = Channel.topic('versions', (String,String,String)) let workflowVersions = Channel.of( - [ 'Workflow', workflow.manifest.name, getWorkflowVersion() ], - [ 'Workflow', 'Nextflow', workflow.nextflow.version ] + ( 'Workflow', workflow.manifest.name, getWorkflowVersion() ), + ( 'Workflow', 'Nextflow', workflow.nextflow.version ) ) emit: processVersions |> mix(workflowVersions) // Channel<(String,String,String)> - |> unique // Channel<(String,String,String)> - |> groupBy { (process, _, _) -> process } // Channel<(String,Bag<(String,String,String)>)> + |> gather { (process, name, version) -> + (process, -1, (name, version)) + } // Channel<(String,Bag<(String,String)>)> |> map { (process, tools) -> let simpleName = process.tokenize(':').last() - let toolsMap = tools.inject([:]) { acc, (_, name, version) -> + let toolsMap = tools.unique().inject([:]) { acc, (name, version) -> acc + [ (name): version ] } return [ simpleName: toolsMap ] diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 009bb6c0..560434c1 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -37,17 +37,15 @@ workflow SRA { // |> map { id -> SRA_IDS_TO_RUNINFO ( id, params.ena_metadata_fields ) - } // Channel + } // Channel // // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ] // - |> map(SRA_RUNINFO_TO_FTP) // Channel + |> map(SRA_RUNINFO_TO_FTP) // Channel - let sra_metadata = runinfo_ftp - |> flatMap { tsv -> - splitCsv(tsv, header:true, sep:'\t') - } // Channel> - |> unique // Channel> + let sra_metadata = runinfo_ftp |> scatter { tsv -> + tsv.splitCsv(header:true, sep:'\t').unique() + } // Channel> // // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums From 2b471669bf1ad45978043a73aecbb0f3290c7e2a Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 8 Nov 2024 18:30:08 +0100 Subject: [PATCH 20/25] Revert `fn` / `let` / `var` with `def` Signed-off-by: Ben Sherman --- main.nf | 6 +- modules/local/aspera_cli/main.nf | 4 +- modules/local/sra_ids_to_runinfo/main.nf | 2 +- modules/nf-core/sratools/fasterqdump/main.nf | 4 +- .../utils_nfcore_fetchngs_pipeline/main.nf | 28 ++--- .../main.nf | 8 +- .../nf-core/utils_nextflow_pipeline/main.nf | 34 ++--- .../nf-core/utils_nfcore_pipeline/main.nf | 116 +++++++++--------- workflows/sra/main.nf | 20 +-- 9 files changed, 111 insertions(+), 111 deletions(-) diff --git a/main.nf b/main.nf index b4628ff6..064b58a8 100644 --- a/main.nf +++ b/main.nf @@ -112,7 +112,7 @@ workflow { // // WORKFLOW: Run primary workflows for the pipeline // - let samples = SRA ( + def samples = SRA ( Channel.fromList(params.input), SraParams( params.ena_metadata_fields, @@ -129,7 +129,7 @@ workflow { // // SUBWORKFLOW: Collect software versions // - let versions = SOFTWARE_VERSIONS() + def versions = SOFTWARE_VERSIONS() // // SUBWORKFLOW: Run completion tasks @@ -157,7 +157,7 @@ workflow { output { samples: Sample { path { _sample -> - let dirs = [ + def dirs = [ 'fastq': 'fastq', 'md5': 'fastq/md5' ] diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index 3e5d92a2..3701e3f8 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -13,8 +13,8 @@ process ASPERA_CLI { args : String script: - let conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" - let fastq = meta.fastq_aspera.tokenize(';') + def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" + def fastq = meta.fastq_aspera.tokenize(';') if (meta.single_end.toBoolean()) { """ $conda_prefix diff --git a/modules/local/sra_ids_to_runinfo/main.nf b/modules/local/sra_ids_to_runinfo/main.nf index 6f04091f..85f0655c 100644 --- a/modules/local/sra_ids_to_runinfo/main.nf +++ b/modules/local/sra_ids_to_runinfo/main.nf @@ -13,7 +13,7 @@ process SRA_IDS_TO_RUNINFO { fields : String script: - let metadata_fields = fields ? "--ena_metadata_fields ${fields}" : '' + def metadata_fields = fields ? "--ena_metadata_fields ${fields}" : '' """ echo $id > id.txt sra_ids_to_runinfo.py \\ diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index 6c8c27c5..839ef80c 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -19,8 +19,8 @@ process SRATOOLS_FASTERQDUMP { script: if( !prefix ) prefix = "${meta.id}" - let outfile = meta.single_end ? "${prefix}.fastq" : prefix - var key_file = '' + def outfile = meta.single_end ? "${prefix}.fastq" : prefix + def key_file = '' if (certificate.baseName.endsWith('.jwt')) { key_file += " --perm ${certificate}" } else if (certificate.baseName.endsWith('.ngc')) { diff --git a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf index eb72d35e..82c3a256 100644 --- a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf @@ -51,9 +51,9 @@ workflow PIPELINE_INITIALISATION { // // Validate parameters and generate parameter summary to stdout // - let pre_help_text = nfCoreLogo(monochrome_logs) - let post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) - let workflow_command = "nextflow run ${workflow.manifest.name} -profile --input ids.csv -output-dir " + def pre_help_text = nfCoreLogo(monochrome_logs) + def post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def workflow_command = "nextflow run ${workflow.manifest.name} -profile --input ids.csv -output-dir " UTILS_NFVALIDATION_PLUGIN ( help, workflow_command, @@ -89,7 +89,7 @@ workflow PIPELINE_COMPLETION { main: - let summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + def summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") // // Completion email and summary @@ -118,11 +118,11 @@ workflow PIPELINE_COMPLETION { // // Check if input ids are from the SRA // -fn isSraId(input: Path) -> boolean { - var is_sra = false - var total_ids = 0 - let no_match_ids = [] - let pattern = /^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\d+)$/ +def isSraId(input: Path) -> boolean { + def is_sra = false + def total_ids = 0 + def no_match_ids = [] + def pattern = /^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\d+)$/ input.eachLine { line -> total_ids += 1 if (!(line =~ pattern)) { @@ -130,7 +130,7 @@ fn isSraId(input: Path) -> boolean { } } - let num_match = total_ids - no_match_ids.size() + def num_match = total_ids - no_match_ids.size() if (num_match > 0) { if (num_match == total_ids) { is_sra = true @@ -144,10 +144,10 @@ fn isSraId(input: Path) -> boolean { // // Check and validate parameters // -fn sraCheckENAMetadataFields(ena_metadata_fields) { +def sraCheckENAMetadataFields(ena_metadata_fields) { // Check minimal ENA fields are provided to download FastQ files - let valid_ena_metadata_fields = ['run_accession', 'experiment_accession', 'library_layout', 'fastq_ftp', 'fastq_md5'] - let actual_ena_metadata_fields = ena_metadata_fields ? ena_metadata_fields.split(',').collect{ it.trim().toLowerCase() } : valid_ena_metadata_fields + def valid_ena_metadata_fields = ['run_accession', 'experiment_accession', 'library_layout', 'fastq_ftp', 'fastq_md5'] + def actual_ena_metadata_fields = ena_metadata_fields ? ena_metadata_fields.split(',').collect{ it.trim().toLowerCase() } : valid_ena_metadata_fields if (!actual_ena_metadata_fields.containsAll(valid_ena_metadata_fields)) { error("Invalid option: '${ena_metadata_fields}'. Minimally required fields for '--ena_metadata_fields': '${valid_ena_metadata_fields.join(',')}'") } @@ -156,7 +156,7 @@ fn sraCheckENAMetadataFields(ena_metadata_fields) { // // Print a warning after pipeline has completed // -fn sraCurateSamplesheetWarn() { +def sraCurateSamplesheetWarn() { log.warn "=============================================================================\n" + " Please double-check the samplesheet that has been auto-created by the pipeline.\n\n" + " Public databases don't reliably hold information such as strandedness\n" + diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index e6fa8d52..b12ba9a0 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -16,13 +16,13 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { // // Detect existing NCBI user settings or create new ones. // - let ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS( collect(sra_metadata) ) + def ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS( collect(sra_metadata) ) - let reads = sra_metadata |> map { meta -> + def reads = sra_metadata |> map { meta -> // // Prefetch sequencing reads in SRA format. // - let sra = SRATOOLS_PREFETCH ( + def sra = SRATOOLS_PREFETCH ( meta, ncbi_settings, dbgap_key ) @@ -30,7 +30,7 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { // // Convert the SRA format into one or more compressed FASTQ files. // - let fastq = SRATOOLS_FASTERQDUMP ( + def fastq = SRATOOLS_FASTERQDUMP ( meta, sra, ncbi_settings, diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf index 0b29dee7..6e4db179 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -57,15 +57,15 @@ workflow UTILS_NEXTFLOW_PIPELINE { // // Generate version string // -fn getWorkflowVersion() -> String { - var version_string = "" +def getWorkflowVersion() -> String { + def version_string = "" if (workflow.manifest.version) { - let prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' version_string += "${prefix_v}${workflow.manifest.version}" } if (workflow.commitId) { - let git_shortsha = workflow.commitId.substring(0, 7) + def git_shortsha = workflow.commitId.substring(0, 7) version_string += "-g${git_shortsha}" } @@ -75,11 +75,11 @@ fn getWorkflowVersion() -> String { // // Dump pipeline parameters to a JSON file // -fn dumpParametersToJSON(outdir: String) { - let timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - let filename = "params_${timestamp}.json" - let temp_pf = new File(workflow.launchDir.toString(), ".${filename}") - let jsonStr = JsonOutput.toJson(params) +def dumpParametersToJSON(outdir: String) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) temp_pf.text = JsonOutput.prettyPrint(jsonStr) FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") @@ -89,11 +89,11 @@ fn dumpParametersToJSON(outdir: String) { // // When running with -profile conda, warn if channels have not been set-up appropriately // -fn checkCondaChannels() { - let parser = new Yaml() - var channels: Set = [] +def checkCondaChannels() { + def parser = new Yaml() + def channels: Set = [] try { - let config = parser.load("conda config --show channels".execute().text) + def config = parser.load("conda config --show channels".execute().text) channels = config.channels } catch(NullPointerException | IOException e) { log.warn "Could not verify conda channel configuration." @@ -102,12 +102,12 @@ fn checkCondaChannels() { // Check that all channels are present // This channel list is ordered by required channel priority. - let required_channels_in_order: Set = ['conda-forge', 'bioconda', 'defaults'] - let channels_missing = !(required_channels_in_order - channels).isEmpty() + def required_channels_in_order: Set = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = !(required_channels_in_order - channels).isEmpty() // Check that they are in the right order - let channel_priority_violation = false - let n = required_channels_in_order.size() + def channel_priority_violation = false + def n = required_channels_in_order.size() // for (int i = 0; i < n - 1; i++) { // channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) // } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index 17d3490b..fdf49828 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -26,8 +26,8 @@ workflow UTILS_NFCORE_PIPELINE { // workflow SOFTWARE_VERSIONS { main: - let processVersions = Channel.topic('versions', (String,String,String)) - let workflowVersions = Channel.of( + def processVersions = Channel.topic('versions', (String,String,String)) + def workflowVersions = Channel.of( ( 'Workflow', workflow.manifest.name, getWorkflowVersion() ), ( 'Workflow', 'Nextflow', workflow.nextflow.version ) ) @@ -39,8 +39,8 @@ workflow SOFTWARE_VERSIONS { (process, -1, (name, version)) } // Channel<(String,Bag<(String,String)>)> |> map { (process, tools) -> - let simpleName = process.tokenize(':').last() - let toolsMap = tools.unique().inject([:]) { acc, (name, version) -> + def simpleName = process.tokenize(':').last() + def toolsMap = tools.unique().inject([:]) { acc, (name, version) -> acc + [ (name): version ] } return [ simpleName: toolsMap ] @@ -56,7 +56,7 @@ workflow SOFTWARE_VERSIONS { // // Warn if a -profile or Nextflow config has not been provided to run the pipeline // -fn checkConfigProvided() -> boolean { +def checkConfigProvided() -> boolean { if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + @@ -72,7 +72,7 @@ fn checkConfigProvided() -> boolean { // // Exit pipeline if --profile contains spaces // -fn checkProfileProvided(nextflow_cli_args: List) { +def checkProfileProvided(nextflow_cli_args: List) { if (workflow.profile.endsWith(',')) { error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" @@ -86,7 +86,7 @@ fn checkProfileProvided(nextflow_cli_args: List) { // // Citation string for pipeline // -fn workflowCitation() -> String { +def workflowCitation() -> String { return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + " ${workflow.manifest.doi}\n\n" + @@ -99,15 +99,15 @@ fn workflowCitation() -> String { // // Generate workflow version string // -fn getWorkflowVersion() -> String { - var version_string = "" +def getWorkflowVersion() -> String { + def version_string = "" if (workflow.manifest.version) { - let prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' version_string += "${prefix_v}${workflow.manifest.version}" } if (workflow.commitId) { - let git_shortsha = workflow.commitId.substring(0, 7) + def git_shortsha = workflow.commitId.substring(0, 7) version_string += "-g${git_shortsha}" } @@ -117,10 +117,10 @@ fn getWorkflowVersion() -> String { // // Get workflow summary for MultiQC // -fn paramsSummaryMultiqc(summary_params: Map) -> String { - var summary_section = '' +def paramsSummaryMultiqc(summary_params: Map) -> String { + def summary_section = '' for (group in summary_params.keySet()) { - let group_params = summary_params.get(group) // This gets the parameters of that particular group + def group_params = summary_params.get(group) // This gets the parameters of that particular group if (group_params) { summary_section += "

$group

\n" summary_section += "
\n" @@ -131,7 +131,7 @@ fn paramsSummaryMultiqc(summary_params: Map) -> String { } } - var yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + def yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" @@ -145,8 +145,8 @@ fn paramsSummaryMultiqc(summary_params: Map) -> String { // // nf-core logo // -fn nfCoreLogo(monochrome_logs: boolean = true) -> String { - let colors = logColours(monochrome_logs) +def nfCoreLogo(monochrome_logs: boolean = true) -> String { + def colors = logColours(monochrome_logs) String.format( """\n ${dashedLine(monochrome_logs)} @@ -164,16 +164,16 @@ fn nfCoreLogo(monochrome_logs: boolean = true) -> String { // // Return dashed line // -fn dashedLine(monochrome_logs: boolean = true) -> String { - let colors = logColours(monochrome_logs) +def dashedLine(monochrome_logs: boolean = true) -> String { + def colors = logColours(monochrome_logs) return "-${colors.dim}----------------------------------------------------${colors.reset}-" } // // ANSII colours used for terminal logging // -fn logColours(monochrome_logs: boolean = true) -> Map { - let colorcodes = [:] +def logColours(monochrome_logs: boolean = true) -> Map { + def colorcodes = [:] // Reset / Meta colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" @@ -240,7 +240,7 @@ fn logColours(monochrome_logs: boolean = true) -> Map { // // Construct and send completion email // -fn completionEmail( +def completionEmail( summary_params: Map, email: String, email_on_fail: String, @@ -250,16 +250,16 @@ fn completionEmail( multiqc_report: Path = null) { // Set up the e-mail variables - let subject = workflow.success + def subject = workflow.success ? "[$workflow.manifest.name] Successful: $workflow.runName" : "[$workflow.manifest.name] FAILED: $workflow.runName" - let summary = [:] + def summary = [:] for (group in summary_params.keySet()) { summary << summary_params[group] } - let misc_fields = [:] + def misc_fields = [:] misc_fields['Date Started'] = workflow.start misc_fields['Date Completed'] = workflow.complete misc_fields['Pipeline script file path'] = workflow.scriptFile @@ -271,7 +271,7 @@ fn completionEmail( misc_fields['Nextflow Build'] = workflow.nextflow.build misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - let email_fields = [:] + def email_fields = [:] email_fields['version'] = getWorkflowVersion() email_fields['runName'] = workflow.runName email_fields['success'] = workflow.success @@ -285,59 +285,59 @@ fn completionEmail( email_fields['summary'] = summary << misc_fields // On success try attach the multiqc report - let mqc_report = workflow.success + def mqc_report = workflow.success ? multiqc_report : null // Check if we are only sending emails on failure - let email_address = !email && email_on_fail && !workflow.success + def email_address = !email && email_on_fail && !workflow.success ? email_on_fail : email // Render the TXT template - let engine = new groovy.text.GStringTemplateEngine() - let tf = new File("${workflow.projectDir}/assets/email_template.txt") - let txt_template = engine.createTemplate(tf).make(email_fields) - let email_txt = txt_template.toString() + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() // Render the HTML template - let hf = new File("${workflow.projectDir}/assets/email_template.html") - let html_template = engine.createTemplate(hf).make(email_fields) - let email_html = html_template.toString() + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() // Render the sendmail template - let max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - let smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - let sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") - let sendmail_template = engine.createTemplate(sf).make(smail_fields) - let sendmail_html = sendmail_template.toString() + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() // Send the HTML e-mail - let colors = logColours(monochrome_logs) + def colors = logColours(monochrome_logs) if (email_address) { try { if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail - let sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext - let mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] mail_cmd.execute() << email_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" } } // Write summary e-mail HTML to a file - let output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } output_hf.toPath().copyTo("${outdir}/pipeline_info/pipeline_report.html"); output_hf.delete() // Write summary e-mail TXT to a file - let output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } output_tf.toPath().copyTo("${outdir}/pipeline_info/pipeline_report.txt"); output_tf.delete() @@ -346,8 +346,8 @@ fn completionEmail( // // Print pipeline summary on completion // -fn completionSummary(monochrome_logs: boolean = true) { - let colors = logColours(monochrome_logs) +def completionSummary(monochrome_logs: boolean = true) { + def colors = logColours(monochrome_logs) if (workflow.success) { if (workflow.stats.ignoredCount == 0) { log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" @@ -362,13 +362,13 @@ fn completionSummary(monochrome_logs: boolean = true) { // // Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack // -fn imNotification(summary_params: Map, hook_url: String) { - let summary = [:] +def imNotification(summary_params: Map, hook_url: String) { + def summary = [:] for (group in summary_params.keySet()) { summary << summary_params[group] } - let misc_fields = [:] + def misc_fields = [:] misc_fields['start'] = workflow.start misc_fields['complete'] = workflow.complete misc_fields['scriptfile'] = workflow.scriptFile @@ -380,7 +380,7 @@ fn imNotification(summary_params: Map, hook_url: String) { misc_fields['nxf_build'] = workflow.nextflow.build misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - let msg_fields = [:] + def msg_fields = [:] msg_fields['version'] = getWorkflowVersion() msg_fields['runName'] = workflow.runName msg_fields['success'] = workflow.success @@ -394,21 +394,21 @@ fn imNotification(summary_params: Map, hook_url: String) { msg_fields['summary'] = summary << misc_fields // Render the JSON template - let engine = new groovy.text.GStringTemplateEngine() + def engine = new groovy.text.GStringTemplateEngine() // Different JSON depending on the service provider // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - let json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - let hf = new File("${workflow.projectDir}/assets/${json_path}") - let json_template = engine.createTemplate(hf).make(msg_fields) - let json_message = json_template.toString() + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() // POST - let post = new URL(hook_url).openConnection(); + def post = new URL(hook_url).openConnection(); post.setRequestMethod("POST") post.setDoOutput(true) post.setRequestProperty("Content-Type", "application/json") post.getOutputStream().write(json_message.getBytes("UTF-8")); - let postRC = post.getResponseCode(); + def postRC = post.getResponseCode(); if (! postRC.equals(200)) { log.warn(post.getErrorStream().getText()); } diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 560434c1..2fbe528a 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -31,7 +31,7 @@ workflow SRA { params : SraParams main: - let runinfo_ftp = ids + def runinfo_ftp = ids // // MODULE: Get SRA run information for public database ids // @@ -43,26 +43,26 @@ workflow SRA { // |> map(SRA_RUNINFO_TO_FTP) // Channel - let sra_metadata = runinfo_ftp |> scatter { tsv -> + def sra_metadata = runinfo_ftp |> scatter { tsv -> tsv.splitCsv(header:true, sep:'\t').unique() } // Channel> // // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums // - let ftp_samples = sra_metadata + def ftp_samples = sra_metadata |> filter { meta -> !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.FTP } // Channel> |> map { meta -> - let out = SRA_FASTQ_FTP ( meta, params.sra_fastq_ftp_args ) + def out = SRA_FASTQ_FTP ( meta, params.sra_fastq_ftp_args ) new Sample(meta.id, out.fastq_1, out.fastq_2, out.md5_1, out.md5_2) } // Channel // // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools. // - let sratools_samples = sra_metadata + def sratools_samples = sra_metadata |> filter { meta -> !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.SRATOOLS } // Channel> @@ -71,20 +71,20 @@ workflow SRA { params.sratools_fasterqdump_args, params.sratools_pigz_args ) // Channel<(Map, List)> |> map { (meta, fastq) -> - let fastq_1 = fastq[0] - let fastq_2 = !meta.single_end ? fastq[1] : null + def fastq_1 = fastq[0] + def fastq_2 = !meta.single_end ? fastq[1] : null new Sample(meta.id, fastq_1, fastq_2, null, null) } // Channel // // MODULE: If Aspera link is provided in run information then download FastQ directly via Aspera CLI and validate with md5sums // - let aspera_samples = sra_metadata + def aspera_samples = sra_metadata |> filter { meta -> !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.ASPERA } // Channel> |> map { meta -> - let out = ASPERA_CLI ( meta, 'era-fasp', params.aspera_cli_args ) + def out = ASPERA_CLI ( meta, 'era-fasp', params.aspera_cli_args ) new Sample(meta.id, out.fastq_1, out.fastq_2, out.md5_1, out.md5_2) } // Channel @@ -101,7 +101,7 @@ workflow SRA { ======================================================================================== */ -fn getDownloadMethod(meta: Map, userMethod: DownloadMethod) -> DownloadMethod { +def getDownloadMethod(meta: Map, userMethod: DownloadMethod) -> DownloadMethod { // meta.fastq_aspera is a metadata string with ENA fasp links supported by Aspera // For single-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz' // For paired-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_2.fastq.gz' From e3f052a17b457d26426092020b1e3c3259a0e96e Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 8 Nov 2024 18:54:57 +0100 Subject: [PATCH 21/25] Revert pipe (`|>`) to dot for operators Signed-off-by: Ben Sherman --- .../main.nf | 16 +++--- .../nf-core/utils_nfcore_pipeline/main.nf | 6 +-- workflows/sra/main.nf | 51 ++++++++++--------- 3 files changed, 39 insertions(+), 34 deletions(-) diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index b12ba9a0..edb14bb4 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -7,18 +7,18 @@ include { SRATOOLS_FASTERQDUMP } from '../../../modules/nf-core/sratools/ // workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { take: - sra_metadata // Channel> - dbgap_key // Path? - sratools_fasterqdump_args // String - sratools_pigz_args // String + sra_metadata : Channel> + dbgap_key : Path? + sratools_fasterqdump_args : String + sratools_pigz_args : String main: // // Detect existing NCBI user settings or create new ones. // - def ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS( collect(sra_metadata) ) + def ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS( sra_metadata.collect() ) - def reads = sra_metadata |> map { meta -> + def reads = sra_metadata.map { meta -> // // Prefetch sequencing reads in SRA format. // @@ -39,8 +39,8 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { sratools_pigz_args ) ( meta, fastq ) - } // Channel<(Map, List)> + } emit: - reads // Channel<(Map, List)> + reads : Channel<(Map, List)> } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index fdf49828..2a63fb85 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -34,11 +34,11 @@ workflow SOFTWARE_VERSIONS { emit: processVersions - |> mix(workflowVersions) // Channel<(String,String,String)> - |> gather { (process, name, version) -> + .mix(workflowVersions) // Channel<(String,String,String)> + .gather { (process, name, version) -> (process, -1, (name, version)) } // Channel<(String,Bag<(String,String)>)> - |> map { (process, tools) -> + .map { (process, tools) -> def simpleName = process.tokenize(':').last() def toolsMap = tools.unique().inject([:]) { acc, (name, version) -> acc + [ (name): version ] diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 2fbe528a..43350719 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -27,23 +27,23 @@ include { FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS } from '../../subworkflow workflow SRA { take: - ids : Channel - params : SraParams + ids : Channel + params : SraParams main: def runinfo_ftp = ids // // MODULE: Get SRA run information for public database ids // - |> map { id -> + .map { id -> SRA_IDS_TO_RUNINFO ( id, params.ena_metadata_fields ) } // Channel // // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ] // - |> map(SRA_RUNINFO_TO_FTP) // Channel + .map(SRA_RUNINFO_TO_FTP) // Channel - def sra_metadata = runinfo_ftp |> scatter { tsv -> + def sra_metadata = runinfo_ftp.scatter { tsv -> tsv.splitCsv(header:true, sep:'\t').unique() } // Channel> @@ -51,10 +51,10 @@ workflow SRA { // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums // def ftp_samples = sra_metadata - |> filter { meta -> + .filter { meta -> !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.FTP } // Channel> - |> map { meta -> + .map { meta -> def out = SRA_FASTQ_FTP ( meta, params.sra_fastq_ftp_args ) new Sample(meta.id, out.fastq_1, out.fastq_2, out.md5_1, out.md5_2) } // Channel @@ -62,34 +62,39 @@ workflow SRA { // // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools. // - def sratools_samples = sra_metadata - |> filter { meta -> - !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.SRATOOLS - } // Channel> - |> FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( - params.dbgap_key, - params.sratools_fasterqdump_args, - params.sratools_pigz_args ) // Channel<(Map, List)> - |> map { (meta, fastq) -> - def fastq_1 = fastq[0] - def fastq_2 = !meta.single_end ? fastq[1] : null - new Sample(meta.id, fastq_1, fastq_2, null, null) - } // Channel + def sratools_metadata = sra_metadata.filter { meta -> + !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.SRATOOLS + } // Channel> + + def sratools_reads = FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( + sratools_metadata, + params.dbgap_key, + params.sratools_fasterqdump_args, + params.sratools_pigz_args + ) // Channel<(Map, List)> + + def sratools_samples = sra_metadata.map { (meta, fastq) -> + def fastq_1 = fastq[0] + def fastq_2 = !meta.single_end ? fastq[1] : null + new Sample(meta.id, fastq_1, fastq_2, null, null) + } // Channel // // MODULE: If Aspera link is provided in run information then download FastQ directly via Aspera CLI and validate with md5sums // def aspera_samples = sra_metadata - |> filter { meta -> + .filter { meta -> !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.ASPERA } // Channel> - |> map { meta -> + .map { meta -> def out = ASPERA_CLI ( meta, 'era-fasp', params.aspera_cli_args ) new Sample(meta.id, out.fastq_1, out.fastq_2, out.md5_1, out.md5_2) } // Channel emit: - mix( ftp_samples, sratools_samples, aspera_samples ) + ftp_samples + .mix(sratools_samples) + .mix(aspera_samples) publish: runinfo_ftp >> 'metadata' From 6598091398187d4d9ddec2372bdb1f79494fa46b Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 19 Nov 2024 22:30:04 -0600 Subject: [PATCH 22/25] Revert unrelated changes Signed-off-by: Ben Sherman --- main.nf | 4 ++-- .../local/utils_nfcore_fetchngs_pipeline/main.nf | 8 ++++---- .../main.nf | 4 ++-- subworkflows/nf-core/utils_nfcore_pipeline/main.nf | 4 ++-- workflows/sra/main.nf | 14 +++++++------- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/main.nf b/main.nf index 064b58a8..c62e4a7f 100644 --- a/main.nf +++ b/main.nf @@ -112,7 +112,7 @@ workflow { // // WORKFLOW: Run primary workflows for the pipeline // - def samples = SRA ( + samples = SRA ( Channel.fromList(params.input), SraParams( params.ena_metadata_fields, @@ -129,7 +129,7 @@ workflow { // // SUBWORKFLOW: Collect software versions // - def versions = SOFTWARE_VERSIONS() + versions = SOFTWARE_VERSIONS() // // SUBWORKFLOW: Run completion tasks diff --git a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf index 82c3a256..3f4dbe51 100644 --- a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf @@ -51,9 +51,9 @@ workflow PIPELINE_INITIALISATION { // // Validate parameters and generate parameter summary to stdout // - def pre_help_text = nfCoreLogo(monochrome_logs) - def post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) - def workflow_command = "nextflow run ${workflow.manifest.name} -profile --input ids.csv -output-dir " + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + workflow_command = "nextflow run ${workflow.manifest.name} -profile --input ids.csv -output-dir " UTILS_NFVALIDATION_PLUGIN ( help, workflow_command, @@ -89,7 +89,7 @@ workflow PIPELINE_COMPLETION { main: - def summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") // // Completion email and summary diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index edb14bb4..0be4d1a9 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -16,9 +16,9 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { // // Detect existing NCBI user settings or create new ones. // - def ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS( sra_metadata.collect() ) + ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS( sra_metadata.collect() ) - def reads = sra_metadata.map { meta -> + reads = sra_metadata.map { meta -> // // Prefetch sequencing reads in SRA format. // diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index 2a63fb85..06969ea4 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -26,8 +26,8 @@ workflow UTILS_NFCORE_PIPELINE { // workflow SOFTWARE_VERSIONS { main: - def processVersions = Channel.topic('versions', (String,String,String)) - def workflowVersions = Channel.of( + processVersions = Channel.topic('versions', (String,String,String)) + workflowVersions = Channel.of( ( 'Workflow', workflow.manifest.name, getWorkflowVersion() ), ( 'Workflow', 'Nextflow', workflow.nextflow.version ) ) diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 43350719..353db63e 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -31,7 +31,7 @@ workflow SRA { params : SraParams main: - def runinfo_ftp = ids + runinfo_ftp = ids // // MODULE: Get SRA run information for public database ids // @@ -43,14 +43,14 @@ workflow SRA { // .map(SRA_RUNINFO_TO_FTP) // Channel - def sra_metadata = runinfo_ftp.scatter { tsv -> + sra_metadata = runinfo_ftp.scatter { tsv -> tsv.splitCsv(header:true, sep:'\t').unique() } // Channel> // // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums // - def ftp_samples = sra_metadata + ftp_samples = sra_metadata .filter { meta -> !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.FTP } // Channel> @@ -62,18 +62,18 @@ workflow SRA { // // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools. // - def sratools_metadata = sra_metadata.filter { meta -> + sratools_metadata = sra_metadata.filter { meta -> !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.SRATOOLS } // Channel> - def sratools_reads = FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( + sratools_reads = FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( sratools_metadata, params.dbgap_key, params.sratools_fasterqdump_args, params.sratools_pigz_args ) // Channel<(Map, List)> - def sratools_samples = sra_metadata.map { (meta, fastq) -> + sratools_samples = sra_metadata.map { (meta, fastq) -> def fastq_1 = fastq[0] def fastq_2 = !meta.single_end ? fastq[1] : null new Sample(meta.id, fastq_1, fastq_2, null, null) @@ -82,7 +82,7 @@ workflow SRA { // // MODULE: If Aspera link is provided in run information then download FastQ directly via Aspera CLI and validate with md5sums // - def aspera_samples = sra_metadata + aspera_samples = sra_metadata .filter { meta -> !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.ASPERA } // Channel> From d55592234e0ee11c1f9d94b1f9db15cde20ece9e Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 19 Nov 2024 22:36:10 -0600 Subject: [PATCH 23/25] Restore ext config Signed-off-by: Ben Sherman --- main.nf | 26 +------------------ modules/local/aspera_cli/main.nf | 2 +- modules/local/aspera_cli/nextflow.config | 5 ++++ modules/local/sra_fastq_ftp/main.nf | 2 +- modules/local/sra_fastq_ftp/nextflow.config | 5 ++++ modules/nf-core/sratools/fasterqdump/main.nf | 12 ++++----- .../sratools/fasterqdump/nextflow.config | 5 ++++ modules/nf-core/sratools/prefetch/main.nf | 8 +++--- .../prefetch/templates/retry_with_backoff.sh | 4 +-- .../main.nf | 15 ++--------- workflows/sra/main.nf | 12 +++------ 11 files changed, 34 insertions(+), 62 deletions(-) create mode 100644 modules/local/aspera_cli/nextflow.config create mode 100644 modules/local/sra_fastq_ftp/nextflow.config create mode 100644 modules/nf-core/sratools/fasterqdump/nextflow.config diff --git a/main.nf b/main.nf index c62e4a7f..1b85ba11 100644 --- a/main.nf +++ b/main.nf @@ -64,26 +64,6 @@ params { icon 'fas fa-address-card' } - aspera_cli_args: String { - description 'Command-line arguments for Aspera CLI' - defaultValue '' - } - - sra_fastq_ftp_args: String { - description 'Command-line arguments for wget when downloading fastq files via FTP' - defaultValue '' - } - - sratools_fasterqdump_args: String { - description 'Command-line arguments for sratools fasterqdump' - defaultValue '' - } - - sratools_pigz_args: String { - description 'Command-line arguments for sratools pigz' - defaultValue '' - } - // TODO: ... } @@ -118,11 +98,7 @@ workflow { params.ena_metadata_fields, params.download_method, params.skip_fastq_download, - params.dbgap_key, - params.aspera_cli_args, - params.sra_fastq_ftp_args, - params.sratools_fasterqdump_args, - params.sratools_pigz_args + params.dbgap_key ) ) diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index 3701e3f8..8b884422 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -10,9 +10,9 @@ process ASPERA_CLI { input: meta : Map user : String - args : String script: + def args = task.ext.args ?: '' def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" def fastq = meta.fastq_aspera.tokenize(';') if (meta.single_end.toBoolean()) { diff --git a/modules/local/aspera_cli/nextflow.config b/modules/local/aspera_cli/nextflow.config new file mode 100644 index 00000000..9a808242 --- /dev/null +++ b/modules/local/aspera_cli/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'ASPERA_CLI' { + ext.args = '-QT -l 300m -P33001' + } +} diff --git a/modules/local/sra_fastq_ftp/main.nf b/modules/local/sra_fastq_ftp/main.nf index e2264541..4ccb4735 100644 --- a/modules/local/sra_fastq_ftp/main.nf +++ b/modules/local/sra_fastq_ftp/main.nf @@ -11,9 +11,9 @@ process SRA_FASTQ_FTP { input: meta : Map - args : String script: + def args = task.ext.args ?: '' if (meta.single_end.toBoolean()) { """ wget \\ diff --git a/modules/local/sra_fastq_ftp/nextflow.config b/modules/local/sra_fastq_ftp/nextflow.config new file mode 100644 index 00000000..26261f26 --- /dev/null +++ b/modules/local/sra_fastq_ftp/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'SRA_FASTQ_FTP' { + ext.args = '-t 5 -nv -c -T 60' + } +} diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index 839ef80c..6f33bb15 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -12,13 +12,11 @@ process SRATOOLS_FASTERQDUMP { sra : Path ncbi_settings : Path certificate : Path? - fasterqdump_args: String = '--split-files --include-technical' - pigz_args : String = '' - prefix : String = '' script: - if( !prefix ) - prefix = "${meta.id}" + def args_fasterqdump = task.ext.args_fasterqdump ?: '' + def args_pigz = task.ext.args_pigz ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def outfile = meta.single_end ? "${prefix}.fastq" : prefix def key_file = '' if (certificate.baseName.endsWith('.jwt')) { @@ -30,14 +28,14 @@ process SRATOOLS_FASTERQDUMP { export NCBI_SETTINGS="\$PWD/${ncbi_settings}" fasterq-dump \\ - $fasterqdump_args \\ + $args_fasterqdump \\ --threads $task.cpus \\ --outfile $outfile \\ ${key_file} \\ ${sra} pigz \\ - $pigz_args \\ + $args_pigz \\ --no-name \\ --processes $task.cpus \\ *.fastq diff --git a/modules/nf-core/sratools/fasterqdump/nextflow.config b/modules/nf-core/sratools/fasterqdump/nextflow.config new file mode 100644 index 00000000..6b14b7ba --- /dev/null +++ b/modules/nf-core/sratools/fasterqdump/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: SRATOOLS_FASTERQDUMP { + ext.args_fasterqdump = '--split-files --include-technical' + } +} \ No newline at end of file diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf index d6bbf690..d3927ff2 100644 --- a/modules/nf-core/sratools/prefetch/main.nf +++ b/modules/nf-core/sratools/prefetch/main.nf @@ -11,17 +11,17 @@ process SRATOOLS_PREFETCH { meta : Map ncbi_settings : Path certificate : Path? - prefetch_args : String = '' - retry_args : String = '5 1 100' // shell: + args_prefetch = task.ext.args_prefetch ?: '' + args_retry = task.ext.args_retry ?: '5 1 100' // id = meta.run_accession if (certificate) { if (certificate.baseName.endsWith('.jwt')) { - prefetch_args += " --perm ${certificate}" + args_prefetch += " --perm ${certificate}" } else if (certificate.baseName.endsWith('.ngc')) { - prefetch_args += " --ngc ${certificate}" + args_prefetch += " --ngc ${certificate}" } } diff --git a/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh b/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh index 7643c651..c097198c 100755 --- a/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh +++ b/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh @@ -42,9 +42,9 @@ retry_with_backoff() { export NCBI_SETTINGS="$PWD/!{ncbi_settings}" -retry_with_backoff !{retry_args} \ +retry_with_backoff !{args_retry} \ prefetch \ - !{prefetch_args} \ + !{args_prefetch} \ !{id} [ -f !{id}.sralite ] && vdb-validate !{id}.sralite || vdb-validate !{id} diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index 0be4d1a9..ea3292ba 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -9,8 +9,6 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { take: sra_metadata : Channel> dbgap_key : Path? - sratools_fasterqdump_args : String - sratools_pigz_args : String main: // @@ -22,21 +20,12 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { // // Prefetch sequencing reads in SRA format. // - def sra = SRATOOLS_PREFETCH ( - meta, - ncbi_settings, - dbgap_key ) + def sra = SRATOOLS_PREFETCH ( meta, ncbi_settings, dbgap_key ) // // Convert the SRA format into one or more compressed FASTQ files. // - def fastq = SRATOOLS_FASTERQDUMP ( - meta, - sra, - ncbi_settings, - dbgap_key, - sratools_fasterqdump_args, - sratools_pigz_args ) + def fastq = SRATOOLS_FASTERQDUMP ( meta, sra, ncbi_settings, dbgap_key ) ( meta, fastq ) } diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 353db63e..7b1d1b7f 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -55,7 +55,7 @@ workflow SRA { !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.FTP } // Channel> .map { meta -> - def out = SRA_FASTQ_FTP ( meta, params.sra_fastq_ftp_args ) + def out = SRA_FASTQ_FTP ( meta ) new Sample(meta.id, out.fastq_1, out.fastq_2, out.md5_1, out.md5_2) } // Channel @@ -68,9 +68,7 @@ workflow SRA { sratools_reads = FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( sratools_metadata, - params.dbgap_key, - params.sratools_fasterqdump_args, - params.sratools_pigz_args + params.dbgap_key ) // Channel<(Map, List)> sratools_samples = sra_metadata.map { (meta, fastq) -> @@ -87,7 +85,7 @@ workflow SRA { !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.ASPERA } // Channel> .map { meta -> - def out = ASPERA_CLI ( meta, 'era-fasp', params.aspera_cli_args ) + def out = ASPERA_CLI ( meta, 'era-fasp' ) new Sample(meta.id, out.fastq_1, out.fastq_2, out.md5_1, out.md5_2) } // Channel @@ -128,10 +126,6 @@ record SraParams { download_method : DownloadMethod skip_fastq_download : boolean dbgap_key : Path? - aspera_cli_args : String - sra_fastq_ftp_args : String - sratools_fasterqdump_args : String - sratools_pigz_args : String } enum DownloadMethod { From adc45a4de3425c9fa33c797d06a3fc90a7091310 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 19 Nov 2024 22:43:39 -0600 Subject: [PATCH 24/25] Restore missing configs Signed-off-by: Ben Sherman --- modules/nf-core/sratools/fasterqdump/tests/nextflow.config | 5 +++++ nextflow.config | 3 +++ .../nextflow.config | 1 + workflows/sra/nextflow.config | 3 +++ 4 files changed, 12 insertions(+) create mode 100644 modules/nf-core/sratools/fasterqdump/tests/nextflow.config create mode 100644 subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config create mode 100644 workflows/sra/nextflow.config diff --git a/modules/nf-core/sratools/fasterqdump/tests/nextflow.config b/modules/nf-core/sratools/fasterqdump/tests/nextflow.config new file mode 100644 index 00000000..e62eb6ec --- /dev/null +++ b/modules/nf-core/sratools/fasterqdump/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: SRATOOLS_FASTERQDUMP { + ext.args_fasterqdump = '' + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 0897d9c9..7efb1ceb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -34,6 +34,9 @@ includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions includeConfig "${params.custom_config_base}/${params.custom_config_version}/nfcore_custom.config" +// Workflow specific configs +includeConfig './workflows/sra/nextflow.config' + profiles { debug { dumpHashes = true diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config new file mode 100644 index 00000000..187faf6d --- /dev/null +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config @@ -0,0 +1 @@ +includeConfig '../../../modules/nf-core/sratools/fasterqdump/nextflow.config' diff --git a/workflows/sra/nextflow.config b/workflows/sra/nextflow.config new file mode 100644 index 00000000..522b05b8 --- /dev/null +++ b/workflows/sra/nextflow.config @@ -0,0 +1,3 @@ +includeConfig "../../modules/local/aspera_cli/nextflow.config" +includeConfig "../../modules/local/sra_fastq_ftp/nextflow.config" +includeConfig "../../subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config" From 0b52c8153173abdcf7aea14b703aa20244284da7 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 19 Nov 2024 22:49:57 -0600 Subject: [PATCH 25/25] Revert unrelated changes Signed-off-by: Ben Sherman --- modules/local/aspera_cli/main.nf | 18 +++++++++--------- modules/local/sra_fastq_ftp/main.nf | 18 +++++++++--------- modules/local/sra_ids_to_runinfo/main.nf | 12 ++++++------ modules/local/sra_runinfo_to_ftp/main.nf | 12 ++++++------ .../custom/sratoolsncbisettings/main.nf | 8 ++++---- modules/nf-core/sratools/fasterqdump/main.nf | 14 +++++++------- modules/nf-core/sratools/prefetch/main.nf | 12 ++++++------ 7 files changed, 47 insertions(+), 47 deletions(-) diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index 8b884422..956f1cec 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -11,6 +11,15 @@ process ASPERA_CLI { meta : Map user : String + output: + fastq_1 : Path = file('*_1.fastq.gz') + fastq_2 : Path? = file('*_2.fastq.gz') + md5_1 : Path = file('*_1.fastq.gz.md5') + md5_2 : Path? = file('*_2.fastq.gz.md5') + + topic: + ( task.process, 'aspera_cli', eval('ascli --version') ) >> 'versions' + script: def args = task.ext.args ?: '' def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" @@ -51,13 +60,4 @@ process ASPERA_CLI { md5sum -c ${meta.id}_2.fastq.gz.md5 """ } - - output: - fastq_1 : Path = file('*_1.fastq.gz') - fastq_2 : Path? = file('*_2.fastq.gz') - md5_1 : Path = file('*_1.fastq.gz.md5') - md5_2 : Path? = file('*_2.fastq.gz.md5') - - topic: - ( task.process, 'aspera_cli', eval('ascli --version') ) >> 'versions' } diff --git a/modules/local/sra_fastq_ftp/main.nf b/modules/local/sra_fastq_ftp/main.nf index 4ccb4735..a5817833 100644 --- a/modules/local/sra_fastq_ftp/main.nf +++ b/modules/local/sra_fastq_ftp/main.nf @@ -12,6 +12,15 @@ process SRA_FASTQ_FTP { input: meta : Map + output: + fastq_1 : Path = file('*_1.fastq.gz') + fastq_2 : Path? = file('*_2.fastq.gz') + md5_1 : Path = file('*_1.fastq.gz.md5') + md5_2 : Path? = file('*_2.fastq.gz.md5') + + topic: + ( task.process, 'wget', eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')") ) >> 'versions' + script: def args = task.ext.args ?: '' if (meta.single_end.toBoolean()) { @@ -43,13 +52,4 @@ process SRA_FASTQ_FTP { md5sum -c ${meta.id}_2.fastq.gz.md5 """ } - - output: - fastq_1 : Path = file('*_1.fastq.gz') - fastq_2 : Path? = file('*_2.fastq.gz') - md5_1 : Path = file('*_1.fastq.gz.md5') - md5_2 : Path? = file('*_2.fastq.gz.md5') - - topic: - ( task.process, 'wget', eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')") ) >> 'versions' } diff --git a/modules/local/sra_ids_to_runinfo/main.nf b/modules/local/sra_ids_to_runinfo/main.nf index 85f0655c..7d644b18 100644 --- a/modules/local/sra_ids_to_runinfo/main.nf +++ b/modules/local/sra_ids_to_runinfo/main.nf @@ -12,6 +12,12 @@ process SRA_IDS_TO_RUNINFO { id : String fields : String + output: + file('*.runinfo.tsv') + + topic: + ( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' + script: def metadata_fields = fields ? "--ena_metadata_fields ${fields}" : '' """ @@ -21,10 +27,4 @@ process SRA_IDS_TO_RUNINFO { ${id}.runinfo.tsv \\ $metadata_fields """ - - output: - file('*.runinfo.tsv') - - topic: - ( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' } diff --git a/modules/local/sra_runinfo_to_ftp/main.nf b/modules/local/sra_runinfo_to_ftp/main.nf index 1842b726..1c56c336 100644 --- a/modules/local/sra_runinfo_to_ftp/main.nf +++ b/modules/local/sra_runinfo_to_ftp/main.nf @@ -9,16 +9,16 @@ process SRA_RUNINFO_TO_FTP { input: runinfo : Path + output: + file('*.runinfo_ftp.tsv') + + topic: + ( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' + script: """ sra_runinfo_to_ftp.py \\ ${runinfo} \\ ${runinfo.baseName.tokenize(".")[0]}.runinfo_ftp.tsv """ - - output: - file('*.runinfo_ftp.tsv') - - topic: - ( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' } diff --git a/modules/nf-core/custom/sratoolsncbisettings/main.nf b/modules/nf-core/custom/sratoolsncbisettings/main.nf index 14d49007..4180beee 100644 --- a/modules/nf-core/custom/sratoolsncbisettings/main.nf +++ b/modules/nf-core/custom/sratoolsncbisettings/main.nf @@ -10,13 +10,13 @@ process CUSTOM_SRATOOLSNCBISETTINGS { input: ids : Bag> - shell: - config = "/LIBS/GUID = \"${UUID.randomUUID().toString()}\"\\n/libs/cloud/report_instance_identity = \"true\"\\n" - template 'detect_ncbi_settings.sh' - output: file('*.mkfg') topic: ( task.process, 'sratools', eval("vdb-config --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' + + shell: + config = "/LIBS/GUID = \"${UUID.randomUUID().toString()}\"\\n/libs/cloud/report_instance_identity = \"true\"\\n" + template 'detect_ncbi_settings.sh' } diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index 6f33bb15..330a3b85 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -13,6 +13,13 @@ process SRATOOLS_FASTERQDUMP { ncbi_settings : Path certificate : Path? + output: + files('*.fastq.gz').sort() + + topic: + ( task.process, 'sratools', eval("fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' + ( task.process, 'pigz', eval("pigz --version 2>&1 | sed 's/pigz //g'") ) >> 'versions' + script: def args_fasterqdump = task.ext.args_fasterqdump ?: '' def args_pigz = task.ext.args_pigz ?: '' @@ -40,11 +47,4 @@ process SRATOOLS_FASTERQDUMP { --processes $task.cpus \\ *.fastq """ - - output: - files('*.fastq.gz').sort() - - topic: - ( task.process, 'sratools', eval("fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' - ( task.process, 'pigz', eval("pigz --version 2>&1 | sed 's/pigz //g'") ) >> 'versions' } diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf index d3927ff2..38ab2728 100644 --- a/modules/nf-core/sratools/prefetch/main.nf +++ b/modules/nf-core/sratools/prefetch/main.nf @@ -12,6 +12,12 @@ process SRATOOLS_PREFETCH { ncbi_settings : Path certificate : Path? + output: + file(id) + + topic: + ( task.process, 'sratools', eval("prefetch --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' + shell: args_prefetch = task.ext.args_prefetch ?: '' args_retry = task.ext.args_retry ?: '5 1 100' // @@ -26,10 +32,4 @@ process SRATOOLS_PREFETCH { } template 'retry_with_backoff.sh' - - output: - file(id) - - topic: - ( task.process, 'sratools', eval("prefetch --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' }