Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proposal: Static types #309

Draft
wants to merge 25 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f531c5d
Replace ext/publishDir with params/publish definition
bentsherman Apr 27, 2024
836ace2
Update config to comply with strict parser
bentsherman Apr 27, 2024
25a1fb5
Use param schemas as source of truth, convert to YAML
bentsherman Apr 27, 2024
505806a
Use eval output, topic channels to collect tool versions
bentsherman Apr 27, 2024
5ae1562
Use static types, record types
bentsherman Apr 27, 2024
b2f563d
Refactor params as inputs for SRA workflow
bentsherman Apr 27, 2024
24b34cf
New dataflow syntax
bentsherman Apr 30, 2024
2771765
Simplify process inputs/outputs
bentsherman May 18, 2024
90e4ac1
Replace `def` with `fn` / `let` / `var`
bentsherman May 18, 2024
a7bebba
Omit name for single process output
bentsherman May 18, 2024
f791e1e
minor updates
bentsherman Sep 23, 2024
7040945
Revert changes to JSOn schemas
bentsherman Nov 2, 2024
aef604b
Add SraParams type
bentsherman Nov 3, 2024
f1f763c
Refactor workflow outputs
bentsherman Nov 3, 2024
c37a3b2
Make Sample type more precise
bentsherman Nov 4, 2024
677f838
Revert unrelated changes
bentsherman Nov 4, 2024
e4c956f
Remove old params, remove script params from config
bentsherman Nov 5, 2024
e4761ce
Replace set operator with assignment
bentsherman Nov 8, 2024
b2d817e
Update operators
bentsherman Nov 8, 2024
2b47166
Revert `fn` / `let` / `var` with `def`
bentsherman Nov 8, 2024
e3f052a
Revert pipe (`|>`) to dot for operators
bentsherman Nov 8, 2024
6598091
Revert unrelated changes
bentsherman Nov 20, 2024
d555922
Restore ext config
bentsherman Nov 20, 2024
adc45a4
Restore missing configs
bentsherman Nov 20, 2024
0b52c81
Revert unrelated changes
bentsherman Nov 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 0 additions & 17 deletions assets/schema_input.json

This file was deleted.

12 changes: 12 additions & 0 deletions assets/schema_input.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
$schema: http://json-schema.org/draft-07/schema
mahesh-panchal marked this conversation as resolved.
Show resolved Hide resolved
$id: https://raw.githubusercontent.com/nf-core/fetchngs/master/assets/schema_input.yml
title: nf-core/fetchngs pipeline - params.input schema
description: Schema for the file provided with params.input
type: array
items:
type: object
properties:
'':
type: string
pattern: ^(((SR|ER|DR)[APRSX])|(SAM(N|EA|D))|(PRJ(NA|EB|DB))|(GS[EM]))(\\d+)$
errorMessage: Please provide a valid SRA, ENA, DDBJ or GEO identifier
44 changes: 22 additions & 22 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@

process {

cpus = { check_max( 1 * task.attempt, 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }

publishDir = [
path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
resourceLimits = [
cpus: params.max_cpus,
memory: params.max_memory,
time: params.max_time
]

cpus = { 1 * task.attempt }
memory = { 6.GB * task.attempt }
time = { 4.h * task.attempt }

errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
maxRetries = 1
maxErrors = '-1'
Expand All @@ -31,30 +31,30 @@ process {
// adding in your local modules too.
// See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
withLabel:process_single {
cpus = { check_max( 1 , 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
cpus = { 1 }
memory = { 6.GB * task.attempt }
time = { 4.h * task.attempt }
}
withLabel:process_low {
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
cpus = { 2 * task.attempt }
memory = { 12.GB * task.attempt }
time = { 4.h * task.attempt }
}
withLabel:process_medium {
cpus = { check_max( 6 * task.attempt, 'cpus' ) }
memory = { check_max( 36.GB * task.attempt, 'memory' ) }
time = { check_max( 8.h * task.attempt, 'time' ) }
cpus = { 6 * task.attempt }
memory = { 36.GB * task.attempt }
time = { 8.h * task.attempt }
}
withLabel:process_high {
cpus = { check_max( 12 * task.attempt, 'cpus' ) }
memory = { check_max( 72.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }
cpus = { 12 * task.attempt }
memory = { 72.GB * task.attempt }
time = { 16.h * task.attempt }
}
withLabel:process_long {
time = { check_max( 20.h * task.attempt, 'time' ) }
time = { 20.h * task.attempt }
}
withLabel:process_high_memory {
memory = { check_max( 200.GB * task.attempt, 'memory' ) }
memory = { 200.GB * task.attempt }
}
withLabel:error_ignore {
errorStrategy = 'ignore'
Expand Down
31 changes: 26 additions & 5 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
----------------------------------------------------------------------------------------
*/

nextflow.enable.dsl = 2
nextflow.preview.dsl = 3

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -33,14 +33,29 @@ include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_fetc
workflow NFCORE_FETCHNGS {

take:
ids // channel: database ids read in from --input
ids // Channel<String>
params // ParamsMap

main:

//
// WORKFLOW: Download FastQ files for SRA / ENA / GEO / DDBJ ids
//
SRA ( ids )
SRA (
ids,
params.ena_metadata_fields ?: '',
params.sample_mapping_fields,
params.nf_core_pipeline ?: '',
params.nf_core_rnaseq_strandedness ?: 'auto',
mahesh-panchal marked this conversation as resolved.
Show resolved Hide resolved
params.download_method,
params.skip_fastq_download,
params.dbgap_key,
params.aspera_cli_args,
params.sra_fastq_ftp_args,
params.sratools_fasterqdump_args,
params.sratools_pigz_args,
params.outdir
)
bentsherman marked this conversation as resolved.
Show resolved Hide resolved

}

Expand All @@ -55,7 +70,7 @@ workflow {
//
// SUBWORKFLOW: Run initialisation tasks
//
PIPELINE_INITIALISATION (
ids = PIPELINE_INITIALISATION (
params.version,
params.help,
params.validate_params,
Expand All @@ -70,7 +85,8 @@ workflow {
// WORKFLOW: Run primary workflows for the pipeline
//
NFCORE_FETCHNGS (
PIPELINE_INITIALISATION.out.ids
ids,
params,
)

//
Expand All @@ -86,6 +102,11 @@ workflow {
)
}

publish {
directory params.outdir
bentsherman marked this conversation as resolved.
Show resolved Hide resolved
mode params.publish_dir_mode
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
THE END
Expand Down
29 changes: 13 additions & 16 deletions modules/local/aspera_cli/main.nf
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include { Sample } from '../../types/types'

process ASPERA_CLI {
tag "$meta.id"
label 'process_medium'
Expand All @@ -8,16 +10,21 @@ process ASPERA_CLI {
'biocontainers/aspera-cli:4.14.0--hdfd78af_1' }"

input:
tuple val(meta), val(fastq)
val user
Sample input
String user
String args

output:
tuple val(meta), path("*fastq.gz"), emit: fastq
tuple val(meta), path("*md5") , emit: md5
path "versions.yml" , emit: versions
Sample fastq = new Sample(meta, path("*fastq.gz"))
Sample md5 = new Sample(meta, path("*md5"))
bentsherman marked this conversation as resolved.
Show resolved Hide resolved

topic:
[ task.process, 'aspera_cli', eval('ascli --version') ] >> 'versions'
bentsherman marked this conversation as resolved.
Show resolved Hide resolved

script:
def args = task.ext.args ?: ''
meta = input.meta
fastq = input.files

def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : ""
if (meta.single_end) {
"""
Expand All @@ -31,11 +38,6 @@ process ASPERA_CLI {

echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5
md5sum -c ${meta.id}.fastq.gz.md5

cat <<-END_VERSIONS > versions.yml
"${task.process}":
aspera_cli: \$(ascli --version)
END_VERSIONS
"""
} else {
"""
Expand All @@ -58,11 +60,6 @@ process ASPERA_CLI {

echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5
md5sum -c ${meta.id}_2.fastq.gz.md5

cat <<-END_VERSIONS > versions.yml
"${task.process}":
aspera_cli: \$(ascli --version)
END_VERSIONS
"""
}
}
17 changes: 0 additions & 17 deletions modules/local/aspera_cli/nextflow.config

This file was deleted.

13 changes: 5 additions & 8 deletions modules/local/multiqc_mappings_config/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,18 @@ process MULTIQC_MAPPINGS_CONFIG {
'biocontainers/python:3.9--1' }"

input:
path csv
Path csv

output:
path "*yml" , emit: yml
path "versions.yml", emit: versions
Path yml = path("multiqc_config.yml")

topic:
[ task.process, 'python', eval("python --version | sed 's/Python //g'") ] >> 'versions'

script:
"""
multiqc_mappings_config.py \\
$csv \\
multiqc_config.yml

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
END_VERSIONS
"""
}
9 changes: 0 additions & 9 deletions modules/local/multiqc_mappings_config/nextflow.config

This file was deleted.

26 changes: 11 additions & 15 deletions modules/local/sra_fastq_ftp/main.nf
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
include { Sample } from '../../types/types'

process SRA_FASTQ_FTP {
tag "$meta.id"
Expand All @@ -10,15 +11,20 @@ process SRA_FASTQ_FTP {
'biocontainers/wget:1.20.1' }"

input:
tuple val(meta), val(fastq)
Sample input
String args

output:
tuple val(meta), path("*fastq.gz"), emit: fastq
tuple val(meta), path("*md5") , emit: md5
path "versions.yml" , emit: versions
Sample fastq = new Sample(meta, path("*fastq.gz"))
Sample md5 = new Sample(meta, path("*md5"))
bentsherman marked this conversation as resolved.
Show resolved Hide resolved

topic:
[ task.process, 'wget', eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')") ] >> 'versions'

script:
def args = task.ext.args ?: ''
meta = input.meta
fastq = input.files

if (meta.single_end) {
"""
wget \\
Expand All @@ -28,11 +34,6 @@ process SRA_FASTQ_FTP {

echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5
md5sum -c ${meta.id}.fastq.gz.md5

cat <<-END_VERSIONS > versions.yml
"${task.process}":
wget: \$(echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//'))
END_VERSIONS
"""
} else {
"""
Expand All @@ -51,11 +52,6 @@ process SRA_FASTQ_FTP {

echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5
md5sum -c ${meta.id}_2.fastq.gz.md5

cat <<-END_VERSIONS > versions.yml
"${task.process}":
wget: \$(echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//'))
END_VERSIONS
"""
}
}
17 changes: 0 additions & 17 deletions modules/local/sra_fastq_ftp/nextflow.config

This file was deleted.

15 changes: 6 additions & 9 deletions modules/local/sra_ids_to_runinfo/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ process SRA_IDS_TO_RUNINFO {
'biocontainers/python:3.9--1' }"

input:
val id
val fields
String id
String fields

output:
path "*.tsv" , emit: tsv
path "versions.yml", emit: versions
Path tsv = path("*.runinfo.tsv")

topic:
[ task.process, 'python', eval("python --version | sed 's/Python //g'") ] >> 'versions'

script:
def metadata_fields = fields ? "--ena_metadata_fields ${fields}" : ''
Expand All @@ -24,10 +26,5 @@ process SRA_IDS_TO_RUNINFO {
id.txt \\
${id}.runinfo.tsv \\
$metadata_fields

cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
END_VERSIONS
"""
}
8 changes: 0 additions & 8 deletions modules/local/sra_ids_to_runinfo/nextflow.config

This file was deleted.

Loading
Loading