From 19a762102ec70dbc244f652614277115c27311e6 Mon Sep 17 00:00:00 2001 From: Chris Fields Date: Sun, 26 Jan 2025 16:55:49 -0600 Subject: [PATCH] add updates to modules and workflows, and example params (NYI) for tweaking options --- modules/local/pacbio_cutadapt.nf | 1 - modules/local/variablefilter.nf | 55 +++++++++++++++++++++++++++ modules/local/variabletrim.nf | 39 +++++++++++++++++++ nextflow.config | 4 +- subworkflows/local/filter_and_trim.nf | 11 ++++++ 5 files changed, 107 insertions(+), 3 deletions(-) create mode 100644 modules/local/variablefilter.nf create mode 100644 modules/local/variabletrim.nf diff --git a/modules/local/pacbio_cutadapt.nf b/modules/local/pacbio_cutadapt.nf index bf1b59c..a3e0e28 100644 --- a/modules/local/pacbio_cutadapt.nf +++ b/modules/local/pacbio_cutadapt.nf @@ -3,7 +3,6 @@ process PACBIO_CUTADAPT { tag "${meta.id}" - container 'quay.io/biocontainers/cutadapt:4.1--py310h1425a21_1' input: diff --git a/modules/local/variablefilter.nf b/modules/local/variablefilter.nf new file mode 100644 index 0000000..b3a4519 --- /dev/null +++ b/modules/local/variablefilter.nf @@ -0,0 +1,55 @@ +process VARIABLEFILTER { + tag "$meta.id" + label 'process_medium' + container "" + + input: + tuple val(meta), file(reads), file(trimming) from itsStep3.join(itsStep3Trimming) + + output: + tuple val(meta), file("${meta.id}.R1.filtered.fastq.gz") optional true, emit: filteredReadsR1 + tuple val(meta), file("${meta.id}.R2.filtered.fastq.gz") optional true, emit: filteredReadsR2 + tuple val(meta), file("${meta.id}.R[12].filtered.fastq.gz") optional true, emit: reads + file "*.trimmed.txt", emit: read_tracking + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + #!/usr/bin/env Rscript + suppressPackageStartupMessages(library(dada2)) + suppressPackageStartupMessages(library(ShortRead)) + suppressPackageStartupMessages(library(Biostrings)) + + out <- filterAndTrim(fwd = paste0("${meta.id}",".R1.cutadapt.fastq.gz"), + filt = paste0("${meta.id}", ".R1.filtered.fastq.gz"), + rev = if("${reads[1]}" == "null") NULL else paste0("${meta.id}",".R2.cutadapt.fastq.gz"), + filt.rev = if("${reads[1]}" == "null") NULL else paste0("${meta.id}", ".R2.filtered.fastq.gz"), + maxEE = if("${reads[1]}" == "null") ${params.maxEEFor} else c(${params.maxEEFor}, ${params.maxEERev}), + truncQ = ${params.truncQ}, + rm.phix = as.logical(${params.rmPhiX}), + maxLen = ${params.max_read_len}, + minLen = ${params.min_read_len}, + compress = TRUE, + verbose = TRUE, + multithread = ${task.cpus}) + #Change input read counts to actual raw read counts + colnames(out) <- c('cutadapt', 'filtered') + write.csv(out3, paste0("${meta.id}", ".trimmed.txt")) + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${meta.id}.trimmed.txt" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + variablefilter: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ +} diff --git a/modules/local/variabletrim.nf b/modules/local/variabletrim.nf new file mode 100644 index 0000000..c4ad84d --- /dev/null +++ b/modules/local/variabletrim.nf @@ -0,0 +1,39 @@ +process VARIABLE_TRIM { + tag "$meta.id" + label 'process_medium' + + container 'quay.io/biocontainers/cutadapt:4.1--py310h1425a21_1' + + input: + tuple val(meta), path(reads) + tuple val(for_primer), val(rev_primer) + tuple val(for_primer_rc), val(rev_primer_rc) + + output: + tuple val(meta), file("${meta.id}.R[12].cutadapt.fastq.gz") optional true, emit: trimmed_reads + file("*.cutadapt.out") into cutadaptToMultiQC + // path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + outr2 = meta.single_end ? '' : "-p ${meta.id}.R2.cutadapt.fastq.gz" + p2 = meta.single_end ? '' : "-G ${rev_primer} -A ${rev_primer_rc}" + """ + cutadapt -g ${for_primer} -a ${for_primer_rc} ${p2} \\ + --cores ${task.cpus} \\ + --max-N ${params.maxN} \\ + -n 2 \\ + -o ${meta.id}.R1.cutadapt.fastq.gz ${outr2} \\ + ${reads} > ${meta.id}.cutadapt.out + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > "${meta.id}.R1.cutadapt.fastq.gz" + touch ${meta.id}.cutadapt.out + """ +} diff --git a/nextflow.config b/nextflow.config index 01f2ca8..3265723 100644 --- a/nextflow.config +++ b/nextflow.config @@ -34,8 +34,8 @@ params { // if quality_binning is true and error_function is set to 'makeBinnedQualErrfun', this is required to be set quality_bins = "" - amplicon_type = "overlapping" - platform = "illumina" // illumina, pacbio; 454 and others could be added + amplicon_type = "overlapping" // "full_length", "overlapping", "dovetail", "mix", "nonoverlapping" + platform = "illumina" // "illumina", "pacbio"; ONT, 454, Element, others could be added // QC skip_FASTQC = false // set to run this step by default, this can fail with large sample #'s diff --git a/subworkflows/local/filter_and_trim.nf b/subworkflows/local/filter_and_trim.nf index a6cb15d..1cd8b7f 100644 --- a/subworkflows/local/filter_and_trim.nf +++ b/subworkflows/local/filter_and_trim.nf @@ -23,6 +23,11 @@ workflow FILTER_AND_TRIM { ch_trimmed_R1 = Channel.empty() ch_trimmed_R2 = Channel.empty() + // TODO: we're probably going to move to requiring the primer sequences to + // make the workflow more flexible re: trimming options, esp. since + // the current version assumes the presence of primer sequences and + // does a hard trim. This also allows for passing in cutadapt anchors + // and primer options (would need to parse these out) for_primer = params.for_primer for_primer_rc = "" rev_primer = params.rev_primer @@ -90,6 +95,12 @@ workflow FILTER_AND_TRIM { trimmed_infer = ch_trimmed_infer } +// def clean_primers(primer) { +// // returns a clean primer string, IUPAC codes +// // w/o any metadata or anchors. Assumes cutadapt +// // filtering +// } + def reverse_complement(primer) { // returns the revcomp, handles IUPAC ambig codes // tr "[ATGCUNYRSWKMBDHV]" "[TACGANRYSWMKVHDB]"