add updates to modules and workflows, and example params (NYI) for tw…

…eaking options
h3abionet · Jan 26, 2025 · 19a7621 · 19a7621
1 parent 11dcf3c
commit 19a7621
Show file tree

Hide file tree

Showing 5 changed files with 107 additions and 3 deletions.
diff --git a/modules/local/pacbio_cutadapt.nf b/modules/local/pacbio_cutadapt.nf
@@ -3,7 +3,6 @@
 process PACBIO_CUTADAPT {
     tag "${meta.id}"
 
-
     container 'quay.io/biocontainers/cutadapt:4.1--py310h1425a21_1'
 
     input:

diff --git a/modules/local/variablefilter.nf b/modules/local/variablefilter.nf
@@ -0,0 +1,55 @@
+process VARIABLEFILTER {
+    tag "$meta.id"
+    label 'process_medium'
+    container ""
+
+    input:
+    tuple val(meta), file(reads), file(trimming) from itsStep3.join(itsStep3Trimming)
+
+    output:
+    tuple val(meta), file("${meta.id}.R1.filtered.fastq.gz") optional true, emit: filteredReadsR1
+    tuple val(meta), file("${meta.id}.R2.filtered.fastq.gz") optional true, emit: filteredReadsR2
+    tuple val(meta), file("${meta.id}.R[12].filtered.fastq.gz") optional true, emit: reads
+    file "*.trimmed.txt", emit: read_tracking
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    #!/usr/bin/env Rscript
+    suppressPackageStartupMessages(library(dada2))
+    suppressPackageStartupMessages(library(ShortRead))
+    suppressPackageStartupMessages(library(Biostrings))
+
+    out <- filterAndTrim(fwd = paste0("${meta.id}",".R1.cutadapt.fastq.gz"),
+                        filt = paste0("${meta.id}", ".R1.filtered.fastq.gz"),
+                        rev = if("${reads[1]}" == "null") NULL else paste0("${meta.id}",".R2.cutadapt.fastq.gz"),
+                        filt.rev = if("${reads[1]}" == "null") NULL else paste0("${meta.id}", ".R2.filtered.fastq.gz"),
+                        maxEE = if("${reads[1]}" == "null") ${params.maxEEFor} else c(${params.maxEEFor}, ${params.maxEERev}), 
+                        truncQ = ${params.truncQ},
+                        rm.phix = as.logical(${params.rmPhiX}),
+                        maxLen = ${params.max_read_len},
+                        minLen = ${params.min_read_len},
+                        compress = TRUE,
+                        verbose = TRUE,
+                        multithread = ${task.cpus})
+    #Change input read counts to actual raw read counts
+    colnames(out) <- c('cutadapt', 'filtered')
+    write.csv(out3, paste0("${meta.id}", ".trimmed.txt"))
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch "${meta.id}.trimmed.txt"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        variablefilter: \$(samtools --version |& sed '1!d ; s/samtools //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/variabletrim.nf b/modules/local/variabletrim.nf
@@ -0,0 +1,39 @@
+process VARIABLE_TRIM {
+    tag "$meta.id"
+    label 'process_medium'
+
+    container 'quay.io/biocontainers/cutadapt:4.1--py310h1425a21_1'
+
+    input:
+    tuple val(meta), path(reads)
+    tuple val(for_primer), val(rev_primer)
+    tuple val(for_primer_rc), val(rev_primer_rc)
+
+    output:
+    tuple val(meta), file("${meta.id}.R[12].cutadapt.fastq.gz") optional true, emit: trimmed_reads
+    file("*.cutadapt.out") into cutadaptToMultiQC
+    // path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    outr2 = meta.single_end ? '' : "-p ${meta.id}.R2.cutadapt.fastq.gz"
+    p2 = meta.single_end ? '' : "-G ${rev_primer} -A ${rev_primer_rc}"
+    """
+    cutadapt -g ${for_primer} -a ${for_primer_rc} ${p2} \\
+        --cores ${task.cpus} \\
+        --max-N ${params.maxN} \\
+        -n 2 \\
+        -o ${meta.id}.R1.cutadapt.fastq.gz ${outr2} \\
+        ${reads} > ${meta.id}.cutadapt.out
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    echo "" | gzip > "${meta.id}.R1.cutadapt.fastq.gz"
+    touch ${meta.id}.cutadapt.out
+    """
+}
diff --git a/nextflow.config b/nextflow.config
@@ -34,8 +34,8 @@ params {
     // if quality_binning is true and error_function is set to 'makeBinnedQualErrfun', this is required to be set
 
     quality_bins = "" 
-    amplicon_type = "overlapping"
-    platform = "illumina" // illumina, pacbio; 454 and others could be added
+    amplicon_type = "overlapping" // "full_length", "overlapping", "dovetail", "mix", "nonoverlapping"
+    platform = "illumina" // "illumina", "pacbio"; ONT, 454, Element, others could be added
 
     // QC
     skip_FASTQC = false  // set to run this step by default, this can fail with large sample #'s

diff --git a/subworkflows/local/filter_and_trim.nf b/subworkflows/local/filter_and_trim.nf
@@ -23,6 +23,11 @@ workflow FILTER_AND_TRIM {
     ch_trimmed_R1 = Channel.empty()
     ch_trimmed_R2 = Channel.empty()
 
+    // TODO: we're probably going to move to requiring the primer sequences to
+    //       make the workflow more flexible re: trimming options, esp. since
+    //       the current version assumes the presence of primer sequences and
+    //       does a hard trim. This also allows for passing in cutadapt anchors 
+    //       and primer options (would need to parse these out)
     for_primer = params.for_primer
     for_primer_rc = ""
     rev_primer = params.rev_primer
@@ -90,6 +95,12 @@ workflow FILTER_AND_TRIM {
     trimmed_infer = ch_trimmed_infer
 }
 
+// def clean_primers(primer) {
+//     // returns a clean primer string, IUPAC codes 
+//     // w/o any metadata or anchors. Assumes cutadapt 
+//     // filtering
+// }
+
 def reverse_complement(primer) {
     // returns the revcomp, handles IUPAC ambig codes
     // tr "[ATGCUNYRSWKMBDHV]" "[TACGANRYSWMKVHDB]"
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,7 +3,6 @@ @@
     process PACBIO_CUTADAPT {
         tag "${meta.id}"
         container 'quay.io/biocontainers/cutadapt:4.1--py310h1425a21_1'
         input:
@@ Expand Down @@