Skip to content

Commit

Permalink
add updates to modules and workflows, and example params (NYI) for tw…
Browse files Browse the repository at this point in the history
…eaking options
  • Loading branch information
cjfields committed Jan 26, 2025
1 parent 11dcf3c commit 19a7621
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 3 deletions.
1 change: 0 additions & 1 deletion modules/local/pacbio_cutadapt.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
process PACBIO_CUTADAPT {
tag "${meta.id}"


container 'quay.io/biocontainers/cutadapt:4.1--py310h1425a21_1'

input:
Expand Down
55 changes: 55 additions & 0 deletions modules/local/variablefilter.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
process VARIABLEFILTER {
tag "$meta.id"
label 'process_medium'
container ""

input:
tuple val(meta), file(reads), file(trimming) from itsStep3.join(itsStep3Trimming)

output:
tuple val(meta), file("${meta.id}.R1.filtered.fastq.gz") optional true, emit: filteredReadsR1
tuple val(meta), file("${meta.id}.R2.filtered.fastq.gz") optional true, emit: filteredReadsR2
tuple val(meta), file("${meta.id}.R[12].filtered.fastq.gz") optional true, emit: reads
file "*.trimmed.txt", emit: read_tracking

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library(dada2))
suppressPackageStartupMessages(library(ShortRead))
suppressPackageStartupMessages(library(Biostrings))
out <- filterAndTrim(fwd = paste0("${meta.id}",".R1.cutadapt.fastq.gz"),
filt = paste0("${meta.id}", ".R1.filtered.fastq.gz"),
rev = if("${reads[1]}" == "null") NULL else paste0("${meta.id}",".R2.cutadapt.fastq.gz"),
filt.rev = if("${reads[1]}" == "null") NULL else paste0("${meta.id}", ".R2.filtered.fastq.gz"),
maxEE = if("${reads[1]}" == "null") ${params.maxEEFor} else c(${params.maxEEFor}, ${params.maxEERev}),
truncQ = ${params.truncQ},
rm.phix = as.logical(${params.rmPhiX}),
maxLen = ${params.max_read_len},
minLen = ${params.min_read_len},
compress = TRUE,
verbose = TRUE,
multithread = ${task.cpus})
#Change input read counts to actual raw read counts
colnames(out) <- c('cutadapt', 'filtered')
write.csv(out3, paste0("${meta.id}", ".trimmed.txt"))
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch "${meta.id}.trimmed.txt"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
variablefilter: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
"""
}
39 changes: 39 additions & 0 deletions modules/local/variabletrim.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
process VARIABLE_TRIM {
tag "$meta.id"
label 'process_medium'

container 'quay.io/biocontainers/cutadapt:4.1--py310h1425a21_1'

input:
tuple val(meta), path(reads)
tuple val(for_primer), val(rev_primer)
tuple val(for_primer_rc), val(rev_primer_rc)

output:
tuple val(meta), file("${meta.id}.R[12].cutadapt.fastq.gz") optional true, emit: trimmed_reads
file("*.cutadapt.out") into cutadaptToMultiQC
// path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
outr2 = meta.single_end ? '' : "-p ${meta.id}.R2.cutadapt.fastq.gz"
p2 = meta.single_end ? '' : "-G ${rev_primer} -A ${rev_primer_rc}"
"""
cutadapt -g ${for_primer} -a ${for_primer_rc} ${p2} \\
--cores ${task.cpus} \\
--max-N ${params.maxN} \\
-n 2 \\
-o ${meta.id}.R1.cutadapt.fastq.gz ${outr2} \\
${reads} > ${meta.id}.cutadapt.out
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
echo "" | gzip > "${meta.id}.R1.cutadapt.fastq.gz"
touch ${meta.id}.cutadapt.out
"""
}
4 changes: 2 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ params {
// if quality_binning is true and error_function is set to 'makeBinnedQualErrfun', this is required to be set

quality_bins = ""
amplicon_type = "overlapping"
platform = "illumina" // illumina, pacbio; 454 and others could be added
amplicon_type = "overlapping" // "full_length", "overlapping", "dovetail", "mix", "nonoverlapping"
platform = "illumina" // "illumina", "pacbio"; ONT, 454, Element, others could be added

// QC
skip_FASTQC = false // set to run this step by default, this can fail with large sample #'s
Expand Down
11 changes: 11 additions & 0 deletions subworkflows/local/filter_and_trim.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ workflow FILTER_AND_TRIM {
ch_trimmed_R1 = Channel.empty()
ch_trimmed_R2 = Channel.empty()

// TODO: we're probably going to move to requiring the primer sequences to
// make the workflow more flexible re: trimming options, esp. since
// the current version assumes the presence of primer sequences and
// does a hard trim. This also allows for passing in cutadapt anchors
// and primer options (would need to parse these out)
for_primer = params.for_primer
for_primer_rc = ""
rev_primer = params.rev_primer
Expand Down Expand Up @@ -90,6 +95,12 @@ workflow FILTER_AND_TRIM {
trimmed_infer = ch_trimmed_infer
}

// def clean_primers(primer) {
// // returns a clean primer string, IUPAC codes
// // w/o any metadata or anchors. Assumes cutadapt
// // filtering
// }

def reverse_complement(primer) {
// returns the revcomp, handles IUPAC ambig codes
// tr "[ATGCUNYRSWKMBDHV]" "[TACGANRYSWMKVHDB]"
Expand Down

0 comments on commit 19a7621

Please sign in to comment.