Skip to content

Commit

Permalink
Merge branch 'edge_v3' into nf_composition
Browse files Browse the repository at this point in the history
  • Loading branch information
aw-watson authored Dec 23, 2024
2 parents 44f80bf + 02cd579 commit d7f92b3
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 168 deletions.
18 changes: 8 additions & 10 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,27 @@ include {READSTOCONTIGS} from './modules/runReadsToContig/runReadsToContig.nf'
workflow {

//input specification

pairedFiles = channel.fromPath(params.pairedFiles, checkIfExists:true)
unpairedFiles = channel.fromPath(params.unpairedFiles, checkIfExists:true)

fastqFiles = channel.fromPath(params.shared.inputFastq, checkIfExists:true)
contigs = channel.empty()
if(params.r2c.useAssembledContigs) {
contigs = channel.fromPath(params.inputContigs, checkIfExists:true)
contigs = channel.fromPath(params.shared.inputContigs, checkIfExists:true)
}


if(params.modules.sra2fastq) {
SRA2FASTQ(params.sra2fastq.plus(params.shared))
pairedFiles = pairedFiles.concat(SRA2FASTQ.out.paired).flatten()
unpairedFiles = unpairedFiles.concat(SRA2FASTQ.out.unpaired).flatten()
fastqFiles = fastqFiles.concat(SRA2FASTQ.out.fastq).flatten()
}

COUNTFASTQ(pairedFiles.collect(), unpairedFiles.collect())
COUNTFASTQ(params.shared, fastqFiles.collect())

avgLen = COUNTFASTQ.out.avgReadLen
paired = COUNTFASTQ.out.paired.ifEmpty(params.pairedFiles)
unpaired = COUNTFASTQ.out.unpaired.ifEmpty(params.unpairedFiles)
fastqFiles = COUNTFASTQ.out.fastqFiles


if(params.modules.faqcs) {
FAQCS(params.faqcs.plus(params.shared),paired,unpaired,avgLen)
FAQCS(params.faqcs.plus(params.shared), fastqFiles,avgLen)
paired = FAQCS.out.paired.ifEmpty(params.pairedFiles)
unpaired = FAQCS.out.unpaired.ifEmpty(params.unpairedFiles)
}
Expand Down
40 changes: 13 additions & 27 deletions modules/countFastq/countFastq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,26 @@ process countFastq {
label "countFastq"

input:
path paired
path unpaired
val settings
path fastq

output:
path "fastqCount.txt", emit: counts
path "all.{1,2}.fastq", emit: allPaired, optional:true
path "all.se.fastq", emit: allUnpaired, optional:true
path "all.*.fastq", emit: allFiles

script:

if(paired.size() > 1 && paired[0] =~ /NO_FILE/) {
paired = paired.tail().join(" ")
file_list = ""
if(settings["pairedFile"]) {
file_list = "-p $fastq"
}
else {
paired = paired.join(" ")
file_list = "-u $fastq"
}
if(unpaired.size() > 1 && unpaired[0] =~ /NO_FILE/) {
unpaired = unpaired.tail().join(" ")
}
else {
unpaired = unpaired.join(" ")
}


paired_list = paired.startsWith("NO_FILE") ? "" : "-p ${paired}"
unpaired_list = unpaired.startsWith("NO_FILE2") ? "" : "-u ${unpaired}"

"""
getAvgLen.pl\
$paired_list\
$unpaired_list\
$file_list\
-d .
"""
}
Expand Down Expand Up @@ -70,19 +59,16 @@ process avgLen {
//calculates average read length and concatenates input files
workflow COUNTFASTQ {
take:
pairedFiles
unpairedFiles
settings
inputFastq

main:

countFastq(pairedFiles, unpairedFiles)
countFastq(settings, inputFastq)
avgReadLen = avgLen(countFastq.out.counts)
paired = countFastq.out.allPaired
unpaired = countFastq.out.allUnpaired

fastqFiles = countFastq.out.allFiles

emit:
avgReadLen
paired
unpaired
fastqFiles
}
115 changes: 25 additions & 90 deletions modules/runFaQCs/runFaQCs.nf
Original file line number Diff line number Diff line change
@@ -1,48 +1,5 @@
#!/usr/bin/env nextflow

//plotting for trimmed reads from ONT
process nanoplot {
label "qc"
publishDir(
path: "${settings["outDir"]}/QcReads",
mode: 'copy'
)
input:
val settings
path unpaired

output:
path "*" //lots of output plots

script:
"""
NanoPlot --fastq $unpaired --N50 --loglength -t ${settings["cpus"]} -f pdf --outdir . 2>/dev/null
"""

}


//Porechop for removing adapters from ONT or PacBio reads
process porechop {
label "qc"
publishDir(
path: "${settings["outDir"]}/QcReads",
mode: 'copy'
)


input:
val settings
path trimmed
path log
output:
path "*.porechop.fastq", emit: porechopped

script:
"""
porechop -i $trimmed -o ./QC.unpaired.porechop.fastq -t ${settings["cpus"]} > $log
"""
}

//double-checks that any provided adapter file is in FASTA format
process adapterFileCheck {
Expand All @@ -60,8 +17,6 @@ process adapterFileCheck {
}

//main QC process. puts parameters together and runs FaQCs.
//EDGE currently uses a custom script (illumina_fastq_QC.pl) to handle QC for long reads,
//but it was unable to create report files when I attempted using it. For now, all input reads go through FaQCs.
process qc {
label "qc"
publishDir(
Expand All @@ -71,8 +26,7 @@ process qc {

input:
val settings
path paired
path unpaired
path fastq
val validAdapter
path adapter
val avgLen
Expand All @@ -86,84 +40,65 @@ process qc {

script:
//adjust minLength
def min = settings["minLength"]
if(settings["minLength"] < 1) {
min = Math.abs(settings["minLength"] * avgLen.toInteger())
def min = settings["minLen"]
if(settings["minLen"] < 1) {
min = Math.abs(settings["minLen"] * avgLen.toInteger())
}

def qcSoftware = "FaQCs"
// if(params.ontFlag || params.pacbioFlag) {
// qcSoftware = "illumina_fastq_QC.pl"
// }
def pairedArg = paired.name != "NO_FILE" ? "-1 ${paired[0]} -2 ${paired[1]}" : ""
// if(pairedArg != "" && (params.ontFlag || params.pacbioFlag)) {
// pairedArg = "-p $paired"
// }
def unpairedArg = unpaired.name != "NO_FILE2" ? "-u $unpaired" : ""


def inputArg = settings["pairedFile"] ? "-1 ${fastq[0]} -2 ${fastq[1]}" : "-u $fastq"

def adapterArg = ""
if(adapter.name != "NO_FILE3" && validAdapter == "Yes"){
adapterArg = "--adapter --artifactFile $adapter"
}

def polyA = settings["polyA"] ? "--polyA" : ""
def trim = ""
// if(params.ontFlag || params.pacbioFlag) {
// trim = "--trim_only"
// }
def ascii = settings["phredOffset"] != null ? "--ascii ${settings["phredOffset"]}" : ""
def phiX = settings["filtPhiX"] ? "--phiX" : ""

"""
$qcSoftware $pairedArg $unpairedArg \
-q ${settings["qualityCutoff"]} --min_L $min --avg_q ${settings["avgQuality"]} \
-n ${settings["numN"]} --lc ${settings["lowComplexity"]} --5end ${settings["cut5end"]} --3end ${settings["cut3end"]} \
--split_size ${settings["splitSize"]} -d . -t ${settings["cpus"]} \
$qcSoftware $inputArg \
-q ${settings["trimQual"]} --min_L $min --avg_q ${settings["avgQual"]} \
-n ${settings["numN"]} --lc ${settings["filtLC"]} --5end ${settings["trim5end"]} --3end ${settings["trim3end"]} \
--split_size 1000000 -d . -t ${settings["cpus"]} \
$polyA \
$trim \
$adapterArg \
$ascii \
$phiX
1>QC.log 2>&1
"""
}

workflow FAQCS {
take:
settings
paired
unpaired
fastq
avgLen


main:

//adapter setup
adapter_ch = channel.fromPath(settings["adapter"], checkIfExists:true)
adapter_ch = channel.fromPath(settings["artifactFile"], checkIfExists:true)
//checks to see if the provided adapter file is a valid FASTA
adapterFileCheck(adapter_ch)

//main QC process
qc(settings, paired, unpaired, adapterFileCheck.out, adapter_ch, avgLen)
qc(settings, fastq, adapterFileCheck.out, adapter_ch, avgLen)


trimmed = channel.empty()
if(settings["pairedFile"]) {
trimmed = qc.out.pairedQC
}
else {
trimmed = qc.out.unpairedQC
}
paired = qc.out.pairedQC
unpaired = qc.out.unpairedQC

//long read trimming and plotting
if(settings["ontFlag"]) {
nanoplot_ch = channel.empty()
if(settings["porechop"]) {
porechop(settings, unpaired, qc.out.log)
nanoplot(settings, porechop.out.porechopped)
unpaired = porechop.out.porechopped
}
else {
nanoplot(settings, unpaired_ch)
unpaired = porechop.out.porechopped

}
}

emit:
paired
unpaired
trimmed

}
17 changes: 7 additions & 10 deletions modules/sra2fastq/sra2fastq.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env nextflow
//to run: nextflow [OPT: -log /path/to/log file] run sra2fastq.nf -params-file [JSON parameter file]
//to run: nextflow run sra2fastq.nf -params-file [JSON parameter file]
//not supporting filesize or run count restrictions


Expand All @@ -9,7 +9,7 @@ process sraDownload {
tag "$accession"
publishDir "${settings["outDir"]}/SRA_Download", mode: 'copy'

//retries download in case of transient failure, then completes any processes that didn't fail
//retries download in case of transient failure, then completes any downloads that didn't fail
maxRetries 3
errorStrategy { (task.attempt <= maxRetries) ? 'retry' : 'finish' }

Expand All @@ -19,15 +19,14 @@ process sraDownload {
val settings

output:
path "$accession/${accession}.fastq.gz", emit: unpairedSRA, optional:true
path "$accession/${accession}_{1,2}.fastq.gz", emit: pairedSRA, optional:true
path "$accession/${accession}*.fastq.gz", emit: files
path "$accession/${accession}_metadata.txt"
path "$accession/sra2fastq_temp/*", optional: true //needed output?

script:
//conditionally create command-line options based on non-empty parameters, for use in the command below
def clean = settings["clean"] != null ? "--clean True" : ""
def platform_restrict = settings["platformRestrict"] != null ? "--platform_restrict ${settings["platformRestrict"]}" : ""
def clean = settings["clean"] ? "--clean True" : ""
def platform_restrict = settings["fastqSource"] != null ? "--platform_restrict ${settings["fastqSource"]}" : ""

//invoke sra2fastq.py with those options
"""
Expand All @@ -46,10 +45,8 @@ workflow SRA2FASTQ {
accessions_ch = channel.of(settings["accessions"])
sraDownload(accessions_ch.flatten().unique(), settings)

paired = sraDownload.out.pairedSRA
unpaired = sraDownload.out.unpairedSRA
fastq = sraDownload.out.files

emit:
paired
unpaired
fastq
}
Loading

0 comments on commit d7f92b3

Please sign in to comment.