diff --git a/main.nf b/main.nf index 2576f00..b1c6293 100644 --- a/main.nf +++ b/main.nf @@ -3,36 +3,26 @@ include {SRA2FASTQ} from './modules/sra2fastq/sra2fastq.nf' include {COUNTFASTQ} from './modules/countFastq/countFastq.nf' include {FAQCS} from './modules/runFaQCs/runFaQCs.nf' -include {HOSTREMOVAL} from './modules/hostRemoval/hostRemoval.nf' workflow { //input specification - pairedFiles = channel.fromPath(params.pairedFiles, checkIfExists:true) - unpairedFiles = channel.fromPath(params.unpairedFiles, checkIfExists:true) + fastqFiles = channel.fromPath(params.shared.inputFastq, checkIfExists:true) if(params.modules.sra2fastq) { SRA2FASTQ(params.sra2fastq.plus(params.shared)) - pairedFiles = pairedFiles.concat(SRA2FASTQ.out.paired).flatten() - unpairedFiles = unpairedFiles.concat(SRA2FASTQ.out.unpaired).flatten() + fastqFiles = fastqFiles.concat(SRA2FASTQ.out.fastq).flatten() } - COUNTFASTQ(pairedFiles.collect(), unpairedFiles.collect()) + COUNTFASTQ(params.shared, fastqFiles.collect()) avgLen = COUNTFASTQ.out.avgReadLen - paired = COUNTFASTQ.out.paired.ifEmpty(params.pairedFiles) - unpaired = COUNTFASTQ.out.unpaired.ifEmpty(params.unpairedFiles) + fastqFiles = COUNTFASTQ.out.fastqFiles if(params.modules.faqcs) { - FAQCS(params.faqcs.plus(params.shared),paired,unpaired,avgLen) - paired = FAQCS.out.paired.ifEmpty(params.pairedFiles) - unpaired = FAQCS.out.unpaired.ifEmpty(params.unpairedFiles) - } - - if(params.modules.hostRemoval) { - HOSTREMOVAL(params.hostRemoval.plus(params.shared),paired,unpaired) + FAQCS(params.faqcs.plus(params.shared), fastqFiles,avgLen) } } \ No newline at end of file diff --git a/modules/countFastq/countFastq.nf b/modules/countFastq/countFastq.nf index 3eb4f80..2c177d1 100644 --- a/modules/countFastq/countFastq.nf +++ b/modules/countFastq/countFastq.nf @@ -3,37 +3,26 @@ process countFastq { label "countFastq" input: - path paired - path unpaired + val settings + path fastq output: path "fastqCount.txt", emit: counts - path "all.{1,2}.fastq", emit: allPaired, optional:true - path "all.se.fastq", emit: allUnpaired, optional:true + path "all.*.fastq", emit: allFiles script: - if(paired.size() > 1 && paired[0] =~ /NO_FILE/) { - paired = paired.tail().join(" ") + file_list = "" + if(settings["pairedFile"]) { + file_list = "-p $fastq" } else { - paired = paired.join(" ") + file_list = "-u $fastq" } - if(unpaired.size() > 1 && unpaired[0] =~ /NO_FILE/) { - unpaired = unpaired.tail().join(" ") - } - else { - unpaired = unpaired.join(" ") - } - - - paired_list = paired.startsWith("NO_FILE") ? "" : "-p ${paired}" - unpaired_list = unpaired.startsWith("NO_FILE2") ? "" : "-u ${unpaired}" """ getAvgLen.pl\ - $paired_list\ - $unpaired_list\ + $file_list\ -d . """ } @@ -70,19 +59,16 @@ process avgLen { //calculates average read length and concatenates input files workflow COUNTFASTQ { take: - pairedFiles - unpairedFiles + settings + inputFastq main: - countFastq(pairedFiles, unpairedFiles) + countFastq(settings, inputFastq) avgReadLen = avgLen(countFastq.out.counts) - paired = countFastq.out.allPaired - unpaired = countFastq.out.allUnpaired - + fastqFiles = countFastq.out.allFiles emit: avgReadLen - paired - unpaired + fastqFiles } \ No newline at end of file diff --git a/modules/runFaQCs/runFaQCs.nf b/modules/runFaQCs/runFaQCs.nf index e0e6fb9..949858e 100644 --- a/modules/runFaQCs/runFaQCs.nf +++ b/modules/runFaQCs/runFaQCs.nf @@ -1,48 +1,5 @@ #!/usr/bin/env nextflow -//plotting for trimmed reads from ONT -process nanoplot { - label "qc" - publishDir( - path: "${settings["outDir"]}/QcReads", - mode: 'copy' - ) - input: - val settings - path unpaired - - output: - path "*" //lots of output plots - - script: - """ - NanoPlot --fastq $unpaired --N50 --loglength -t ${settings["cpus"]} -f pdf --outdir . 2>/dev/null - """ - -} - - -//Porechop for removing adapters from ONT or PacBio reads -process porechop { - label "qc" - publishDir( - path: "${settings["outDir"]}/QcReads", - mode: 'copy' - ) - - - input: - val settings - path trimmed - path log - output: - path "*.porechop.fastq", emit: porechopped - - script: - """ - porechop -i $trimmed -o ./QC.unpaired.porechop.fastq -t ${settings["cpus"]} > $log - """ -} //double-checks that any provided adapter file is in FASTA format process adapterFileCheck { @@ -60,8 +17,6 @@ process adapterFileCheck { } //main QC process. puts parameters together and runs FaQCs. -//EDGE currently uses a custom script (illumina_fastq_QC.pl) to handle QC for long reads, -//but it was unable to create report files when I attempted using it. For now, all input reads go through FaQCs. process qc { label "qc" publishDir( @@ -71,8 +26,7 @@ process qc { input: val settings - path paired - path unpaired + path fastq val validAdapter path adapter val avgLen @@ -86,20 +40,15 @@ process qc { script: //adjust minLength - def min = settings["minLength"] - if(settings["minLength"] < 1) { - min = Math.abs(settings["minLength"] * avgLen.toInteger()) + def min = settings["minLen"] + if(settings["minLen"] < 1) { + min = Math.abs(settings["minLen"] * avgLen.toInteger()) } def qcSoftware = "FaQCs" - // if(params.ontFlag || params.pacbioFlag) { - // qcSoftware = "illumina_fastq_QC.pl" - // } - def pairedArg = paired.name != "NO_FILE" ? "-1 ${paired[0]} -2 ${paired[1]}" : "" - // if(pairedArg != "" && (params.ontFlag || params.pacbioFlag)) { - // pairedArg = "-p $paired" - // } - def unpairedArg = unpaired.name != "NO_FILE2" ? "-u $unpaired" : "" + + + def inputArg = settings["pairedFile"] ? "-1 ${fastq[0]} -2 ${fastq[1]}" : "-u $fastq" def adapterArg = "" if(adapter.name != "NO_FILE3" && validAdapter == "Yes"){ @@ -107,21 +56,16 @@ process qc { } def polyA = settings["polyA"] ? "--polyA" : "" - def trim = "" - // if(params.ontFlag || params.pacbioFlag) { - // trim = "--trim_only" - // } - def ascii = settings["phredOffset"] != null ? "--ascii ${settings["phredOffset"]}" : "" + def phiX = settings["filtPhiX"] ? "--phiX" : "" """ - $qcSoftware $pairedArg $unpairedArg \ - -q ${settings["qualityCutoff"]} --min_L $min --avg_q ${settings["avgQuality"]} \ - -n ${settings["numN"]} --lc ${settings["lowComplexity"]} --5end ${settings["cut5end"]} --3end ${settings["cut3end"]} \ - --split_size ${settings["splitSize"]} -d . -t ${settings["cpus"]} \ + $qcSoftware $inputArg \ + -q ${settings["trimQual"]} --min_L $min --avg_q ${settings["avgQual"]} \ + -n ${settings["numN"]} --lc ${settings["filtLC"]} --5end ${settings["trim5end"]} --3end ${settings["trim3end"]} \ + --split_size 1000000 -d . -t ${settings["cpus"]} \ $polyA \ - $trim \ $adapterArg \ - $ascii \ + $phiX 1>QC.log 2>&1 """ } @@ -129,41 +73,32 @@ process qc { workflow FAQCS { take: settings - paired - unpaired + fastq avgLen main: //adapter setup - adapter_ch = channel.fromPath(settings["adapter"], checkIfExists:true) + adapter_ch = channel.fromPath(settings["artifactFile"], checkIfExists:true) //checks to see if the provided adapter file is a valid FASTA adapterFileCheck(adapter_ch) //main QC process - qc(settings, paired, unpaired, adapterFileCheck.out, adapter_ch, avgLen) + qc(settings, fastq, adapterFileCheck.out, adapter_ch, avgLen) + + trimmed = channel.empty() + if(settings["pairedFile"]) { + trimmed = qc.out.pairedQC + } + else { + trimmed = qc.out.unpairedQC + } paired = qc.out.pairedQC unpaired = qc.out.unpairedQC - //long read trimming and plotting - if(settings["ontFlag"]) { - nanoplot_ch = channel.empty() - if(settings["porechop"]) { - porechop(settings, unpaired, qc.out.log) - nanoplot(settings, porechop.out.porechopped) - unpaired = porechop.out.porechopped - } - else { - nanoplot(settings, unpaired_ch) - unpaired = porechop.out.porechopped - - } - } - emit: - paired - unpaired + trimmed } \ No newline at end of file diff --git a/modules/sra2fastq/sra2fastq.nf b/modules/sra2fastq/sra2fastq.nf index cac3000..0cc3e76 100644 --- a/modules/sra2fastq/sra2fastq.nf +++ b/modules/sra2fastq/sra2fastq.nf @@ -1,5 +1,5 @@ #!/usr/bin/env nextflow -//to run: nextflow [OPT: -log /path/to/log file] run sra2fastq.nf -params-file [JSON parameter file] +//to run: nextflow run sra2fastq.nf -params-file [JSON parameter file] //not supporting filesize or run count restrictions @@ -9,7 +9,7 @@ process sraDownload { tag "$accession" publishDir "${settings["outDir"]}/SRA_Download", mode: 'copy' - //retries download in case of transient failure, then completes any processes that didn't fail + //retries download in case of transient failure, then completes any downloads that didn't fail maxRetries 3 errorStrategy { (task.attempt <= maxRetries) ? 'retry' : 'finish' } @@ -19,15 +19,14 @@ process sraDownload { val settings output: - path "$accession/${accession}.fastq.gz", emit: unpairedSRA, optional:true - path "$accession/${accession}_{1,2}.fastq.gz", emit: pairedSRA, optional:true + path "$accession/${accession}*.fastq.gz", emit: files path "$accession/${accession}_metadata.txt" path "$accession/sra2fastq_temp/*", optional: true //needed output? script: //conditionally create command-line options based on non-empty parameters, for use in the command below - def clean = settings["clean"] != null ? "--clean True" : "" - def platform_restrict = settings["platformRestrict"] != null ? "--platform_restrict ${settings["platformRestrict"]}" : "" + def clean = settings["clean"] ? "--clean True" : "" + def platform_restrict = settings["fastqSource"] != null ? "--platform_restrict ${settings["fastqSource"]}" : "" //invoke sra2fastq.py with those options """ @@ -46,10 +45,8 @@ workflow SRA2FASTQ { accessions_ch = channel.of(settings["accessions"]) sraDownload(accessions_ch.flatten().unique(), settings) - paired = sraDownload.out.pairedSRA - unpaired = sraDownload.out.unpairedSRA + fastq = sraDownload.out.files emit: - paired - unpaired + fastq } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 818ea0b..bec3704 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,52 +1,42 @@ params { - //input - pairedFiles = ["${projectDir}/nf_assets/NO_FILE"] - unpairedFiles = ["${projectDir}/nf_assets/NO_FILE2"] - //which modules are run modules { sra2fastq = false faqcs = false - hostRemoval = true + hostRemoval = false annotation = false } //module parameters -- passed directly into subworkflows according to best practices //these are the default values shared { - outDir = "./testing_output" - projName = "PROJECT" + inputFastq = null + pairedFile = false + outDir = "EDGE_output" + projName = "Project" cpus = 8 - fastqSource = "Illumina" + fastqSource = null } sra2fastq { - clean = null - platformRestrict = null + clean = false accessions = [] } faqcs{ - adapter = "${projectDir}/nf_assets/NO_FILE3" - - qualityCutoff = 5 - minLength = 50 - avgQuality = 0 - numN = 10 - lowComplexity = 0.85 - cut3end = 0 - cut5end = 0 - splitSize = 1000000 - - ontFlag = false - pacbioFlag = false - polyA = false - - phredOffset = null - - porechop = false + trimQual = 20 + trim5end = 0 + trim3end = 0 + trimAdapter = false + trimPolyA = false + artifactFile = "${projectDir}/nf_assets/NO_FILE3" + minLen = 50 + avgQual = 0 + numN = 2 + filtLC = 0.85 + filtPhiX = false } hostRemoval { @@ -70,7 +60,7 @@ params { } //container settings -singularity { +apptainer { enabled = true runOptions = "--compat" } @@ -97,5 +87,6 @@ executor { submitRateLimit = '1/5sec' } + //cleanup -cleanup = false +cleanup = true \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config index 7e7ba26..8431ecb 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -4,4 +4,4 @@ ======================================================================================== */ -cleanup = false \ No newline at end of file +cleanup = true \ No newline at end of file