diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e69de29 diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml new file mode 100644 index 0000000..3a049c4 --- /dev/null +++ b/.github/workflows/ci-tests.yml @@ -0,0 +1,39 @@ +name: CI Tests + +on: [push] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + shard: [1, 2, 3, 4] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up JDK 17 + uses: actions/setup-java@v2 + with: + java-version: '17' + distribution: 'adopt' + + - name: Set up Apptainer 1.3.0 + uses: eWaterCycle/setup-apptainer@v2 + with: + apptainer-version: 1.3.0 + + + - name: Setup Nextflow 24.10.1 + uses: nf-core/setup-nextflow@v1 + with: + version: "24.10.1" + + - name: Install nf-test + run: | + wget -qO- https://get.nf-test.com | bash + sudo mv nf-test /usr/local/bin/ + + - name: Run Tests (Shard ${{ matrix.shard }}/${{ strategy.job-total }}) + run: nf-test test --ci --shard ${{ matrix.shard }}/${{ strategy.job-total }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 1acbcec..7289e52 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ **/__pycache__/* .nextflow* -**/work** +**/work/** */logs/* **/ec_info/* .nf-test* \ No newline at end of file diff --git a/main.nf b/main.nf index b1c6293..2ec5b67 100644 --- a/main.nf +++ b/main.nf @@ -3,12 +3,20 @@ include {SRA2FASTQ} from './modules/sra2fastq/sra2fastq.nf' include {COUNTFASTQ} from './modules/countFastq/countFastq.nf' include {FAQCS} from './modules/runFaQCs/runFaQCs.nf' +include {HOSTREMOVAL} from './modules/hostRemoval/hostRemoval.nf' +include {ASSEMBLY} from './modules/runAssembly/runAssembly.nf' +include {READSTOCONTIGS} from './modules/runReadsToContig/runReadsToContig.nf' workflow { //input specification - + fastqFiles = channel.fromPath(params.shared.inputFastq, checkIfExists:true) + contigs = channel.empty() + if(params.r2c.useAssembledContigs) { + contigs = channel.fromPath(params.shared.inputContigs, checkIfExists:true) + } + if(params.modules.sra2fastq) { SRA2FASTQ(params.sra2fastq.plus(params.shared)) @@ -23,6 +31,21 @@ workflow { if(params.modules.faqcs) { FAQCS(params.faqcs.plus(params.shared), fastqFiles,avgLen) + paired = FAQCS.out.paired.ifEmpty(params.pairedFiles) + unpaired = FAQCS.out.unpaired.ifEmpty(params.unpairedFiles) } + if(params.modules.hostRemoval) { + HOSTREMOVAL(params.hostRemoval.plus(params.shared),paired,unpaired) + paired = HOSTREMOVAL.out.paired.ifEmpty(params.pairedFiles) + unpaired = HOSTREMOVAL.out.unpaired.ifEmpty(params.unpairedFiles) + } + + if(params.modules.runAssembly && !params.r2c.useAssembledContigs) { + ASSEMBLY(params.assembly.plus(params.shared), paired, unpaired, avgLen) + contigs = ASSEMBLY.out.outContigs + READSTOCONTIGS(params.r2c.plus(params.shared), paired, unpaired, contigs) + } + + } \ No newline at end of file diff --git a/modules/hostRemoval/hostRemoval.nf b/modules/hostRemoval/hostRemoval.nf index 5493c22..3b3083f 100644 --- a/modules/hostRemoval/hostRemoval.nf +++ b/modules/hostRemoval/hostRemoval.nf @@ -73,7 +73,7 @@ process collectCleanPairedReads { path(hostFiles, stageAs: 'host?.fastq') output: - path "hostclean.{1,2}.fastq" + path "hostclean.{1,2}.fastq", emit: paired path "merged_host_unique.fastq", emit: hostMerged script: @@ -97,7 +97,7 @@ process collectCleanPairedReadsOneHost { path cleanedFiles output: - path "hostclean.{1,2}.fastq" + path "hostclean.{1,2}.fastq", emit:paired script: @@ -119,7 +119,7 @@ process collectCleanSingleReads { path remainingUnpairedReads output: - path "hostclean.unpaired.fastq" + path "hostclean.unpaired.fastq", emit:unpaired script: """ @@ -158,7 +158,6 @@ workflow HOSTREMOVAL{ unpaired main: - providedRef = channel.fromPath(settings["host"], checkIfExists:true) //remove host reads in parallel @@ -166,23 +165,26 @@ workflow HOSTREMOVAL{ cleaned1_ch = hostRemoval.out.cleaned1.collect() cleaned2_ch = hostRemoval.out.cleaned2.collect() - //more than one host - if (settings["host"].size() > 1) { + if (([] + settings["host"]).size() > 1) { //merge clean paired-end reads (intersection) collectCleanPairedReads(settings, cleaned1_ch, cleaned2_ch, hostRemoval.out.hostReads.collect()) + paired = collectCleanPairedReads.out.paired //calculate overall stats and create PDF hostRemovalStats(settings, hostRemoval.out.cleanstats.collect(), collectCleanPairedReads.out.hostMerged) } else { //no need to merge if only reads from one host were removed - collectCleanPairedReadsOneHost(settings, cleaned1_ch.concat(cleaned2_ch)) + paired = collectCleanPairedReadsOneHost(settings, cleaned1_ch.concat(cleaned2_ch)).collect() //calculate overall stats and create PDF hostRemovalStats(settings, hostRemoval.out.cleanstats.collect(), hostRemoval.out.hostReads) } - //merge clean unpaired reads (removing any duplicates by read name) - collectCleanSingleReads(settings, hostRemoval.out.cleanedSingleton.collect()) + unpaired = collectCleanSingleReads(settings, hostRemoval.out.cleanedSingleton.collect()) + + emit: + paired + unpaired } \ No newline at end of file diff --git a/modules/hostRemoval/test_files/parameters/hostRemoval_basic.json b/modules/hostRemoval/test_files/parameters/hostRemoval_basic.json deleted file mode 100644 index dd8f514..0000000 --- a/modules/hostRemoval/test_files/parameters/hostRemoval_basic.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "outDir":"test_out", - "inputFiles": ["test_files/SRR28835063_1.fastq", "test_files/SRR28835063_2.fastq"], - "host": ["test_files/Gallus_gallus.bGalGal1.mat.broiler.GRCg7b.dna.toplevel.fa"], - "cpus": 4 -} \ No newline at end of file diff --git a/modules/hostRemoval/test_files/parameters/hostRemoval_options.json b/modules/hostRemoval/test_files/parameters/hostRemoval_options.json deleted file mode 100644 index e3c806a..0000000 --- a/modules/hostRemoval/test_files/parameters/hostRemoval_options.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "outDir":"test_out", - "inputFiles": ["test_files/SRR28835063_1.fastq", "test_files/SRR28835063_2.fastq"], - "host": "test_files/Gallus_gallus.bGalGal1.mat.broiler.GRCg7b.dna.toplevel.fa", - "similarity": 85, - "cpus": 5 -} \ No newline at end of file diff --git a/modules/hostRemoval/test_files/parameters/hostRemoval_unpaired.json b/modules/hostRemoval/test_files/parameters/hostRemoval_unpaired.json deleted file mode 100644 index 1b3cffd..0000000 --- a/modules/hostRemoval/test_files/parameters/hostRemoval_unpaired.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "outDir":"test_out", - "inputFiles": ["test_files/SRR28835063_1.fastq"], - "host": "test_files/Gallus_gallus.bGalGal1.mat.broiler.GRCg7b.dna.toplevel.fa", - "cpus": 4 -} \ No newline at end of file diff --git a/runAssembly/Dockerfile b/modules/runAssembly/Dockerfile similarity index 89% rename from runAssembly/Dockerfile rename to modules/runAssembly/Dockerfile index 0a9832c..25cd9af 100644 --- a/runAssembly/Dockerfile +++ b/modules/runAssembly/Dockerfile @@ -31,7 +31,6 @@ RUN conda install -n assembly git RUN conda install -c conda-forge conda-pack ADD bin/extractLongReads.pl /opt/conda/envs/assembly/bin -ADD bin/getAvgLen.pl /opt/conda/envs/assembly/bin ADD bin/renameFilterFasta.pl /opt/conda/envs/assembly/bin RUN conda-pack -n assembly -o /tmp/env.tar && \ @@ -43,12 +42,12 @@ RUN /venv/bin/conda-unpack FROM debian:latest AS runtime COPY --from=build /venv /venv -ENV PERL5LIB=/venv/lib/perl5/core_perl +RUN /venv/bin/git clone --depth 1 https://gitlab.com/chienchi/long_read_assembly.git +RUN apt-get update && apt-get install procps -y && apt-get clean -ENV PATH="/venv/bin:$PATH" -RUN git clone --depth 1 https://gitlab.com/chienchi/long_read_assembly.git -ENV PATH="/long_read_assembly:$PATH" +ENV PATH="/venv/bin:/long_read_assembly:$PATH" +ENV PERL5LIB=/venv/lib/perl5/core_perl SHELL ["/bin/bash", "-c"] CMD /bin/bash \ No newline at end of file diff --git a/runAssembly/bin/extractLongReads.pl b/modules/runAssembly/bin/extractLongReads.pl similarity index 100% rename from runAssembly/bin/extractLongReads.pl rename to modules/runAssembly/bin/extractLongReads.pl diff --git a/runAssembly/bin/renameFilterFasta.pl b/modules/runAssembly/bin/renameFilterFasta.pl similarity index 100% rename from runAssembly/bin/renameFilterFasta.pl rename to modules/runAssembly/bin/renameFilterFasta.pl diff --git a/runAssembly/runAssembly.nf b/modules/runAssembly/runAssembly.nf similarity index 61% rename from runAssembly/runAssembly.nf rename to modules/runAssembly/runAssembly.nf index e9e005d..bd50786 100644 --- a/runAssembly/runAssembly.nf +++ b/modules/runAssembly/runAssembly.nf @@ -6,8 +6,9 @@ //main process for assembly with IDBA process idbaUD { + label "assembly" publishDir ( - path:"$params.outDir/AssemblyBasedAnalysis", + path:"${settings["outDir"]}/AssemblyBasedAnalysis", mode: 'copy', saveAs: { filename -> @@ -24,6 +25,7 @@ process idbaUD { ) input: + val settings path short_paired path short_single path long_reads @@ -50,19 +52,19 @@ process idbaUD { def longReadsFile = long_reads.name != "NO_FILE3" ? "-l $long_reads" : "" def maxK = 121 def maxK_option = "--maxk $maxK " - if (params.idba.maxK == null || params.idba.maxK > avg_len) { + if (settings["idba"]["maxK"] == null || settings["idba"]["maxK"] > avg_len) { if(avg_len > 0 && avg_len <= 151) { maxK = avg_len - 1 maxK_option = "--maxk ${avg_len - 1}" } } - def minK = params.idba.minK != null ? "--mink $params.idba.minK " : "" - def step = params.idba.step != null ? "--step $params.idba.step " : "" - def minLen = params.minContigSize != null ? "--min_contig $params.minContigSize " : "" + minK = settings["idba"]["minK"] != null ? "--mink ${settings["idba"]["minK"]} " : "" + step = settings["idba"]["step"] != null ? "--step ${settings["idba"]["step"]} " : "" + minLen = settings["minContigSize"] != null ? "--min_contig ${settings["minContigSize"]} " : "" - def memLimit = params.memLimit != null ? "ulimit -v $params.memLimit 2>/dev/null;" : "" + memLimit = settings["memLimit"] != null ? "ulimit -v ${settings["memLimit"]} 2>/dev/null;" : "" """ - ${memLimit}idba_ud --pre_correction -o . --num_threads $params.threads\ + ${memLimit}idba_ud --pre_correction -o . --num_threads ${settings["cpus"]}\ $runFlag\ $longReadsFile\ $maxK_option\ @@ -77,6 +79,8 @@ process idbaUD { //prep for idba process idbaExtractLong { + label "assembly" + input: path paired path unpaired @@ -99,6 +103,7 @@ process idbaExtractLong { //prep for idba process idbaPrepReads { + label "assembly" input: path paired path unpaired @@ -118,60 +123,13 @@ process idbaPrepReads { } -//prep for idba -process idbaReadFastq { - input: - path paired - path unpaired - - output: - path "fastqCount.txt" - - script: - def paired_list = paired.name != "NO_FILE" ? "-p ${paired}" : "" - def unpaired_list = unpaired.name != "NO_FILE2" ? "-u ${unpaired}" : "" - - """ - getAvgLen.pl\ - $paired_list\ - $unpaired_list\ - -d . - """ -} - -//prep for idba -process idbaAvgLen { - input: - - path countFastq - - output: - stdout - - shell: - ''' - #!/usr/bin/env perl - my $fastq_count_file = "./!{countFastq}"; - my $total_count = 0; - my $total_len = 0; - open (my $fh, "<", $fastq_count_file) or die "Cannot open $fastq_count_file\n"; - while(<$fh>){ - chomp; - my ($name,$count,$len,$avg) = split /\t/,$_; - $total_count += $count; - $total_len += $len; - } - close $fh; - my $avg_len = ($total_count > 0)? $total_len/$total_count : 0; - print "$avg_len"; - ''' -} //assemble using spades process spades { + label "assembly" publishDir ( - path: "$params.outDir/AssemblyBasedAnalysis", + path: "${settings["outDir"]}/AssemblyBasedAnalysis", mode: 'copy', saveAs: { filename -> @@ -182,7 +140,7 @@ process spades { "scaffold.fa" } else if(filename ==~ /assembly_graph\.fastg/) { - "${params.projName}_contigs.fastg" + "${settings["projName"]}_contigs.fastg" } else if(filename ==~ /assembly_graph_with_scaffolds\.gfa/) { "assembly_graph_with_scaffolds.gfa" @@ -200,6 +158,7 @@ process spades { ) input: + val settings path paired path unpaired path pacbio @@ -220,19 +179,19 @@ process spades { def unpaired = unpaired.name != "NO_FILE2" ? "--s1 $unpaired " : "" def pacbio_file = pacbio.name != "NO_FILE3" ? "--pacbio $pacbio " : "" def nanopore_file = nanopore.name != "NO_FILE4" ? "--nanopore $nanopore " : "" - def meta_flag = (paired != "" && params.spades.algorithm == "metagenome") ? "--meta " : "" - def sc_flag = params.spades.algorithm == "singlecell" ? "--sc " : "" - def rna_flag = params.spades.algorithm == "rna" ? "--rna " : "" - def plasmid_flag = params.spades.algorithm == "plasmid" ? "--plasmid " : "" - def bio_flag = params.spades.algorithm == "bio" ? "--bio " : "" - def corona_flag = params.spades.algorithm == "corona" ? "--corona " : "" - def metaviral_flag = params.spades.algorithm == "metaviral" ? "--metaviral " : "" - def metaplasmid_flag = params.spades.algorithm == "metaplasmid" ? "--metaplasmid " : "" - def rnaviral_flag = params.spades.algorithm == "rnaviral" ? "--rnaviral " : "" - def memLimit = params.memLimit != null ? "-m ${params.memLimit/1024*1024}" : "" + def meta_flag = (paired != "" && settings["spades"]["algorithm"] == "metagenome") ? "--meta " : "" + def sc_flag = settings["spades"]["algorithm"] == "singlecell" ? "--sc " : "" + def rna_flag = settings["spades"]["algorithm"] == "rna" ? "--rna " : "" + def plasmid_flag = settings["spades"]["algorithm"] == "plasmid" ? "--plasmid " : "" + def bio_flag = settings["spades"]["algorithm"] == "bio" ? "--bio " : "" + def corona_flag = settings["spades"]["algorithm"] == "corona" ? "--corona " : "" + def metaviral_flag = settings["spades"]["algorithm"] == "metaviral" ? "--metaviral " : "" + def metaplasmid_flag = settings["spades"]["algorithm"] == "metaplasmid" ? "--metaplasmid " : "" + def rnaviral_flag = settings["spades"]["algorithm"] == "rnaviral" ? "--rnaviral " : "" + def memLimit = settings["memLimit"] != null ? "-m ${settings["memLimit"]}" : "" """ - spades.py -o . -t $params.threads\ + spades.py -o . -t ${settings["cpus"]}\ $paired\ $meta_flag\ $sc_flag\ @@ -253,8 +212,9 @@ process spades { //assemble using megahit process megahit { + label "assembly" publishDir( - path: "$params.outDir/AssemblyBasedAnalysis", + path: "${settings["outDir"]}/AssemblyBasedAnalysis", mode: 'copy', saveAs: { filename -> @@ -277,6 +237,7 @@ process megahit { ) input: + val settings path paired path unpaired @@ -284,16 +245,16 @@ process megahit { path "megahit/log" path "megahit/scaffold*.{fa,fasta}", optional:true //I don't believe this is a normal output of megahit, but just in case path "megahit/final.contigs.fa", emit: contigs, optional:true - path "${params.projName}_contigs.fastg" + path "${settings["projName"]}_contigs.fastg" path "megahit/{contig-*,*contigs.fa,K*/final_contigs.fasta}", emit: intContigs script: def paired = paired.name != "NO_FILE" ? "-1 ${paired[0]} -2 ${paired[1]} " : "" def unpaired = unpaired.name != "NO_FILE2" ? "-r $unpaired " : "" - def megahit_preset = params.megahit.preset != null ? "--presets $params.megahit.preset " : "" + def megahit_preset = settings["megahit"]["preset"] != null ? "--presets ${settings["megahit"]["preset"]} " : "" """ - megahit -o ./megahit -t $params.threads\ + megahit -o ./megahit -t ${settings["cpus"]}\ $megahit_preset\ $paired\ $unpaired\ @@ -301,7 +262,7 @@ process megahit { LARGESTKMER=\$(head -n 1 megahit/final.contigs.fa | perl -ne '/^>k(\\d+)\\_/; print \$1;') - megahit_toolkit contig2fastg \$LARGESTKMER megahit/intermediate_contigs/k\${LARGESTKMER}.contigs.fa > ${params.projName}_contigs.fastg + megahit_toolkit contig2fastg \$LARGESTKMER megahit/intermediate_contigs/k\${LARGESTKMER}.contigs.fa > ${settings["projName"]}_contigs.fastg """ } @@ -309,8 +270,9 @@ process megahit { //assembly using unicycler process unicycler { + label "assembly" publishDir ( - path: "$params.outDir/AssemblyBasedAnalysis", + path: "${settings["outDir"]}/AssemblyBasedAnalysis", mode: 'copy', saveAs: { filename -> @@ -321,7 +283,7 @@ process unicycler { "scaffold.fa" } else if(filename.equals("assembly.gfa")) { - "${params.projName}_contigs.fastg" + "${settings["projName"]}_contigs.fastg" } else if(filename ==~ /assembly\.fasta/) { null //don't publish, but emit for use in downstream process "renameFilterFasta" @@ -333,6 +295,7 @@ process unicycler { ) input: + val settings path paired path unpaired path longreads //If present, expects filtered long reads. @@ -348,12 +311,12 @@ process unicycler { def paired = paired.name != "NO_FILE" ? "-1 ${paired[0]} -2 ${paired[1]} " : "" def unpaired = unpaired.name != "NO_FILE2" ? "-r $unpaired " : "" def filt_lr = longreads.name != "NO_FILE3" ? "-l $longreads " : "" - def bridge = params.unicycler.bridgingMode != "normal" ? "--mode $params.unicycler.bridgingMode" : "--mode normal" + def bridge = settings["unicycler"]["bridgingMode"] != "normal" ? "--mode ${settings["unicycler"]["bridgingMode"]} " : "--mode normal" """ export _JAVA_OPTIONS='-Xmx20G'; export TERM='xterm'; - unicycler -t $params.threads -o .\ + unicycler -t ${settings["cpus"]} -o .\ $paired\ $filt_lr\ $bridge 2>&1 1>/dev/null @@ -363,7 +326,10 @@ process unicycler { //filter long reads for unicycler process unicyclerPrep { + label "assembly" + input: + val settings path longreads @@ -373,8 +339,8 @@ process unicyclerPrep { script: """ - seqtk seq -A -L\ - $params.unicycler.minLongReads\ + seqtk seq -A -L \ + ${settings["unicycler"]["minLongReads"]} \ $longreads > long_reads.fasta """ } @@ -382,9 +348,10 @@ process unicyclerPrep { //assembly using lrasm process lrasm { + label "assembly" publishDir ( - path: "$params.outDir/AssemblyBasedAnalysis", + path: "${settings["outDir"]}/AssemblyBasedAnalysis", mode: 'copy', saveAs: { filename -> @@ -395,13 +362,13 @@ process lrasm { "scaffold.fa" } else if(filename.equals("Assembly/unitig.gfa")) { - "${params.projName}_contigs.fastg" + "${settings["projName"]}_contigs.fastg" } else if(filename.equals("Assembly/assembly_graph.gfa")) { - "${params.projName}_contigs.fastg" + "${settings["projName"]}_contigs.fastg" } else if(filename.equals("Assembly/assembly_graph.gv")) { - "${params.projName}_contigs.gv" + "${settings["projName"]}_contigs.gv" } else if(filename.equals("Assembly/assembly_info.txt")) { "assembly_info.txt" @@ -419,6 +386,7 @@ process lrasm { ) input: + val settings path unpaired output: @@ -432,21 +400,21 @@ process lrasm { path "{contig-*,*contigs.fa,K*/final_contigs.fasta}", emit: intContigs script: - def consensus = params.lrasm.numConsensus != null ? "-n $params.lrasm.numConsensus ": "" - def preset = params.lrasm.preset != null ? "-x $params.lrasm.preset " : "" - def errorCorrection = params.lrasm.ec != null ? "-e " : "" - def algorithm = params.lrasm.algorithm != null ? "-a $params.lrasm.algorithm " : "" + def consensus = settings["lrasm"]["numConsensus"] != null ? "-n ${settings["lrasm"]["numConsensus"]} ": "" + def preset = settings["lrasm"]["preset"] != null ? "-x ${settings["lrasm"]["preset"]} " : "" + def errorCorrection = settings["lrasm"]["ec"] != null ? "-e " : "" + def algorithm = settings["lrasm"]["algorithm"] != null ? "-a ${settings["lrasm"]["algorithm"]} " : "" def minLenOpt = "" - if (params.lrasm.algorithm == "miniasm") { - minLenOpt = "--ao \'-s $params.lrasm.minLength\' " + if (settings["lrasm"]["algorithm"] == "miniasm") { + minLenOpt = "--ao \'-s ${settings["lrasm"]["minLength"]}\' " } - else if (params.lrasm.algorithm == "wtdbg2") { - minLenOpt = "--wo \'-L $params.lrasm.minLength\' " + else if (settings["lrasm"]["algorithm"] == "wtdbg2") { + minLenOpt = "--wo \'-L ${settings["lrasm"]["minLength"]}\' " } - def flyeOpt = params.lrasm.algorithm == "metaflye" ? "--fo '--meta' ": "" + def flyeOpt = settings["lrasm"]["algorithm"] == "metaflye" ? "--fo '--meta' ": "" """ - lrasm -o . -t $params.threads\ + lrasm -o . -t ${settings["cpus"]} \ $preset\ $consensus\ $errorCorrection\ @@ -455,39 +423,43 @@ process lrasm { $flyeOpt\ $unpaired\ """ - //2>/dev/null } process renameFilterFasta { + label "assembly" publishDir( - path: "$params.outDir/AssemblyBasedAnalysis", + path: "${settings["outDir"]}/AssemblyBasedAnalysis", mode: 'copy' ) input: + val settings path contigs output: - path "*" + path "*_contigs.fa", emit: contigs + path "*_contigs_*up.fa", emit: annotationContigs script: - def annotation = params.annotation ? "-ann 1" : "" + def annotation = settings["annotation"] ? "-ann 1" : "" """ CONTIG_NUMBER=\$(grep -c '>' ${contigs}) renameFilterFasta.pl \ -u $contigs\ -d .\ - -filt $params.minContigSize\ + -filt ${settings["minContigSize"]} \ -maxseq \$CONTIG_NUMBER\ - -ann_size $params.contigSizeForAnnotation\ - -n $params.projName\ + -ann_size ${settings["contigSizeForAnnotation"]} \ + -n ${settings["projName"]} \ $annotation """ } process bestIncompleteAssembly { + label "assembly" input: + val x path intContigs @@ -511,66 +483,92 @@ process bestIncompleteAssembly { } -workflow { - "mkdir nf_assets".execute().text - "touch nf_assets/NO_FILE".execute().text - "touch nf_assets/NO_FILE2".execute().text - "touch nf_assets/NO_FILE3".execute().text - "touch nf_assets/NO_FILE4".execute().text - - paired_ch = channel.fromPath(params.pairedFiles, relative:true, checkIfExists:true).collect() - unpaired_ch = channel.fromPath(params.unpairedFile, relative:true, checkIfExists:true) - spades_pb = file(params.spades.pacbio, checkIfExists:true) - spades_np = file(params.spades.nanopore, checkIfExists:true) - unicycler_lr = file(params.unicycler.longreads, checkIfExists:true) - - if (params.assembler.equalsIgnoreCase("IDBA_UD")) { - avg_len_ch = idbaAvgLen(idbaReadFastq(paired_ch, unpaired_ch)) - (c1,c2) = idbaPrepReads(paired_ch, unpaired_ch) +workflow ASSEMBLY { + take: + settings + paired + unpaired + avgLen + + main: + + //supplementary long read files setup + spades_pb = file(settings["spades"]["pacbio"], checkIfExists:true) + spades_np = file(settings["spades"]["nanopore"], checkIfExists:true) + unicycler_lr = file(settings["unicycler"]["longreads"], checkIfExists:true) + + + //output channel setup + outContigs = channel.empty() + annotationContigs = channel.empty() + + if (settings["assembler"].equalsIgnoreCase("IDBA_UD")) { + + (c1,c2) = idbaPrepReads(paired, unpaired) (sp,su,l) = idbaExtractLong(c1,c2.ifEmpty({file("nf_assets/NO_FILE")})) - idbaUD(sp.filter{ it.size()>0 }.ifEmpty({file("nf_assets/NO_FILE")}), + + idbaUD(settings, sp.filter{ it.size()>0 }.ifEmpty({file("nf_assets/NO_FILE")}), su.filter{ it.size()>0 }.ifEmpty({file("nf_assets/NO_FILE2")}), l.filter{ it.size()>0 }.ifEmpty({file("nf_assets/NO_FILE3")}), - avg_len_ch) + avgLen) bestIncompleteAssembly(idbaUD.out.contigs.ifEmpty('EMPTY'), idbaUD.out.intContigs) - renameFilterFasta(idbaUD.out.contigs.concat(bestIncompleteAssembly.out).first()) + renameFilterFasta(settings, idbaUD.out.contigs.concat(bestIncompleteAssembly.out).first()) + outContigs = renameFilterFasta.out.contigs + annotationContigs = renameFilterFasta.out.annotationContigs } - else if (params.assembler.equalsIgnoreCase("SPAdes")) { - spades(paired_ch, unpaired_ch, spades_pb, spades_np) + else if (settings["assembler"].equalsIgnoreCase("SPAdes")) { + spades(settings, paired, unpaired, spades_pb, spades_np) bestIncompleteAssembly(spades.out.contigs.ifEmpty('EMPTY'), spades.out.intContigs) - renameFilterFasta(spades.out.contigs.concat(bestIncompleteAssembly.out).first()) + renameFilterFasta(settings, spades.out.contigs.concat(bestIncompleteAssembly.out).first()) + outContigs = renameFilterFasta.out.contigs + annotationContigs = renameFilterFasta.out.annotationContigs } - else if (params.assembler.equalsIgnoreCase("MEGAHIT")) { - megahit(paired_ch, unpaired_ch) + else if (settings["assembler"].equalsIgnoreCase("MEGAHIT")) { + megahit(settings, paired, unpaired) bestIncompleteAssembly(megahit.out.contigs.ifEmpty('EMPTY'), megahit.out.intContigs) - renameFilterFasta(megahit.out.contigs.concat(bestIncompleteAssembly.out).first()) + renameFilterFasta(settings, megahit.out.contigs.concat(bestIncompleteAssembly.out).first()) + outContigs = renameFilterFasta.out.contigs + annotationContigs = renameFilterFasta.out.annotationContigs } - else if (params.assembler.equalsIgnoreCase("UniCycler")) { - if (params.unicycler.longreads != "nf_assets/NO_FILE3") { - println("Filter long reads with $params.unicycler.minLongReads (bp) cutoff") - unicycler(paired_ch, - unpaired_ch, - unicyclerPrep(unicycler_lr).filter{it.size()>0}.ifEmpty({file("nf_assets/NO_FILE3")})) + else if (settings["assembler"].equalsIgnoreCase("UniCycler")) { + if (settings["unicycler"]["longreads"] != "nf_assets/NO_FILE3") { + println("Filter long reads with ${settings["unicycler"]["minLongReads"]} (bp) cutoff") + unicycler( + settings, + paired, + unpaired, + unicyclerPrep(settings,unicycler_lr).filter{it.size()>0}.ifEmpty({file("nf_assets/NO_FILE3")}) + ) //unicycler produces no intermediate contigs, we let it error out above rather than try to rescue a failed assembly - renameFilterFasta(unicycler.out.contigs) + renameFilterFasta(settings, unicycler.out.contigs) + outContigs = renameFilterFasta.out.contigs + annotationContigs = renameFilterFasta.out.annotationContigs } else { - unicycler(paired_ch, unpaired_ch, unicycler_lr) - renameFilterFasta(unicycler.out.contigs) + unicycler(settings, paired, unpaired, unicycler_lr) + renameFilterFasta(settings, unicycler.out.contigs) + outContigs = renameFilterFasta.out.contigs + annotationContigs = renameFilterFasta.out.annotationContigs } } - else if (params.assembler.equalsIgnoreCase("LRASM")) { - lrasm(unpaired_ch) + else if (settings["assembler"].equalsIgnoreCase("LRASM")) { + lrasm(settings, unpaired) bestIncompleteAssembly(lrasm.out.contigs.ifEmpty('EMPTY'), lrasm.out.intContigs) - renameFilterFasta(lrasm.out.contigs.concat(bestIncompleteAssembly.out).first()) + renameFilterFasta(settings, lrasm.out.contigs.concat(bestIncompleteAssembly.out).first()) + outContigs = renameFilterFasta.out.contigs + annotationContigs = renameFilterFasta.out.annotationContigs } else { - error "Invalid assembler: $params.assembler" + error "Invalid assembler: ${settings["assembler"]}" } + emit: + outContigs + annotationContigs + } \ No newline at end of file diff --git a/runAssembly/test_files/parameters/idba_assembly.json b/modules/runAssembly/test_files/parameters/idba_assembly.json similarity index 100% rename from runAssembly/test_files/parameters/idba_assembly.json rename to modules/runAssembly/test_files/parameters/idba_assembly.json diff --git a/runAssembly/test_files/parameters/lrasm_assembly.json b/modules/runAssembly/test_files/parameters/lrasm_assembly.json similarity index 100% rename from runAssembly/test_files/parameters/lrasm_assembly.json rename to modules/runAssembly/test_files/parameters/lrasm_assembly.json diff --git a/runAssembly/test_files/parameters/megahit_assembly.json b/modules/runAssembly/test_files/parameters/megahit_assembly.json similarity index 100% rename from runAssembly/test_files/parameters/megahit_assembly.json rename to modules/runAssembly/test_files/parameters/megahit_assembly.json diff --git a/runAssembly/test_files/parameters/spades_assembly.json b/modules/runAssembly/test_files/parameters/spades_assembly.json similarity index 100% rename from runAssembly/test_files/parameters/spades_assembly.json rename to modules/runAssembly/test_files/parameters/spades_assembly.json diff --git a/runAssembly/test_files/parameters/unicycler_assembly.json b/modules/runAssembly/test_files/parameters/unicycler_assembly.json similarity index 100% rename from runAssembly/test_files/parameters/unicycler_assembly.json rename to modules/runAssembly/test_files/parameters/unicycler_assembly.json diff --git a/modules/runFaQCs/test_files/parameters/runqc_adapter.json b/modules/runFaQCs/test_files/parameters/runqc_adapter.json deleted file mode 100644 index 0d3e917..0000000 --- a/modules/runFaQCs/test_files/parameters/runqc_adapter.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "outDir": "test_out", - "pairFile": ["../test_data/Ecoli_10x.1.fastq","../test_data/Ecoli_10x.2.fastq"], - "adapter": "../test_data/adapters.fasta" - } diff --git a/modules/runFaQCs/test_files/parameters/runqc_explicit_defaults.json b/modules/runFaQCs/test_files/parameters/runqc_explicit_defaults.json deleted file mode 100644 index 4f516d8..0000000 --- a/modules/runFaQCs/test_files/parameters/runqc_explicit_defaults.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "numCPU": 4, - "outDir": "test_out", - "pairFile": ["../test_data/Ecoli_10x.1.fastq","../test_data/Ecoli_10x.2.fastq"], - "qualityCutoff": 5, - "minLength": 50, - "avgQuality": 0, - "numN": 10, - "lowComplexity": 0.85, - "cut5end": 0, - "cut3end": 0, - "splitSize": 1000000, - "polyA": false, - "phredOffset": 33, - "ontFlag": false, - "pacbioFlag": false, - "porechop": false - } diff --git a/modules/runFaQCs/test_files/parameters/runqc_inputs1.json b/modules/runFaQCs/test_files/parameters/runqc_inputs1.json deleted file mode 100644 index a2d1713..0000000 --- a/modules/runFaQCs/test_files/parameters/runqc_inputs1.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "outDir": "test_out", - "unpairFile": ["../test_data/Ecoli_10x.1.fastq"] - } diff --git a/modules/runFaQCs/test_files/parameters/runqc_inputs2.json b/modules/runFaQCs/test_files/parameters/runqc_inputs2.json deleted file mode 100644 index f6196b4..0000000 --- a/modules/runFaQCs/test_files/parameters/runqc_inputs2.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "outDir": "test_out", - "pairFile": ["../test_data/Ecoli_10x.1.fastq","../test_data/Ecoli_10x.2.fastq"] - } diff --git a/modules/runFaQCs/test_files/parameters/runqc_nanopore.json b/modules/runFaQCs/test_files/parameters/runqc_nanopore.json deleted file mode 100644 index 8e03cae..0000000 --- a/modules/runFaQCs/test_files/parameters/runqc_nanopore.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "outDir": "test_out", - "unpairFile": ["../test_data/Ecoli_10x.1.fastq"], - "ontFlag": true, - "porechop": true, - "minLength": -1 - } diff --git a/runReadsToContig/Dockerfile b/modules/runReadsToContig/Dockerfile similarity index 89% rename from runReadsToContig/Dockerfile rename to modules/runReadsToContig/Dockerfile index 8ffea85..bd023d4 100644 --- a/runReadsToContig/Dockerfile +++ b/modules/runReadsToContig/Dockerfile @@ -1,5 +1,5 @@ # syntax=docker/dockerfile:1 -FROM continuumio/miniconda3:23.5.2-0 AS build +FROM continuumio/miniconda3:main AS build ENV container=docker @@ -14,6 +14,7 @@ RUN conda init bash \ RUN conda install -n readsToContig -c conda-forge r-base RUN conda install -n readsToContig -c conda-forge python=3.11 +RUN conda install -n readsToContig -c bioconda minimap2=2.24 RUN conda install -n readsToContig -c bioconda perl-json RUN conda install -n readsToContig -c bioconda samclip=0.4.0 RUN conda install -n readsToContig -c bioconda bwa @@ -39,6 +40,7 @@ FROM debian:buster AS runtime #RUN apk add --no-cache bash COPY --from=build /venv /venv +RUN apt-get update && apt-get install procps -y && apt-get clean ENV PERL5LIB=/venv/lib/perl5/core_perl ENV PATH=/venv/bin:$PATH diff --git a/runReadsToContig/bin/ContigCoverageFold_plots_from_samPileup.pl b/modules/runReadsToContig/bin/ContigCoverageFold_plots_from_samPileup.pl similarity index 100% rename from runReadsToContig/bin/ContigCoverageFold_plots_from_samPileup.pl rename to modules/runReadsToContig/bin/ContigCoverageFold_plots_from_samPileup.pl diff --git a/runReadsToContig/bin/bam_to_fastq.pl b/modules/runReadsToContig/bin/bam_to_fastq.pl similarity index 100% rename from runReadsToContig/bin/bam_to_fastq.pl rename to modules/runReadsToContig/bin/bam_to_fastq.pl diff --git a/runReadsToContig/bin/contig_stats.pl b/modules/runReadsToContig/bin/contig_stats.pl similarity index 100% rename from runReadsToContig/bin/contig_stats.pl rename to modules/runReadsToContig/bin/contig_stats.pl diff --git a/runReadsToContig/bin/fastq_utility.pm b/modules/runReadsToContig/bin/fastq_utility.pm similarity index 100% rename from runReadsToContig/bin/fastq_utility.pm rename to modules/runReadsToContig/bin/fastq_utility.pm diff --git a/runReadsToContig/bin/runReadsToContig.pl b/modules/runReadsToContig/bin/runReadsToContig.pl similarity index 100% rename from runReadsToContig/bin/runReadsToContig.pl rename to modules/runReadsToContig/bin/runReadsToContig.pl diff --git a/runReadsToContig/bin/tab2Json_for_dataTable.pl b/modules/runReadsToContig/bin/tab2Json_for_dataTable.pl similarity index 100% rename from runReadsToContig/bin/tab2Json_for_dataTable.pl rename to modules/runReadsToContig/bin/tab2Json_for_dataTable.pl diff --git a/runReadsToContig/runReadsToContig.nf b/modules/runReadsToContig/runReadsToContig.nf similarity index 57% rename from runReadsToContig/runReadsToContig.nf rename to modules/runReadsToContig/runReadsToContig.nf index 9bb5504..f353764 100644 --- a/runReadsToContig/runReadsToContig.nf +++ b/modules/runReadsToContig/runReadsToContig.nf @@ -1,12 +1,13 @@ #!/usr/bin/env nextflow -process r2c { - debug true +process validationAlignment { + label 'r2c' publishDir( - path: "$params.outDir/AssemblyBasedAnalysis/readsMappingToContig", + path: "${settings["outDir"]}/AssemblyBasedAnalysis/readsMappingToContig", mode: 'copy' ) input: + val settings path paired path unpaired path contigs @@ -20,27 +21,27 @@ process r2c { path "mapping.log", emit: logFile script: - def outPrefix = params.prefix!=null ? "$params.prefix" : "readsToContigs" + def outPrefix = "readsToContigs" def paired = paired.name != "NO_FILE" ? "-p \'${paired[0]} ${paired[1]}\' " : "" def unpaired = unpaired.name != "NO_FILE2" ? "-u $unpaired " : "" - def cutoff = params.assembledContigs ? "-c 0 " : "-c 0.1 " - def cpu = params.cpus != null ? "-cpu $params.cpus " : "" - def max_clip = params.r2g_max_clip != null ? "-max_clip $params.r2g_max_clip " : "" + def cutoff = settings["useAssembledContigs"] ? "-c 0 " : "-c 0.1 " + def cpu = settings["cpus"] != null ? "-cpu ${settings["cpus"]} " : "" + def max_clip = settings["r2g_max_clip"] != null ? "-max_clip ${settings["r2g_max_clip"]} " : "" - def ont_flag = (params.fastq_source != null && params.fastq_source.equalsIgnoreCase("nanopore")) ? "-x ont2d " : "" - def pb_flag = (params.fastq_source != null && params.fastq_source.equalsIgnoreCase("pacbio")) ? "-x pacbio " : "" + def ont_flag = (settings["fastq_source"] != null && settings["fastq_source"].equalsIgnoreCase("nanopore")) ? "-x ont2d " : "" + def pb_flag = (settings["fastq_source"] != null && settings["fastq_source"].equalsIgnoreCase("pacbio")) ? "-x pacbio " : "" def aligner_options = "" - if(params.r2c_aligner =~ "bowtie") { - def bowtie_options = params.r2c_aligner_options.replaceAll("-p\\s*\\d+","") + if(settings["r2c_aligner"] =~ "bowtie") { + def bowtie_options = settings["r2c_aligner_options"].replaceAll("-p\\s*\\d+","") if(!(bowtie_options =~ /-k/)) { bowtie_options += " -k 10 " } aligner_options = "-aligner bowtie -bowtie_options \'$bowtie_options\'" } - else if(params.r2c_aligner =~ "bwa") { - def bwa_options = params.r2c_aligner_options.replaceAll("-t\\s*\\d+","") + else if(settings["r2c_aligner"] =~ "bwa") { + def bwa_options = settings["r2c_aligner_options"].replaceAll("-t\\s*\\d+","") if (ont_flag != "") { unpaired = unpaired.replaceAll("-u ","-long ") bwa_options += ont_flag @@ -51,8 +52,8 @@ process r2c { } aligner_options = "-aligner bwa -bwa_options \'$bwa_options\'" } - else if (params.r2c_aligner =~ "minimap") { - def minimap_options = params.r2c_aligner_options.replaceAll("-t\\s*\\d+","") + else if (settings["r2c_aligner"] =~ "minimap") { + def minimap_options = settings["r2c_aligner_options"].replaceAll("-t\\s*\\d+","") if(ont_flag != "" || pb_flag != "") { unpaired = unpaired.replaceAll("-u ","-long ") } @@ -80,18 +81,20 @@ process r2c { } -process r2c_jsonTable { +process makeCoverageTable { + label 'r2c' publishDir( - path: "$params.outDir/AssemblyBasedAnalysis/readsMappingToContig", + path: "${settings["outDir"]}/AssemblyBasedAnalysis/readsMappingToContig", mode: 'copy', pattern: "*_coverage.table.json" ) publishDir( - path: "$params.outDir/AssemblyBasedAnalysis", + path: "${settings["outDir"]}/AssemblyBasedAnalysis", mode: 'copy', pattern: "*stats.{pdf,txt}" ) input: + val settings path cov_table path contigFile @@ -101,24 +104,25 @@ process r2c_jsonTable { path "*_coverage.table.json" script: - def rowLimit = params.rowLimit != null ? "$params.rowLimit" : "3000" - def outPrefix = params.prefix!=null ? "$params.prefix" : "readsToContigs" + def rowLimit = settings["rowLimit"] != null ? "${settings["rowLimit"]} " : "3000" """ tab2Json_for_dataTable.pl -project_dir . -mode contig -limit $rowLimit \ - ${outPrefix}_coverage.table > ${outPrefix}_coverage.table.json + readsToContigs_coverage.table > readsToContigs_coverage.table.json contig_stats.pl -p $contigFile > contigs_stats.txt """ } process extractUnmapped { + label 'r2c' publishDir( - path:"$params.outDir/AssemblyBasedAnalysis/readsMappingToContig/", + path:"${settings["outDir"]}/AssemblyBasedAnalysis/readsMappingToContig/", mode: 'copy', overwrite: true ) input: + val settings path bamFile path logFile @@ -135,18 +139,22 @@ process extractUnmapped { } -workflow { +workflow READSTOCONTIGS { + take: + settings + paired + unpaired + contigs + + main: "mkdir nf_assets".execute().text "touch nf_assets/NO_FILE".execute().text "touch nf_assets/NO_FILE2".execute().text - paired_ch = channel.fromPath(params.pairFile, checkIfExists:true).collect() - unpaired_ch = channel.fromPath(params.unpairFile, checkIfExists:true) - contig_ch = channel.fromPath(params.contigFile, checkIfExists:true) - - r2c(paired_ch, unpaired_ch, contig_ch) - r2c_jsonTable(r2c.out.cov_table, r2c.out.contig_file) - if(params.extractUnmapped) { - extractUnmapped(r2c.out.sortedBam, r2c.out.logFile) + + validationAlignment(settings, paired, unpaired, contigs) + makeCoverageTable(settings, validationAlignment.out.cov_table, validationAlignment.out.contig_file) + if(settings["extractUnmapped"]) { + extractUnmapped(settings, validationAlignment.out.sortedBam, validationAlignment.out.logFile) } } diff --git a/runReadsToContig/test_files/parameters/r2c_forContigTax.json b/modules/runReadsToContig/test_files/parameters/r2c_forContigTax.json similarity index 100% rename from runReadsToContig/test_files/parameters/r2c_forContigTax.json rename to modules/runReadsToContig/test_files/parameters/r2c_forContigTax.json diff --git a/runReadsToContig/test_files/parameters/r2c_simple.json b/modules/runReadsToContig/test_files/parameters/r2c_simple.json similarity index 100% rename from runReadsToContig/test_files/parameters/r2c_simple.json rename to modules/runReadsToContig/test_files/parameters/r2c_simple.json diff --git a/nextflow.config b/nextflow.config index bec3704..d7e944c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -7,12 +7,14 @@ params { faqcs = false hostRemoval = false annotation = false + runAssembly = false } //module parameters -- passed directly into subworkflows according to best practices //these are the default values shared { inputFastq = null + inputContigs = "${projectDir}/nf_assets/NO_FILE3" pairedFile = false outDir = "EDGE_output" projName = "Project" @@ -56,12 +58,56 @@ params { keggView = false } + assembly { + assembler = "IDBA_UD" + annotation = false + contigSizeForAnnotation = 700 + minContigSize = 200 + memLimit = null + idba{ + maxK = null + minK = 31 + step = 20 + } + spades { + pacbio = "${projectDir}/nf_assets/NO_FILE3" + nanopore = "${projectDir}/nf_assets/NO_FILE4" + algorithm = null + } + megahit { + preset = null + } + unicycler { + longreads = "${projectDir}/nf_assets/NO_FILE3" + minLongReads = 2000 + bridgingMode = "normal" + } + lrasm { + minLength = 400 + preset = null + algorithm = null + ec = null + numConsensus = null + } + + } + + r2c { + useAssembledContigs = false + r2c_aligner = "bwa" + r2c_aligner_options = "" + r2g_max_clip = null + extractUnmapped = false + rowLimit = null + } + } //container settings apptainer { enabled = true + pullTimeout = "1 hour" runOptions = "--compat" } @@ -80,6 +126,12 @@ process { withLabel: 'hostRemoval' { container= 'apwat/host_removal:1.7.4' } + withLabel: 'assembly' { + container = 'apwat/run_assembly:1.5' + } + withLabel: 'r2c' { + container = 'apwat/run_r2c:1.6' + } } //submission rate limit: needed for sra2fastq to operate correctly diff --git a/nf_assets/NO_FILE4 b/nf_assets/NO_FILE4 new file mode 100644 index 0000000..e69de29 diff --git a/runAssembly/bin/getAvgLen.pl b/runAssembly/bin/getAvgLen.pl deleted file mode 100755 index 1333951..0000000 --- a/runAssembly/bin/getAvgLen.pl +++ /dev/null @@ -1,198 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; -use File::Basename; -use Getopt::Long; - -my $outputDir=''; - -my @unpairedList= (); -my @pairedList= (); - -GetOptions( - 'u:s{1,}' => \@unpairedList, - 'p:s{1,}' => \@pairedList, - 'd=s' => \$outputDir, -); - - -my $all_R1_fastq = "$outputDir/all.1.fastq"; -my $all_R2_fastq = "$outputDir/all.2.fastq"; -my $all_SE_fastq = "$outputDir/all.se.fastq"; -my $count_file_list= "$outputDir/fastqCount.txt"; -my $avg_read_len= 0 ; -my $PE_count = 0 ; -my $PE_total_len = 0; -my $SE_count= 0 ; -my $SE_total_len = 0 ; - - - -open (my $fh, ">$count_file_list") or die "Cannot write $count_file_list\n"; -while(my ($R1,$R2) = splice (@pairedList,0,2)) -{ - ($PE_count, $PE_total_len) = &countPE_exe($R1,$R2,$all_R1_fastq,$all_R2_fastq,$all_SE_fastq,$fh); -} -foreach my $file (@unpairedList) -{ - ($SE_count,$SE_total_len)=&countFastq_exe($file,$all_SE_fastq); - printf $fh ("%s\t%d\t%d\t%.2f\n",basename($file),$SE_count,$SE_total_len,$SE_total_len/$SE_count); - printf ("%s\t%d\t%d\t%.2f\n",basename($file),$SE_count,$SE_total_len,$SE_total_len/$SE_count); -} -close $fh; -$avg_read_len = ($PE_count + $SE_count) > 0 ? ($PE_total_len + $SE_total_len) / ($PE_count + $SE_count) : 0 ; - -sub countPE_exe{ - my $r1=shift; - my $r2=shift; - my $out_r1=shift; - my $out_r2=shift; - my $out_se=shift; - my $count_fh=shift; - my %seq_hash; - my $pair_char; - my $unpaired_count=0; - my $read1_count=0; - my $read2_count=0; - my $se2_count=0; - my $se1_count=0; - my $paired_count=0; - my $read1_total_len=0; - my $read2_total_len=0; - my $existed_id1=0; - my $existed_id2=0; - my ($fh1,$pid) = open_file($r1); - open (my $ofh1, ">>$out_r1") or die "Cannot write $out_r1\n"; - open (my $ofh2, ">>$out_r2") or die "Cannot write $out_r2\n"; - open (my $ofhse, ">>$out_se") or die "Cannot write $out_se\n"; - while(<$fh1>){ - chomp; - next unless $_ =~ /\S/; - next if ($_ =~ /length=0/); - my $id_line=$_; - my ($id) = $id_line =~ /^\@(\S+).?\/?1?\s*/; - my $seq = <$fh1>; - chomp $seq; - if ($seq_hash{$id}){ - $existed_id1++; - } - my $len = length $seq; - $read1_total_len += $len; - my $qual_id = <$fh1>; - my $qual = <$fh1>; - $seq = $seq."\n".$qual_id.$qual; - $seq_hash{$id}++; - $read1_count++; - } - close $fh1; - my %seq_hash2; - my ($fh2,$pid2) = open_file($r2); - while(<$fh2>){ - chomp; - next unless $_ =~ /\S/; - next if ($_ =~ /length=0/); - my $id_line=$_; - my ($id2) = $id_line =~ /^\@(\S+)\.?\/?2?\s*/; - $read2_count++; - my $seq2 = <$fh2>; - chomp $seq2; - if ($seq_hash2{$id2}){ - $existed_id2++; - } - my $len = length $seq2; - $read2_total_len += $len; - my $qual_id = <$fh2>; - my $qual = <$fh2>; - $seq2 = $seq2."\n".$qual_id.$qual; - $seq_hash2{$id2}++; - if ($seq_hash{$id2}){ - $seq_hash{$id2}++; - $paired_count++; - print $ofh2 $id_line,"\n",$seq2; - }else{ - print $ofhse $id_line,"\n",$seq2; - $se2_count++; - } - } - close $fh2; - ($fh1,$pid) = open_file($r1); - while(<$fh1>){ - chomp; - next unless $_ =~ /\S/; - next if ($_ =~ /length=0/); - my $id_line=$_; - my ($id) = $id_line =~ /^\@(\S+)\.?\/?1?\s*/; - my $seq = <$fh1>; - chomp $seq; - my $qual_id = <$fh1>; - my $qual = <$fh1>; - $seq = $seq."\n".$qual_id.$qual; - if ($seq_hash{$id} == 2){ - print $ofh1 $id_line,"\n",$seq; - } - if ($seq_hash{$id} == 1){ - print $ofhse $id_line,"\n",$seq; - $se1_count++; - } - } - close $fh1; - close $ofh1; - close $ofh2; - close $ofhse; - printf ("%s\t%d\t%d\t%.2f\n",basename($r1),$read1_count,$read1_total_len,$read1_total_len/$read1_count); - printf ("%s\t%d\t%d\t%.2f\n",basename($r2),$read2_count,$read2_total_len,$read2_total_len/$read2_count); - printf $count_fh ("%s\t%d\t%d\t%.2f\n",basename($r1),$read1_count,$read1_total_len,$read1_total_len/$read1_count); - printf $count_fh ("%s\t%d\t%d\t%.2f\n",basename($r2),$read2_count,$read2_total_len,$read2_total_len/$read2_count); - printf ("%d duplicate id from %s\n", $existed_id1, basename($r1)) if ($existed_id1 > 0); - printf ("%d duplicate id from %s\n", $existed_id2, basename($r2)) if ($existed_id2 > 0); - printf ("There are %d reads from %s don't have corresponding paired read.\n", $se1_count, basename($r1)) if ($se1_count >0); - printf ("There are %d reads from %s don't have corresponding paired read.\n", $se2_count, basename($r2)) if ($se2_count >0); - - unlink $out_se if (-z $out_se); - return ($read1_count + $read2_count, $read1_total_len + $read2_total_len); -} - -sub countFastq_exe -{ - my $file=shift; - my $output=shift; - my $seq_count=0; - my $total_length; - my ($fh,$pid)= open_file($file); - open (my $ofh, ">>$output") or die "Cannot write $output\n"; - while (<$fh>) - { - next unless $_ =~ /\S/; - next if ($_ =~ /length=0/); - my $id=$_; - $id = '@'."seq_$seq_count\n" if ($id =~ /No name/); - my $seq=<$fh>; - chomp $seq; - my $q_id=<$fh>; - my $q_seq=<$fh>; - my $len = length $seq; - $seq_count++; - $total_length +=$len; - print $ofh "$id$seq\n$q_id$q_seq"; - } - close $fh; - return ($seq_count,$total_length); -} - -sub touchFile{ - my $file=shift; - open (my $fh,">",$file) or die "$!"; - close $fh; -} - -sub open_file -{ - my ($file) = @_; - print "$file\n"; - my $fh; - my $pid; - if ( $file=~/\.gz\.?\d?$/i ) { $pid=open($fh, "gunzip -c $file |") or die ("gunzip -c $file: $!"); } - else { $pid=open($fh,'<',$file) or die("$file: $!"); } - return ($fh,$pid); -} \ No newline at end of file diff --git a/runAssembly/nextflow.config b/runAssembly/nextflow.config deleted file mode 100644 index 04c17d9..0000000 --- a/runAssembly/nextflow.config +++ /dev/null @@ -1,48 +0,0 @@ -process.container = 'apwat/run_assembly:1.4.5' -singularity { - enabled = true - runOptions = "--compat" -} -params { - assembler = "IDBA_UD" - outDir = '.' - threads = 8 - projName = "project" - annotation = false - contigSizeForAnnotation = 700 - pairedFiles = "nf_assets/NO_FILE" - unpairedFile = "nf_assets/NO_FILE2" - minContigSize = 200 - memLimit = null - idba{ - maxK = null - minK = 31 - step = 20 - } - spades { - pacbio = "nf_assets/NO_FILE3" - nanopore = "nf_assets/NO_FILE4" - algorithm = null - } - megahit { - preset = null - } - unicycler { - longreads = "nf_assets/NO_FILE3" - minLongReads = 2000 - bridgingMode = "normal" - } - lrasm { - minLength = 400 - preset = null - algorithm = null - ec = null - numConsensus = null - } -} -workflow.onComplete = { - "rm -rf nf_assets".execute().text -} -workflow.onError = { - "rm -rf nf_assets".execute().text -} \ No newline at end of file diff --git a/runReadsToContig/nextflow.config b/runReadsToContig/nextflow.config deleted file mode 100644 index b4a673d..0000000 --- a/runReadsToContig/nextflow.config +++ /dev/null @@ -1,23 +0,0 @@ -process.container = 'apwat/run_r2c:1.3' -singularity.enabled=true - -params { - pairFile = "nf_assets/NO_FILE" - unpairFile = "nf_assets/NO_FILE2" - contigFile = null - assembledContigs = null - r2c_aligner = "bwa" - r2c_aligner_options = "" - r2g_max_clip = null - fastq_source = null - prefix = null - cpus = 8 - outDir = "." - extractUnmapped = false - rowLimit = null -} - -workflow.onComplete = { - "rm -rf nf_assets".execute().text -} - diff --git a/test_data/Homo_sapiens.GRCh38.dna.chromosome.1.fa.gz b/test_data/Homo_sapiens.GRCh38.dna.chromosome.1.fa.gz new file mode 100644 index 0000000..fcdf047 Binary files /dev/null and b/test_data/Homo_sapiens.GRCh38.dna.chromosome.1.fa.gz differ diff --git a/test_data/Homo_sapiens.GRCh38.dna.chromosome.2.fa.gz b/test_data/Homo_sapiens.GRCh38.dna.chromosome.2.fa.gz new file mode 100644 index 0000000..4e85f76 Binary files /dev/null and b/test_data/Homo_sapiens.GRCh38.dna.chromosome.2.fa.gz differ diff --git a/test_data/reads.fastq.gz b/test_data/reads.fastq.gz new file mode 100644 index 0000000..0b15405 Binary files /dev/null and b/test_data/reads.fastq.gz differ diff --git a/tests/modules/hostRemoval/hostRemoval.nf.test b/tests/modules/hostRemoval/hostRemoval.nf.test new file mode 100644 index 0000000..752b77f --- /dev/null +++ b/tests/modules/hostRemoval/hostRemoval.nf.test @@ -0,0 +1,141 @@ +nextflow_workflow { + + name "Test Workflow HOSTREMOVAL" + script "modules/hostRemoval/hostRemoval.nf" + workflow "HOSTREMOVAL" + + test("PE Reads, Single Host") { + + when { + params { + shared { + outDir = "./testing_output" + projName = "testHR" + } + hostRemoval { + host = ["${projectDir}/test_data/Homo_sapiens.GRCh38.dna.chromosome.1.fa.gz"] + cpus = 4 + } + + } + workflow { + """ + input[0] = params.hostRemoval.plus(params.shared) + input[1] = [file("${projectDir}/test_data/Ecoli_10x.1.fastq"),file("${projectDir}/test_data/Ecoli_10x.2.fastq")] + input[2] = file("${projectDir}/nf_assets/NO_FILE2") + """ + } + } + + then { + + assertAll( + {assert workflow.success}, + {assert file(workflow.out.paired[0].find { file(it).name == "hostclean.1.fastq" }).exists()}, + {assert file(workflow.out.paired[0].find { file(it).name == "hostclean.2.fastq" }).exists()}, + {assert file(workflow.out.unpaired.find { file(it).name == "hostclean.unpaired.fastq" }).exists()} + ) + } + + } + + test("SE Reads, Single Host") { + + when { + params { + shared { + outDir = "./testing_output" + projName = "testHR" + } + hostRemoval { + host = "${projectDir}/test_data/Homo_sapiens.GRCh38.dna.chromosome.1.fa.gz" + cpus = 4 + } + + } + workflow { + """ + input[0] = params.hostRemoval.plus(params.shared) + input[1] = [file("${projectDir}/nf_assets/NO_FILE")] + input[2] = file("${projectDir}/test_data/Ecoli_10x.1.fastq") + """ + } + } + + then { + assertAll( + {assert workflow.success}, + {assert file(workflow.out.unpaired.find { file(it).name == "hostclean.unpaired.fastq" }).exists()}, + ) + } + + } + + test("SE Reads, Multi Host") { + + when { + params { + shared { + outDir = "./testing_output" + projName = "testHR" + } + hostRemoval { + host = ["${projectDir}/test_data/Homo_sapiens.GRCh38.dna.chromosome.1.fa.gz", "${projectDir}/test_data/Homo_sapiens.GRCh38.dna.chromosome.2.fa.gz"] + cpus = 4 + } + } + workflow { + """ + input[0] = params.hostRemoval.plus(params.shared) + input[1] = [file("${projectDir}/nf_assets/NO_FILE")] + input[2] = file("${projectDir}/test_data/Ecoli_10x.1.fastq") + """ + } + } + + then { + assertAll( + {assert workflow.success}, + {assert file(workflow.out.unpaired.find { file(it).name == "hostclean.unpaired.fastq" }).exists()}, + ) + + } + + } + + test("PE Reads, Multi Host") { + + when { + params { + shared { + outDir = "./testing_output" + projName = "testHR" + } + hostRemoval { + host = ["${projectDir}/test_data/Homo_sapiens.GRCh38.dna.chromosome.1.fa.gz","${projectDir}/test_data/Homo_sapiens.GRCh38.dna.chromosome.2.fa.gz"] + cpus = 4 + } + } + workflow { + """ + input[0] = params.hostRemoval.plus(params.shared) + input[1] = [file("${projectDir}/test_data/Ecoli_10x.1.fastq"),file("${projectDir}/test_data/Ecoli_10x.2.fastq")] + input[2] = file("${projectDir}/nf_assets/NO_FILE2") + """ + } + } + + then { + assertAll( + {assert workflow.success}, + {assert file(workflow.out.paired[0].find { file(it).name == "hostclean.1.fastq" }).exists()}, + {assert file(workflow.out.paired[0].find { file(it).name == "hostclean.2.fastq" }).exists()}, + {assert file(workflow.out.unpaired.find { file(it).name == "hostclean.unpaired.fastq" }).exists()}, + ) + + } + + } + + +} diff --git a/tests/modules/hostRemoval/hostRemoval.nf.test.snap b/tests/modules/hostRemoval/hostRemoval.nf.test.snap new file mode 100644 index 0000000..8cda4bd --- /dev/null +++ b/tests/modules/hostRemoval/hostRemoval.nf.test.snap @@ -0,0 +1,106 @@ +{ + "SE Reads, Single Host": { + "content": [ + { + "0": [ + + ], + "1": [ + "hostclean.unpaired.fastq:md5,7b1d185d8e9fef1fc68efba3257ca3fb" + ], + "paired": [ + + ], + "unpaired": [ + "hostclean.unpaired.fastq:md5,7b1d185d8e9fef1fc68efba3257ca3fb" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T18:45:45.584906" + }, + "PE Reads, Multi Host": { + "content": [ + { + "0": [ + [ + "hostclean.1.fastq:md5,e2f29a1147a9f450557cf88ad72400cf", + "hostclean.2.fastq:md5,70927fcae9ee7bdbd98fce66b5d353f3" + ] + ], + "1": [ + "hostclean.unpaired.fastq:md5,a1ace46fc8b5a225afcab35e757e0542" + ], + "paired": [ + [ + "hostclean.1.fastq:md5,e2f29a1147a9f450557cf88ad72400cf", + "hostclean.2.fastq:md5,70927fcae9ee7bdbd98fce66b5d353f3" + ] + ], + "unpaired": [ + "hostclean.unpaired.fastq:md5,a1ace46fc8b5a225afcab35e757e0542" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T18:35:06.56628" + }, + "PE Reads, Single Host": { + "content": [ + { + "0": [ + [ + "hostclean.1.fastq:md5,de55918c54218d510baf01946e97e28b", + "hostclean.2.fastq:md5,6395b18db7f09a6374495d577533dfc0" + ] + ], + "1": [ + "hostclean.unpaired.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "paired": [ + [ + "hostclean.1.fastq:md5,de55918c54218d510baf01946e97e28b", + "hostclean.2.fastq:md5,6395b18db7f09a6374495d577533dfc0" + ] + ], + "unpaired": [ + "hostclean.unpaired.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T18:41:15.089008" + }, + "SE Reads, Multi Host": { + "content": [ + { + "0": [ + + ], + "1": [ + "hostclean.unpaired.fastq:md5,8fc35373e3bfb3951e9d7632f3362861" + ], + "paired": [ + + ], + "unpaired": [ + "hostclean.unpaired.fastq:md5,8fc35373e3bfb3951e9d7632f3362861" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-20T18:27:35.91726" + } +} \ No newline at end of file diff --git a/tests/modules/runAssembly/runAssembly.nf.test b/tests/modules/runAssembly/runAssembly.nf.test new file mode 100644 index 0000000..e33679a --- /dev/null +++ b/tests/modules/runAssembly/runAssembly.nf.test @@ -0,0 +1,173 @@ +nextflow_workflow { + + name "Test Workflow ASSEMBLY" + script "modules/runAssembly/runAssembly.nf" + workflow "ASSEMBLY" + + test("IDBA") { + + when { + params { + shared { + outDir = "./testing_output" + projName = "testAssembly" + } + assembly{ + assembler = "IDBA_UD" + } + } + workflow { + """ + input[0] = params.assembly.plus(params.shared) + input[1] = [file("${projectDir}/test_data/Ecoli_10x.1.fastq"),file("${projectDir}/test_data/Ecoli_10x.2.fastq")] + input[2] = file("${projectDir}/nf_assets/NO_FILE2") + input[3] = channel.of("151") + """ + } + } + + then { + assertAll( + {assert workflow.success}, + {assert file(workflow.out.outContigs.find { file(it).name == "testAssembly_contigs.fa" }).exists()}, + {assert file(workflow.out.annotationContigs.find { file(it).name == "testAssembly_contigs_700up.fa" }).exists()} + ) + } + + } + + test("SPAdes") { + + when { + params { + shared { + outDir = "./testing_output" + projName = "testAssembly" + } + assembly{ + assembler = "spades" + } + } + workflow { + """ + input[0] = params.assembly.plus(params.shared) + input[1] = [file("${projectDir}/test_data/Ecoli_10x.1.fastq"),file("${projectDir}/test_data/Ecoli_10x.2.fastq")] + input[2] = file("${projectDir}/nf_assets/NO_FILE2") + input[3] = channel.of("151") + """ + } + } + + then { + assertAll( + {assert workflow.success}, + {assert file(workflow.out.outContigs.find { file(it).name == "testAssembly_contigs.fa" }).exists()}, + {assert file(workflow.out.annotationContigs.find { file(it).name == "testAssembly_contigs_700up.fa" }).exists()} + ) + } + + } + + + test("LRASM") { + + when { + params { + shared { + outDir = "./testing_output" + projName = "testAssembly" + } + assembly{ + assembler = "lrasm" + lrasm { + ec = true + } + } + } + workflow { + """ + input[0] = params.assembly.plus(params.shared) + input[1] = [file("${projectDir}/nf_assets/NO_FILE")] + input[2] = file("${projectDir}/test_data/reads.fastq.gz") + input[3] = channel.of("151") + """ + } + } + + then { + assertAll( + {assert workflow.success}, + {assert file(workflow.out.outContigs.find { file(it).name == "testAssembly_contigs.fa" }).exists()}, + {assert file(workflow.out.annotationContigs.find { file(it).name == "testAssembly_contigs_700up.fa" }).exists()} + ) + } + + } + + + test("Unicycler") { + + when { + params { + shared { + outDir = "./testing_output" + projName = "testAssembly" + } + assembly{ + assembler = "Unicycler" + } + } + workflow { + """ + input[0] = params.assembly.plus(params.shared) + input[1] = [file("${projectDir}/test_data/Ecoli_10x.1.fastq"),file("${projectDir}/test_data/Ecoli_10x.2.fastq")] + input[2] = file("${projectDir}/nf_assets/NO_FILE2") + input[3] = channel.of("151") + """ + } + } + + then { + assertAll( + {assert workflow.success}, + {assert file(workflow.out.outContigs.find { file(it).name == "testAssembly_contigs.fa" }).exists()}, + {assert file(workflow.out.annotationContigs.find { file(it).name == "testAssembly_contigs_700up.fa" }).exists()} + ) + } + + } + + + test("MEGAHIT") { + + when { + params { + shared { + outDir = "./testing_output" + projName = "testAssembly" + } + assembly{ + assembler = "megahit" + } + } + workflow { + """ + input[0] = params.assembly.plus(params.shared) + input[1] = [file("${projectDir}/test_data/Ecoli_10x.1.fastq"),file("${projectDir}/test_data/Ecoli_10x.2.fastq")] + input[2] = file("${projectDir}/nf_assets/NO_FILE2") + input[3] = channel.of("151") + """ + } + } + + then { + assertAll( + {assert workflow.success}, + {assert file(workflow.out.outContigs.find { file(it).name == "testAssembly_contigs.fa" }).exists()}, + {assert file(workflow.out.annotationContigs.find { file(it).name == "testAssembly_contigs_700up.fa" }).exists()} + ) + } + + } + +} diff --git a/tests/modules/runAssembly/runAssembly.nf.test.snap b/tests/modules/runAssembly/runAssembly.nf.test.snap new file mode 100644 index 0000000..3b96a76 --- /dev/null +++ b/tests/modules/runAssembly/runAssembly.nf.test.snap @@ -0,0 +1,48 @@ +{ + "SPAdes": { + "content": [ + { + "0": [ + "testAssembly_contigs.fa:md5,b205c190c365337380dee8a3bcc535e4" + ], + "1": [ + "testAssembly_contigs_700up.fa:md5,149b7cd8d5110b39c33c284429c5f086" + ], + "annotationContigs": [ + "testAssembly_contigs_700up.fa:md5,149b7cd8d5110b39c33c284429c5f086" + ], + "outContigs": [ + "testAssembly_contigs.fa:md5,b205c190c365337380dee8a3bcc535e4" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T13:45:36.26872" + }, + "IDBA": { + "content": [ + { + "0": [ + "testAssembly_contigs.fa:md5,03db7c3078458dd0ca091fa0662b140e" + ], + "1": [ + "testAssembly_contigs_700up.fa:md5,aa162c4f9a3684c7fcfd55dd044de22c" + ], + "annotationContigs": [ + "testAssembly_contigs_700up.fa:md5,aa162c4f9a3684c7fcfd55dd044de22c" + ], + "outContigs": [ + "testAssembly_contigs.fa:md5,03db7c3078458dd0ca091fa0662b140e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.1" + }, + "timestamp": "2024-11-25T13:33:59.338422" + } +} \ No newline at end of file diff --git a/tests/modules/runReadsToContig/runReadsToContig.nf.test b/tests/modules/runReadsToContig/runReadsToContig.nf.test new file mode 100644 index 0000000..3933515 --- /dev/null +++ b/tests/modules/runReadsToContig/runReadsToContig.nf.test @@ -0,0 +1,101 @@ +nextflow_workflow { + + name "Test Workflow READSTOCONTIGS" + script "modules/runReadsToContig/runReadsToContig.nf" + workflow "READSTOCONTIGS" + + test("BWA mem") { + + when { + params { + shared { + outDir = "./testing_output" + projName = "testR2C" + } + r2c { + r2c_aligner = "bwa" + extractUnmapped = true + } + } + workflow { + """ + input[0] = params.r2c.plus(params.shared) + input[1] = [file("${projectDir}/test_data/Ecoli_10x.1.fastq"),file("${projectDir}/test_data/Ecoli_10x.2.fastq")] + input[2] = file("${projectDir}/nf_assets/NO_FILE2") + input[3] = file("${projectDir}/test_data/contigs.fa") + """ + } + } + + then { + assertAll( + {assert workflow.success} + ) + } + + } + + test("Bowtie2") { + + when { + params { + shared { + outDir = "./testing_output" + projName = "testR2C" + } + r2c { + r2c_aligner = "bowtie2" + extractUnmapped = true + } + } + workflow { + """ + input[0] = params.r2c.plus(params.shared) + input[1] = [file("${projectDir}/test_data/Ecoli_10x.1.fastq"),file("${projectDir}/test_data/Ecoli_10x.2.fastq")] + input[2] = file("${projectDir}/nf_assets/NO_FILE2") + input[3] = file("${projectDir}/test_data/contigs.fa") + """ + } + } + + then { + assertAll( + {assert workflow.success} + ) + } + + } + + test("minimap2") { + + when { + params { + shared { + outDir = "./testing_output" + projName = "testR2C" + } + r2c { + r2c_aligner = "minimap2" + extractUnmapped = true + } + } + workflow { + """ + input[0] = params.r2c.plus(params.shared) + input[1] = [file("${projectDir}/test_data/Ecoli_10x.1.fastq"),file("${projectDir}/test_data/Ecoli_10x.2.fastq")] + input[2] = file("${projectDir}/nf_assets/NO_FILE2") + input[3] = file("${projectDir}/test_data/contigs.fa") + """ + } + } + + then { + assertAll( + {assert workflow.success} + ) + } + + } + + +} diff --git a/tests/modules/runReadsToContig/runReadsToContig.nf.test.snap b/tests/modules/runReadsToContig/runReadsToContig.nf.test.snap new file mode 100644 index 0000000..cc4cc37 --- /dev/null +++ b/tests/modules/runReadsToContig/runReadsToContig.nf.test.snap @@ -0,0 +1,14 @@ +{ + "BWA mem": { + "content": [ + "Final_contigs.fasta:md5,cfb6bc7595556231e115e5c9ad57f20d", + "readsToContigs_coverage.table.json:md5,ea221b314df455a84177b669edb3e5b7", + "readsToContigs.alnstats.txt:md5,259f501e745b79cb92b69e335d01cac7" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-17T13:52:25.552513444" + } +} \ No newline at end of file