From 0fcb34a6b227ff072d42d5992962d240acbcd025 Mon Sep 17 00:00:00 2001 From: Andre Watson Date: Mon, 9 Dec 2024 16:28:45 -0700 Subject: [PATCH] reads to contig restructured as subworkflow. adding nf-test CI --- .github/workflows/ci-tests.yml | 30 ++++++++ main.nf | 13 +++- .../runReadsToContig}/Dockerfile | 0 ...ContigCoverageFold_plots_from_samPileup.pl | 0 .../runReadsToContig}/bin/bam_to_fastq.pl | 0 .../runReadsToContig}/bin/contig_stats.pl | 0 .../runReadsToContig}/bin/fastq_utility.pm | 0 .../runReadsToContig}/bin/runReadsToContig.pl | 0 .../bin/tab2Json_for_dataTable.pl | 0 .../runReadsToContig}/runReadsToContig.nf | 70 +++++++++++-------- .../parameters/r2c_forContigTax.json | 0 .../test_files/parameters/r2c_simple.json | 0 nextflow.config | 19 ++++- runReadsToContig/nextflow.config | 23 ------ 14 files changed, 97 insertions(+), 58 deletions(-) create mode 100644 .github/workflows/ci-tests.yml rename {runReadsToContig => modules/runReadsToContig}/Dockerfile (100%) rename {runReadsToContig => modules/runReadsToContig}/bin/ContigCoverageFold_plots_from_samPileup.pl (100%) rename {runReadsToContig => modules/runReadsToContig}/bin/bam_to_fastq.pl (100%) rename {runReadsToContig => modules/runReadsToContig}/bin/contig_stats.pl (100%) rename {runReadsToContig => modules/runReadsToContig}/bin/fastq_utility.pm (100%) rename {runReadsToContig => modules/runReadsToContig}/bin/runReadsToContig.pl (100%) rename {runReadsToContig => modules/runReadsToContig}/bin/tab2Json_for_dataTable.pl (100%) rename {runReadsToContig => modules/runReadsToContig}/runReadsToContig.nf (57%) rename {runReadsToContig => modules/runReadsToContig}/test_files/parameters/r2c_forContigTax.json (100%) rename {runReadsToContig => modules/runReadsToContig}/test_files/parameters/r2c_simple.json (100%) delete mode 100644 runReadsToContig/nextflow.config diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml new file mode 100644 index 0000000..539c6d9 --- /dev/null +++ b/.github/workflows/ci-tests.yml @@ -0,0 +1,30 @@ +name: CI Tests + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up JDK 11 + uses: actions/setup-java@v2 + with: + java-version: '11' + distribution: 'adopt' + + - name: Setup Nextflow latest-edge + uses: nf-core/setup-nextflow@v1 + with: + version: "latest-edge" + + - name: Install nf-test + run: | + wget -qO- https://get.nf-test.com | bash + sudo mv nf-test /usr/local/bin/ + + - name: Run Tests + run: nf-test test --ci \ No newline at end of file diff --git a/main.nf b/main.nf index 1c9458b..62de5d7 100644 --- a/main.nf +++ b/main.nf @@ -5,6 +5,7 @@ include {COUNTFASTQ} from './modules/countFastq/countFastq.nf' include {FAQCS} from './modules/runFaQCs/runFaQCs.nf' include {HOSTREMOVAL} from './modules/hostRemoval/hostRemoval.nf' include {ASSEMBLY} from './modules/runAssembly/runAssembly.nf' +include {READSTOCONTIGS} from './modules/runReadsToContig/runReadsToContig.nf' workflow { @@ -12,6 +13,10 @@ workflow { pairedFiles = channel.fromPath(params.pairedFiles, checkIfExists:true) unpairedFiles = channel.fromPath(params.unpairedFiles, checkIfExists:true) + contigs = channel.empty() + if(params.r2c.useAssembledContigs) { + contigs = channel.fromPath(params.inputContigs, checkIfExists:true) + } if(params.modules.sra2fastq) { SRA2FASTQ(params.sra2fastq.plus(params.shared)) @@ -38,8 +43,12 @@ workflow { unpaired = HOSTREMOVAL.out.unpaired.ifEmpty(params.unpairedFiles) } - if(params.modules.runAssembly) { + if(params.modules.runAssembly && !params.r2c.useAssembledContigs) { ASSEMBLY(params.assembly.plus(params.shared), paired, unpaired, avgLen) - } + contigs = ASSEMBLY.out.outContigs + } + + READSTOCONTIGS(params.r2c.plus(params.shared), paired, unpaired, contigs) + } \ No newline at end of file diff --git a/runReadsToContig/Dockerfile b/modules/runReadsToContig/Dockerfile similarity index 100% rename from runReadsToContig/Dockerfile rename to modules/runReadsToContig/Dockerfile diff --git a/runReadsToContig/bin/ContigCoverageFold_plots_from_samPileup.pl b/modules/runReadsToContig/bin/ContigCoverageFold_plots_from_samPileup.pl similarity index 100% rename from runReadsToContig/bin/ContigCoverageFold_plots_from_samPileup.pl rename to modules/runReadsToContig/bin/ContigCoverageFold_plots_from_samPileup.pl diff --git a/runReadsToContig/bin/bam_to_fastq.pl b/modules/runReadsToContig/bin/bam_to_fastq.pl similarity index 100% rename from runReadsToContig/bin/bam_to_fastq.pl rename to modules/runReadsToContig/bin/bam_to_fastq.pl diff --git a/runReadsToContig/bin/contig_stats.pl b/modules/runReadsToContig/bin/contig_stats.pl similarity index 100% rename from runReadsToContig/bin/contig_stats.pl rename to modules/runReadsToContig/bin/contig_stats.pl diff --git a/runReadsToContig/bin/fastq_utility.pm b/modules/runReadsToContig/bin/fastq_utility.pm similarity index 100% rename from runReadsToContig/bin/fastq_utility.pm rename to modules/runReadsToContig/bin/fastq_utility.pm diff --git a/runReadsToContig/bin/runReadsToContig.pl b/modules/runReadsToContig/bin/runReadsToContig.pl similarity index 100% rename from runReadsToContig/bin/runReadsToContig.pl rename to modules/runReadsToContig/bin/runReadsToContig.pl diff --git a/runReadsToContig/bin/tab2Json_for_dataTable.pl b/modules/runReadsToContig/bin/tab2Json_for_dataTable.pl similarity index 100% rename from runReadsToContig/bin/tab2Json_for_dataTable.pl rename to modules/runReadsToContig/bin/tab2Json_for_dataTable.pl diff --git a/runReadsToContig/runReadsToContig.nf b/modules/runReadsToContig/runReadsToContig.nf similarity index 57% rename from runReadsToContig/runReadsToContig.nf rename to modules/runReadsToContig/runReadsToContig.nf index 9bb5504..f353764 100644 --- a/runReadsToContig/runReadsToContig.nf +++ b/modules/runReadsToContig/runReadsToContig.nf @@ -1,12 +1,13 @@ #!/usr/bin/env nextflow -process r2c { - debug true +process validationAlignment { + label 'r2c' publishDir( - path: "$params.outDir/AssemblyBasedAnalysis/readsMappingToContig", + path: "${settings["outDir"]}/AssemblyBasedAnalysis/readsMappingToContig", mode: 'copy' ) input: + val settings path paired path unpaired path contigs @@ -20,27 +21,27 @@ process r2c { path "mapping.log", emit: logFile script: - def outPrefix = params.prefix!=null ? "$params.prefix" : "readsToContigs" + def outPrefix = "readsToContigs" def paired = paired.name != "NO_FILE" ? "-p \'${paired[0]} ${paired[1]}\' " : "" def unpaired = unpaired.name != "NO_FILE2" ? "-u $unpaired " : "" - def cutoff = params.assembledContigs ? "-c 0 " : "-c 0.1 " - def cpu = params.cpus != null ? "-cpu $params.cpus " : "" - def max_clip = params.r2g_max_clip != null ? "-max_clip $params.r2g_max_clip " : "" + def cutoff = settings["useAssembledContigs"] ? "-c 0 " : "-c 0.1 " + def cpu = settings["cpus"] != null ? "-cpu ${settings["cpus"]} " : "" + def max_clip = settings["r2g_max_clip"] != null ? "-max_clip ${settings["r2g_max_clip"]} " : "" - def ont_flag = (params.fastq_source != null && params.fastq_source.equalsIgnoreCase("nanopore")) ? "-x ont2d " : "" - def pb_flag = (params.fastq_source != null && params.fastq_source.equalsIgnoreCase("pacbio")) ? "-x pacbio " : "" + def ont_flag = (settings["fastq_source"] != null && settings["fastq_source"].equalsIgnoreCase("nanopore")) ? "-x ont2d " : "" + def pb_flag = (settings["fastq_source"] != null && settings["fastq_source"].equalsIgnoreCase("pacbio")) ? "-x pacbio " : "" def aligner_options = "" - if(params.r2c_aligner =~ "bowtie") { - def bowtie_options = params.r2c_aligner_options.replaceAll("-p\\s*\\d+","") + if(settings["r2c_aligner"] =~ "bowtie") { + def bowtie_options = settings["r2c_aligner_options"].replaceAll("-p\\s*\\d+","") if(!(bowtie_options =~ /-k/)) { bowtie_options += " -k 10 " } aligner_options = "-aligner bowtie -bowtie_options \'$bowtie_options\'" } - else if(params.r2c_aligner =~ "bwa") { - def bwa_options = params.r2c_aligner_options.replaceAll("-t\\s*\\d+","") + else if(settings["r2c_aligner"] =~ "bwa") { + def bwa_options = settings["r2c_aligner_options"].replaceAll("-t\\s*\\d+","") if (ont_flag != "") { unpaired = unpaired.replaceAll("-u ","-long ") bwa_options += ont_flag @@ -51,8 +52,8 @@ process r2c { } aligner_options = "-aligner bwa -bwa_options \'$bwa_options\'" } - else if (params.r2c_aligner =~ "minimap") { - def minimap_options = params.r2c_aligner_options.replaceAll("-t\\s*\\d+","") + else if (settings["r2c_aligner"] =~ "minimap") { + def minimap_options = settings["r2c_aligner_options"].replaceAll("-t\\s*\\d+","") if(ont_flag != "" || pb_flag != "") { unpaired = unpaired.replaceAll("-u ","-long ") } @@ -80,18 +81,20 @@ process r2c { } -process r2c_jsonTable { +process makeCoverageTable { + label 'r2c' publishDir( - path: "$params.outDir/AssemblyBasedAnalysis/readsMappingToContig", + path: "${settings["outDir"]}/AssemblyBasedAnalysis/readsMappingToContig", mode: 'copy', pattern: "*_coverage.table.json" ) publishDir( - path: "$params.outDir/AssemblyBasedAnalysis", + path: "${settings["outDir"]}/AssemblyBasedAnalysis", mode: 'copy', pattern: "*stats.{pdf,txt}" ) input: + val settings path cov_table path contigFile @@ -101,24 +104,25 @@ process r2c_jsonTable { path "*_coverage.table.json" script: - def rowLimit = params.rowLimit != null ? "$params.rowLimit" : "3000" - def outPrefix = params.prefix!=null ? "$params.prefix" : "readsToContigs" + def rowLimit = settings["rowLimit"] != null ? "${settings["rowLimit"]} " : "3000" """ tab2Json_for_dataTable.pl -project_dir . -mode contig -limit $rowLimit \ - ${outPrefix}_coverage.table > ${outPrefix}_coverage.table.json + readsToContigs_coverage.table > readsToContigs_coverage.table.json contig_stats.pl -p $contigFile > contigs_stats.txt """ } process extractUnmapped { + label 'r2c' publishDir( - path:"$params.outDir/AssemblyBasedAnalysis/readsMappingToContig/", + path:"${settings["outDir"]}/AssemblyBasedAnalysis/readsMappingToContig/", mode: 'copy', overwrite: true ) input: + val settings path bamFile path logFile @@ -135,18 +139,22 @@ process extractUnmapped { } -workflow { +workflow READSTOCONTIGS { + take: + settings + paired + unpaired + contigs + + main: "mkdir nf_assets".execute().text "touch nf_assets/NO_FILE".execute().text "touch nf_assets/NO_FILE2".execute().text - paired_ch = channel.fromPath(params.pairFile, checkIfExists:true).collect() - unpaired_ch = channel.fromPath(params.unpairFile, checkIfExists:true) - contig_ch = channel.fromPath(params.contigFile, checkIfExists:true) - - r2c(paired_ch, unpaired_ch, contig_ch) - r2c_jsonTable(r2c.out.cov_table, r2c.out.contig_file) - if(params.extractUnmapped) { - extractUnmapped(r2c.out.sortedBam, r2c.out.logFile) + + validationAlignment(settings, paired, unpaired, contigs) + makeCoverageTable(settings, validationAlignment.out.cov_table, validationAlignment.out.contig_file) + if(settings["extractUnmapped"]) { + extractUnmapped(settings, validationAlignment.out.sortedBam, validationAlignment.out.logFile) } } diff --git a/runReadsToContig/test_files/parameters/r2c_forContigTax.json b/modules/runReadsToContig/test_files/parameters/r2c_forContigTax.json similarity index 100% rename from runReadsToContig/test_files/parameters/r2c_forContigTax.json rename to modules/runReadsToContig/test_files/parameters/r2c_forContigTax.json diff --git a/runReadsToContig/test_files/parameters/r2c_simple.json b/modules/runReadsToContig/test_files/parameters/r2c_simple.json similarity index 100% rename from runReadsToContig/test_files/parameters/r2c_simple.json rename to modules/runReadsToContig/test_files/parameters/r2c_simple.json diff --git a/nextflow.config b/nextflow.config index 39de29c..37be0e2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -2,8 +2,10 @@ params { //input - pairedFiles = ["${projectDir}/nf_assets/NO_FILE"] + //pairedFiles = ["${projectDir}/nf_assets/NO_FILE"] + pairedFiles = ["${projectDir}/test_data/Ecoli_10x.1.fastq","${projectDir}/test_data/Ecoli_10x.2.fastq"] unpairedFiles = ["${projectDir}/nf_assets/NO_FILE2"] + inputContigs = "${projectDir}/nf_assets/NO_FILE3" //which modules are run modules { @@ -11,7 +13,7 @@ params { faqcs = false hostRemoval = false annotation = false - runAssembly = false + runAssembly = true } //module parameters -- passed directly into subworkflows according to best practices @@ -101,12 +103,22 @@ params { } + r2c { + useAssembledContigs = false + r2c_aligner = "bwa" + r2c_aligner_options = "" + r2g_max_clip = null + extractUnmapped = false + rowLimit = null + } + } //container settings singularity { enabled = true + pullTimeout = "1 hour" runOptions = "--compat" } @@ -128,6 +140,9 @@ process { withLabel: 'assembly' { container = 'apwat/run_assembly:1.5' } + withLabel: 'r2c' { + container = 'apwat/run_r2c:1.3' + } } //submission rate limit: needed for sra2fastq to operate correctly diff --git a/runReadsToContig/nextflow.config b/runReadsToContig/nextflow.config deleted file mode 100644 index b4a673d..0000000 --- a/runReadsToContig/nextflow.config +++ /dev/null @@ -1,23 +0,0 @@ -process.container = 'apwat/run_r2c:1.3' -singularity.enabled=true - -params { - pairFile = "nf_assets/NO_FILE" - unpairFile = "nf_assets/NO_FILE2" - contigFile = null - assembledContigs = null - r2c_aligner = "bwa" - r2c_aligner_options = "" - r2g_max_clip = null - fastq_source = null - prefix = null - cpus = 8 - outDir = "." - extractUnmapped = false - rowLimit = null -} - -workflow.onComplete = { - "rm -rf nf_assets".execute().text -} -