Skip to content

Commit

Permalink
Improve GUNC execution
Browse files Browse the repository at this point in the history
  • Loading branch information
dialvarezs committed Nov 1, 2024
1 parent da52285 commit a4f42ef
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 20 deletions.
6 changes: 3 additions & 3 deletions modules/local/combine_tsv.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ process COMBINE_TSV {
'biocontainers/bioawk:1.0--hed695b0_5' }"

input:
path(bin_summaries, stageAs: "bin_summaries/*.tsv")
path(tsv_files, stageAs: "tsv_files/*.tsv")

output:
path("*.tsv") , emit: combined
path "versions.yml", emit: versions

script:
def prefix = task.ext.prefix ?: "bin_depths_summary_combined"
def prefix = task.ext.prefix ?: "tsv_summary_combined"
"""
bioawk '(NR == 1) || (FNR > 1)' ${bin_summaries} > ${prefix}.tsv
bioawk '(NR == 1) || (FNR > 1)' ${tsv_files} > ${prefix}.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
46 changes: 29 additions & 17 deletions subworkflows/local/bin_qc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,19 @@ include { CHECKM_QA } from '../../modules/nf-core/checkm
include { CHECKM_LINEAGEWF } from '../../modules/nf-core/checkm/lineagewf/main'
include { CHECKM2_PREDICT } from '../../modules/nf-core/checkm2/predict/main'
include { COMBINE_TSV as COMBINE_BINQC_TSV } from '../../modules/local/combine_tsv'
include { COMBINE_TSV as COMBINE_GUNC_TSV } from '../../modules/local/combine_tsv'
include { GUNC_DOWNLOADDB } from '../../modules/nf-core/gunc/downloaddb/main'
include { GUNC_RUN } from '../../modules/nf-core/gunc/run/main'
include { GUNC_MERGECHECKM } from '../../modules/nf-core/gunc/mergecheckm/main'


workflow BIN_QC {
take:
ch_bins // [ [ meta] , fasta ], input bins (mandatory)
ch_checkm_db // [ db ], presupplied CheckM database (optional)
ch_checkm2_db // [ [meta] , db ], presupplied CheckM2 database (optional)
ch_busco_db // [ db ], presupplied BUSCO database (optional)
ch_gunc_db // [ db ], presupplied GUNC database (optional)
ch_bins // [ [ meta] , fasta ], input bins (mandatory)
ch_checkm_db // [ db ], presupplied CheckM database (optional)
ch_checkm2_db // [ [meta] , db ], presupplied CheckM2 database (optional)
ch_busco_db // [ db ], presupplied BUSCO database (optional)
ch_gunc_db // [ db ], presupplied GUNC database (optional)

main:
qc_summary = []
Expand Down Expand Up @@ -81,7 +82,7 @@ workflow BIN_QC {
)

ch_multiqc_files = ch_multiqc_files.mix(
BUSCO.out.summary_domain.mix(BUSCO.out.summary_specific).map{ it[1] }
BUSCO.out.summary_domain.mix(BUSCO.out.summary_specific).map { it[1] }
)
qc_summary = BUSCO_SUMMARY.out.summary
ch_versions = ch_versions.mix(BUSCO.out.versions.first())
Expand All @@ -93,11 +94,12 @@ workflow BIN_QC {
ch_bins_for_checkmlineagewf = ch_input_bins_for_qc
.groupTuple()
.filter { meta, _bins ->
meta.domain != "eukarya"
}
meta.domain != "eukarya"
}
.multiMap { meta, fa ->
reads: [meta, fa]
ext: fa.extension.unique().join("") // the pipeline ensures that all bins will have the same extension
// the pipeline ensures that all bins will have the same extension
ext: fa.extension.unique().join("")
}

CHECKM_LINEAGEWF(ch_bins_for_checkmlineagewf.reads, ch_bins_for_checkmlineagewf.ext, ch_checkm_db)
Expand Down Expand Up @@ -137,10 +139,11 @@ workflow BIN_QC {
meta.domain != "eukarya"
}
.flatMap { meta, bins ->
bins.collect { bin -> [meta, bin] }
// Set ID per bin, but save original ID for merging with CheckM output
bins.collect { bin -> [[id: bin.baseName, _id: meta.id] + meta, bin] }
}

if ( params.gunc_db ) {
if (params.gunc_db) {
ch_db_for_gunc = ch_gunc_db
}
else {
Expand All @@ -153,23 +156,32 @@ workflow BIN_QC {

// Make sure to keep directory in sync with modules.conf
GUNC_RUN.out.maxcss_level_tsv
.map{it[1]}
.map { it[1] }
.collectFile(name: "gunc_summary.tsv", keepHeader: true, storeDir: "${params.outdir}/GenomeBinning/QC/")

if ( params.binqc_tool == 'checkm' ) {
ch_input_to_mergecheckm = GUNC_RUN.out.maxcss_level_tsv.combine(CHECKM_QA.out.output, by: 0)
if (params.binqc_tool == 'checkm') {
ch_input_to_gunc_combine = GUNC_RUN.out.maxcss_level_tsv
.map { meta, output ->
// restore original ID to combine with CheckM output
[[id: meta._id] + meta - meta.subMap(['_id']), output]
}
.groupTuple()

COMBINE_GUNC_TSV(ch_input_to_gunc_combine)

GUNC_MERGECHECKM(ch_input_to_mergecheckm)
ch_input_to_mergegunccheckm = COMBINE_BINQC_TSV.out.combined
.combine(CHECKM_QA.out.output, by: 0)

GUNC_MERGECHECKM(ch_input_to_mergegunccheckm)
ch_versions.mix(GUNC_MERGECHECKM.out.versions)

// Make sure to keep directory in sync with modules.conf
GUNC_MERGECHECKM.out.tsv
.map{it[1]}
.map { it[1] }
.collectFile(name: "gunc_checkm_summary.tsv", keepHeader: true, storeDir: "${params.outdir}/GenomeBinning/QC/")
}
}


emit:
qc_summary = qc_summary
multiqc_files = ch_multiqc_files
Expand Down

0 comments on commit a4f42ef

Please sign in to comment.