Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix meta map stability #480

Merged
merged 4 commits into from
Jul 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#447](https://github.com/nf-core/mag/pull/447) - Remove `default: None` from parameter schema (by @drpatelh)
- [#449](https://github.com/nf-core/mag/pull/447) - Fix results file overwriting in Ancient DNA workflow (reported by @alexhbnr, fix by @jfy133)
- [#470](https://github.com/nf-core/mag/pull/470) - Fix binning preparation from running even when binning was requested to be skipped (reported by @prototaxites, fix by @jfy133)
- [#480](https://github.com/nf-core/mag/pull/480) - Improved `-resume` reliability through better meta map preservation (reported by @prototaxites, fix by @jfy133)

### `Dependencies`

Expand Down
25 changes: 10 additions & 15 deletions subworkflows/local/binning.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,16 @@ workflow BINNING {
// generate coverage depths for each contig
ch_summarizedepth_input = assemblies
.map { meta, assembly, bams, bais ->
def meta_new = meta.clone()
def meta_keys = meta.keySet()
def meta_new = meta + meta.subMap(meta_keys)
[ meta_new, bams, bais ]
}

METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS ( ch_summarizedepth_input )

ch_metabat_depths = METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS.out.depth
.map { meta, depths ->
def meta_new = meta.clone()
meta_new['binner'] = 'MetaBAT2'

def meta_new = meta + [binner: 'MetaBAT2']
[ meta_new, depths ]
}

Expand All @@ -44,9 +43,7 @@ workflow BINNING {
// combine depths back with assemblies
ch_metabat2_input = assemblies
.map { meta, assembly, bams, bais ->
def meta_new = meta.clone()
meta_new['binner'] = 'MetaBAT2'

def meta_new = meta + [binner: 'MetaBAT2']
[ meta_new, assembly, bams, bais ]
}
.join( ch_metabat_depths, by: 0 )
Expand All @@ -59,18 +56,18 @@ workflow BINNING {
CONVERT_DEPTHS ( ch_metabat2_input )
ch_maxbin2_input = CONVERT_DEPTHS.out.output
.map { meta, assembly, reads, depth ->
def meta_new = meta.clone()
meta_new['binner'] = 'MaxBin2'

def meta_new = meta + [binner: 'MaxBin2']
[ meta_new, assembly, reads, depth ]
}
ch_versions = ch_versions.mix(CONVERT_DEPTHS.out.versions.first())
}

// main bins for decompressing for MAG_DEPTHS
ch_final_bins_for_gunzip = Channel.empty()

// final gzipped bins
ch_binning_results_gzipped_final = Channel.empty()

// run binning
if ( !params.skip_metabat2 ) {
METABAT2_METABAT2 ( ch_metabat2_input )
Expand All @@ -90,9 +87,7 @@ workflow BINNING {

ch_concoct_input = assemblies
.map { meta, bins, bams, bais ->
def meta_new = meta.clone()
meta_new['binner'] = 'CONCOCT'

def meta_new = meta + [binner: 'CONCOCT']
[ meta_new, bins, bams, bais ]
}
.multiMap {
Expand Down Expand Up @@ -135,9 +130,9 @@ workflow BINNING {
ch_versions = ch_versions.mix(GUNZIP_UNBINS.out.versions.first())

emit:
bins = ch_binning_results_gunzipped
bins = ch_binning_results_gunzipped.dump(tag: "ch_binning_results_gunzipped")
bins_gz = ch_binning_results_gzipped_final
unbinned = ch_splitfasta_results_gunzipped
unbinned = ch_splitfasta_results_gunzipped.dump(tag: "ch_splitfasta_results_gunzipped")
unbinned_gz = SPLIT_FASTA.out.unbinned
metabat2depths = METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS.out.depth
versions = ch_versions
Expand Down
15 changes: 6 additions & 9 deletions subworkflows/local/binning_refinement.nf
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ workflow BINNING_REFINEMENT {
.mix(DASTOOL_FASTATOCONTIG2BIN_CONCOCT.out.fastatocontig2bin)
.map {
meta, fastatocontig2bin ->
def meta_new = meta.clone()
meta_new.remove('binner')
def meta_new = meta - meta.subMap('binner')
[ meta_new, fastatocontig2bin ]
}
.groupTuple(by: 0)
Expand All @@ -81,25 +80,23 @@ workflow BINNING_REFINEMENT {
.map {
meta, bin ->
if (bin.name != "unbinned.fa") {
def meta_new = meta.clone()
meta_new['binner'] = 'DASTool'
def meta_new = meta + [binner: 'DASTool']
[ meta_new, bin ]
}
}
.groupTuple()
.map {
meta, bins ->
def meta_new = meta.clone()
meta_new['domain'] = params.bin_domain_classification ? 'prokarya' : 'unclassified'
def domain_class = params.bin_domain_classification ? 'prokarya' : 'unclassified'
def meta_new = meta + [domain: domain_class]
[ meta_new, bins ]
}

ch_input_for_renamedastool = DASTOOL_DASTOOL.out.bins
.map {
meta, bins ->
def meta_new = meta.clone()
meta_new['binner'] = 'DASTool'
meta_new['domain'] = params.bin_domain_classification ? 'prokarya' : 'unclassified'
def domain_class = params.bin_domain_classification ? 'prokarya' : 'unclassified'
def meta_new = meta + [binner: 'DASTool', domain: domain_class]
[ meta_new, bins ]
}

Expand Down
12 changes: 5 additions & 7 deletions subworkflows/local/depths.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ params.mag_depths_options = [:]
params.mag_depths_plot_options = [:]
params.mag_depths_summary_options = [:]

include { MAG_DEPTHS } from '../../modules/local/mag_depths'
include { MAG_DEPTHS } from '../../modules/local/mag_depths'
include { MAG_DEPTHS_PLOT } from '../../modules/local/mag_depths_plot'
include { MAG_DEPTHS_SUMMARY } from '../../modules/local/mag_depths_summary'

Expand All @@ -24,24 +24,22 @@ workflow DEPTHS {
ch_versions = Channel.empty()

// Compute bin depths for different samples (according to `binning_map_mode`)
// Create a new meta joining key first, but clone meta so that
// Create a new meta joining key first, but copy meta so that
// we retain the information about binners and domain classification
ch_depth_input = bins_unbins
.map { meta, bins ->
def meta_join = meta.clone()
meta_join.remove('binner')
meta_join.remove('domain')
def meta_join = meta - meta.subMap('binner','domain')
[ meta_join, meta, bins ]
}
.combine( depths, by: 0 )
.map { meta_join, meta, bins, contig_depths_file ->
def meta_new = meta.clone()
meta_new.remove('domain')
def meta_new = meta - meta.subMap('domain')
[ meta_new, bins, contig_depths_file ]
}
.transpose()
.groupTuple(by: [0,2])


MAG_DEPTHS ( ch_depth_input )
ch_versions = ch_versions.mix(MAG_DEPTHS.out.versions)

Expand Down
33 changes: 11 additions & 22 deletions subworkflows/local/tiara.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,14 @@ workflow TIARA {

bins = bins
.map { meta, bins ->
def meta_new = meta.clone()
def meta_new = meta + [bin: 'bins']
meta_new.bin = 'bins'
[meta_new, bins]
}

unbins = unbins
.map { meta, unbins ->
def meta_new = meta.clone()
meta_new.bin = 'unbins'
def meta_new = meta + [bin: 'unbins']
[meta_new, unbins]
}

Expand All @@ -40,9 +39,7 @@ workflow TIARA {
ch_contigs_to_bin_tiara = DASTOOL_FASTATOCONTIG2BIN_TIARA.out.fastatocontig2bin
.combine(ch_tiara_input, by: 0)
.map { meta, contig2bin, bins ->
def meta_join = meta.clone()
meta_join.remove('binner')
meta_join.remove('bin')
def meta_join = meta - meta.subMap('binner', 'bin')
[ meta_join, meta, contig2bin, bins ]
}

Expand All @@ -57,43 +54,37 @@ workflow TIARA {

ch_eukarya_bins = TIARA_CLASSIFY.out.eukarya_bins
.map { meta, bins ->
def meta_new = meta.clone()
meta_new.domain = 'eukarya'
def meta_new = meta + [domain: 'eukarya']
[meta_new, bins]
}

ch_prokarya_bins = TIARA_CLASSIFY.out.prokarya_bins
.map { meta, bins ->
def meta_new = meta.clone()
meta_new.domain = 'prokarya'
def meta_new = meta + [domain: 'prokarya']
[meta_new, bins]
}

ch_bacteria_bins = TIARA_CLASSIFY.out.bacteria_bins
.map { meta, bins ->
def meta_new = meta.clone()
meta_new.domain = 'bacteria'
def meta_new = meta + [domain: 'bacteria']
[meta_new, bins]
}

ch_archaea_bins = TIARA_CLASSIFY.out.archaea_bins
.map { meta, bins ->
def meta_new = meta.clone()
meta_new.domain = 'archaea'
def meta_new = meta + [domain: 'archaea']
[meta_new, bins]
}

ch_organelle_bins = TIARA_CLASSIFY.out.organelle_bins
.map { meta, bins ->
def meta_new = meta.clone()
meta_new.domain = 'organelle'
def meta_new = meta + [domain: 'organelle']
[meta_new, bins]
}

ch_unknown_bins = TIARA_CLASSIFY.out.unknown_bins
.map { meta, bins ->
def meta_new = meta.clone()
meta_new.domain = 'unknown'
def meta_new = meta + [domain: 'unknown']
[meta_new, bins]
}

Expand All @@ -109,8 +100,7 @@ workflow TIARA {
meta.bin == "bins"
}
.map { meta, bins ->
def meta_new = meta.clone()
meta_new.remove('bin')
def meta_new = meta - meta.subMap('bin')
[meta_new, bins]
}

Expand All @@ -119,8 +109,7 @@ workflow TIARA {
meta.bin == "unbins"
}
.map { meta, bins ->
def meta_new = meta.clone()
meta_new.remove('bin')
def meta_new = meta - meta.subMap('bin')
[meta_new, bins]
}

Expand Down
34 changes: 12 additions & 22 deletions workflows/mag.nf
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,7 @@ workflow MAG {
ch_short_reads_forcat = ch_short_reads_phixremoved
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new.remove('run')
def meta_new = meta - meta.subMap('run')
[ meta_new, reads ]
}
.groupTuple()
Expand Down Expand Up @@ -369,8 +368,7 @@ workflow MAG {
ch_short_reads = ch_raw_short_reads
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new.remove('run')
def meta_new = meta - meta.subMap('run')
[ meta_new, reads ]
}
}
Expand All @@ -388,8 +386,7 @@ workflow MAG {
ch_long_reads = ch_raw_long_reads
.map {
meta, reads ->
def meta_new = meta.clone()
meta_new.remove('run')
def meta_new = meta - meta.subMap('run')
[ meta_new, reads ]
}

Expand Down Expand Up @@ -457,8 +454,7 @@ workflow MAG {
KRONA_DB ()
ch_tax_classifications = CENTRIFUGE.out.results_for_krona.mix(KRAKEN2.out.results_for_krona)
. map { classifier, meta, report ->
def meta_new = meta.clone()
meta_new.classifier = classifier
def meta_new = meta + [classifer: classifier]
[ meta_new, report ]
}
KRONA (
Expand Down Expand Up @@ -519,8 +515,7 @@ workflow MAG {
MEGAHIT ( ch_short_reads_grouped )
ch_megahit_assemblies = MEGAHIT.out.assembly
.map { meta, assembly ->
def meta_new = meta.clone()
meta_new.assembler = "MEGAHIT"
def meta_new = meta + [assembler: 'MEGAHIT']
[ meta_new, assembly ]
}
ch_assemblies = ch_assemblies.mix(ch_megahit_assemblies)
Expand Down Expand Up @@ -563,8 +558,7 @@ workflow MAG {
SPADES ( ch_short_reads_spades )
ch_spades_assemblies = SPADES.out.assembly
.map { meta, assembly ->
def meta_new = meta.clone()
meta_new.assembler = "SPAdes"
def meta_new = meta + [assembler: 'SPAdes']
[ meta_new, assembly ]
}
ch_assemblies = ch_assemblies.mix(ch_spades_assemblies)
Expand All @@ -583,8 +577,7 @@ workflow MAG {
SPADESHYBRID ( ch_reads_spadeshybrid )
ch_spadeshybrid_assemblies = SPADESHYBRID.out.assembly
.map { meta, assembly ->
def meta_new = meta.clone()
meta_new.assembler = "SPAdesHybrid"
def meta_new = meta + [assembler: "SPAdesHybrid"]
[ meta_new, assembly ]
}
ch_assemblies = ch_assemblies.mix(ch_spadeshybrid_assemblies)
Expand Down Expand Up @@ -692,16 +685,14 @@ workflow MAG {


} else {
ch_binning_results_bins = BINNING.out.bins
ch_binning_results_bins = BINNING.out.bins.dump(tag: 'BINNING.out.bins')
.map { meta, bins ->
meta_new = meta.clone()
meta_new.domain = 'unclassified'
def meta_new = meta + [domain: 'unclassified']
[meta_new, bins]
}
ch_binning_results_unbins = BINNING.out.unbinned
ch_binning_results_unbins = BINNING.out.unbinned.dump(tag: 'BINNING.out.unbins')
.map { meta, bins ->
meta_new = meta.clone()
meta_new.domain = 'unclassified'
def meta_new = meta + [domain: 'unclassified']
[meta_new, bins]
}
}
Expand Down Expand Up @@ -886,8 +877,7 @@ workflow MAG {
if (!params.skip_prokka){
ch_bins_for_prokka = ch_input_for_postbinning_bins_unbins.transpose()
.map { meta, bin ->
def meta_new = meta.clone()
meta_new.id = bin.getBaseName()
def meta_new = meta + [id: bin.getBaseName()]
[ meta_new, bin ]
}
.filter { meta, bin ->
Expand Down