-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #422 from broadinstitute/dp-denovo-update
incorporate assemble_refbased as refine/polish for denovo
- Loading branch information
Showing
15 changed files
with
103 additions
and
150 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ workflow align_and_count_report { | |
description: "Align reads to reference with minimap2 and count the number of hits. Results are returned in the format of 'samtools idxstats'." | ||
author: "Broad Viral Genomics" | ||
email: "[email protected]" | ||
allowNestedInputs: true | ||
} | ||
|
||
call reports.align_and_count | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ workflow align_and_count_multiple_report { | |
description: "Count the number of times reads map to provided reference sequences. Useful for counting spike-ins, etc." | ||
author: "Broad Viral Genomics" | ||
email: "[email protected]" | ||
allowNestedInputs: true | ||
} | ||
|
||
input { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ workflow align_and_plot { | |
description: "Align reads to reference and produce coverage plots and statistics." | ||
author: "Broad Viral Genomics" | ||
email: "[email protected]" | ||
allowNestedInputs: true | ||
} | ||
|
||
call assembly.align_reads as align | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,10 +3,17 @@ version 1.0 | |
import "../tasks/tasks_taxon_filter.wdl" as taxon_filter | ||
import "../tasks/tasks_read_utils.wdl" as read_utils | ||
import "../tasks/tasks_assembly.wdl" as assembly | ||
import "../tasks/tasks_intrahost.wdl" as intrahost | ||
import "assemble_refbased.wdl" as assemble_refbased | ||
|
||
workflow assemble_denovo { | ||
|
||
|
||
meta { | ||
description: "Assisted de novo viral genome assembly from raw reads." | ||
author: "Broad Viral Genomics" | ||
email: "[email protected]" | ||
allowNestedInputs: true | ||
} | ||
|
||
input { | ||
File reads_unmapped_bam | ||
|
||
|
@@ -19,18 +26,7 @@ workflow assemble_denovo { | |
File? filter_to_taxon_db | ||
File trim_clip_db | ||
|
||
File? novocraft_license | ||
|
||
Boolean call_isnvs=false | ||
|
||
String assembler="spades" | ||
Float? scaffold_min_length_fraction | ||
Float? scaffold_min_unambig | ||
Int? scaffold_replace_length=55 | ||
Int? nucmer_max_gap | ||
Int? nucmer_min_match | ||
Int? nucmer_min_cluster | ||
Float? scaffold_min_pct_contig_aligned | ||
String sample_name = basename(basename(reads_unmapped_bam, ".bam"), ".cleaned") | ||
} | ||
|
||
parameter_meta { | ||
|
@@ -57,8 +53,6 @@ workflow assemble_denovo { | |
} | ||
} | ||
|
||
String sample_name = basename(basename(reads_unmapped_bam, ".bam"), ".cleaned") | ||
|
||
if(length(deplete_bmtaggerDbs) + length(deplete_blastDbs) + length(deplete_bwaDbs) > 0) { | ||
call taxon_filter.deplete_taxa { | ||
input: | ||
|
@@ -87,48 +81,31 @@ workflow assemble_denovo { | |
reads_unmapped_bam = rmdup_ubam.dedup_bam, | ||
trim_clip_db = trim_clip_db, | ||
always_succeed = true, | ||
assembler = assembler, | ||
sample_name = sample_name | ||
} | ||
|
||
call assembly.scaffold { | ||
input: | ||
contigs_fasta = assemble.contigs_fasta, | ||
reads_bam = select_first([filter_to_taxon.taxfilt_bam, deplete_taxa.cleaned_bam, reads_unmapped_bam]), | ||
reference_genome_fasta = reference_genome_fasta, | ||
min_length_fraction = scaffold_min_length_fraction, | ||
min_unambig = scaffold_min_unambig, | ||
replace_length = scaffold_replace_length, | ||
nucmer_max_gap = nucmer_max_gap, | ||
nucmer_min_match = nucmer_min_match, | ||
nucmer_min_cluster = nucmer_min_cluster, | ||
scaffold_min_pct_contig_aligned = scaffold_min_pct_contig_aligned | ||
} | ||
|
||
call assembly.refine_2x_and_plot { | ||
input: | ||
assembly_fasta = scaffold.scaffold_fasta, | ||
reads_unmapped_bam = select_first([deplete_taxa.cleaned_bam, reads_unmapped_bam]), | ||
novocraft_license = novocraft_license, | ||
sample_name = sample_name | ||
contigs_fasta = assemble.contigs_fasta, | ||
reads_bam = select_first([filter_to_taxon.taxfilt_bam, deplete_taxa.cleaned_bam, reads_unmapped_bam]), | ||
reference_genome_fasta = reference_genome_fasta | ||
} | ||
|
||
if(call_isnvs) { | ||
call intrahost.isnvs_per_sample { | ||
input: | ||
assembly_fasta = refine_2x_and_plot.final_assembly_fasta, | ||
mapped_bam = refine_2x_and_plot.aligned_bam | ||
} | ||
call assemble_refbased.assemble_refbased as refine { | ||
input: | ||
reads_unmapped_bams = [rmdup_ubam.dedup_bam], | ||
reference_fasta = scaffold.scaffold_fasta, | ||
sample_name = sample_name | ||
} | ||
|
||
output { | ||
File final_assembly_fasta = refine_2x_and_plot.final_assembly_fasta | ||
File aligned_only_reads_bam = refine_2x_and_plot.aligned_only_reads_bam | ||
File coverage_plot = refine_2x_and_plot.coverage_plot | ||
Int assembly_length = refine_2x_and_plot.assembly_length | ||
Int assembly_length_unambiguous = refine_2x_and_plot.assembly_length_unambiguous | ||
Int reads_aligned = refine_2x_and_plot.reads_aligned | ||
Float mean_coverage = refine_2x_and_plot.mean_coverage | ||
File final_assembly_fasta = refine.assembly_fasta | ||
File aligned_only_reads_bam = refine.align_to_self_merged_aligned_only_bam | ||
File coverage_plot = refine.align_to_self_merged_coverage_plot | ||
Int assembly_length = refine.assembly_length | ||
Int assembly_length_unambiguous = refine.assembly_length_unambiguous | ||
Int reads_aligned = refine.align_to_self_merged_reads_aligned | ||
Float mean_coverage = refine.align_to_self_merged_mean_coverage | ||
|
||
File cleaned_bam = select_first([deplete_taxa.cleaned_bam, reads_unmapped_bam]) | ||
File? cleaned_fastqc = deplete_taxa.cleaned_fastqc | ||
|
@@ -155,22 +132,25 @@ workflow assemble_denovo { | |
String scaffolding_chosen_ref_name = scaffold.scaffolding_chosen_ref_name | ||
File scaffolding_stats = scaffold.scaffolding_stats | ||
File scaffolding_alt_contigs = scaffold.scaffolding_alt_contigs | ||
|
||
Int replicate_concordant_sites = refine.replicate_concordant_sites | ||
Int replicate_discordant_snps = refine.replicate_discordant_snps | ||
Int replicate_discordant_indels = refine.replicate_discordant_indels | ||
Int num_read_groups = refine.num_read_groups | ||
Int num_libraries = refine.num_libraries | ||
File replicate_discordant_vcf = refine.replicate_discordant_vcf | ||
|
||
File isnvs_vcf = refine.align_to_self_isnvs_vcf | ||
|
||
File? isnvsFile = isnvs_per_sample.isnvsFile | ||
|
||
File aligned_bam = refine_2x_and_plot.aligned_bam | ||
File aligned_only_reads_bam_idx = refine_2x_and_plot.aligned_only_reads_bam_idx | ||
File aligned_only_reads_fastqc = refine_2x_and_plot.aligned_only_reads_fastqc | ||
File coverage_tsv = refine_2x_and_plot.coverage_tsv | ||
Int read_pairs_aligned = refine_2x_and_plot.read_pairs_aligned | ||
Float bases_aligned = refine_2x_and_plot.bases_aligned | ||
File aligned_bam = refine.align_to_self_merged_aligned_and_unaligned_bam[0] | ||
File aligned_only_reads_fastqc = refine.align_to_ref_per_input_fastqc[0] | ||
File coverage_tsv = refine.align_to_self_merged_coverage_tsv | ||
Int read_pairs_aligned = refine.align_to_self_merged_read_pairs_aligned | ||
Float bases_aligned = refine.align_to_self_merged_bases_aligned | ||
|
||
String? deplete_viral_classify_version = deplete_taxa.viralngs_version | ||
String? taxfilt_viral_classify_version = filter_to_taxon.viralngs_version | ||
String assemble_viral_assemble_version = assemble.viralngs_version | ||
String scaffold_viral_assemble_version = scaffold.viralngs_version | ||
String refine_viral_assemble_version = refine_2x_and_plot.viralngs_version | ||
String? isnvs_viral_phylo_version = isnvs_per_sample.viralngs_version | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ workflow classify_kraken2 { | |
description: "Taxonomic classification of sequences via kraken2 (or kraken2x, depending on the database provided)." | ||
author: "Broad Viral Genomics" | ||
email: "[email protected]" | ||
allowNestedInputs: true | ||
} | ||
|
||
call metagenomics.kraken2 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ workflow classify_single { | |
description: "Runs raw reads through taxonomic classification (Kraken2), human read depletion (based on Kraken2), de novo assembly (SPAdes), and FASTQC/multiQC of reads." | ||
author: "Broad Viral Genomics" | ||
email: "[email protected]" | ||
allowNestedInputs: true | ||
} | ||
|
||
input { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ workflow demux_only { | |
description: "Picard-based demultiplexing and basecalling from a tarball of a raw BCL directory." | ||
author: "Broad Viral Genomics" | ||
email: "[email protected]" | ||
allowNestedInputs: true | ||
} | ||
|
||
input { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ workflow demux_plus { | |
description: "Picard-based demultiplexing and basecalling from a tarball of a raw BCL directory, followed by basic metagenomics and QC metrics. Intended for automatic triggering post upload on DNAnexus." | ||
author: "Broad Viral Genomics" | ||
email: "[email protected]" | ||
allowNestedInputs: true | ||
} | ||
|
||
input { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ workflow fastq_to_ubam { | |
description: "Convert reads from fastq format (single or paired) to unaligned BAM format." | ||
author: "Broad Viral Genomics" | ||
email: "[email protected]" | ||
allowNestedInputs: true | ||
} | ||
|
||
call tasks_read_utils.FastqToUBAM | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ workflow fetch_sra_to_bam { | |
description: "Retrieve reads from the NCBI Short Read Archive in unaligned BAM format with relevant metadata encoded." | ||
author: "Broad Viral Genomics" | ||
email: "[email protected]" | ||
allowNestedInputs: true | ||
} | ||
|
||
call ncbi_tools.Fetch_SRA_to_BAM | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ workflow genbank { | |
description: "Prepare assemblies for Genbank submission. This includes annotation by simple coordinate transfer from Genbank annotations and a multiple alignment. See https://viral-pipelines.readthedocs.io/en/latest/ncbi_submission.html for details." | ||
author: "Broad Viral Genomics" | ||
email: "[email protected]" | ||
allowNestedInputs: true | ||
} | ||
|
||
input { | ||
|
Oops, something went wrong.