From 136f26e37c12bc7bb7543fc4ec001dbc1bcc80c3 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Tue, 7 May 2024 10:53:11 -0400 Subject: [PATCH] report reference genome length in align_reads output; use this value for assemble_refbased. reference_genome_length Currently, assemble_refbased reports reference_genome_length based on inspection of the mapped bam file used in plot_coverage however in the event zero reads map, the reference length is reported as zero. This commit reports the actual length of the sequence in the reference fasta provided to assemble_refbased, as the first Int output of the scattered calls to assembly.align_reads (and exposes the value, which was already obtained but not output, in align_reads) --- pipes/WDL/tasks/tasks_assembly.wdl | 1 + pipes/WDL/workflows/assemble_refbased.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pipes/WDL/tasks/tasks_assembly.wdl b/pipes/WDL/tasks/tasks_assembly.wdl index 522011e01..ad57684be 100644 --- a/pipes/WDL/tasks/tasks_assembly.wdl +++ b/pipes/WDL/tasks/tasks_assembly.wdl @@ -652,6 +652,7 @@ task align_reads { File aligned_only_reads_bam_idx = "~{sample_name}.mapped.bai" File aligned_only_reads_fastqc = "~{sample_name}.mapped_fastqc.html" File aligned_only_reads_fastqc_zip = "~{sample_name}.mapped_fastqc.zip" + Int reference_length = read_int("assembly_length") Int reads_provided = read_int("reads_provided") Int reads_aligned = read_int("reads_aligned") Int read_pairs_aligned = read_int("read_pairs_aligned") diff --git a/pipes/WDL/workflows/assemble_refbased.wdl b/pipes/WDL/workflows/assemble_refbased.wdl index 4c72fe4f5..a81ddbba1 100644 --- a/pipes/WDL/workflows/assemble_refbased.wdl +++ b/pipes/WDL/workflows/assemble_refbased.wdl @@ -194,7 +194,7 @@ workflow assemble_refbased { File align_to_ref_variants_vcf_gz = call_consensus.sites_vcf_gz Int assembly_length = call_consensus.assembly_length Int assembly_length_unambiguous = call_consensus.assembly_length_unambiguous - Int reference_genome_length = plot_ref_coverage.assembly_length + Int reference_genome_length = align_to_ref.reference_length[0] Float assembly_mean_coverage = plot_ref_coverage.mean_coverage Int dist_to_ref_snps = call_consensus.dist_to_ref_snps