From 764a0b51cec6c64d35dd566af28f0072444204ad Mon Sep 17 00:00:00 2001 From: Danny Park Date: Thu, 28 Mar 2024 09:41:50 -0400 Subject: [PATCH 1/7] parameterize skani opts --- pipes/WDL/tasks/tasks_assembly.wdl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pipes/WDL/tasks/tasks_assembly.wdl b/pipes/WDL/tasks/tasks_assembly.wdl index b8fa2bee1..c0b69517f 100644 --- a/pipes/WDL/tasks/tasks_assembly.wdl +++ b/pipes/WDL/tasks/tasks_assembly.wdl @@ -111,6 +111,10 @@ task select_references { Array[File] reference_genomes_fastas File contigs_fasta + Int? skani_m + Int? skani_s + Int? skani_c + String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.3" Int machine_mem_gb = 4 Int cpu = 2 @@ -128,6 +132,9 @@ task select_references { "~{contigs_basename}.refs_skani_dist.full.tsv" \ "~{contigs_basename}.refs_skani_dist.top.tsv" \ "~{contigs_basename}.ref_clusters.tsv" \ + ~{'-m ' + skani_m} \ + ~{'-s ' + skani_s} \ + ~{'-c ' + skani_c} \ --loglevel=DEBUG # create basename-only version of ref_clusters output file @@ -188,6 +195,10 @@ task scaffold { Int replace_length=55 Boolean allow_incomplete_output = false + Int? skani_m + Int? skani_s + Int? skani_c + Int? nucmer_max_gap Int? nucmer_min_match Int? nucmer_min_cluster @@ -283,6 +294,9 @@ task scaffold { "~{sample_name}.refs_skani_dist.full.tsv" \ "~{sample_name}.refs_skani_dist.top.tsv" \ "~{sample_name}.ref_clusters.tsv" \ + ~{'-m ' + skani_m} \ + ~{'-s ' + skani_s} \ + ~{'-c ' + skani_c} \ --loglevel=DEBUG CHOSEN_REF_FASTA=$(cut -f 1 "~{sample_name}.refs_skani_dist.full.tsv" | tail +2 | head -1) cut -f 3 "~{sample_name}.refs_skani_dist.full.tsv" | tail +2 | head -1 > SKANI_ANI From 2b71376e91698b0d2544a8deb614ea9c80d4434c Mon Sep 17 00:00:00 2001 From: Danny Park Date: Thu, 28 Mar 2024 17:46:41 -0400 Subject: [PATCH 2/7] bump viral-assemble --- pipes/WDL/tasks/tasks_assembly.wdl | 10 +++++----- pipes/WDL/tasks/tasks_reports.wdl | 2 +- requirements-modules.txt | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pipes/WDL/tasks/tasks_assembly.wdl b/pipes/WDL/tasks/tasks_assembly.wdl index c0b69517f..522011e01 100644 --- a/pipes/WDL/tasks/tasks_assembly.wdl +++ b/pipes/WDL/tasks/tasks_assembly.wdl @@ -15,7 +15,7 @@ task assemble { String sample_name = basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt") Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.3" + String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.4" } parameter_meta{ reads_unmapped_bam: { @@ -115,7 +115,7 @@ task select_references { Int? skani_s Int? skani_c - String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.3" + String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.4" Int machine_mem_gb = 4 Int cpu = 2 Int disk_size = 100 @@ -206,7 +206,7 @@ task scaffold { Float? scaffold_min_pct_contig_aligned Int? machine_mem_gb - String docker="quay.io/broadinstitute/viral-assemble:2.3.1.3" + String docker="quay.io/broadinstitute/viral-assemble:2.3.1.4" # do this in multiple steps in case the input doesn't actually have "assembly1-x" in the name String sample_name = basename(basename(contigs_fasta, ".fasta"), ".assembly1-spades") @@ -691,7 +691,7 @@ task refine_assembly_with_aligned_reads { Int min_coverage = 3 Int machine_mem_gb = 15 - String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.3" + String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.4" } Int disk_size = 375 @@ -816,7 +816,7 @@ task refine_2x_and_plot { String? plot_coverage_novoalign_options = "-r Random -l 40 -g 40 -x 20 -t 100 -k" Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.3" + String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.4" # do this in two steps in case the input doesn't actually have "cleaned" in the name String sample_name = basename(basename(reads_unmapped_bam, ".bam"), ".cleaned") diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl index 873ef0b65..447580253 100644 --- a/pipes/WDL/tasks/tasks_reports.wdl +++ b/pipes/WDL/tasks/tasks_reports.wdl @@ -674,7 +674,7 @@ task compare_two_genomes { File genome_two String out_basename - String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.3" + String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.4" } Int disk_size = 50 diff --git a/requirements-modules.txt b/requirements-modules.txt index 7e51d051a..4e93df79a 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -1,5 +1,5 @@ broadinstitute/viral-core=2.3.1 -broadinstitute/viral-assemble=2.3.1.3 +broadinstitute/viral-assemble=2.3.1.4 broadinstitute/viral-classify=2.2.4.0 broadinstitute/viral-phylo=2.1.20.2 broadinstitute/py3-bio=0.1.2 From d1ffbc94c8bc4c1158c4d3015436b767217fdc73 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Wed, 3 Apr 2024 16:56:20 -0400 Subject: [PATCH 3/7] update nextclade_one_sample to ignore output rows with empty values --- pipes/WDL/tasks/tasks_nextstrain.wdl | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index 914c4ac4a..a7824cedf 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -54,16 +54,22 @@ task nextclade_one_sample { --output-tree "~{basename}".nextclade.auspice.json \ "~{genome_fasta}" python3 < NEXTCLADE_CLADE - grep ^aaSubstitutions\\W transposed.tsv | cut -f 2 | grep -v aaSubstitutions > NEXTCLADE_AASUBS - grep ^aaDeletions\\W transposed.tsv | cut -f 2 | grep -v aaDeletions > NEXTCLADE_AADELS } runtime { docker: docker @@ -80,6 +86,8 @@ task nextclade_one_sample { File auspice_json = "~{basename}.nextclade.auspice.json" File nextclade_tsv = "~{basename}.nextclade.tsv" String nextclade_clade = read_string("NEXTCLADE_CLADE") + String nextclade_shortclade = read_string("NEXTCLADE_SHORTCLADE") + String nextclade_subclade = read_string("NEXTCLADE_SUBCLADE") String aa_subs_csv = read_string("NEXTCLADE_AASUBS") String aa_dels_csv = read_string("NEXTCLADE_AADELS") } From e063848384b990d5b05752ac2e2658fa66450dfe Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Wed, 3 Apr 2024 17:01:30 -0400 Subject: [PATCH 4/7] what? --- pipes/WDL/tasks/tasks_nextstrain.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index a7824cedf..a2260e077 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -17,7 +17,7 @@ task nextclade_one_sample { String docker = "nextstrain/nextclade:2.14.0" } String basename = basename(genome_fasta, ".fasta") - command { + command <<< set -e apt-get update apt-get -y install python3 @@ -70,7 +70,7 @@ task nextclade_one_sample { with codecs.open(fname, 'w', encoding='utf-8') as outf: outf.write(out.get(k, '')+'\n') CODE - } + >>> runtime { docker: docker memory: "3 GB" From 2ac90b18474a18d6ea7b73b0154ed485af51c24c Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Wed, 3 Apr 2024 17:06:34 -0400 Subject: [PATCH 5/7] percolate outputs --- pipes/WDL/workflows/nextclade_single.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pipes/WDL/workflows/nextclade_single.wdl b/pipes/WDL/workflows/nextclade_single.wdl index 59b57539b..ed30bd812 100644 --- a/pipes/WDL/workflows/nextclade_single.wdl +++ b/pipes/WDL/workflows/nextclade_single.wdl @@ -15,6 +15,8 @@ workflow nextclade_single { File nextclade_json = nextclade_one_sample.nextclade_json String nextclade_aa_subs = nextclade_one_sample.aa_subs_csv String nextclade_aa_dels = nextclade_one_sample.aa_dels_csv + String nextclade_shortclade = nextclade_one_sample.nextclade_shortclade + String nextclade_subclade = nextclade_one_sample.nextclade_subclade String nextclade_version = nextclade_one_sample.nextclade_version } } From 2d135a826796fd83be5b463cb09f806a5027a3cd Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Wed, 3 Apr 2024 17:37:20 -0400 Subject: [PATCH 6/7] add new taxid_to_nextclade workflow --- .dockstore.yml | 5 ++++ pipes/WDL/tasks/tasks_nextstrain.wdl | 35 ++++++++++++++++++++++ pipes/WDL/workflows/taxid_to_nextclade.wdl | 15 ++++++++++ 3 files changed, 55 insertions(+) create mode 100644 pipes/WDL/workflows/taxid_to_nextclade.wdl diff --git a/.dockstore.yml b/.dockstore.yml index b693e6a43..2e5185717 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -369,6 +369,11 @@ workflows: primaryDescriptorPath: /pipes/WDL/workflows/subsample_by_metadata_with_focal.wdl testParameterFiles: - /empty.json + - name: taxid_to_nextclade + subclass: WDL + primaryDescriptorPath: /pipes/WDL/workflows/taxid_to_nextclade.wdl + testParameterFiles: + - /empty.json - name: terra_table_to_tsv subclass: WDL primaryDescriptorPath: /pipes/WDL/workflows/terra_table_to_tsv.wdl diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index a2260e077..6945d1c7d 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -1,5 +1,40 @@ version 1.0 +task taxid_to_nextclade_dataset_name { + input { + String taxid + } + command <<< + python3 <>> + runtime { + docker: "python:slim" + memory: "1 GB" + cpu: 1 + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES + dx_instance_type: "mem2_ssd1_v2_x2" + maxRetries: 2 + } + output { + String nextclade_dataset_name = read_string("DATASET_NAME") + } +} + task nextclade_one_sample { meta { description: "Nextclade classification of one sample. Leaving optional inputs unspecified will use SARS-CoV-2 defaults." diff --git a/pipes/WDL/workflows/taxid_to_nextclade.wdl b/pipes/WDL/workflows/taxid_to_nextclade.wdl new file mode 100644 index 000000000..f3cae320c --- /dev/null +++ b/pipes/WDL/workflows/taxid_to_nextclade.wdl @@ -0,0 +1,15 @@ +version 1.0 + +import "../tasks/tasks_nextstrain.wdl" as nextstrain + +workflow taxid_to_nextclade { + meta { + description: "Convert taxids to a nextclade dataset name" + } + + call nextstrain.taxid_to_nextclade_dataset_name + + output { + String nextclade_dataset = taxid_to_nextclade_dataset_name.nextclade_dataset_name + } +} From 0ffc09b2901bd5b93f1d8e1dfb981413c31c9f19 Mon Sep 17 00:00:00 2001 From: Daniel Park Date: Wed, 3 Apr 2024 18:21:49 -0400 Subject: [PATCH 7/7] disk runtime --- pipes/WDL/tasks/tasks_nextstrain.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index 6945d1c7d..091884b36 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -25,9 +25,9 @@ task taxid_to_nextclade_dataset_name { docker: "python:slim" memory: "1 GB" cpu: 1 - disks: "local-disk " + disk_size + " LOCAL" - disk: disk_size + " GB" # TES - dx_instance_type: "mem2_ssd1_v2_x2" + disks: "local-disk 50 HDD" + disk: "50 GB" # TES + dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } output {