From 53579e15b6357d114eb7da11582211fbaca26df7 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Wed, 18 Sep 2024 23:54:41 -0400 Subject: [PATCH] initial WIP commit of a task and workflow to run delphy initial WIP commit of a task and workflow to run delphy (https://github.com/broadinstitute/delphy). Further work pending https://github.com/broadinstitute/delphy/pull/2 and availability of a delphy docker image --- .dockstore.yml | 5 ++ pipes/WDL/tasks/tasks_delphy.wdl | 92 ++++++++++++++++++++++++++++ pipes/WDL/workflows/delphy_phylo.wdl | 23 +++++++ requirements-modules.txt | 1 + 4 files changed, 121 insertions(+) create mode 100644 pipes/WDL/tasks/tasks_delphy.wdl create mode 100644 pipes/WDL/workflows/delphy_phylo.wdl diff --git a/.dockstore.yml b/.dockstore.yml index 0c9012485..3bb666675 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -100,6 +100,11 @@ workflows: primaryDescriptorPath: /pipes/WDL/workflows/coverage_table.wdl testParameterFiles: - /empty.json + - name: delphy_phylo + subclass: WDL + primaryDescriptorPath: /pipes/WDL/workflows/delphy_phylo.wdl + testParameterFiles: + - /empty.json - name: demux_metag subclass: WDL primaryDescriptorPath: /pipes/WDL/workflows/demux_metag.wdl diff --git a/pipes/WDL/tasks/tasks_delphy.wdl b/pipes/WDL/tasks/tasks_delphy.wdl new file mode 100644 index 000000000..c17efff71 --- /dev/null +++ b/pipes/WDL/tasks/tasks_delphy.wdl @@ -0,0 +1,92 @@ +task delphy { + input { + File sequences_msa_fasta + + Int? num_steps + Int? tree_every_n_steps + Int? snapshot_every_n_steps + Int? log_every_n_steps + + Int? num_threads # min($(nproc),(num_sequences / 100)) suggested + + Int? cpus + Int? machine_mem_gb + Int disk_dize = 300 + + String docker = "quay.io/broadinstitute/delphy:0.999" + } + + String out_basename = basename(basename(sequences_msa_fasta,".fa"),".fasta") + + meta { + description: "Execute Delphy; see: https://github.com/broadinstitute/delphy" + } + parameter_meta { + sequences_msa_fasta: { + description: "multiple alignment of input sequences in fasta format" + } + num_steps: { + description: "(5000000 * num_sequences) suggested" + } + tree_every_n_steps: { + description: "(num_steps / 200) suggested" + } + snapshot_every_n_steps: { + description: "(num_steps / 200) suggested" + } + log_every_n_steps: { + description: "(num_steps / 10000) suggested" + } + num_threads: { + description: "min($(nproc),(num_sequences / 100)) suggested" + } + } + + command <<< + set -e + delphy --version > DELPHY_VERSION + + num_sequences=$(grep -c ">" ~{sequences_msa_fasta}) + + steps=~{if defined(num_steps) then "~{num_steps}" else "$((($num_sequences * 5000000)))"} + tree_every=~{if defined(tree_every_n_steps) then "~{tree_every_n_steps}" else "$((($steps / 200)))"} + snapshot_every=~{if defined(snapshot_every_n_steps) then "~{snapshot_every_n_steps}" else "$((($steps / 200)))"} + log_every=~{if defined(log_every_n_steps) then "~{log_every_n_steps}" else "$((($steps / 10000)))"} + + threads=~{if defined(num_threads) then "~{num_threads}" else "$(nproc)"} + + delphy \ + --v0-in-fasta ~{sequences_msa_fasta} \ + --v0-threads $threads \ + #[--v0-site-rate-heterogeneity] \ + --v0-steps $steps \ + --v0-tree-every $tree_every \ + --v0-delphy-snapshot-every $snapshot_every \ + --v0-log-every $log_every \ + --v0-out-trees-file "~{out_basename}.dphy.trees" \ + --v0-out-log-file "~{out_basename}.dphy.log" \ + --v0-out-delphy-file "~{out_basename}.dphy" \ + --v0-out-beast-xml "~{out_basename}.dphy.for_beast.xml" + >>> + + output { + File delphy_trees = "~{out_basename}.dphy.trees" + File delphy_log = "~{out_basename}.dphy.log" + File delphy_for_web_ui = "~{out_basename}.dphy" + File delphy_output_for_beast = "~{out_basename}.dphy.for_beast.xml" + File delphy_stdout = stdout() + + String delphy_version = read_string('DELPHY_VERSION') + } + + runtime { + docker: docker + memory: select_first([machine_mem_gb, 15]) + " GB" + cpu: select_first([cpus, 8]) + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES + dx_instance_type: "mem1_ssd1_gpu2_x8" # dxWDL + + maxRetries: 1 + } +} \ No newline at end of file diff --git a/pipes/WDL/workflows/delphy_phylo.wdl b/pipes/WDL/workflows/delphy_phylo.wdl new file mode 100644 index 000000000..9022239ac --- /dev/null +++ b/pipes/WDL/workflows/delphy_phylo.wdl @@ -0,0 +1,23 @@ +version 1.0 + +import "../tasks/tasks_delphy.wdl" as delphy + +workflow delphy_phylo { + meta { + description: "Runs Delphy" + author: "Broad Viral Genomics" + email: "viral-ngs@broadinstitute.org" + } + + call delphy.delphy + + output { + File delphy_trees = delphy.delphy_trees + File delphy_log = delphy.delphy_log + File delphy_for_web_ui = delphy.delphy_for_web_ui + File delphy_output_for_beast = delphy.delphy_output_for_beast + File delphy_stdout = delphy.delphy_stdout + + String delphy_version = delphy.delphy_version + } +} diff --git a/requirements-modules.txt b/requirements-modules.txt index e0e4ab675..9221cd99c 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -5,6 +5,7 @@ broadinstitute/viral-phylo=2.1.20.2 broadinstitute/py3-bio=0.1.2 broadinstitute/beast-beagle-cuda=1.10.5pre broadinstitute/ncbi-tools=2.10.7.10 +broadinstitute/delphy=0.999 nextstrain/base=build-20240318T173028Z andersenlabapps/ivar=1.3.1 quay.io/staphb/pangolin=4.3.1-pdata-1.23.1