Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parameterize and update defaults for VM shapes in nextstrain tasks #455

Merged
merged 19 commits into from
Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 44 additions & 45 deletions pipes/WDL/tasks/tasks_nextstrain.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ task nextclade_one_sample {
File? pcr_primers_csv
File? virus_properties
String? dataset_name
Int disk_size = 50
String docker = "nextstrain/nextclade:2.12.0"
}
String basename = basename(genome_fasta, ".fasta")
Int disk_size = 50
command {
set -e
apt-get update
Expand Down Expand Up @@ -100,9 +100,9 @@ task nextclade_many_samples {
String? dataset_name
String basename
File? genome_ids_setdefault_blank
Int disk_size = 150
String docker = "nextstrain/nextclade:2.12.0"
}
Int disk_size = 100
command <<<
set -e
apt-get update
Expand Down Expand Up @@ -281,8 +281,8 @@ task derived_cols {
Array[File] table_map = []

String docker = "quay.io/broadinstitute/viral-core:2.1.33"
Int disk_size = 50
}
Int disk_size = 50
parameter_meta {
lab_highlight_loc: {
description: "This option copies the 'originating_lab' and 'submitting_lab' columns to new ones including a prefix, but only if they match certain criteria. The value of this string must be of the form prefix;col_header=value:col_header=value. For example, 'MA;country=USA:division=Massachusetts' will copy the originating_lab and submitting_lab columns to MA_originating_lab and MA_submitting_lab, but only for those rows where country=USA and division=Massachusetts."
Expand Down Expand Up @@ -391,12 +391,12 @@ task derived_cols {
task filter_segments {
input {
File all_samples_fasta
Int? segment = 1
Int segment = 1
File? pre_assembled_samples_fasta

Int? machine_mem_gb
Int machine_mem_gb = 3
Int disk_size = 375
}
Int disk_size = 375
command <<<
python3 <<CODE

Expand Down Expand Up @@ -424,7 +424,7 @@ task filter_segments {
>>>
runtime {
docker: "python:slim"
memory: select_first([machine_mem_gb, 3]) + " GB"
memory: machine_mem_gb + " GB"
cpu: 1
disks: "local-disk " + disk_size + " LOCAL"
disk: disk_size + " GB" # TES
Expand All @@ -449,9 +449,10 @@ task nextstrain_build_subsample {
File? keep_list
File? drop_list

Int? machine_mem_gb
Int machine_mem_gb = 50
String docker = "nextstrain/base:build-20211012T204409Z"
String nextstrain_ncov_repo_commit = "30435fb9ec8de2f045167fb90adfec12f123e80a"
Int disk_size = 750
}
parameter_meta {
alignment_msa_fasta: {
Expand All @@ -474,7 +475,6 @@ task nextstrain_build_subsample {
patterns: ["*.yaml"]
}
}
Int disk_size = 375
command <<<
set -e -o pipefail
augur version > VERSION
Expand Down Expand Up @@ -570,7 +570,7 @@ task nextstrain_build_subsample {
>>>
runtime {
docker: docker
memory: select_first([machine_mem_gb, 50]) + " GB"
memory: machine_mem_gb + " GB"
cpu : 4
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
Expand All @@ -595,8 +595,8 @@ task nextstrain_ncov_defaults {
input {
String nextstrain_ncov_repo_commit = "30435fb9ec8de2f045167fb90adfec12f123e80a"
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 50
}
Int disk_size = 50
command {
set -e
wget -q "https://github.com/nextstrain/ncov/archive/~{nextstrain_ncov_repo_commit}.tar.gz"
Expand Down Expand Up @@ -633,10 +633,9 @@ task nextstrain_deduplicate_sequences {

String nextstrain_ncov_repo_commit = "30435fb9ec8de2f045167fb90adfec12f123e80a"
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
}

Int disk_size = 375

parameter_meta {
sequences_fasta: {
description: "FASTA file with multiple sequences",
Expand Down Expand Up @@ -688,10 +687,9 @@ task nextstrain_ncov_sanitize_gisaid_data {

String nextstrain_ncov_repo_commit = "30435fb9ec8de2f045167fb90adfec12f123e80a"
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
}

Int disk_size = 375

parameter_meta {
sequences_gisaid_fasta: {
description: "Multiple sequences downloaded from GISAID",
Expand Down Expand Up @@ -765,8 +763,8 @@ task filter_subsample_sequences {
Array[String]? include_where

String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
}
Int disk_size = 100
parameter_meta {
sequences_fasta: {
description: "Set of sequences (unaligned fasta or aligned fasta -- one sequence per genome) or variants (vcf format) to subsample using augur filter.",
Expand Down Expand Up @@ -850,8 +848,8 @@ task filter_sequences_to_list {

String out_fname = sub(sub(basename(sequences), ".vcf", ".filtered.vcf"), ".fasta$", ".filtered.fasta")
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
}
Int disk_size = 200
parameter_meta {
sequences: {
description: "Set of sequences (unaligned fasta or aligned fasta -- one sequence per genome) or variants (vcf format) to subsample using augur filter.",
Expand Down Expand Up @@ -952,8 +950,8 @@ task mafft_one_chr {
String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2"
Int mem_size = 500
Int cpus = 64
Int disk_size = 750
}
Int disk_size = 750
command <<<
set -e

Expand Down Expand Up @@ -1039,9 +1037,9 @@ task mafft_one_chr_chunked {

String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2"
Int mem_size = 32
Int cpus = 96
Int cpus = 64
Int disk_size = 750
}
Int disk_size = 750
command <<<
set -e

Expand Down Expand Up @@ -1146,8 +1144,8 @@ task augur_mafft_align {
Boolean remove_reference = true

String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
}
Int disk_size = 750
command <<<
set -e
augur version > VERSION
Expand Down Expand Up @@ -1187,9 +1185,9 @@ task snp_sites {
File msa_fasta
Boolean allow_wildcard_bases = true
String docker = "quay.io/biocontainers/snp-sites:2.5.1--hed695b0_0"
Int disk_size = 750
}
String out_basename = basename(msa_fasta, ".fasta")
Int disk_size = 100
command {
snp-sites -V > VERSION
snp-sites -v ~{true="" false="-c" allow_wildcard_bases} -o "~{out_basename}.vcf" "~{msa_fasta}"
Expand Down Expand Up @@ -1219,8 +1217,8 @@ task augur_mask_sites {
File? mask_bed

String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
}
Int disk_size = 100
parameter_meta {
sequences: {
description: "Set of alignments (fasta format) or variants (vcf format) to mask.",
Expand Down Expand Up @@ -1275,10 +1273,11 @@ task draft_augur_tree {
File? vcf_reference
String? tree_builder_args

Int? cpus
Int cpus = 64
Int machine_mem_gb = 32
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 1250
}
Int disk_size = 750
parameter_meta {
msa_or_vcf: {
description: "Set of alignments (fasta format) or variants (vcf format) to construct a tree from using augur tree (iqTree).",
Expand All @@ -1303,8 +1302,8 @@ task draft_augur_tree {
>>>
runtime {
docker: docker
memory: "32 GB"
cpu: select_first([cpus, 64])
memory: machine_mem_gb + " GB"
cpu: cpus
disks: "local-disk " + disk_size + " LOCAL"
disk: disk_size + " GB" # TES
dx_instance_type: "mem1_ssd1_v2_x36"
Expand Down Expand Up @@ -1346,8 +1345,9 @@ task refine_augur_tree {
File? vcf_reference

String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
Int machine_mem_gb = 75
}
Int disk_size = 100
parameter_meta {
msa_or_vcf: {
description: "Set of alignments (fasta format) or variants (vcf format) to use to guide Treetime.",
Expand Down Expand Up @@ -1386,7 +1386,7 @@ task refine_augur_tree {
>>>
runtime {
docker: docker
memory: "50 GB"
memory: machine_mem_gb + " GB"
cpu : 2
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
Expand Down Expand Up @@ -1417,11 +1417,11 @@ task ancestral_traits {
File? weights
Float? sampling_bias_correction

Int? machine_mem_gb
Int machine_mem_gb = 32
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
}
String out_basename = basename(tree, '.nwk')
Int disk_size = 100
command <<<
set -e
augur version > VERSION
Expand All @@ -1439,7 +1439,7 @@ task ancestral_traits {
>>>
runtime {
docker: docker
memory: select_first([machine_mem_gb, 32]) + " GB"
memory: machine_mem_gb + " GB"
cpu : 4
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
Expand Down Expand Up @@ -1472,8 +1472,8 @@ task ancestral_tree {
File? output_vcf

String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 300
}
Int disk_size = 50
parameter_meta {
msa_or_vcf: {
description: "Set of alignments (fasta format) or variants (vcf format) to use to guide Treetime.",
Expand Down Expand Up @@ -1533,9 +1533,9 @@ task translate_augur_tree {
File? vcf_reference

String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 300
}
String out_basename = basename(tree, '.nwk')
Int disk_size = 50
command <<<
set -e
augur version > VERSION
Expand Down Expand Up @@ -1588,11 +1588,11 @@ task tip_frequencies {
Boolean censored = false
Boolean include_internal_nodes = false

Int? machine_mem_gb
Int machine_mem_gb = 64
String docker = "nextstrain/base:build-20211012T204409Z"
String out_basename = basename(tree, '.nwk')
Int disk_size = 200
}
Int disk_size = 100
command <<<
set -e
augur version > VERSION
Expand Down Expand Up @@ -1621,7 +1621,7 @@ task tip_frequencies {
>>>
runtime {
docker: docker
memory: select_first([machine_mem_gb, 30]) + " GB"
memory: machine_mem_gb + " GB"
cpu : 4
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
Expand Down Expand Up @@ -1650,9 +1650,9 @@ task assign_clades_to_nodes {
File clades_tsv

String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 300
}
String out_basename = basename(basename(tree_nwk, ".nwk"), "_timetree")
Int disk_size = 50
command <<<
set -e
augur version > VERSION
Expand Down Expand Up @@ -1693,11 +1693,11 @@ task augur_import_beast {
String? tip_date_format
String? tip_date_delimiter

Int? machine_mem_gb
Int machine_mem_gb = 3
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 150
}
String tree_basename = basename(beast_mcc_tree, ".tree")
Int disk_size = 50
command <<<
set -e
augur version > VERSION
Expand All @@ -1715,7 +1715,7 @@ task augur_import_beast {
>>>
runtime {
docker: docker
memory: select_first([machine_mem_gb, 3]) + " GB"
memory: machine_mem_gb + " GB"
cpu : 2
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
Expand Down Expand Up @@ -1754,11 +1754,10 @@ task export_auspice_json {

String out_basename = basename(basename(tree, ".nwk"), "_timetree")

Int? machine_mem_gb
Int machine_mem_gb = 64
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 300
}

Int disk_size = 100

command <<<
set -e -o pipefail
Expand Down Expand Up @@ -1819,7 +1818,7 @@ task export_auspice_json {
>>>
runtime {
docker: docker
memory: select_first([machine_mem_gb, 64]) + " GB"
memory: machine_mem_gb + " GB"
cpu : 4
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
Expand Down
2 changes: 1 addition & 1 deletion pipes/WDL/tasks/tasks_utils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -721,8 +721,8 @@ task filter_sequences_by_length {
Int min_non_N = 1

String docker = "quay.io/broadinstitute/viral-core:2.1.33"
Int disk_size = 750
}
Int disk_size = 300
dpark01 marked this conversation as resolved.
Show resolved Hide resolved
parameter_meta {
sequences_fasta: {
description: "Set of sequences in fasta format",
Expand Down