diff --git a/.dockstore.yml b/.dockstore.yml index b693e6a43..2e5185717 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -369,6 +369,11 @@ workflows: primaryDescriptorPath: /pipes/WDL/workflows/subsample_by_metadata_with_focal.wdl testParameterFiles: - /empty.json + - name: taxid_to_nextclade + subclass: WDL + primaryDescriptorPath: /pipes/WDL/workflows/taxid_to_nextclade.wdl + testParameterFiles: + - /empty.json - name: terra_table_to_tsv subclass: WDL primaryDescriptorPath: /pipes/WDL/workflows/terra_table_to_tsv.wdl diff --git a/pipes/WDL/tasks/tasks_assembly.wdl b/pipes/WDL/tasks/tasks_assembly.wdl index b8fa2bee1..522011e01 100644 --- a/pipes/WDL/tasks/tasks_assembly.wdl +++ b/pipes/WDL/tasks/tasks_assembly.wdl @@ -15,7 +15,7 @@ task assemble { String sample_name = basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt") Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.3" + String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.4" } parameter_meta{ reads_unmapped_bam: { @@ -111,7 +111,11 @@ task select_references { Array[File] reference_genomes_fastas File contigs_fasta - String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.3" + Int? skani_m + Int? skani_s + Int? skani_c + + String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.4" Int machine_mem_gb = 4 Int cpu = 2 Int disk_size = 100 @@ -128,6 +132,9 @@ task select_references { "~{contigs_basename}.refs_skani_dist.full.tsv" \ "~{contigs_basename}.refs_skani_dist.top.tsv" \ "~{contigs_basename}.ref_clusters.tsv" \ + ~{'-m ' + skani_m} \ + ~{'-s ' + skani_s} \ + ~{'-c ' + skani_c} \ --loglevel=DEBUG # create basename-only version of ref_clusters output file @@ -188,6 +195,10 @@ task scaffold { Int replace_length=55 Boolean allow_incomplete_output = false + Int? skani_m + Int? skani_s + Int? skani_c + Int? nucmer_max_gap Int? nucmer_min_match Int? nucmer_min_cluster @@ -195,7 +206,7 @@ task scaffold { Float? scaffold_min_pct_contig_aligned Int? machine_mem_gb - String docker="quay.io/broadinstitute/viral-assemble:2.3.1.3" + String docker="quay.io/broadinstitute/viral-assemble:2.3.1.4" # do this in multiple steps in case the input doesn't actually have "assembly1-x" in the name String sample_name = basename(basename(contigs_fasta, ".fasta"), ".assembly1-spades") @@ -283,6 +294,9 @@ task scaffold { "~{sample_name}.refs_skani_dist.full.tsv" \ "~{sample_name}.refs_skani_dist.top.tsv" \ "~{sample_name}.ref_clusters.tsv" \ + ~{'-m ' + skani_m} \ + ~{'-s ' + skani_s} \ + ~{'-c ' + skani_c} \ --loglevel=DEBUG CHOSEN_REF_FASTA=$(cut -f 1 "~{sample_name}.refs_skani_dist.full.tsv" | tail +2 | head -1) cut -f 3 "~{sample_name}.refs_skani_dist.full.tsv" | tail +2 | head -1 > SKANI_ANI @@ -677,7 +691,7 @@ task refine_assembly_with_aligned_reads { Int min_coverage = 3 Int machine_mem_gb = 15 - String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.3" + String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.4" } Int disk_size = 375 @@ -802,7 +816,7 @@ task refine_2x_and_plot { String? plot_coverage_novoalign_options = "-r Random -l 40 -g 40 -x 20 -t 100 -k" Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.3" + String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.4" # do this in two steps in case the input doesn't actually have "cleaned" in the name String sample_name = basename(basename(reads_unmapped_bam, ".bam"), ".cleaned") diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index 914c4ac4a..091884b36 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -1,5 +1,40 @@ version 1.0 +task taxid_to_nextclade_dataset_name { + input { + String taxid + } + command <<< + python3 <>> + runtime { + docker: "python:slim" + memory: "1 GB" + cpu: 1 + disks: "local-disk 50 HDD" + disk: "50 GB" # TES + dx_instance_type: "mem1_ssd1_v2_x2" + maxRetries: 2 + } + output { + String nextclade_dataset_name = read_string("DATASET_NAME") + } +} + task nextclade_one_sample { meta { description: "Nextclade classification of one sample. Leaving optional inputs unspecified will use SARS-CoV-2 defaults." @@ -17,7 +52,7 @@ task nextclade_one_sample { String docker = "nextstrain/nextclade:2.14.0" } String basename = basename(genome_fasta, ".fasta") - command { + command <<< set -e apt-get update apt-get -y install python3 @@ -54,17 +89,23 @@ task nextclade_one_sample { --output-tree "~{basename}".nextclade.auspice.json \ "~{genome_fasta}" python3 < NEXTCLADE_CLADE - grep ^aaSubstitutions\\W transposed.tsv | cut -f 2 | grep -v aaSubstitutions > NEXTCLADE_AASUBS - grep ^aaDeletions\\W transposed.tsv | cut -f 2 | grep -v aaDeletions > NEXTCLADE_AADELS - } + >>> runtime { docker: docker memory: "3 GB" @@ -80,6 +121,8 @@ task nextclade_one_sample { File auspice_json = "~{basename}.nextclade.auspice.json" File nextclade_tsv = "~{basename}.nextclade.tsv" String nextclade_clade = read_string("NEXTCLADE_CLADE") + String nextclade_shortclade = read_string("NEXTCLADE_SHORTCLADE") + String nextclade_subclade = read_string("NEXTCLADE_SUBCLADE") String aa_subs_csv = read_string("NEXTCLADE_AASUBS") String aa_dels_csv = read_string("NEXTCLADE_AADELS") } diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl index 873ef0b65..447580253 100644 --- a/pipes/WDL/tasks/tasks_reports.wdl +++ b/pipes/WDL/tasks/tasks_reports.wdl @@ -674,7 +674,7 @@ task compare_two_genomes { File genome_two String out_basename - String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.3" + String docker = "quay.io/broadinstitute/viral-assemble:2.3.1.4" } Int disk_size = 50 diff --git a/pipes/WDL/workflows/nextclade_single.wdl b/pipes/WDL/workflows/nextclade_single.wdl index 59b57539b..ed30bd812 100644 --- a/pipes/WDL/workflows/nextclade_single.wdl +++ b/pipes/WDL/workflows/nextclade_single.wdl @@ -15,6 +15,8 @@ workflow nextclade_single { File nextclade_json = nextclade_one_sample.nextclade_json String nextclade_aa_subs = nextclade_one_sample.aa_subs_csv String nextclade_aa_dels = nextclade_one_sample.aa_dels_csv + String nextclade_shortclade = nextclade_one_sample.nextclade_shortclade + String nextclade_subclade = nextclade_one_sample.nextclade_subclade String nextclade_version = nextclade_one_sample.nextclade_version } } diff --git a/pipes/WDL/workflows/taxid_to_nextclade.wdl b/pipes/WDL/workflows/taxid_to_nextclade.wdl new file mode 100644 index 000000000..f3cae320c --- /dev/null +++ b/pipes/WDL/workflows/taxid_to_nextclade.wdl @@ -0,0 +1,15 @@ +version 1.0 + +import "../tasks/tasks_nextstrain.wdl" as nextstrain + +workflow taxid_to_nextclade { + meta { + description: "Convert taxids to a nextclade dataset name" + } + + call nextstrain.taxid_to_nextclade_dataset_name + + output { + String nextclade_dataset = taxid_to_nextclade_dataset_name.nextclade_dataset_name + } +} diff --git a/requirements-modules.txt b/requirements-modules.txt index 7e51d051a..4e93df79a 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -1,5 +1,5 @@ broadinstitute/viral-core=2.3.1 -broadinstitute/viral-assemble=2.3.1.3 +broadinstitute/viral-assemble=2.3.1.4 broadinstitute/viral-classify=2.2.4.0 broadinstitute/viral-phylo=2.1.20.2 broadinstitute/py3-bio=0.1.2