diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index 513461f85..a78666e37 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -345,6 +345,35 @@ task nextstrain_build_subsample { } } +task nextstrain_ncov_defaults { + input { + String nextstrain_ncov_repo_commit = "5dbca8a45a64e39057c22163f154db981f7ed5c1" + String docker = "nextstrain/base:build-20210127T135203Z" + } + command { + set -e + wget -q "https://github.com/nextstrain/ncov/archive/~{nextstrain_ncov_repo_commit}.tar.gz" + tar -xf "~{nextstrain_ncov_repo_commit}.tar.gz" --strip-components=1 + cat defaults/clades.tsv defaults/subclades.tsv > clades-with-subclades.tsv + } + runtime { + docker: docker + memory: "1 GB" + cpu : 1 + disks: "local-disk 50 HDD" + dx_instance_type: "mem1_ssd1_v2_x2" + } + output { + File clades_tsv = "clades-with-subclades.tsv" + File lat_longs_tsv = "defaults/lat_longs.tsv" + File reference_fasta = "defaults/reference_seq.fasta" + File reference_gb = "defaults/reference_seq.gb" + File ids_include = "defaults/include.txt" + File ids_exclude = "defaults/exclude.txt" + File auspice_config = "defaults/auspice_config.json" + } +} + task filter_subsample_sequences { meta { description: "Filter and subsample a sequence set. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/filter.html" diff --git a/pipes/WDL/tasks/tasks_sarscov2.wdl b/pipes/WDL/tasks/tasks_sarscov2.wdl index 317507ffd..56617f3bf 100644 --- a/pipes/WDL/tasks/tasks_sarscov2.wdl +++ b/pipes/WDL/tasks/tasks_sarscov2.wdl @@ -137,7 +137,7 @@ task pangolin_one_sample { grep ^lineage transposed.tsv | cut -f 2 | grep -v lineage > PANGOLIN_CLADE } runtime { - docker: "staphb/pangolin:2.1.10-pangolearn-2021-02-01" + docker: "staphb/pangolin:2.1.11-pangolearn-2021-02-01" memory: "3 GB" cpu: 2 disks: "local-disk 50 HDD" diff --git a/pipes/WDL/workflows/augur_export_only.wdl b/pipes/WDL/workflows/augur_export_only.wdl index 7afb52b18..fdbe7650b 100644 --- a/pipes/WDL/workflows/augur_export_only.wdl +++ b/pipes/WDL/workflows/augur_export_only.wdl @@ -12,6 +12,7 @@ workflow augur_export_only { call nextstrain.export_auspice_json output { File auspice_json = export_auspice_json.virus_json + File root_sequence_json = export_auspice_json.root_sequence_json } } diff --git a/pipes/WDL/workflows/augur_from_msa.wdl b/pipes/WDL/workflows/augur_from_msa.wdl index d64d2cbe0..a064d1290 100644 --- a/pipes/WDL/workflows/augur_from_msa.wdl +++ b/pipes/WDL/workflows/augur_from_msa.wdl @@ -96,6 +96,11 @@ workflow augur_from_msa { columns = select_first([ancestral_traits_to_infer,[]]) } } + call nextstrain.tip_frequencies { + input: + tree = refine_augur_tree.tree_refined, + metadata = select_first(flatten([[tsv_join.out_tsv], sample_metadata])) + } call nextstrain.ancestral_tree { input: tree = refine_augur_tree.tree_refined, @@ -141,5 +146,7 @@ workflow augur_from_msa { translate_augur_tree.aa_muts_json, assign_clades_to_nodes.node_clade_data_json]) File auspice_input_json = export_auspice_json.virus_json + File tip_frequencies_json = tip_frequencies.node_data_json + File root_sequence_json = export_auspice_json.root_sequence_json } } \ No newline at end of file diff --git a/pipes/WDL/workflows/sarscov2_nextstrain.wdl b/pipes/WDL/workflows/sarscov2_nextstrain.wdl index 0a4d4fbf2..858be4767 100644 --- a/pipes/WDL/workflows/sarscov2_nextstrain.wdl +++ b/pipes/WDL/workflows/sarscov2_nextstrain.wdl @@ -14,13 +14,16 @@ workflow sarscov2_nextstrain { input { Array[File]+ assembly_fastas Array[File]+ sample_metadata_tsvs - File ref_fasta String build_name + File builds_yaml - File? clades_tsv Array[String]? ancestral_traits_to_infer + File? auspice_config + File? ref_fasta + File? clades_tsv + Int min_unambig_genome = 27000 } @@ -49,6 +52,7 @@ workflow sarscov2_nextstrain { } } + call nextstrain.nextstrain_ncov_defaults #### mafft_and_snp @@ -65,7 +69,7 @@ workflow sarscov2_nextstrain { call nextstrain.mafft_one_chr as mafft { input: sequences = filter_sequences_by_length.filtered_fasta, - ref_fasta = ref_fasta, + ref_fasta = select_first([ref_fasta, nextstrain_ncov_defaults.reference_fasta]), basename = "all_samples_aligned.fasta" } call nextstrain.snp_sites { @@ -94,7 +98,8 @@ workflow sarscov2_nextstrain { input: alignment_msa_fasta = mafft.aligned_sequences, sample_metadata_tsv = derived_cols.derived_metadata, - build_name = build_name + build_name = build_name, + builds_yaml = builds_yaml } call nextstrain.fasta_to_ids { input: @@ -104,9 +109,13 @@ workflow sarscov2_nextstrain { #### augur_from_msa + call nextstrain.augur_mask_sites { + input: + sequences = subsample.subsampled_msa + } call nextstrain.draft_augur_tree { input: - msa_or_vcf = subsample.subsampled_msa + msa_or_vcf = augur_mask_sites.masked_sequences } call nextstrain.refine_augur_tree { @@ -142,28 +151,29 @@ workflow sarscov2_nextstrain { call nextstrain.translate_augur_tree { input: tree = refine_augur_tree.tree_refined, - nt_muts = ancestral_tree.nt_muts_json + nt_muts = ancestral_tree.nt_muts_json, + genbank_gb = nextstrain_ncov_defaults.reference_gb } - if(defined(clades_tsv)) { - call nextstrain.assign_clades_to_nodes { - input: - tree_nwk = refine_augur_tree.tree_refined, - nt_muts_json = ancestral_tree.nt_muts_json, - aa_muts_json = translate_augur_tree.aa_muts_json, - ref_fasta = ref_fasta, - clades_tsv = select_first([clades_tsv]) - } + call nextstrain.assign_clades_to_nodes { + input: + tree_nwk = refine_augur_tree.tree_refined, + nt_muts_json = ancestral_tree.nt_muts_json, + aa_muts_json = translate_augur_tree.aa_muts_json, + ref_fasta = select_first([ref_fasta, nextstrain_ncov_defaults.reference_fasta]), + clades_tsv = select_first([clades_tsv, nextstrain_ncov_defaults.clades_tsv]) } call nextstrain.export_auspice_json { input: tree = refine_augur_tree.tree_refined, sample_metadata = derived_cols.derived_metadata, + lat_longs_tsv = nextstrain_ncov_defaults.lat_longs_tsv, node_data_jsons = select_all([ refine_augur_tree.branch_lengths, ancestral_traits.node_data_json, ancestral_tree.nt_muts_json, translate_augur_tree.aa_muts_json, assign_clades_to_nodes.node_clade_data_json]), + auspice_config = select_first([auspice_config, nextstrain_ncov_defaults.auspice_config]), out_basename = "auspice-~{build_name}" } diff --git a/requirements-modules.txt b/requirements-modules.txt index 22d7efa3c..c4998641b 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -6,5 +6,5 @@ broadinstitute/beast-beagle-cuda=1.10.5pre broadinstitute/ncbi-tools=2.10.7.1 nextstrain/base=build-20210127T135203Z andersenlabapps/ivar=1.3 -staphb/pangolin=2.1.10-pangolearn-2021-02-01 +staphb/pangolin=2.1.11-pangolearn-2021-02-01 neherlab/nextclade=0.12.0