Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sarscov2_nextstrain and sarscov2_lineages improvements #208

Merged
merged 6 commits into from
Feb 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions pipes/WDL/tasks/tasks_nextstrain.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,35 @@ task nextstrain_build_subsample {
}
}

task nextstrain_ncov_defaults {
input {
String nextstrain_ncov_repo_commit = "5dbca8a45a64e39057c22163f154db981f7ed5c1"
String docker = "nextstrain/base:build-20210127T135203Z"
}
command {
set -e
wget -q "https://github.com/nextstrain/ncov/archive/~{nextstrain_ncov_repo_commit}.tar.gz"
tar -xf "~{nextstrain_ncov_repo_commit}.tar.gz" --strip-components=1
cat defaults/clades.tsv defaults/subclades.tsv > clades-with-subclades.tsv
}
runtime {
docker: docker
memory: "1 GB"
cpu : 1
disks: "local-disk 50 HDD"
dx_instance_type: "mem1_ssd1_v2_x2"
}
output {
File clades_tsv = "clades-with-subclades.tsv"
File lat_longs_tsv = "defaults/lat_longs.tsv"
File reference_fasta = "defaults/reference_seq.fasta"
File reference_gb = "defaults/reference_seq.gb"
File ids_include = "defaults/include.txt"
File ids_exclude = "defaults/exclude.txt"
File auspice_config = "defaults/auspice_config.json"
}
}

task filter_subsample_sequences {
meta {
description: "Filter and subsample a sequence set. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/filter.html"
Expand Down
2 changes: 1 addition & 1 deletion pipes/WDL/tasks/tasks_sarscov2.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ task pangolin_one_sample {
grep ^lineage transposed.tsv | cut -f 2 | grep -v lineage > PANGOLIN_CLADE
}
runtime {
docker: "staphb/pangolin:2.1.10-pangolearn-2021-02-01"
docker: "staphb/pangolin:2.1.11-pangolearn-2021-02-01"
memory: "3 GB"
cpu: 2
disks: "local-disk 50 HDD"
Expand Down
1 change: 1 addition & 0 deletions pipes/WDL/workflows/augur_export_only.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ workflow augur_export_only {
call nextstrain.export_auspice_json
output {
File auspice_json = export_auspice_json.virus_json
File root_sequence_json = export_auspice_json.root_sequence_json
}
}

7 changes: 7 additions & 0 deletions pipes/WDL/workflows/augur_from_msa.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ workflow augur_from_msa {
columns = select_first([ancestral_traits_to_infer,[]])
}
}
call nextstrain.tip_frequencies {
input:
tree = refine_augur_tree.tree_refined,
metadata = select_first(flatten([[tsv_join.out_tsv], sample_metadata]))
}
call nextstrain.ancestral_tree {
input:
tree = refine_augur_tree.tree_refined,
Expand Down Expand Up @@ -141,5 +146,7 @@ workflow augur_from_msa {
translate_augur_tree.aa_muts_json,
assign_clades_to_nodes.node_clade_data_json])
File auspice_input_json = export_auspice_json.virus_json
File tip_frequencies_json = tip_frequencies.node_data_json
File root_sequence_json = export_auspice_json.root_sequence_json
}
}
40 changes: 25 additions & 15 deletions pipes/WDL/workflows/sarscov2_nextstrain.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,16 @@ workflow sarscov2_nextstrain {
input {
Array[File]+ assembly_fastas
Array[File]+ sample_metadata_tsvs
File ref_fasta

String build_name
File builds_yaml

File? clades_tsv
Array[String]? ancestral_traits_to_infer

File? auspice_config
File? ref_fasta
File? clades_tsv

Int min_unambig_genome = 27000
}

Expand Down Expand Up @@ -49,6 +52,7 @@ workflow sarscov2_nextstrain {
}
}

call nextstrain.nextstrain_ncov_defaults

#### mafft_and_snp

Expand All @@ -65,7 +69,7 @@ workflow sarscov2_nextstrain {
call nextstrain.mafft_one_chr as mafft {
input:
sequences = filter_sequences_by_length.filtered_fasta,
ref_fasta = ref_fasta,
ref_fasta = select_first([ref_fasta, nextstrain_ncov_defaults.reference_fasta]),
basename = "all_samples_aligned.fasta"
}
call nextstrain.snp_sites {
Expand Down Expand Up @@ -94,7 +98,8 @@ workflow sarscov2_nextstrain {
input:
alignment_msa_fasta = mafft.aligned_sequences,
sample_metadata_tsv = derived_cols.derived_metadata,
build_name = build_name
build_name = build_name,
builds_yaml = builds_yaml
}
call nextstrain.fasta_to_ids {
input:
Expand All @@ -104,9 +109,13 @@ workflow sarscov2_nextstrain {

#### augur_from_msa

call nextstrain.augur_mask_sites {
input:
sequences = subsample.subsampled_msa
}
call nextstrain.draft_augur_tree {
input:
msa_or_vcf = subsample.subsampled_msa
msa_or_vcf = augur_mask_sites.masked_sequences
}

call nextstrain.refine_augur_tree {
Expand Down Expand Up @@ -142,28 +151,29 @@ workflow sarscov2_nextstrain {
call nextstrain.translate_augur_tree {
input:
tree = refine_augur_tree.tree_refined,
nt_muts = ancestral_tree.nt_muts_json
nt_muts = ancestral_tree.nt_muts_json,
genbank_gb = nextstrain_ncov_defaults.reference_gb
}
if(defined(clades_tsv)) {
call nextstrain.assign_clades_to_nodes {
input:
tree_nwk = refine_augur_tree.tree_refined,
nt_muts_json = ancestral_tree.nt_muts_json,
aa_muts_json = translate_augur_tree.aa_muts_json,
ref_fasta = ref_fasta,
clades_tsv = select_first([clades_tsv])
}
call nextstrain.assign_clades_to_nodes {
input:
tree_nwk = refine_augur_tree.tree_refined,
nt_muts_json = ancestral_tree.nt_muts_json,
aa_muts_json = translate_augur_tree.aa_muts_json,
ref_fasta = select_first([ref_fasta, nextstrain_ncov_defaults.reference_fasta]),
clades_tsv = select_first([clades_tsv, nextstrain_ncov_defaults.clades_tsv])
}
call nextstrain.export_auspice_json {
input:
tree = refine_augur_tree.tree_refined,
sample_metadata = derived_cols.derived_metadata,
lat_longs_tsv = nextstrain_ncov_defaults.lat_longs_tsv,
node_data_jsons = select_all([
refine_augur_tree.branch_lengths,
ancestral_traits.node_data_json,
ancestral_tree.nt_muts_json,
translate_augur_tree.aa_muts_json,
assign_clades_to_nodes.node_clade_data_json]),
auspice_config = select_first([auspice_config, nextstrain_ncov_defaults.auspice_config]),
out_basename = "auspice-~{build_name}"
}

Expand Down
2 changes: 1 addition & 1 deletion requirements-modules.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ broadinstitute/beast-beagle-cuda=1.10.5pre
broadinstitute/ncbi-tools=2.10.7.1
nextstrain/base=build-20210127T135203Z
andersenlabapps/ivar=1.3
staphb/pangolin=2.1.10-pangolearn-2021-02-01
staphb/pangolin=2.1.11-pangolearn-2021-02-01
neherlab/nextclade=0.12.0