diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index 3bb1d2307..becdae553 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -97,6 +97,7 @@ task nextclade_many_samples { File? virus_properties String? dataset_name String basename + File? genome_ids_setdefault_blank String docker = "nextstrain/nextclade:2.0.0" } command <<< @@ -141,12 +142,22 @@ task nextclade_many_samples { # transpose table import codecs, csv, json out_maps = {'clade':{}, 'aaSubstitutions':{}, 'aaDeletions':{}} - with codecs.open('~{basename}.nextclade.tsv', 'r', encoding='utf-8') as inf: - with codecs.open('IDLIST', 'w', encoding='utf-8') as outf_ids: + with codecs.open('IDLIST', 'w', encoding='utf-8') as outf_ids: + # parse entries from output tsv into jsons + with codecs.open('~{basename}.nextclade.tsv', 'r', encoding='utf-8') as inf: for row in csv.DictReader(inf, delimiter='\t'): - for k in ('clade','aaSubstitutions','aaDeletions'): + for k in out_maps.keys(): out_maps[k][row['seqName']] = row[k] outf_ids.write(row['seqName']+'\n') + # fill empty values for anything not mentioned by output tsv + with codecs.open("~{default='/dev/null' genome_ids_setdefault_blank}", 'r', encoding='utf-8') as inf: + for line in inf: + seqName = line.strip() + if seqName and (seqName not in out_maps['clade']): + for k in out_maps.keys(): + out_maps[k][seqName] = '' + outf_ids.write(seqName+'\n') + with codecs.open('NEXTCLADE_CLADE.json', 'w', encoding='utf-8') as outf: json.dump(out_maps['clade'], outf) with codecs.open('NEXTCLADE_AASUBS.json', 'w', encoding='utf-8') as outf: diff --git a/pipes/WDL/tasks/tasks_sarscov2.wdl b/pipes/WDL/tasks/tasks_sarscov2.wdl index a0c916c60..b28fb6b09 100644 --- a/pipes/WDL/tasks/tasks_sarscov2.wdl +++ b/pipes/WDL/tasks/tasks_sarscov2.wdl @@ -10,7 +10,7 @@ task pangolin_one_sample { Float? max_ambig String? analysis_mode Boolean update_dbs_now=false - String docker = "quay.io/staphb/pangolin:4.0.6-pdata-1.9" + String docker = "quay.io/staphb/pangolin:4.1.1-pdata-1.11" } String basename = basename(genome_fasta, ".fasta") command <<< @@ -91,7 +91,7 @@ task pangolin_many_samples { String? analysis_mode Boolean update_dbs_now=false String basename - String docker = "quay.io/staphb/pangolin:4.0.6-pdata-1.9" + String docker = "quay.io/staphb/pangolin:4.1.1-pdata-1.11" } command <<< set -ex diff --git a/pipes/WDL/workflows/sarscov2_batch_relineage.wdl b/pipes/WDL/workflows/sarscov2_batch_relineage.wdl index 3390e8e28..bf794c04e 100644 --- a/pipes/WDL/workflows/sarscov2_batch_relineage.wdl +++ b/pipes/WDL/workflows/sarscov2_batch_relineage.wdl @@ -38,9 +38,10 @@ workflow sarscov2_batch_relineage { call nextstrain.nextclade_many_samples { input: - genome_fastas = [filter_sequences_by_length.filtered_fasta], - basename = "nextclade-~{flowcell_id}", - dataset_name = "sars-cov-2" + genome_fastas = [filter_sequences_by_length.filtered_fasta], + genome_ids_setdefault_blank = fasta_to_ids.ids_txt, + basename = "nextclade-~{flowcell_id}", + dataset_name = "sars-cov-2" } call sarscov2.pangolin_many_samples { diff --git a/requirements-modules.txt b/requirements-modules.txt index 40880d7d0..26292e4bf 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -7,5 +7,5 @@ broadinstitute/beast-beagle-cuda=1.10.5pre broadinstitute/ncbi-tools=2.10.7.10 nextstrain/base=build-20211012T204409Z andersenlabapps/ivar=1.3.1 -quay.io/staphb/pangolin=4.0.6-pdata-1.9 +quay.io/staphb/pangolin=4.1.1-pdata-1.11 nextstrain/nextclade=2.0.0