diff --git a/pipes/WDL/workflows/augur_from_assemblies.wdl b/pipes/WDL/workflows/augur_from_assemblies.wdl index 8ae8f39af..e94ee5574 100644 --- a/pipes/WDL/workflows/augur_from_assemblies.wdl +++ b/pipes/WDL/workflows/augur_from_assemblies.wdl @@ -1,7 +1,6 @@ version 1.0 import "../tasks/tasks_nextstrain.wdl" as nextstrain -import "../tasks/tasks_reports.wdl" as reports import "../tasks/tasks_utils.wdl" as utils workflow augur_from_assemblies { @@ -19,17 +18,10 @@ workflow augur_from_assemblies { Int min_unambig_genome - String focal_variable = "region" - String focal_value = "North America" - - String focal_bin_variable = "division" - Int focal_bin_max = 50 - - String global_bin_variable = "country" - Int global_bin_max = 50 - File? clades_tsv Array[String]? ancestral_traits_to_infer + + Boolean make_snps_vcf = false } parameter_meta { @@ -48,28 +40,6 @@ workflow augur_from_assemblies { min_unambig_genome: { description: "Minimum number of called bases in genome to pass prefilter." } - - focal_variable: { - description: "The dataset will be bifurcated based on this column header." - } - focal_value: { - description: "The dataset will be bifurcated based whether the focal_variable column matches this value or not. Rows that match this value are considered to be part of the 'focal' set of interest, rows that do not are part of the 'global' set." - } - - focal_bin_variable: { - description: "The focal subset of samples will be evenly subsampled across the discrete values of this column header." - } - focal_bin_max: { - description: "The output will contain no more than this number of focal samples from each discrete value in the focal_bin_variable column." - } - - global_bin_variable: { - description: "The global subset of samples will be evenly subsampled across the discrete values of this column header." - } - global_bin_max: { - description: "The output will contain no more than this number of global samples from each discrete value in the global_bin_variable column." - } - ancestral_traits_to_infer: { description: "A list of metadata traits to use for ancestral node inference (see https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/traits.html). Multiple traits may be specified; must correspond exactly to column headers in metadata file. Omitting these values will skip ancestral trait inference, and ancestral nodes will not have estimated values for metadata." } @@ -98,9 +68,11 @@ workflow augur_from_assemblies { ref_fasta = ref_fasta, basename = "all_samples_aligned.fasta" } - call nextstrain.snp_sites { - input: - msa_fasta = mafft.aligned_sequences + if(make_snps_vcf) { + call nextstrain.snp_sites { + input: + msa_fasta = mafft.aligned_sequences + } } @@ -126,35 +98,9 @@ workflow augur_from_assemblies { sample_metadata_tsv = derived_cols.derived_metadata } - call nextstrain.filter_subsample_sequences as subsample_focal { - input: - sequences_fasta = prefilter.filtered_fasta, - sample_metadata_tsv = derived_cols.derived_metadata, - exclude_where = ["${focal_variable}!=${focal_value}"], - sequences_per_group = focal_bin_max, - group_by = focal_bin_variable - } - - call nextstrain.filter_subsample_sequences as subsample_global { - input: - sequences_fasta = prefilter.filtered_fasta, - sample_metadata_tsv = derived_cols.derived_metadata, - exclude_where = ["${focal_variable}=${focal_value}"], - sequences_per_group = global_bin_max, - group_by = global_bin_variable - } - - call utils.concatenate as cat_fasta { - input: - infiles = [ - subsample_focal.filtered_fasta, subsample_global.filtered_fasta - ], - output_name = "subsampled.fasta" - } - call utils.fasta_to_ids { input: - sequences_fasta = cat_fasta.combined + sequences_fasta = prefilter.filtered_fasta } @@ -162,7 +108,7 @@ workflow augur_from_assemblies { call nextstrain.augur_mask_sites { input: - sequences = cat_fasta.combined + sequences = prefilter.filtered_fasta } call nextstrain.draft_augur_tree { input: @@ -223,14 +169,12 @@ workflow augur_from_assemblies { output { File combined_assemblies = filter_sequences_by_length.filtered_fasta File multiple_alignment = mafft.aligned_sequences - File unmasked_snps = snp_sites.snps_vcf + File? unmasked_snps = snp_sites.snps_vcf File metadata_merged = derived_cols.derived_metadata File keep_list = fasta_to_ids.ids_txt - File subsampled_sequences = cat_fasta.combined - Int focal_kept = subsample_focal.sequences_out - Int global_kept = subsample_global.sequences_out - Int sequences_kept = subsample_focal.sequences_out + subsample_global.sequences_out + File subsampled_sequences = prefilter.filtered_fasta + Int sequences_kept = prefilter.sequences_out File masked_alignment = augur_mask_sites.masked_sequences diff --git a/test/input/WDL/test_outputs-sarscov2_lineages-local.json b/test/input/WDL/test_outputs-sarscov2_lineages-local.json index 804a776ee..bf4191625 100644 --- a/test/input/WDL/test_outputs-sarscov2_lineages-local.json +++ b/test/input/WDL/test_outputs-sarscov2_lineages-local.json @@ -1,5 +1,5 @@ { - "sarscov2_lineages.nextclade_clade": "20A", + "sarscov2_lineages.nextclade_clade": "20C", "sarscov2_lineages.nextclade_aa_subs": "ORF1b:P314L,ORF3a:Q57H,S:D614G", "sarscov2_lineages.nextclade_aa_dels": "", "sarscov2_lineages.pango_lineage": "B.1"