-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update/arborator gas 0.1.3 #27
Changes from all commits
9a72656
c2d7cdd
5ee7644
62292dd
257826e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
(((((S5:1.000000,S3:1.000000):0.5,S4:1.500000):0.8333333333333335,S2:2.333333):1.1666666666666665,S1:3.500000):0.10000000000000009,S6:3.600000); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
sample,mlst_alleles,metadata_partition,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
S1,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC","Canada","O157:H7",21,"2024/05/30","beef",true | ||
S2,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S2.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false | ||
S3,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S3.mlst.json,1,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true | ||
S4,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S4.mlst.json,1,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true | ||
S5,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S5.mlst.json,1,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false | ||
S6,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S6.mlst.json,1,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,6 +51,14 @@ nextflow_pipeline { | |
def expected_arborator_meta_included = path("$baseDir/tests/data/arborator/basic/metadata.included.tsv") | ||
assert actual_arborator_meta_included.text == expected_arborator_meta_included.text | ||
|
||
def actual_arborator_tree_1 = path("$launchDir/results/arborator/1_tree.nwk") | ||
def expected_arborator_tree_1 = path("$baseDir/tests/data/arborator/1/tree.nwk") | ||
assert actual_arborator_tree_1.text == expected_arborator_tree_1.text | ||
|
||
def actual_arborator_tree_2 = path("$launchDir/results/arborator/2_tree.nwk") | ||
def expected_arborator_tree_2 = path("$baseDir/tests/data/arborator/2/tree.nwk") | ||
assert actual_arborator_tree_2.text == expected_arborator_tree_2.text | ||
|
||
assert path("$launchDir/results/arborator/1_clusters.tsv").exists() | ||
assert path("$launchDir/results/arborator/1_loci.summary.tsv").exists() | ||
assert path("$launchDir/results/arborator/1_matrix.pq").exists() | ||
|
@@ -109,6 +117,94 @@ nextflow_pipeline { | |
} | ||
} | ||
|
||
test("Small-scale test of full pipeline with all samples same partition, tree comparison"){ | ||
tag "pipeline" | ||
tag "pipeline_all" | ||
when { | ||
params { | ||
input = "$baseDir/tests/data/samplesheets/samplesheet-all-single-partition.csv" | ||
outdir = "results" | ||
|
||
metadata_partition_name = "outbreak" | ||
metadata_1_header = "organism" | ||
metadata_2_header = "subtype" | ||
metadata_3_header = "country" | ||
metadata_4_header = "serovar" | ||
metadata_5_header = "age" | ||
metadata_6_header = "date" | ||
metadata_7_header = "source" | ||
metadata_8_header = "special" | ||
} | ||
} | ||
|
||
then { | ||
assert workflow.success | ||
assert path("$launchDir/results").exists() | ||
|
||
// Check merged profiles | ||
def actual_profile_tsv = path("$launchDir/results/merged/profile.tsv") | ||
def expected_profile_tsv = path("$baseDir/tests/data/profiles/merged_profiles.tsv") | ||
assert actual_profile_tsv.text == expected_profile_tsv.text | ||
|
||
// The goal of this test is to make sure that the produced tree has correct branches | ||
// So other comparisons for metadata, etc, were removed | ||
def actual_arborator_tree_1 = path("$launchDir/results/arborator/1_tree.nwk") | ||
def expected_arborator_tree_1 = path("$baseDir/tests/data/arborator/1/tree_all_single_partition.nwk") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I included this test which focuses on verifying the produced newick file when all genomes are in the same partition is valid (e.g., has correct branch lengths after updating GAS to 0.1.3). I removed all other checks (e.g., related to metadata/summaries) from this test case. For additional information, see the screenshot below. The left tree is the For more context on this issue, see phac-nml/genomic_address_service#15 |
||
assert actual_arborator_tree_1.text == expected_arborator_tree_1.text | ||
|
||
assert path("$launchDir/results/arborator/1_clusters.tsv").exists() | ||
assert path("$launchDir/results/arborator/1_loci.summary.tsv").exists() | ||
assert path("$launchDir/results/arborator/1_matrix.pq").exists() | ||
assert path("$launchDir/results/arborator/1_matrix.tsv").exists() | ||
assert path("$launchDir/results/arborator/1_metadata.tsv").exists() | ||
assert path("$launchDir/results/arborator/1_outliers.tsv").exists() | ||
assert path("$launchDir/results/arborator/1_profile.tsv").exists() | ||
assert path("$launchDir/results/arborator/1_tree.nwk").exists() | ||
|
||
// Since all samples in same partition, 1, should be no output files prefixed with 2 | ||
assert !path("$launchDir/results/arborator/2_clusters.tsv").exists() | ||
assert !path("$launchDir/results/arborator/2_loci.summary.tsv").exists() | ||
assert !path("$launchDir/results/arborator/2_matrix.pq").exists() | ||
assert !path("$launchDir/results/arborator/2_matrix.tsv").exists() | ||
assert !path("$launchDir/results/arborator/2_metadata.tsv").exists() | ||
assert !path("$launchDir/results/arborator/2_outliers.tsv").exists() | ||
assert !path("$launchDir/results/arborator/2_profile.tsv").exists() | ||
assert !path("$launchDir/results/arborator/2_tree.nwk").exists() | ||
|
||
// compare IRIDA Next JSON output | ||
def iridanext_json = path("$launchDir/results/iridanext.output.json").json | ||
def iridanext_global = iridanext_json.files.global | ||
def iridanext_samples = iridanext_json.files.samples | ||
def iridanext_metadata = iridanext_json.metadata.samples | ||
|
||
assert iridanext_global.findAll { it.path == "arborator/1_clusters.tsv" }.size() == 1 | ||
assert iridanext_global.findAll { it.path == "arborator/1_loci.summary.tsv" }.size() == 1 | ||
assert iridanext_global.findAll { it.path == "arborator/1_matrix.pq" }.size() == 1 | ||
assert iridanext_global.findAll { it.path == "arborator/1_matrix.tsv" }.size() == 1 | ||
assert iridanext_global.findAll { it.path == "arborator/1_metadata.tsv" }.size() == 1 | ||
assert iridanext_global.findAll { it.path == "arborator/1_outliers.tsv" }.size() == 1 | ||
assert iridanext_global.findAll { it.path == "arborator/1_profile.tsv" }.size() == 1 | ||
assert iridanext_global.findAll { it.path == "arborator/1_tree.nwk" }.size() == 1 | ||
|
||
// Since all samples in same partition, 1, should be no output files prefixed with 2 | ||
assert iridanext_global.findAll { it.path == "arborator/2_clusters.tsv" }.size() == 0 | ||
assert iridanext_global.findAll { it.path == "arborator/2_loci.summary.tsv" }.size() == 0 | ||
assert iridanext_global.findAll { it.path == "arborator/2_matrix.pq" }.size() == 0 | ||
assert iridanext_global.findAll { it.path == "arborator/2_matrix.tsv" }.size() == 0 | ||
assert iridanext_global.findAll { it.path == "arborator/2_metadata.tsv" }.size() == 0 | ||
assert iridanext_global.findAll { it.path == "arborator/2_outliers.tsv" }.size() == 0 | ||
assert iridanext_global.findAll { it.path == "arborator/2_profile.tsv" }.size() == 0 | ||
assert iridanext_global.findAll { it.path == "arborator/2_tree.nwk" }.size() == 0 | ||
|
||
assert iridanext_global.findAll { it.path == "arborator/cluster_summary.tsv" }.size() == 1 | ||
assert iridanext_global.findAll { it.path == "arborator/metadata.excluded.tsv" }.size() == 1 | ||
assert iridanext_global.findAll { it.path == "arborator/metadata.included.tsv" }.size() == 1 | ||
|
||
assert iridanext_samples.isEmpty() | ||
assert iridanext_metadata.isEmpty() | ||
} | ||
} | ||
|
||
test("Small-scale test of full pipeline, missing metadata"){ | ||
tag "pipeline" | ||
when { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the double tag here intentional?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. I wanted to have a separate tag so I could run this test alone.