Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update/arborator gas 0.1.3 #27

Merged
merged 5 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.3.2] - 2025-01-15

- Updated the build of the arborator container in order to update the `genomic_address_service` dependency to [version 0.1.3](https://github.com/phac-nml/genomic_address_service/releases/tag/0.1.3), which fixes an issue with branch lengths in Newick files [PR 27](https://github.com/phac-nml/arboratornf/pull/27).

## [0.3.1] - 2024-11-05

- Fixed the replacement of `docker.userEmulation` with `docker.runOptions = '-u $(id -u):$(id -g)'` that was causing a bug in Azure
Expand Down Expand Up @@ -41,3 +45,4 @@ Initial release of the arboratornf pipeline to be used for running [Arborator](h
[0.2.0]: https://github.com/phac-nml/arboratornf/releases/tag/0.2.0
[0.3.0]: https://github.com/phac-nml/arboratornf/releases/tag/0.3.0
[0.3.1]: https://github.com/phac-nml/arboratornf/releases/tag/0.3.1
[0.3.2]: https://github.com/phac-nml/arboratornf/releases/tag/0.3.2
4 changes: 2 additions & 2 deletions modules/local/arborator/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ process ARBORATOR {
label 'process_high'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/arborator%3A1.0.0--pyhdfd78af_1' :
'biocontainers/arborator:1.0.0--pyhdfd78af_1' }"
'https://depot.galaxyproject.org/singularity/arborator%3A1.0.0--pyhdfd78af_2' :
'biocontainers/arborator:1.0.0--pyhdfd78af_2' }"

input:
path merged_profiles // The allelic profiles
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ manifest {
description = """Arborator: Genomic Profile Clustering and Summary"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
version = '0.3.1'
version = '0.3.2'
doi = ''
defaultBranch = 'main'
}
Expand Down
1 change: 1 addition & 0 deletions tests/data/arborator/1/tree_all_single_partition.nwk
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(((((S5:1.000000,S3:1.000000):0.5,S4:1.500000):0.8333333333333335,S2:2.333333):1.1666666666666665,S1:3.500000):0.10000000000000009,S6:3.600000);
7 changes: 7 additions & 0 deletions tests/data/samplesheets/samplesheet-all-single-partition.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
sample,mlst_alleles,metadata_partition,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8
S1,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC","Canada","O157:H7",21,"2024/05/30","beef",true
S2,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S2.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false
S3,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S3.mlst.json,1,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true
S4,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S4.mlst.json,1,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true
S5,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S5.mlst.json,1,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false
S6,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S6.mlst.json,1,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false
96 changes: 96 additions & 0 deletions tests/pipelines/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ nextflow_pipeline {
def expected_arborator_meta_included = path("$baseDir/tests/data/arborator/basic/metadata.included.tsv")
assert actual_arborator_meta_included.text == expected_arborator_meta_included.text

def actual_arborator_tree_1 = path("$launchDir/results/arborator/1_tree.nwk")
def expected_arborator_tree_1 = path("$baseDir/tests/data/arborator/1/tree.nwk")
assert actual_arborator_tree_1.text == expected_arborator_tree_1.text

def actual_arborator_tree_2 = path("$launchDir/results/arborator/2_tree.nwk")
def expected_arborator_tree_2 = path("$baseDir/tests/data/arborator/2/tree.nwk")
assert actual_arborator_tree_2.text == expected_arborator_tree_2.text

assert path("$launchDir/results/arborator/1_clusters.tsv").exists()
assert path("$launchDir/results/arborator/1_loci.summary.tsv").exists()
assert path("$launchDir/results/arborator/1_matrix.pq").exists()
Expand Down Expand Up @@ -109,6 +117,94 @@ nextflow_pipeline {
}
}

test("Small-scale test of full pipeline with all samples same partition, tree comparison"){
tag "pipeline"
tag "pipeline_all"
Comment on lines +121 to +122
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the double tag here intentional?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I wanted to have a separate tag so I could run this test alone.

when {
params {
input = "$baseDir/tests/data/samplesheets/samplesheet-all-single-partition.csv"
outdir = "results"

metadata_partition_name = "outbreak"
metadata_1_header = "organism"
metadata_2_header = "subtype"
metadata_3_header = "country"
metadata_4_header = "serovar"
metadata_5_header = "age"
metadata_6_header = "date"
metadata_7_header = "source"
metadata_8_header = "special"
}
}

then {
assert workflow.success
assert path("$launchDir/results").exists()

// Check merged profiles
def actual_profile_tsv = path("$launchDir/results/merged/profile.tsv")
def expected_profile_tsv = path("$baseDir/tests/data/profiles/merged_profiles.tsv")
assert actual_profile_tsv.text == expected_profile_tsv.text

// The goal of this test is to make sure that the produced tree has correct branches
// So other comparisons for metadata, etc, were removed
def actual_arborator_tree_1 = path("$launchDir/results/arborator/1_tree.nwk")
def expected_arborator_tree_1 = path("$baseDir/tests/data/arborator/1/tree_all_single_partition.nwk")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I included this test which focuses on verifying the produced newick file when all genomes are in the same partition is valid (e.g., has correct branch lengths after updating GAS to 0.1.3). I removed all other checks (e.g., related to metadata/summaries) from this test case.

For additional information, see the screenshot below. The left tree is the expected_arborator_tree_1, with correct branch lengths (all leaves line up on the right side). The right tree is the older, incorrect tree when using the older arborator container with an older version of the GAS dependency (you can see leaves do not all line up on the right side).

image

For more context on this issue, see phac-nml/genomic_address_service#15

assert actual_arborator_tree_1.text == expected_arborator_tree_1.text

assert path("$launchDir/results/arborator/1_clusters.tsv").exists()
assert path("$launchDir/results/arborator/1_loci.summary.tsv").exists()
assert path("$launchDir/results/arborator/1_matrix.pq").exists()
assert path("$launchDir/results/arborator/1_matrix.tsv").exists()
assert path("$launchDir/results/arborator/1_metadata.tsv").exists()
assert path("$launchDir/results/arborator/1_outliers.tsv").exists()
assert path("$launchDir/results/arborator/1_profile.tsv").exists()
assert path("$launchDir/results/arborator/1_tree.nwk").exists()

// Since all samples in same partition, 1, should be no output files prefixed with 2
assert !path("$launchDir/results/arborator/2_clusters.tsv").exists()
assert !path("$launchDir/results/arborator/2_loci.summary.tsv").exists()
assert !path("$launchDir/results/arborator/2_matrix.pq").exists()
assert !path("$launchDir/results/arborator/2_matrix.tsv").exists()
assert !path("$launchDir/results/arborator/2_metadata.tsv").exists()
assert !path("$launchDir/results/arborator/2_outliers.tsv").exists()
assert !path("$launchDir/results/arborator/2_profile.tsv").exists()
assert !path("$launchDir/results/arborator/2_tree.nwk").exists()

// compare IRIDA Next JSON output
def iridanext_json = path("$launchDir/results/iridanext.output.json").json
def iridanext_global = iridanext_json.files.global
def iridanext_samples = iridanext_json.files.samples
def iridanext_metadata = iridanext_json.metadata.samples

assert iridanext_global.findAll { it.path == "arborator/1_clusters.tsv" }.size() == 1
assert iridanext_global.findAll { it.path == "arborator/1_loci.summary.tsv" }.size() == 1
assert iridanext_global.findAll { it.path == "arborator/1_matrix.pq" }.size() == 1
assert iridanext_global.findAll { it.path == "arborator/1_matrix.tsv" }.size() == 1
assert iridanext_global.findAll { it.path == "arborator/1_metadata.tsv" }.size() == 1
assert iridanext_global.findAll { it.path == "arborator/1_outliers.tsv" }.size() == 1
assert iridanext_global.findAll { it.path == "arborator/1_profile.tsv" }.size() == 1
assert iridanext_global.findAll { it.path == "arborator/1_tree.nwk" }.size() == 1

// Since all samples in same partition, 1, should be no output files prefixed with 2
assert iridanext_global.findAll { it.path == "arborator/2_clusters.tsv" }.size() == 0
assert iridanext_global.findAll { it.path == "arborator/2_loci.summary.tsv" }.size() == 0
assert iridanext_global.findAll { it.path == "arborator/2_matrix.pq" }.size() == 0
assert iridanext_global.findAll { it.path == "arborator/2_matrix.tsv" }.size() == 0
assert iridanext_global.findAll { it.path == "arborator/2_metadata.tsv" }.size() == 0
assert iridanext_global.findAll { it.path == "arborator/2_outliers.tsv" }.size() == 0
assert iridanext_global.findAll { it.path == "arborator/2_profile.tsv" }.size() == 0
assert iridanext_global.findAll { it.path == "arborator/2_tree.nwk" }.size() == 0

assert iridanext_global.findAll { it.path == "arborator/cluster_summary.tsv" }.size() == 1
assert iridanext_global.findAll { it.path == "arborator/metadata.excluded.tsv" }.size() == 1
assert iridanext_global.findAll { it.path == "arborator/metadata.included.tsv" }.size() == 1

assert iridanext_samples.isEmpty()
assert iridanext_metadata.isEmpty()
}
}

test("Small-scale test of full pipeline, missing metadata"){
tag "pipeline"
when {
Expand Down
Loading