From 59fcd8b56cdea9a533e27e0312370e9ad3048aca Mon Sep 17 00:00:00 2001 From: kylacochrane Date: Thu, 22 Aug 2024 11:57:56 -0400 Subject: [PATCH 1/7] Update to locidex:0.2.3 and modify input_assure.py to accommodate the revised mlst.json report format --- assets/samplesheet.csv | 12 ++++---- bin/input_assure.py | 45 ++++++++++++----------------- conf/test.config | 2 +- modules/local/locidex/merge/main.nf | 4 +-- 4 files changed, 28 insertions(+), 35 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 7e27187..3da85df 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,7 +1,7 @@ sample,mlst_alleles,metadata_partition,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -S1,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC","Canada","O157:H7",21,"2024/05/30","beef",true -S2,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S2.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false -S3,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S3.mlst.json,2,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true -S4,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S4.mlst.json,2,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true -S5,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S5.mlst.json,3,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false -S6,https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/profiles/S6.mlst.json,unassociated,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false +S1,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC","Canada","O157:H7",21,"2024/05/30","beef",true +S2,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false +S3,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,2,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true +S4,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,2,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true +S5,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,3,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false +S6,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,unassociated,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false diff --git a/bin/input_assure.py b/bin/input_assure.py index d99bf2a..2b4f455 100755 --- a/bin/input_assure.py +++ b/bin/input_assure.py @@ -19,18 +19,20 @@ def check_inputs(json_file, sample_id, address, output_error_file, output_json_f with open_file(json_file, "rt") as f: json_data = json.load(f) + # Extract the profile from the json_data + profile = json_data.get("data", {}).get("profile", {}) + # Check for multiple keys in the JSON file and define error message + keys = sorted(profile.keys()) + original_key = keys[0] if keys else None + # Define a variable to store the match_status (True or False) - match_status = sample_id in json_data + match_status = sample_id in profile # Initialize the error message error_message = None - # Check for multiple keys in the JSON file and define error message - keys = list(json_data.keys()) - original_key = keys[0] if keys else None - - if len(keys) == 0: - error_message = f"{json_file} is completely empty!" + if not keys: + error_message = f"{json_file} is missing the 'profile' section or is completely empty!" print(error_message) sys.exit(1) elif len(keys) > 1: @@ -38,11 +40,11 @@ def check_inputs(json_file, sample_id, address, output_error_file, output_json_f if not match_status: error_message = f"No key in the MLST JSON file ({json_file}) matches the specified sample ID '{sample_id}'. The first key '{original_key}' has been forcefully changed to '{sample_id}' and all other keys have been removed." # Retain only the specified sample ID - json_data = {sample_id: json_data.pop(original_key)} + json_data["data"]["profile"] = {sample_id: profile.pop(original_key)} else: error_message = f"MLST JSON file ({json_file}) contains multiple keys: {keys}. The MLST JSON file has been modified to retain only the '{sample_id}' entry" - # Remove all keys expect the one matching sample_id - json_data = {sample_id: json_data[sample_id]} + # Retain only the specified sample_id in the profile + json_data["data"]["profile"] = {sample_id: profile[sample_id]} elif not match_status: # Define error message based on meta.address (query or reference) if address == "null": @@ -50,7 +52,8 @@ def check_inputs(json_file, sample_id, address, output_error_file, output_json_f else: error_message = f"Reference {sample_id} ID and JSON key in {json_file} DO NOT MATCH. The '{original_key}' key in {json_file} has been forcefully changed to '{sample_id}': User should manually check input files to ensure correctness." # Update the JSON file with the new sample ID - json_data[sample_id] = json_data.pop(original_key) + json_data["data"]["profile"] = {sample_id: profile.pop(original_key)} + json_data["data"]["sample_name"] = sample_id # Write file containing relevant error messages if error_message: @@ -69,21 +72,11 @@ def check_inputs(json_file, sample_id, address, output_error_file, output_json_f description="Check sample inputs, force change if ID ≠ KEY, and generate an error report." ) parser.add_argument("--input", help="Path to the mlst.json file.", required=True) - parser.add_argument( - "--sample_id", help="Sample ID to check in the JSON file.", required=True - ) - parser.add_argument( - "--address", help="Address to use in the error message.", required=True - ) - parser.add_argument( - "--output_error", help="Path to the error report file.", required=True - ) - parser.add_argument( - "--output_json", help="Path to the MLST JSON file (gzipped).", required=True - ) + parser.add_argument("--sample_id", help="Sample ID to check in the JSON file.", required=True) + parser.add_argument("--address", help="Address to use in the error message.", required=True) + parser.add_argument("--output_error", help="Path to the error report file.", required=True) + parser.add_argument("--output_json", help="Path to the MLST JSON file (gzipped).", required=True) args = parser.parse_args() - check_inputs( - args.input, args.sample_id, args.address, args.output_error, args.output_json - ) + check_inputs(args.input, args.sample_id, args.address, args.output_error, args.output_json) diff --git a/conf/test.config b/conf/test.config index a43dada..5108575 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,7 +20,7 @@ params { max_time = '1.h' // Input data - input = 'https://raw.githubusercontent.com/phac-nml/arboratornf/dev/tests/data/samplesheets/samplesheet.csv' + input = "${projectDir}/tests/data/samplesheets/samplesheet.csv" outdir = "results" diff --git a/modules/local/locidex/merge/main.nf b/modules/local/locidex/merge/main.nf index 39bd73b..cb29847 100644 --- a/modules/local/locidex/merge/main.nf +++ b/modules/local/locidex/merge/main.nf @@ -7,8 +7,8 @@ process LOCIDEX_MERGE { label 'process_medium' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/locidex:0.1.1--pyhdfd78af_0' : - 'biocontainers/locidex:0.1.1--pyhdfd78af_0' }" + 'docker.io/mwells14/locidex:0.2.3' : + 'docker.io/mwells14/locidex:0.2.3' }" input: path input_values // [file(sample1), file(sample2), file(sample3), etc...] From b42fc8041cd81afa02411d34677b2024e3a7a5b7 Mon Sep 17 00:00:00 2001 From: kylacochrane Date: Thu, 22 Aug 2024 11:58:54 -0400 Subject: [PATCH 2/7] Update test data (reports and samplesheets) to accomodate locidex mlst.json report --- tests/data/profiles/S1.mlst.json | 32 +++++++++++++------ tests/data/profiles/S2.mlst.json | 32 +++++++++++++------ tests/data/profiles/S3.mlst.json | 32 +++++++++++++------ tests/data/profiles/S4.mlst.json | 32 +++++++++++++------ tests/data/profiles/S5.mlst.json | 32 +++++++++++++------ tests/data/profiles/S6.mlst.json | 32 +++++++++++++------ .../samplesheet-bad-metadata_1.csv | 12 +++---- .../samplesheet-bad-metadata_partition.csv | 12 +++---- .../samplesheets/samplesheet-id-mismatch.csv | 12 +++---- .../samplesheet-little-metadata.csv | 12 +++---- tests/data/samplesheets/samplesheet.csv | 12 +++---- 11 files changed, 168 insertions(+), 84 deletions(-) diff --git a/tests/data/profiles/S1.mlst.json b/tests/data/profiles/S1.mlst.json index 6405934..d0f7242 100644 --- a/tests/data/profiles/S1.mlst.json +++ b/tests/data/profiles/S1.mlst.json @@ -1,11 +1,25 @@ { - "S1": { - "locus_1": 1, - "locus_2": 1, - "locus_3": "1", - "locus_4": "1", - "locus_5": "1", - "locus_6": 1, - "locus_7": 1 + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "S1", + "profile": { + "S1": { + "locus_1": 1, + "locus_2": 1, + "locus_3": "1", + "locus_4": "1", + "locus_5": "1", + "locus_6": 1, + "locus_7": 1 + } + }, + "seq_data": {} } -} \ No newline at end of file +} diff --git a/tests/data/profiles/S2.mlst.json b/tests/data/profiles/S2.mlst.json index 54eed29..7057996 100644 --- a/tests/data/profiles/S2.mlst.json +++ b/tests/data/profiles/S2.mlst.json @@ -1,11 +1,25 @@ { - "S2": { - "locus_1": 1, - "locus_2": 1, - "locus_3": "2", - "locus_4": "2", - "locus_5": "?", - "locus_6": 4, - "locus_7": 1 + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "S2", + "profile": { + "S2": { + "locus_1": 1, + "locus_2": 1, + "locus_3": "2", + "locus_4": "2", + "locus_5": "?", + "locus_6": 4, + "locus_7": 1 + } + }, + "seq_data": {} } -} \ No newline at end of file +} diff --git a/tests/data/profiles/S3.mlst.json b/tests/data/profiles/S3.mlst.json index 578dd5f..b20aacc 100644 --- a/tests/data/profiles/S3.mlst.json +++ b/tests/data/profiles/S3.mlst.json @@ -1,11 +1,25 @@ { - "S3": { - "locus_1": 1, - "locus_2": 2, - "locus_3": "2", - "locus_4": "2", - "locus_5": "1", - "locus_6": 5, - "locus_7": 1 + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "S3", + "profile": { + "S3": { + "locus_1": 1, + "locus_2": 2, + "locus_3": "2", + "locus_4": "2", + "locus_5": "1", + "locus_6": 5, + "locus_7": 1 + } + }, + "seq_data": {} } -} \ No newline at end of file +} diff --git a/tests/data/profiles/S4.mlst.json b/tests/data/profiles/S4.mlst.json index acee8b3..6481fae 100644 --- a/tests/data/profiles/S4.mlst.json +++ b/tests/data/profiles/S4.mlst.json @@ -1,11 +1,25 @@ { - "S4": { - "locus_1": 1, - "locus_2": 2, - "locus_3": "3", - "locus_4": "2", - "locus_5": "1", - "locus_6": 6, - "locus_7": 1 + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "S4", + "profile": { + "S4": { + "locus_1": 1, + "locus_2": 2, + "locus_3": "3", + "locus_4": "2", + "locus_5": "1", + "locus_6": 6, + "locus_7": 1 + } + }, + "seq_data": {} } -} \ No newline at end of file +} diff --git a/tests/data/profiles/S5.mlst.json b/tests/data/profiles/S5.mlst.json index 76c3232..0787044 100644 --- a/tests/data/profiles/S5.mlst.json +++ b/tests/data/profiles/S5.mlst.json @@ -1,11 +1,25 @@ { - "S5": { - "locus_1": 1, - "locus_2": 2, - "locus_3": "?", - "locus_4": "2", - "locus_5": "1", - "locus_6": 8, - "locus_7": 1 + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "S5", + "profile": { + "S5": { + "locus_1": 1, + "locus_2": 2, + "locus_3": "?", + "locus_4": "2", + "locus_5": "1", + "locus_6": 8, + "locus_7": 1 + } + }, + "seq_data": {} } -} \ No newline at end of file +} diff --git a/tests/data/profiles/S6.mlst.json b/tests/data/profiles/S6.mlst.json index 31869bd..f9cb68a 100644 --- a/tests/data/profiles/S6.mlst.json +++ b/tests/data/profiles/S6.mlst.json @@ -1,11 +1,25 @@ { - "S6": { - "locus_1": 2, - "locus_2": 3, - "locus_3": "3", - "locus_4": "-", - "locus_5": "?", - "locus_6": 9, - "locus_7": 0 + "db_info": {}, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "S6", + "profile": { + "S6": { + "locus_1": 2, + "locus_2": 3, + "locus_3": "3", + "locus_4": "-", + "locus_5": "?", + "locus_6": 9, + "locus_7": 0 + } + }, + "seq_data": {} } -} \ No newline at end of file +} diff --git a/tests/data/samplesheets/samplesheet-bad-metadata_1.csv b/tests/data/samplesheets/samplesheet-bad-metadata_1.csv index 4a6bff0..f92ce5b 100644 --- a/tests/data/samplesheets/samplesheet-bad-metadata_1.csv +++ b/tests/data/samplesheets/samplesheet-bad-metadata_1.csv @@ -1,7 +1,7 @@ sample,mlst_alleles,metadata_partition,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -S1,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S1.mlst.json,1,"Escherichia coli|","EHEC/STEC","Canada","O157:H7",21,"2024/05/30","beef",true -S2,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S2.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false -S3,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S3.mlst.json,2,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true -S4,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S4.mlst.json,2,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true -S5,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S5.mlst.json,3,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false -S6,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S6.mlst.json,unassociated,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false +S1,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,1,"Escherichia coli|","EHEC/STEC","Canada","O157:H7",21,"2024/05/30","beef",true +S2,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S2.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false +S3,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S3.mlst.json,2,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true +S4,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S4.mlst.json,2,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true +S5,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S5.mlst.json,3,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false +S6,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S6.mlst.json,unassociated,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false diff --git a/tests/data/samplesheets/samplesheet-bad-metadata_partition.csv b/tests/data/samplesheets/samplesheet-bad-metadata_partition.csv index f8da29f..657161c 100644 --- a/tests/data/samplesheets/samplesheet-bad-metadata_partition.csv +++ b/tests/data/samplesheets/samplesheet-bad-metadata_partition.csv @@ -1,7 +1,7 @@ sample,mlst_alleles,metadata_partition,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -S1,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S1.mlst.json,1@,"Escherichia coli","EHEC/STEC","Canada","O157:H7",21,"2024/05/30","beef",true -S2,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S2.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false -S3,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S3.mlst.json,2,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true -S4,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S4.mlst.json,2,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true -S5,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S5.mlst.json,3,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false -S6,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S6.mlst.json,unassociated,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false +S1,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,1@,"Escherichia coli","EHEC/STEC","Canada","O157:H7",21,"2024/05/30","beef",true +S2,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S2.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false +S3,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S3.mlst.json,2,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true +S4,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S4.mlst.json,2,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true +S5,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S5.mlst.json,3,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false +S6,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S6.mlst.json,unassociated,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false diff --git a/tests/data/samplesheets/samplesheet-id-mismatch.csv b/tests/data/samplesheets/samplesheet-id-mismatch.csv index ee543e0..fc9ba9c 100644 --- a/tests/data/samplesheets/samplesheet-id-mismatch.csv +++ b/tests/data/samplesheets/samplesheet-id-mismatch.csv @@ -1,7 +1,7 @@ sample,mlst_alleles,metadata_partition,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -S1,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC","Canada","O157:H7",21,"2024/05/30","beef",true -MISMATCH,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S2.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false -S3,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S3.mlst.json,2,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true -S4,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S4.mlst.json,2,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true -S5,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S5.mlst.json,3,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false -S6,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S6.mlst.json,unassociated,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false +S1,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC","Canada","O157:H7",21,"2024/05/30","beef",true +MISMATCH,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S2.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false +S3,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S3.mlst.json,2,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true +S4,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S4.mlst.json,2,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true +S5,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S5.mlst.json,3,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false +S6,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S6.mlst.json,unassociated,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false diff --git a/tests/data/samplesheets/samplesheet-little-metadata.csv b/tests/data/samplesheets/samplesheet-little-metadata.csv index d49ce75..443d67a 100644 --- a/tests/data/samplesheets/samplesheet-little-metadata.csv +++ b/tests/data/samplesheets/samplesheet-little-metadata.csv @@ -1,7 +1,7 @@ sample,mlst_alleles,metadata_partition,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -S1,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC",,,,,, -S2,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S2.mlst.json,1,"Escherichia coli","EHEC/STEC",,,,,, -S3,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S3.mlst.json,2,"Escherichia coli","EPEC",,,,,, -S4,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S4.mlst.json,2,"Escherichia coli","EPEC",,,,,, -S5,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S5.mlst.json,3,"Escherichia coli","EAEC",,,,,, -S6,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S6.mlst.json,unassociated,"Escherichia coli","EAEC",,,,,, +S1,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC",,,,,, +S2,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S2.mlst.json,1,"Escherichia coli","EHEC/STEC",,,,,, +S3,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S3.mlst.json,2,"Escherichia coli","EPEC",,,,,, +S4,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S4.mlst.json,2,"Escherichia coli","EPEC",,,,,, +S5,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S5.mlst.json,3,"Escherichia coli","EAEC",,,,,, +S6,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S6.mlst.json,unassociated,"Escherichia coli","EAEC",,,,,, diff --git a/tests/data/samplesheets/samplesheet.csv b/tests/data/samplesheets/samplesheet.csv index 35bf784..3da85df 100644 --- a/tests/data/samplesheets/samplesheet.csv +++ b/tests/data/samplesheets/samplesheet.csv @@ -1,7 +1,7 @@ sample,mlst_alleles,metadata_partition,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 -S1,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC","Canada","O157:H7",21,"2024/05/30","beef",true -S2,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S2.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false -S3,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S3.mlst.json,2,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true -S4,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S4.mlst.json,2,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true -S5,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S5.mlst.json,3,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false -S6,https://raw.githubusercontent.com/phac-nml/clustersplitter/dev/tests/data/profiles/S6.mlst.json,unassociated,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false +S1,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC","Canada","O157:H7",21,"2024/05/30","beef",true +S2,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false +S3,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,2,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true +S4,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,2,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true +S5,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,3,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false +S6,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,unassociated,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false From dfb3263648303ab11d85d751a0b0df63626cb85d Mon Sep 17 00:00:00 2001 From: kylacochrane Date: Thu, 22 Aug 2024 12:02:58 -0400 Subject: [PATCH 3/7] Updated CHANGELOG.md and nextflow.config manifest version --- CHANGELOG.md | 10 ++++++++++ nextflow.config | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f52b659..12da828 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.2.0] - 2024-08-23 + +### Changed + +- Upgraded `locidex/merge` to version `0.2.2` and updated `input_assure.py` and test data for compatibility with the new `mlst.json` allele file format. + - [PR19](https://github.com/phac-nml/arboratornf/pull/19) +- Aligned container registry handling in configuration files and modules with `phac-nml/pipeline-standards` + - [PR19](https://github.com/phac-nml/arboratornf/pull/19) + ## [0.1.0] - 2024-08-20 Initial release of the arboratornf pipeline to be used for running [Arborator](https://github.com/phac-nml/arborator) under Nextflow. @@ -15,3 +24,4 @@ Initial release of the arboratornf pipeline to be used for running [Arborator](h - ArborView integration. [0.1.0]: https://github.com/phac-nml/arboratornf/releases/tag/0.1.0 +[0.2.0]: https://github.com/phac-nml/arboratornf/releases/tag/0.2.0 diff --git a/nextflow.config b/nextflow.config index 36c89b3..d1f4d12 100644 --- a/nextflow.config +++ b/nextflow.config @@ -213,7 +213,7 @@ manifest { description = """Arborator: Genomic Profile Clustering and Summary""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '0.1.0' + version = '0.2.0' doi = '' defaultBranch = 'main' } From 94d5be9a075394eb4f484bdb09ced27598bdfcfd Mon Sep 17 00:00:00 2001 From: kylacochrane Date: Thu, 22 Aug 2024 12:09:47 -0400 Subject: [PATCH 4/7] Update typos and formatting --- modules/local/locidex/merge/main.nf | 5 +++-- nextflow.config | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/local/locidex/merge/main.nf b/modules/local/locidex/merge/main.nf index cb29847..2075bc1 100644 --- a/modules/local/locidex/merge/main.nf +++ b/modules/local/locidex/merge/main.nf @@ -7,8 +7,9 @@ process LOCIDEX_MERGE { label 'process_medium' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/mwells14/locidex:0.2.3' : - 'docker.io/mwells14/locidex:0.2.3' }" + "docker.io/mwells14/locidex:0.2.3" : + task.ext.override_configured_container_registry != false ? 'docker.io/mwells14/locidex:0.2.3' : + 'mwells14/locidex:0.2.3' }" input: path input_values // [file(sample1), file(sample2), file(sample3), etc...] diff --git a/nextflow.config b/nextflow.config index d1f4d12..7066d48 100644 --- a/nextflow.config +++ b/nextflow.config @@ -213,7 +213,7 @@ manifest { description = """Arborator: Genomic Profile Clustering and Summary""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '0.2.0' + version = '0.3.0' doi = '' defaultBranch = 'main' } From f08fef898a4fd0143a81bbb8a7d169804167043c Mon Sep 17 00:00:00 2001 From: kylacochrane Date: Thu, 22 Aug 2024 13:02:14 -0400 Subject: [PATCH 5/7] update samplesheets ... again --- assets/samplesheet.csv | 10 +++++----- tests/data/samplesheets/samplesheet.csv | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 3da85df..7b0fc77 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,7 +1,7 @@ sample,mlst_alleles,metadata_partition,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 S1,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC","Canada","O157:H7",21,"2024/05/30","beef",true -S2,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false -S3,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,2,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true -S4,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,2,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true -S5,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,3,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false -S6,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,unassociated,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false +S2,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S2.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false +S3,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S3.mlst.json,2,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true +S4,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S4.mlst.json,2,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true +S5,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S5.mlst.json,3,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false +S6,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S6.mlst.json,unassociated,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false diff --git a/tests/data/samplesheets/samplesheet.csv b/tests/data/samplesheets/samplesheet.csv index 3da85df..7b0fc77 100644 --- a/tests/data/samplesheets/samplesheet.csv +++ b/tests/data/samplesheets/samplesheet.csv @@ -1,7 +1,7 @@ sample,mlst_alleles,metadata_partition,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 S1,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC","Canada","O157:H7",21,"2024/05/30","beef",true -S2,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false -S3,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,2,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true -S4,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,2,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true -S5,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,3,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false -S6,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S1.mlst.json,unassociated,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false +S2,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S2.mlst.json,1,"Escherichia coli","EHEC/STEC","The United States","O157:H7",55,"2024/05/21","milk",false +S3,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S3.mlst.json,2,"Escherichia coli","EPEC","France","O125",14,"2024/04/30","cheese",true +S4,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S4.mlst.json,2,"Escherichia coli","EPEC","France","O125",35,"2024/04/22","cheese",true +S5,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S5.mlst.json,3,"Escherichia coli","EAEC","Canada","O126:H27",61,"2012/09/01","milk",false +S6,https://raw.githubusercontent.com/phac-nml/arboratornf/update/input_assure/tests/data/profiles/S6.mlst.json,unassociated,"Escherichia coli","EAEC","Canada","O111:H21",43,"2011/12/25","fruit",false From a6fb650bea9d7f26812ad1d969e02849bad60b55 Mon Sep 17 00:00:00 2001 From: kylacochrane Date: Thu, 22 Aug 2024 13:32:19 -0400 Subject: [PATCH 6/7] Updated changelog --- CHANGELOG.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 12da828..3c5d78d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Upgraded `locidex/merge` to version `0.2.2` and updated `input_assure.py` and test data for compatibility with the new `mlst.json` allele file format. - [PR19](https://github.com/phac-nml/arboratornf/pull/19) -- Aligned container registry handling in configuration files and modules with `phac-nml/pipeline-standards` - - [PR19](https://github.com/phac-nml/arboratornf/pull/19) ## [0.1.0] - 2024-08-20 From 288ce4d1619b9b55aada24b152507466cc4cc12d Mon Sep 17 00:00:00 2001 From: kylacochrane Date: Fri, 23 Aug 2024 09:08:04 -0400 Subject: [PATCH 7/7] Fixed version typos --- CHANGELOG.md | 2 +- nextflow.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c5d78d..7c54b27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- Upgraded `locidex/merge` to version `0.2.2` and updated `input_assure.py` and test data for compatibility with the new `mlst.json` allele file format. +- Upgraded `locidex/merge` to version `0.2.3` and updated `input_assure.py` and test data for compatibility with the new `mlst.json` allele file format. - [PR19](https://github.com/phac-nml/arboratornf/pull/19) ## [0.1.0] - 2024-08-20 diff --git a/nextflow.config b/nextflow.config index 7066d48..d1f4d12 100644 --- a/nextflow.config +++ b/nextflow.config @@ -213,7 +213,7 @@ manifest { description = """Arborator: Genomic Profile Clustering and Summary""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '0.3.0' + version = '0.2.0' doi = '' defaultBranch = 'main' }