From 0c716e197bcb966e527c4fa38215494d4616a2f3 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 9 Apr 2024 10:27:05 -0500 Subject: [PATCH 01/10] Added test case for empty values in CSV --- .../iridanext/MetadataParserCSVTest.groovy | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserCSVTest.groovy b/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserCSVTest.groovy index 5b79c7c..dedd999 100644 --- a/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserCSVTest.groovy +++ b/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserCSVTest.groovy @@ -5,6 +5,7 @@ import java.nio.file.FileSystems import nextflow.iridanext.MetadataParser import nextflow.iridanext.MetadataParserCSV import spock.lang.Specification +import spock.lang.Ignore import nextflow.iridanext.TestHelper @@ -70,4 +71,37 @@ class MetadataParserCSVTest extends Specification { ] csvMapUnmatch == [:] } + + def 'Test parse CSV file with missing values' () { + when: + def csvContent = """a,b,c + |1,2, + |4,,""".stripMargin() + def csvFile = TestHelper.createInMemTempFile("temp.csv", csvContent) + def parser = new MetadataParserCSV("a", ",") + def csvMapColA = parser.parseMetadata(csvFile) + + then: + csvMapColA == [ + "1": ["b": "2", "c": ""], + "4": ["b": "", "c": ""] + ] + } + + @Ignore + def 'Test parse CSV file with missing ids' () { + when: + def csvContent = """a,b,c + |1,2, + |4,,6""".stripMargin() + def csvFile = TestHelper.createInMemTempFile("temp.csv", csvContent) + + parser = new MetadataParserCSV("b", ",") + def csvMapColB = parser.parseMetadata(csvFile) + + then: + csvMapColB == [ + "2": ["a": "1", "c": ""] + ] + } } \ No newline at end of file From 80e594f3a0173dc43c225e575697327f9a5e65a5 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 9 Apr 2024 10:40:38 -0500 Subject: [PATCH 02/10] Added test case for missing CSV identifier values --- .../nextflow/iridanext/MetadataParserCSVTest.groovy | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserCSVTest.groovy b/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserCSVTest.groovy index dedd999..d538f88 100644 --- a/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserCSVTest.groovy +++ b/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserCSVTest.groovy @@ -5,7 +5,7 @@ import java.nio.file.FileSystems import nextflow.iridanext.MetadataParser import nextflow.iridanext.MetadataParserCSV import spock.lang.Specification -import spock.lang.Ignore +import groovy.lang.MissingPropertyException import nextflow.iridanext.TestHelper @@ -88,11 +88,10 @@ class MetadataParserCSVTest extends Specification { ] } - @Ignore def 'Test parse CSV file with missing ids' () { when: def csvContent = """a,b,c - |1,2, + |1,2,3 |4,,6""".stripMargin() def csvFile = TestHelper.createInMemTempFile("temp.csv", csvContent) @@ -100,8 +99,8 @@ class MetadataParserCSVTest extends Specification { def csvMapColB = parser.parseMetadata(csvFile) then: - csvMapColB == [ - "2": ["a": "1", "c": ""] - ] + // the column of identifiers is column "b", which has a missing value + // and so should trigger an exception + thrown(MissingPropertyException) } } \ No newline at end of file From 11f8222113146ffcb52345b56c1f15be011b4f50 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 9 Apr 2024 10:41:56 -0500 Subject: [PATCH 03/10] Updated changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d23f0f..0457693 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# Unreleased + +- Added documentation for running test cases. +- Added test cases to verify that missing values in CSV will be encoded as empty strings in IRIDA Next JSON file in the sample metadata section. + # 0.2.0 - 2024/01/22 - Added support for writing JSON output file when using `-resume` in a pipeline. From 98fc0a429db33141b145851a9ce45e8bc2fe0f53 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 9 Apr 2024 10:45:30 -0500 Subject: [PATCH 04/10] Added information on running test cases --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 7982f5e..98eca31 100644 --- a/README.md +++ b/README.md @@ -421,6 +421,20 @@ plugins { } ``` +## Run unit/integration tests + +In order to run the test cases, please glone this repository and run the following command: + +```bash +./gradlew check +``` + +To get more information for any failed tests, please run: + +```bash +./gradlew check --info +``` + # Example: nf-core/fetchngs One use case of this plugin is to structure reads and metadata downloaded from NCBI/ENA for storage in IRIDA Next by making use of the [nf-core/fetchngs][nf-core/fetchngs] pipeline. The example configuration [fetchngs.conf][] can be used for this purpose. To test, please run the following (using [ids.csv][fetchngs-ids.csv] as example data accessions): From 021804ac98d86b395fdbc8eec08a65528fefa617 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 9 Apr 2024 12:07:45 -0500 Subject: [PATCH 05/10] Added instructions on installing the groovy language --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 98eca31..431b80e 100644 --- a/README.md +++ b/README.md @@ -389,6 +389,12 @@ iridanext { # Development +In order to build this plugin you will need a Java Development Kit (such as [OpenJDK](https://openjdk.org/)) and [Groovy](https://groovy-lang.org/index.html). For Ubuntu, this can be installed with: + +```bash +sudo apt install default-jdk groovy +``` + ## Build and install from source In order to build and install the plugin from source, please do the following: From 85aa2d9287c1442204a166672a5d6e2f331f1a8c Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 9 Apr 2024 12:30:03 -0500 Subject: [PATCH 06/10] Updated readme with section on missing values in metadata --- README.md | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/README.md b/README.md index 431b80e..218701f 100644 --- a/README.md +++ b/README.md @@ -387,6 +387,73 @@ iridanext { } ``` +### Missing values in metadata + +There are two different scenarios where metadata key/value pairs could be missing for a sample, which result in different behaviours in IRIDA Next. + +1. **Ignore key**: If the `key` is left out of the samples metadata in the IRIDA Next JSON, then nothing is written for that `key` for the sample. Any existing metadata under that `key` will remain in IRIDA Next. + +2. **Delete key**: If a metadata value is an empty string (`"key": ""`) or null (`"key": null`), then IRIDA Next will remove that particular metadata key/value pair from the sample metadata if it exists. This is the expected scenario if pipeline results contain missing (or N/A) values (deleting older metadata keys prevents mixing up old and new pipeline analysis results in the metadata table). + +The following are the expectations for writing missing values in the final IRIDA Next JSON file (in order to delete the key/value pairs in IRIDA Next). + +#### Encoding missing metadata values using JSON + +If the metadata key `key` for **SAMPLE1** is encoded as an empty string `""` or `null` in the JSON file like the below example: + +**output.json** +```json +{ + "SAMPLE1": { + "a": "value1", + "b": "" + } +} +``` + +Then the final IRIDA Next JSON file will preserve the empty string/null value in the samples metadata section: + +**iridanext.output.json.gz** +```json +"metadata": { + "samples": { + "SAMPLE1": { "a": "", "b": "" } + } +} +``` + +#### Encoding missing metadata values using CSV + +If the metadata key `b` for **SAMPLE1** is left empty in the CSV file like the below two examples: + +**output.csv** as table +| column1 | b | c | +|--|--|--| +| SAMPLE1 | | 3 | +| SAMPLE2 | 4 | 5 | +| SAMPLE3 | 6 | 7 | + +**output.csv** as CSV +``` +column1,b,c +SAMPLE1,,3 +SAMPLE2,4,5 +Sample3,6,7 +``` + +Then the value for `b` for **SAMPLE1** will be written as an empty string in the IRIDA Next JSON file: + +**iridanext.output.json.gz** +```json +"metadata": { + "samples": { + "SAMPLE1": { "b": "", "c": "3" }, + "SAMPLE2": { "b": "4", "c": "5" }, + "SAMPLE3": { "b": "6", "c": "7" } + } +} +``` + # Development In order to build this plugin you will need a Java Development Kit (such as [OpenJDK](https://openjdk.org/)) and [Groovy](https://groovy-lang.org/index.html). For Ubuntu, this can be installed with: From 8c5e97e98efe29b5a71c26175d4d4485f1bcfc67 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 9 Apr 2024 12:32:35 -0500 Subject: [PATCH 07/10] Added test case for JSON file with missing values --- .../iridanext/MetadataParserJSONTest.groovy | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserJSONTest.groovy b/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserJSONTest.groovy index 5fac93f..f8cf87a 100644 --- a/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserJSONTest.groovy +++ b/plugins/nf-iridanext/src/test/nextflow/iridanext/MetadataParserJSONTest.groovy @@ -46,4 +46,22 @@ class MetadataParserJSONTest extends Specification { "2": ["coords": ["x": 0, "y": 1], "coords.x": 4] ] } + + def 'Test parse JSON file missing values' () { + when: + def jsonContent = '''{ + "1": {"b": "", "c": "3"}, + "2": {"b": "3", "c": null} + }'''.stripMargin() + + def jsonFile = TestHelper.createInMemTempFile("temp.json", jsonContent) + def parser = new MetadataParserJSON() + def outputData = parser.parseMetadata(jsonFile) + + then: + outputData == [ + "1": ["b": "", "c": "3"], + "2": ["b": "3", "c": null] + ] + } } \ No newline at end of file From 43cf0ba03ea675e18f27408fcc5082c8d3842652 Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 9 Apr 2024 12:39:43 -0500 Subject: [PATCH 08/10] Additional information in the JSON file --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0457693..2015c47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ - Added documentation for running test cases. - Added test cases to verify that missing values in CSV will be encoded as empty strings in IRIDA Next JSON file in the sample metadata section. +- Added test cases for passing missing values in a JSON file. # 0.2.0 - 2024/01/22 From a1833fcde0eafafa3ab03589e488125ca4b20a2d Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 9 Apr 2024 12:44:26 -0500 Subject: [PATCH 09/10] Fixed typo in metadata JSON --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 218701f..8b0f1ef 100644 --- a/README.md +++ b/README.md @@ -417,7 +417,7 @@ Then the final IRIDA Next JSON file will preserve the empty string/null value in ```json "metadata": { "samples": { - "SAMPLE1": { "a": "", "b": "" } + "SAMPLE1": { "a": "value1", "b": "" } } } ``` From bf5acfd4c32b646fbbd1ca1e39d97f09a8fc6bfa Mon Sep 17 00:00:00 2001 From: Aaron Petkau Date: Tue, 9 Apr 2024 12:48:17 -0500 Subject: [PATCH 10/10] More small typo fixes --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8b0f1ef..b5cf8f0 100644 --- a/README.md +++ b/README.md @@ -399,7 +399,7 @@ The following are the expectations for writing missing values in the final IRIDA #### Encoding missing metadata values using JSON -If the metadata key `key` for **SAMPLE1** is encoded as an empty string `""` or `null` in the JSON file like the below example: +If the metadata key `b` for **SAMPLE1** is encoded as an empty string `""` or `null` in the JSON file like the below example: **output.json** ```json @@ -496,7 +496,7 @@ plugins { ## Run unit/integration tests -In order to run the test cases, please glone this repository and run the following command: +In order to run the test cases, please clone this repository and run the following command: ```bash ./gradlew check