From 9e8570a52f3448712af71738c74bb18d93f99a55 Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Mon, 18 Nov 2024 07:50:24 -0800
Subject: [PATCH 01/24] fix: add missing parameters to `annotate_workbooks`

Add missing parameters to the `annotate_workbooks` function to ensure
correct argument propagation to its subfunctions.
---
 src/spinneret/main.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/spinneret/main.py b/src/spinneret/main.py
index 930f224..24c6d07 100644
--- a/src/spinneret/main.py
+++ b/src/spinneret/main.py
@@ -46,7 +46,13 @@ def create_workbooks(eml_dir: str, workbook_dir: str) -> None:
 
 
 def annotate_workbooks(
-    workbook_dir: str, eml_dir: str, annotator: str, output_dir: str, config_path: str
+    workbook_dir: str,
+    eml_dir: str,
+    annotator: str,
+    output_dir: str,
+    config_path: str,
+    local_model: str = None,
+    return_ungrounded: bool = False,
 ) -> None:
     """Create workbooks for each EML file in a directory
 
@@ -58,6 +64,9 @@ def annotate_workbooks(
         an API key and is described in the `get_bioportal_annotation` function.
     :param output_dir: Directory to save annotated workbooks
     :param config_path: Path to configuration file
+    :param local_model: See `get_ontogpt_annotation` documentation for details.
+    :param return_ungrounded: See `get_ontogpt_annotation` documentation for
+        details.
     :return: None
     :notes: Annotated workbooks will not be created if they already exist.
     """
@@ -95,6 +104,8 @@ def annotate_workbooks(
             eml_path=eml_dir + "/" + eml_file,
             annotator=annotator,
             output_path=output_dir + "/" + workbook_file_annotated,
+            local_model=local_model,
+            return_ungrounded=return_ungrounded,
         )
 
 

From d3427737114469e2be59cdf7289a724c08a8601b Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Tue, 19 Nov 2024 12:46:10 -0800
Subject: [PATCH 02/24] fix: prevent OntoGPT cache-related errors by clearing
 cache

Implement a cache-clearing mechanism before each OntoGPT call to
mitigate issues where cached results, particularly those without
grounded concepts, could lead to processing errors. This ensures that
each call to OntoGPT is fresh and produces reliable results.
---
 src/spinneret/annotator.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py
index 503ccfe..88ba0b8 100644
--- a/src/spinneret/annotator.py
+++ b/src/spinneret/annotator.py
@@ -680,6 +680,9 @@ def get_ontogpt_annotation(
         if local_model is not None:
             cmd += f"  -m ollama/{local_model}"
         try:
+            # Clear the cache so that the model can derive new annotations
+            cache_path = os.getcwd() + "/.litellm_cache"
+            os.system(f"rm -rf {cache_path}")
             os.system(cmd)
         except Exception as e:  # pylint: disable=broad-exception-caught
             print(f"Error calling OntoGPT: {e}")

From 57e6df729b005224621d594a9ef169a7bc128f40 Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Wed, 20 Nov 2024 08:48:29 -0800
Subject: [PATCH 03/24] perf: enhance OntoGPT grounding with sample size

Implement a strategy to combine multiple OntoGPT runs for each input to
improve the consistency and completeness of concept grounding. This
approach addresses the variability inherent in the OntoGPT process,
resulting in more reliable and accurate annotations.
---
 src/spinneret/annotator.py | 226 +++++++++++++++++++++++++++----------
 src/spinneret/main.py      |   4 +
 2 files changed, 171 insertions(+), 59 deletions(-)

diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py
index 88ba0b8..3fed6aa 100644
--- a/src/spinneret/annotator.py
+++ b/src/spinneret/annotator.py
@@ -136,6 +136,7 @@ def annotate_workbook(
     annotator: str,
     local_model: str = None,
     return_ungrounded: bool = False,
+    sample_size: int = 1,
 ) -> None:
     """Annotate a workbook with automated annotation
 
@@ -150,6 +151,8 @@ def annotate_workbook(
     :param local_model: See `get_ontogpt_annotation` documentation for details.
     :param return_ungrounded: See `get_ontogpt_annotation` documentation for
         details.
+    :param sample_size: Executes multiple replicates of the annotation request
+        to reduce variability of outputs. Variability is inherent in OntoGPT.
     :returns: None
     :notes: The workbook is annotated by annotators best suited for the XPaths
         in the EML file. The annotated workbook is written back to the same
@@ -170,23 +173,45 @@ def annotate_workbook(
 
     # Run workbook annotators, results of one are used as input for the next
     if annotator == "bioportal":
-        wb = add_dataset_annotations_to_workbook(wb, eml)
-        wb = add_measurement_type_annotations_to_workbook(wb, eml, annotator=annotator)
+        wb = add_dataset_annotations_to_workbook(wb, eml, sample_size=sample_size)
+        wb = add_measurement_type_annotations_to_workbook(
+            wb, eml, annotator=annotator, sample_size=sample_size
+        )
     elif annotator == "ontogpt":
         wb = add_env_broad_scale_annotations_to_workbook(
-            wb, eml, local_model=local_model, return_ungrounded=return_ungrounded
+            wb,
+            eml,
+            local_model=local_model,
+            return_ungrounded=return_ungrounded,
+            sample_size=sample_size,
         )
         wb = add_env_local_scale_annotations_to_workbook(
-            wb, eml, local_model=local_model, return_ungrounded=return_ungrounded
+            wb,
+            eml,
+            local_model=local_model,
+            return_ungrounded=return_ungrounded,
+            sample_size=sample_size,
         )
         wb = add_process_annotations_to_workbook(
-            wb, eml, local_model=local_model, return_ungrounded=return_ungrounded
+            wb,
+            eml,
+            local_model=local_model,
+            return_ungrounded=return_ungrounded,
+            sample_size=sample_size,
         )
         wb = add_methods_annotations_to_workbook(
-            wb, eml, local_model=local_model, return_ungrounded=return_ungrounded
+            wb,
+            eml,
+            local_model=local_model,
+            return_ungrounded=return_ungrounded,
+            sample_size=sample_size,
         )
         wb = add_research_topic_annotations_to_workbook(
-            wb, eml, local_model=local_model, return_ungrounded=return_ungrounded
+            wb,
+            eml,
+            local_model=local_model,
+            return_ungrounded=return_ungrounded,
+            sample_size=sample_size,
         )
         wb = add_measurement_type_annotations_to_workbook(
             wb,
@@ -194,9 +219,14 @@ def annotate_workbook(
             annotator="ontogpt",
             local_model=local_model,
             return_ungrounded=return_ungrounded,
+            sample_size=sample_size,
         )
         wb = add_env_medium_annotations_to_workbook(
-            wb, eml, local_model=local_model, return_ungrounded=return_ungrounded
+            wb,
+            eml,
+            local_model=local_model,
+            return_ungrounded=return_ungrounded,
+            sample_size=sample_size,
         )
     wb = add_qudt_annotations_to_workbook(wb, eml)  # irrespective of annotator
 
@@ -442,6 +472,7 @@ def add_dataset_annotations_to_workbook(
     eml: Union[str, etree._ElementTree],
     output_path: str = None,
     overwrite: bool = False,
+    sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
     """
     :param workbook: Either the path to the workbook to be annotated, or the
@@ -451,6 +482,8 @@ def add_dataset_annotations_to_workbook(
     :param output_path: The path to write the annotated workbook.
     :param overwrite: If True, overwrite existing `dataset` annotations in the
         workbook, so a fresh set may be created.
+    :param sample_size: Executes multiple replicates of the annotation request
+        to reduce variability of outputs. Variability is inherent in OntoGPT.
     :returns: Workbook with dataset annotations."""
 
     # Load the workbook and EML for processing
@@ -475,12 +508,18 @@ def add_dataset_annotations_to_workbook(
     # Get the dataset annotations
     dataset_element = eml.xpath("//dataset")[0]
     element_description = get_description(dataset_element)
-    annotations = get_bioportal_annotation(  # expecting a list of annotations
-        text=element_description,
-        api_key=os.environ["BIOPORTAL_API_KEY"],
-        ontologies="ENVO",  # ENVO provides environmental terms
-        exclude_synonyms="true",
-    )
+    annotations = []
+    for _ in range(sample_size):
+        res = get_bioportal_annotation(  # expecting a list of annotations
+            text=element_description,
+            api_key=os.environ["BIOPORTAL_API_KEY"],
+            ontologies="ENVO",  # ENVO provides environmental terms
+            exclude_synonyms="true",
+        )
+        if res is not None:
+            annotations.extend(res)
+    if len(annotations) == 0:
+        annotations = None
 
     # Add dataset annotations to the workbook
     if annotations is not None:
@@ -513,6 +552,7 @@ def add_dataset_annotations_to_workbook(
 
 
 # pylint: disable=too-many-branches
+# pylint: disable=too-many-statements
 def add_measurement_type_annotations_to_workbook(
     workbook: Union[str, pd.core.frame.DataFrame],
     eml: Union[str, etree._ElementTree],
@@ -521,6 +561,7 @@ def add_measurement_type_annotations_to_workbook(
     overwrite: bool = False,
     local_model: str = None,
     return_ungrounded: bool = False,
+    sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
     """
     :param workbook: Either the path to the workbook to be annotated, or the
@@ -538,6 +579,8 @@ def add_measurement_type_annotations_to_workbook(
         `get_ontogpt_annotation` documentation for details.
     :param return_ungrounded: An option if `annotator` is "ontogpt". See
         `get_ontogpt_annotation` documentation for details.
+    :param sample_size: Executes multiple replicates of the annotation request
+        to reduce variability of outputs. Variability is inherent in OntoGPT.
     :returns: Workbook with measurement type annotations."""
 
     # Parameters for the function
@@ -586,21 +629,32 @@ def add_measurement_type_annotations_to_workbook(
         if annotations is None:
             # Select an annotator, and get the measurement type annotations
             if annotator.lower() == "ontogpt":
-                annotations = get_ontogpt_annotation(
-                    text=attribute_description,
-                    template="contains_measurement_of_type",
-                    local_model=local_model,
-                    return_ungrounded=return_ungrounded,
-                )
+                annotations = []
+                for _ in range(sample_size):
+                    res = get_ontogpt_annotation(
+                        text=attribute_description,
+                        template="contains_measurement_of_type",
+                        local_model=local_model,
+                        return_ungrounded=return_ungrounded,
+                    )
+                    if res is not None:
+                        annotations.extend(res)
+                if len(annotations) == 0:
+                    annotations = None
             else:
-                annotations = (
-                    get_bioportal_annotation(  # expecting a list of annotations
+                annotations = []
+                for _ in range(sample_size):
+                    res = get_bioportal_annotation(
+                        # expecting a list of annotations
                         text=attribute_description,
                         api_key=os.environ["BIOPORTAL_API_KEY"],
                         ontologies="ECSO",  # ECSO provides measurment terms
                         exclude_synonyms="true",
                     )
-                )
+                    if res is not None:
+                        annotations.extend(res)
+                if len(annotations) == 0:
+                    annotations = None
 
         # Add the measurement type annotations to the workbook
         if annotations is not None:
@@ -718,6 +772,7 @@ def add_process_annotations_to_workbook(
     overwrite: bool = False,
     local_model: str = None,
     return_ungrounded: bool = False,
+    sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
     """
     :param workbook: Either the path to the workbook to be annotated, or the
@@ -730,6 +785,8 @@ def add_process_annotations_to_workbook(
     :param local_model: See `get_ontogpt_annotation` documentation for details.
     :param return_ungrounded: See `get_ontogpt_annotation` documentation for
         details.
+    :param sample_size: Executes multiple replicates of the annotation request
+        to reduce variability of outputs. Variability is inherent in OntoGPT.
     :returns: Workbook with process annotations.
     :notes: This function retrieves process annotations using OntoGPT, which
         requires setup and configuration described in the
@@ -780,12 +837,18 @@ def add_process_annotations_to_workbook(
 
     if annotations is None:
         # Get the process annotations
-        annotations = get_ontogpt_annotation(
-            text=element_description,
-            template="contains_process",
-            local_model=local_model,
-            return_ungrounded=return_ungrounded,
-        )
+        annotations = []
+        for _ in range(sample_size):
+            res = get_ontogpt_annotation(
+                text=element_description,
+                template="contains_process",
+                local_model=local_model,
+                return_ungrounded=return_ungrounded,
+            )
+            if res is not None:
+                annotations.extend(res)
+        if len(annotations) == 0:
+            annotations = None
 
     # Add process annotations to the workbook
     if annotations is not None:
@@ -824,6 +887,7 @@ def add_env_broad_scale_annotations_to_workbook(
     overwrite: bool = False,
     local_model: str = None,
     return_ungrounded: bool = False,
+    sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
     """
     :param workbook: Either the path to the workbook to be annotated, or the
@@ -836,6 +900,8 @@ def add_env_broad_scale_annotations_to_workbook(
     :param local_model: See `get_ontogpt_annotation` documentation for details.
     :param return_ungrounded: See `get_ontogpt_annotation` documentation for
         details.
+    :param sample_size: Executes multiple replicates of the annotation request
+        to reduce variability of outputs. Variability is inherent in OntoGPT.
     :returns: Workbook with broad scale environmental context annotations.
     :notes: This function retrieves broad scale environmental context
         annotations using OntoGPT, which requires setup and configuration
@@ -885,12 +951,18 @@ def add_env_broad_scale_annotations_to_workbook(
 
     if annotations is None:
         # Get the broad scale environmental context annotations
-        annotations = get_ontogpt_annotation(
-            text=element_description,
-            template=predicate,
-            local_model=local_model,
-            return_ungrounded=return_ungrounded,
-        )
+        annotations = []
+        for _ in range(sample_size):
+            res = get_ontogpt_annotation(
+                text=element_description,
+                template=predicate,
+                local_model=local_model,
+                return_ungrounded=return_ungrounded,
+            )
+            if res is not None:
+                annotations.extend(res)
+        if len(annotations) == 0:
+            annotations = None
 
     # Add broad scale environmental context annotations to the workbook
     if annotations is not None:
@@ -931,6 +1003,7 @@ def add_env_local_scale_annotations_to_workbook(
     overwrite: bool = False,
     local_model: str = None,
     return_ungrounded: bool = False,
+    sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
     """
     :param workbook: Either the path to the workbook to be annotated, or the
@@ -943,6 +1016,8 @@ def add_env_local_scale_annotations_to_workbook(
     :param local_model: See `get_ontogpt_annotation` documentation for details.
     :param return_ungrounded: See `get_ontogpt_annotation` documentation for
         details.
+    :param sample_size: Executes multiple replicates of the annotation request
+        to reduce variability of outputs. Variability is inherent in OntoGPT.
     :returns: Workbook with local scale environmental context annotations.
     :notes: This function retrieves local scale environmental context
         annotations using OntoGPT, which requires setup and configuration
@@ -994,12 +1069,18 @@ def add_env_local_scale_annotations_to_workbook(
 
     if annotations is None:
         # Get the local scale environmental context annotations
-        annotations = get_ontogpt_annotation(
-            text=element_description,
-            template=predicate,
-            local_model=local_model,
-            return_ungrounded=return_ungrounded,
-        )
+        annotations = []
+        for _ in range(sample_size):
+            res = get_ontogpt_annotation(
+                text=element_description,
+                template=predicate,
+                local_model=local_model,
+                return_ungrounded=return_ungrounded,
+            )
+            if res is not None:
+                annotations.extend(res)
+        if len(annotations) == 0:
+            annotations = None
 
     # Add local scale environmental context annotations to the workbook
     if annotations is not None:
@@ -1040,6 +1121,7 @@ def add_env_medium_annotations_to_workbook(
     overwrite: bool = False,
     local_model: str = None,
     return_ungrounded: bool = False,
+    sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
     """
     :param workbook: Either the path to the workbook to be annotated, or the
@@ -1053,6 +1135,8 @@ def add_env_medium_annotations_to_workbook(
         `get_ontogpt_annotation` documentation for details.
     :param return_ungrounded: An option if `annotator` is "ontogpt". See
         `get_ontogpt_annotation` documentation for details.
+    :param sample_size: Executes multiple replicates of the annotation request
+        to reduce variability of outputs. Variability is inherent in OntoGPT.
     :returns: Workbook with environmental medium annotations."""
 
     # Parameters for the function
@@ -1101,12 +1185,18 @@ def add_env_medium_annotations_to_workbook(
 
         if annotations is None:
             # Get the environmental medium annotations from the annotator
-            annotations = get_ontogpt_annotation(
-                text=attribute_description,
-                template="env_medium",
-                local_model=local_model,
-                return_ungrounded=return_ungrounded,
-            )
+            annotations = []
+            for _ in range(sample_size):
+                res = get_ontogpt_annotation(
+                    text=attribute_description,
+                    template="env_medium",
+                    local_model=local_model,
+                    return_ungrounded=return_ungrounded,
+                )
+                if res is not None:
+                    annotations.extend(res)
+            if len(annotations) == 0:
+                annotations = None
 
         # And add the environmental medium annotations to the workbook
         if annotations is not None:
@@ -1145,6 +1235,7 @@ def add_research_topic_annotations_to_workbook(
     overwrite: bool = False,
     local_model: str = None,
     return_ungrounded: bool = False,
+    sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
     """
     :param workbook: Either the path to the workbook to be annotated, or the
@@ -1157,6 +1248,8 @@ def add_research_topic_annotations_to_workbook(
     :param local_model: See `get_ontogpt_annotation` documentation for details.
     :param return_ungrounded: See `get_ontogpt_annotation` documentation for
         details.
+    :param sample_size: Executes multiple replicates of the annotation request
+        to reduce variability of outputs. Variability is inherent in OntoGPT.
     :returns: Workbook with research topic annotations.
     :notes: This function retrieves research topic annotations using OntoGPT, which
         requires setup and configuration described in the
@@ -1207,12 +1300,18 @@ def add_research_topic_annotations_to_workbook(
 
     if annotations is None:
         # Get the research topic annotations
-        annotations = get_ontogpt_annotation(
-            text=element_description,
-            template="research_topic",
-            local_model=local_model,
-            return_ungrounded=return_ungrounded,
-        )
+        annotations = []
+        for _ in range(sample_size):
+            res = get_ontogpt_annotation(
+                text=element_description,
+                template="research_topic",
+                local_model=local_model,
+                return_ungrounded=return_ungrounded,
+            )
+            if res is not None:
+                annotations.extend(res)
+        if len(annotations) == 0:
+            annotations = None
 
     # Add research topic annotations to the workbook
     if annotations is not None:
@@ -1251,6 +1350,7 @@ def add_methods_annotations_to_workbook(
     overwrite: bool = False,
     local_model: str = None,
     return_ungrounded: bool = False,
+    sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
     """
     :param workbook: Either the path to the workbook to be annotated, or the
@@ -1263,6 +1363,8 @@ def add_methods_annotations_to_workbook(
     :param local_model: See `get_ontogpt_annotation` documentation for details.
     :param return_ungrounded: See `get_ontogpt_annotation` documentation for
         details.
+    :param sample_size: Executes multiple replicates of the annotation request
+        to reduce variability of outputs. Variability is inherent in OntoGPT.
     :returns: Workbook with methods annotations.
     :notes: This function retrieves methods annotations using OntoGPT, which
         requires setup and configuration described in the
@@ -1316,12 +1418,18 @@ def add_methods_annotations_to_workbook(
     )
 
     if annotations is None:
-        annotations = get_ontogpt_annotation(
-            text=element_description,
-            template="uses_method",
-            local_model=local_model,
-            return_ungrounded=return_ungrounded,
-        )
+        annotations = []
+        for _ in range(sample_size):
+            res = get_ontogpt_annotation(
+                text=element_description,
+                template="uses_method",
+                local_model=local_model,
+                return_ungrounded=return_ungrounded,
+            )
+            if res is not None:
+                annotations.extend(res)
+        if len(annotations) == 0:
+            annotations = None
 
     # Add methods annotations to the workbook. Note, methods annotations are
     # at the dataset level.
diff --git a/src/spinneret/main.py b/src/spinneret/main.py
index 24c6d07..80409dc 100644
--- a/src/spinneret/main.py
+++ b/src/spinneret/main.py
@@ -53,6 +53,7 @@ def annotate_workbooks(
     config_path: str,
     local_model: str = None,
     return_ungrounded: bool = False,
+    sample_size: int = 1,
 ) -> None:
     """Create workbooks for each EML file in a directory
 
@@ -67,6 +68,8 @@ def annotate_workbooks(
     :param local_model: See `get_ontogpt_annotation` documentation for details.
     :param return_ungrounded: See `get_ontogpt_annotation` documentation for
         details.
+    :param sample_size: Executes multiple replicates of the annotation request
+        to reduce variability of outputs. Variability is inherent in OntoGPT.
     :return: None
     :notes: Annotated workbooks will not be created if they already exist.
     """
@@ -106,6 +109,7 @@ def annotate_workbooks(
             output_path=output_dir + "/" + workbook_file_annotated,
             local_model=local_model,
             return_ungrounded=return_ungrounded,
+            sample_size=sample_size,
         )
 
 

From 66843ba5fe8d2c41ce78875a214a43522c51b321 Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Mon, 25 Nov 2024 16:32:12 -0800
Subject: [PATCH 04/24] feat: initialize benchmark testing module

Create a new module to facilitate benchmark testing, allowing for
performance evaluation and optimization.
---
 docs/source/user/api.rst   | 6 ++++++
 src/spinneret/benchmark.py | 1 +
 2 files changed, 7 insertions(+)
 create mode 100644 src/spinneret/benchmark.py

diff --git a/docs/source/user/api.rst b/docs/source/user/api.rst
index 85f787f..b360ae3 100644
--- a/docs/source/user/api.rst
+++ b/docs/source/user/api.rst
@@ -9,6 +9,12 @@ Annotator Module
 .. automodule:: spinneret.annotator
    :members:
 
+Benchmark Module
+----------------
+
+.. automodule:: spinneret.benchmark
+   :members:
+
 Datasets Module
 ---------------
 
diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py
new file mode 100644
index 0000000..ddcc047
--- /dev/null
+++ b/src/spinneret/benchmark.py
@@ -0,0 +1 @@
+"""The benchmark module"""

From 864889eac7791a2cf91c03a176975d973f2e6caf Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Tue, 26 Nov 2024 07:32:19 -0800
Subject: [PATCH 05/24] feat: implement logging for debugging

Add logging capabilities to enhance debugging and runtime monitoring.
---
 environment-min.yml |   4 +
 environment.yml     | 176 ++++++++++++++++++++++----------------------
 poetry.lock         |  31 +++++++-
 pyproject.toml      |   1 +
 requirements.txt    | 130 ++++++++++++++++----------------
 5 files changed, 192 insertions(+), 150 deletions(-)

diff --git a/environment-min.yml b/environment-min.yml
index f8892ff..3983356 100644
--- a/environment-min.yml
+++ b/environment-min.yml
@@ -16,4 +16,8 @@ dependencies:
   - rdflib
   - sphinx
   - sphinx-autoapi
+  - daiquiri
+  - pip
+  - pip:
+      - git+https://github.com/clnsmth/soso.git@main
 prefix: /opt/miniconda3/envs/spinneret
diff --git a/environment.yml b/environment.yml
index c919f4d..e417283 100644
--- a/environment.yml
+++ b/environment.yml
@@ -5,127 +5,153 @@ channels:
 dependencies:
   - alabaster=1.0.0
   - annotated-types=0.7.0
-  - anyascii=0.3.2
-  - astroid=3.2.4
-  - black=24.8.0
+  - anyio=4.6.2.post1
+  - astroid=3.3.5
+  - babel=2.16.0
+  - backoff=2.2.1
+  - black=24.10.0
   - brotli-python=1.1.0
   - bzip2=1.0.8
   - ca-certificates=2024.8.30
+  - certifi=2024.8.30
   - cffi=1.17.1
-  - charset-normalizer=3.3.2
+  - charset-normalizer=3.4.0
   - click=8.1.7
   - click-option-group=0.5.6
   - colorama=0.4.6
-  - coverage=7.6.1
-  - dill=0.3.8
+  - coverage=7.6.8
+  - daiquiri=3.0.0
+  - dill=0.3.9
   - docutils=0.21.2
   - dotty-dict=1.3.1
   - exceptiongroup=1.2.2
   - gitdb=4.0.11
   - gitpython=3.1.43
+  - gql=3.5.0
+  - graphql-core=3.2.5
   - h2=4.1.0
   - hpack=4.0.0
   - hyperframe=6.0.1
-  - icu=75.1
+  - idna=3.10
   - imagesize=1.4.1
-  - importlib-metadata=8.5.0
+  - importlib-resources=6.4.5
+  - importlib_resources=6.4.5
   - iniconfig=2.0.0
-  - isodate=0.6.1
+  - isodate=0.7.2
   - isort=5.13.2
   - jinja2=3.1.4
   - libblas=3.9.0
   - libcblas=3.9.0
-  - libcxx=18.1.8
-  - libexpat=2.6.3
+  - libcxx=19.1.4
+  - libexpat=2.6.4
   - libffi=3.4.2
   - libgfortran=5.0.0
   - libgfortran5=13.2.0
   - libiconv=1.17
   - liblapack=3.9.0
-  - libopenblas=0.3.27
-  - libsqlite=3.46.1
-  - libxml2=2.12.7
+  - libopenblas=0.3.28
+  - libsqlite=3.47.0
+  - libxml2=2.13.5
   - libxslt=1.1.39
   - libzlib=1.3.1
-  - llvm-openmp=18.1.8
+  - llvm-openmp=19.1.4
   - lxml=5.3.0
   - markdown-it-py=3.0.0
-  - markupsafe=2.1.5
+  - markupsafe=3.0.2
   - mccabe=0.7.0
+  - mdit-py-plugins=0.4.2
   - mdurl=0.1.2
+  - multidict=6.1.0
   - mypy_extensions=1.0.0
   - myst-parser=4.0.0
   - ncurses=6.5
-  - openssl=3.3.2
-  - packaging=24.1
-  - pandas=2.2.2
+  - openssl=3.4.0
+  - packaging=24.2
+  - pandas=2.2.3
   - pathspec=0.12.1
-  - pip=24.2
+  - pip=24.3.1
+  - platformdirs=4.3.6
   - pluggy=1.5.0
+  - propcache=0.2.0
   - pycparser=2.22
+  - pydantic=2.10.1
+  - pydantic-core=2.27.1
   - pygments=2.18.0
+  - pylint=3.3.1
+  - pyparsing=3.2.0
   - pysocks=1.7.1
-  - pytest-cov=5.0.0
+  - pytest=8.3.3
+  - pytest-cov=6.0.0
   - pytest-mock=3.14.0
   - python=3.11.10
-  - python-tzdata=2024.1
+  - python-dateutil=2.9.0.post0
+  - python-gitlab=4.13.0
+  - python-json-logger=2.0.7
+  - python-semantic-release=9.14.0
+  - python-tzdata=2024.2
   - python_abi=3.11
+  - pytz=2024.1
   - pyyaml=6.0.2
-  - rdflib=7.0.0
+  - rdflib=7.1.1
   - readline=8.2
   - requests=2.32.3
   - requests-toolbelt=1.0.0
+  - rich=13.9.4
+  - setuptools=75.6.0
   - shellingham=1.5.4
   - six=1.16.0
+  - smmap=5.0.0
+  - sniffio=1.3.1
   - snowballstemmer=2.2.0
-  - sphinx=8.0.2
+  - sphinx=8.1.3
+  - sphinx-autoapi=3.3.3
   - sphinxcontrib-applehelp=2.0.0
   - sphinxcontrib-devhelp=2.0.0
   - sphinxcontrib-htmlhelp=2.1.0
   - sphinxcontrib-jsmath=1.0.1
   - sphinxcontrib-qthelp=2.0.0
+  - sphinxcontrib-serializinghtml=1.1.10
+  - stdlib-list=0.11.0
   - tk=8.6.13
   - toml=0.10.2
-  - tomli=2.0.1
+  - tomli=2.1.0
   - tomlkit=0.13.2
   - typing-extensions=4.12.2
   - typing_extensions=4.12.2
-  - tzdata=2024a
-  - wheel=0.44.0
+  - tzdata=2024b
+  - wheel=0.45.1
   - xz=5.2.6
   - yaml=0.2.5
-  - zipp=3.20.2
+  - yarl=1.18.0
+  - zipp=3.21.0
   - zstandard=0.23.0
   - zstd=1.5.6
   - pip:
       - adeft==0.12.3
       - aiofiles==24.1.0
       - aiohappyeyeballs==2.4.3
-      - aiohttp==3.10.10
+      - aiohttp==3.11.7
       - aiosignal==1.3.1
       - airium==0.2.6
       - aniso8601==9.0.1
       - antlr4-python3-runtime==4.9.3
-      - anyio==4.6.2.post1
       - appdirs==1.4.4
       - arrow==1.3.0
       - attrs==24.2.0
-      - babel==2.16.0
       - bcp47==0.1.0
       - beautifulsoup4==4.12.3
       - bioc==2.1
-      - blinker==1.8.2
-      - boto3==1.35.54
-      - botocore==1.35.54
+      - blinker==1.9.0
+      - boto3==1.35.69
+      - botocore==1.35.69
       - cachier==3.1.2
       - cattrs==24.1.2
-      - certifi==2024.8.30
       - cfgraph==0.2.1
       - chardet==5.2.0
-      - class-resolver==0.5.2
+      - class-resolver==0.5.4
       - curies==0.9.0
       - defusedxml==0.7.1
-      - deprecated==1.2.14
+      - deprecated==1.2.15
       - deprecation==2.1.0
       - diskcache==5.6.3
       - distro==1.9.0
@@ -135,7 +161,7 @@ dependencies:
       - eutils==0.6.0
       - fastobo==0.12.3
       - filelock==3.16.1
-      - flask==3.0.3
+      - flask==3.1.0
       - flask-restx==1.3.0
       - fqdn==1.5.1
       - frontend==0.0.3
@@ -143,21 +169,21 @@ dependencies:
       - fsspec==2024.10.0
       - funowl==0.2.3
       - gilda==1.4.0
+      - graphviz==0.20.3
       - h11==0.14.0
       - hbreader==0.9.1
       - html5lib==1.1
-      - httpcore==1.0.6
+      - httpcore==1.0.7
       - httpx==0.27.2
       - huggingface-hub==0.26.2
-      - idna==3.10
       - ijson==3.3.0
-      - importlib-resources==6.4.5
+      - importlib-metadata==8.5.0
       - inflect==7.4.0
       - inflection==0.5.1
       - intervaltree==3.1.0
       - isoduration==20.11.0
       - itsdangerous==2.2.0
-      - jiter==0.7.0
+      - jiter==0.7.1
       - jmespath==1.0.1
       - joblib==1.4.2
       - json-flattener==0.1.9
@@ -177,105 +203,83 @@ dependencies:
       - linkml-owl==0.3.0
       - linkml-renderer==0.3.1
       - linkml-runtime==1.8.3
-      - litellm==1.51.2
-      - mdit-py-plugins==0.4.2
+      - litellm==1.52.16
       - more-click==0.1.2
       - more-itertools==10.5.0
-      - multidict==6.1.0
       - ndex2==3.9.0
       - networkx==3.4.2
       - nltk==3.9.1
-      - numpy==2.1.1
+      - numpy==2.0.2
       - oaklib==0.6.18
       - ols-client==0.1.4
-      - ontogpt==1.0.6
+      - ontogpt==1.0.8
       - ontoportal-client==0.0.4
-      - openai==1.53.0
+      - openai==1.55.1
       - openpyxl==3.1.5
       - owlrl==6.0.2
       - pansql==0.0.1
       - parse==1.20.2
-      - platformdirs==4.3.6
       - ply==3.11
-      - portalocker==2.10.1
+      - portalocker==3.0.0
       - prefixcommons==0.1.12
       - prefixmaps==0.2.6
-      - prettytable==3.11.0
+      - prettytable==3.12.0
       - pronto==2.5.8
-      - propcache==0.2.0
-      - pydantic==2.9.2
-      - pydantic-core==2.23.4
       - pyjsg==0.11.10
-      - pylint==3.2.7
-      - pymupdf==1.24.13
-      - pyparsing==3.1.4
+      - pymupdf==1.24.14
       - pyshacl==0.26.0
       - pyshex==0.8.1
       - pyshexc==0.9.1
       - pysolr==3.10.0
-      - pystow==0.5.6
-      - pytest==8.3.3
+      - pystow==0.6.1
       - pytest-logging==2015.11.4
-      - python-dateutil==2.9.0.post0
       - python-dotenv==1.0.1
-      - python-gitlab==4.11.1
-      - python-graphviz==0.20.3
-      - python-semantic-release==9.8.8
       - pytrie==0.4.0
-      - pytz==2024.2
       - ratelimit==2.2.1
       - rdflib-jsonld==0.6.1
       - rdflib-shim==1.0.3
       - referencing==0.35.1
-      - regex==2024.9.11
+      - regex==2024.11.6
       - requests-cache==1.2.1
       - rfc3339-validator==0.1.4
       - rfc3987==1.3.8
-      - rich==13.8.1
-      - rpds-py==0.20.1
+      - rpds-py==0.21.0
       - ruamel-yaml==0.18.6
       - ruamel-yaml-clib==0.2.12
-      - s3transfer==0.10.3
+      - s3transfer==0.10.4
       - scikit-learn==1.4.2
       - scipy==1.14.1
       - semsql==0.3.3
-      - setuptools==75.3.0
       - shexjsg==0.8.2
-      - smmap==5.0.1
-      - sniffio==1.3.1
       - sortedcontainers==2.4.0
-      - soso==0.2.0
       - soupsieve==2.6
       - sparqlslurper==0.5.1
       - sparqlwrapper==2.0.0
-      - sphinx-autoapi==3.3.1
-      - sphinxcontrib-serializinghtml==2.0.0
-      - spinneret==0.1.0
       - sqlalchemy==2.0.36
       - sqlalchemy-utils==0.38.3
-      - sssom==0.4.12
+      - sssom==0.4.13
       - sssom-schema==1.0.0
-      - starlette==0.41.2
+      - starlette==0.41.3
       - tenacity==9.0.0
       - threadpoolctl==3.5.0
-      - tiktoken==0.7.0
-      - tokenizers==0.20.1
-      - tqdm==4.66.6
-      - typeguard==4.4.0
+      - tiktoken==0.8.0
+      - tokenizers==0.20.3
+      - tqdm==4.67.1
+      - typeguard==4.4.1
       - types-python-dateutil==2.9.0.20241003
       - unidecode==1.3.8
       - uri-template==1.3.0
       - url-normalize==1.4.3
       - urllib3==1.26.20
-      - uvicorn==0.32.0
+      - uvicorn==0.32.1
       - validators==0.34.0
       - watchdog==6.0.0
       - wcwidth==0.2.13
-      - webcolors==24.8.0
+      - webcolors==24.11.1
       - webencodings==0.5.1
-      - werkzeug==3.1.1
+      - werkzeug==3.1.3
       - wikipedia==1.4.0
       - wikipedia-api==0.7.1
-      - wrapt==1.16.0
-      - yarl==1.17.1
+      - wrapt==1.17.0
+      - git+https://github.com/clnsmth/soso.git@main
 prefix: /opt/miniconda3/envs/spinneret
diff --git a/poetry.lock b/poetry.lock
index e4ae891..98a35ed 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -812,6 +812,24 @@ pandas = ["pandas"]
 rdflib = ["rdflib"]
 tests = ["coverage", "pytest", "requests"]
 
+[[package]]
+name = "daiquiri"
+version = "3.2.5.1"
+description = "Library to configure Python logging easily"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "daiquiri-3.2.5.1-py3-none-any.whl", hash = "sha256:4a3457f54fc077e12796b258dfdc7f16572177e41c95d84c54bf010a9fd371d3"},
+    {file = "daiquiri-3.2.5.1.tar.gz", hash = "sha256:5f2e86d6fca8bc38d1e9adfa605184df6fdea3702e07ca02d16aa3d0043b2eec"},
+]
+
+[package.dependencies]
+python-json-logger = "*"
+
+[package.extras]
+systemd = ["systemd-python (>=234)"]
+test = ["pytest"]
+
 [[package]]
 name = "defusedxml"
 version = "0.7.1"
@@ -3846,6 +3864,17 @@ autocompletion = ["argcomplete (>=1.10.0,<3)"]
 graphql = ["gql[httpx] (>=3.5.0,<4)"]
 yaml = ["PyYaml (>=6.0.1)"]
 
+[[package]]
+name = "python-json-logger"
+version = "2.0.7"
+description = "A python library adding a json log formatter"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "python-json-logger-2.0.7.tar.gz", hash = "sha256:23e7ec02d34237c5aa1e29a070193a4ea87583bb4e7f8fd06d3de8264c4b2e1c"},
+    {file = "python_json_logger-2.0.7-py3-none-any.whl", hash = "sha256:f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd"},
+]
+
 [[package]]
 name = "python-semantic-release"
 version = "9.8.8"
@@ -5778,4 +5807,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "504cee6f72fc1b325a1c41af49eabb86803369006287185a9f1db856e18e7dc8"
+content-hash = "c615df7205bca842a4fd1edb0ea32df5fa2ff3d72506cc28285f4d4a300e708f"
diff --git a/pyproject.toml b/pyproject.toml
index 5dbc5e6..0185814 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,7 @@ rdflib = "^7.0.0"
 lxml = "^5.3.0"
 soso = {git = "https://github.com/clnsmth/soso.git", rev = "main"}
 ontogpt = "^1.0.6"
+daiquiri = "^3.2.5.1"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.3.2"
diff --git a/requirements.txt b/requirements.txt
index ea61ee6..55de71b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,27 +1,27 @@
 adeft==0.12.3
 aiofiles==24.1.0
 aiohappyeyeballs==2.4.3
-aiohttp==3.10.10
+aiohttp==3.11.7
 aiosignal==1.3.1
 airium==0.2.6
 alabaster==1.0.0
 aniso8601==9.0.1
 annotated-types==0.7.0
 antlr4-python3-runtime==4.9.3
-anyascii==0.3.2
 anyio==4.6.2.post1
 appdirs==1.4.4
 arrow==1.3.0
-astroid==3.2.4
+astroid==3.3.5
 attrs==24.2.0
 babel==2.16.0
+backoff==2.2.1
 bcp47==0.1.0
 beautifulsoup4==4.12.3
 bioc==2.1
-black==24.8.0
-blinker==1.8.2
-boto3==1.35.54
-botocore==1.35.54
+black==24.10.0
+blinker==1.9.0
+boto3==1.35.69
+botocore==1.35.69
 Brotli==1.1.0
 cachier==3.1.2
 cattrs==24.1.2
@@ -29,17 +29,18 @@ certifi==2024.8.30
 cffi==1.17.1
 CFGraph==0.2.1
 chardet==5.2.0
-charset-normalizer==3.3.2
-class_resolver==0.5.2
+charset-normalizer==3.4.0
+class_resolver==0.5.4
 click==8.1.7
 click-option-group==0.5.6
 colorama==0.4.6
-coverage==7.6.1
+coverage==7.6.8
 curies==0.9.0
+daiquiri==3.0.0
 defusedxml==0.7.1
-Deprecated==1.2.14
+Deprecated==1.2.15
 deprecation==2.1.0
-dill==0.3.8
+dill==0.3.9
 diskcache==5.6.3
 distro==1.9.0
 docopt==0.6.2
@@ -51,7 +52,7 @@ eutils==0.6.0
 exceptiongroup==1.2.2
 fastobo==0.12.3
 filelock==3.16.1
-Flask==3.0.3
+Flask==3.1.0
 flask-restx==1.3.0
 fqdn==1.5.1
 frontend==0.0.3
@@ -61,13 +62,15 @@ funowl==0.2.3
 gilda==1.4.0
 gitdb==4.0.11
 GitPython==3.1.43
+gql==3.5.0
+graphql-core==3.2.5
 graphviz==0.20.3
 h11==0.14.0
 h2==4.1.0
 hbreader==0.9.1
 hpack==4.0.0
 html5lib==1.1
-httpcore==1.0.6
+httpcore==1.0.7
 httpx==0.27.2
 huggingface-hub==0.26.2
 hyperframe==6.0.1
@@ -80,12 +83,12 @@ inflect==7.4.0
 inflection==0.5.1
 iniconfig==2.0.0
 intervaltree==3.1.0
-isodate==0.6.1
+isodate==0.7.2
 isoduration==20.11.0
 isort==5.13.2
 itsdangerous==2.2.0
 Jinja2==3.1.4
-jiter==0.7.0
+jiter==0.7.1
 jmespath==1.0.1
 joblib==1.4.2
 json-flattener==0.1.9
@@ -105,10 +108,10 @@ linkml-dataops==0.1.0
 linkml-owl==0.3.0
 linkml-renderer==0.3.1
 linkml-runtime==1.8.3
-litellm==1.51.2
+litellm==1.52.16
 lxml==5.3.0
 markdown-it-py==3.0.0
-MarkupSafe==2.1.5
+MarkupSafe==3.0.2
 mccabe==0.7.0
 mdit-py-plugins==0.4.2
 mdurl==0.1.2
@@ -120,126 +123,127 @@ myst-parser==4.0.0
 ndex2==3.9.0
 networkx==3.4.2
 nltk==3.9.1
-numpy==2.1.1
+numpy==2.0.2
 oaklib==0.6.18
 ols-client==0.1.4
-ontogpt==1.0.6
+ontogpt==1.0.8
 ontoportal-client==0.0.4
-openai==1.53.0
+openai==1.55.1
 openpyxl==3.1.5
 owlrl==6.0.2
-packaging==24.1
-pandas==2.2.2
+packaging==24.2
+pandas==2.2.3
 pansql==0.0.1
 parse==1.20.2
 pathspec==0.12.1
-pip==24.2
+pip==24.3.1
 platformdirs==4.3.6
 pluggy==1.5.0
 ply==3.11
-portalocker==2.10.1
+portalocker==3.0.0
 prefixcommons==0.1.12
 prefixmaps==0.2.6
-prettytable==3.11.0
+prettytable==3.12.0
 pronto==2.5.8
 propcache==0.2.0
 pycparser==2.22
-pydantic==2.9.2
-pydantic_core==2.23.4
+pydantic==2.10.1
+pydantic_core==2.27.1
 Pygments==2.18.0
 PyJSG==0.11.10
-pylint==3.2.7
-PyMuPDF==1.24.13
-pyparsing==3.1.4
+pylint==3.3.1
+PyMuPDF==1.24.14
+pyparsing==3.2.0
 pyshacl==0.26.0
 PyShEx==0.8.1
 PyShExC==0.9.1
 PySocks==1.7.1
 pysolr==3.10.0
-pystow==0.5.6
+pystow==0.6.1
 pytest==8.3.3
-pytest-cov==5.0.0
+pytest-cov==6.0.0
 pytest-logging==2015.11.4
 pytest-mock==3.14.0
 python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
-python-gitlab==4.11.1
-python-semantic-release==9.8.8
+python-gitlab==4.13.0
+python-json-logger==2.0.7
+python-semantic-release==9.14.0
 PyTrie==0.4.0
-pytz==2024.2
+pytz==2024.1
 PyYAML==6.0.2
 ratelimit==2.2.1
-rdflib==7.0.0
+rdflib==7.1.1
 rdflib-jsonld==0.6.1
 rdflib-shim==1.0.3
 referencing==0.35.1
-regex==2024.9.11
+regex==2024.11.6
 requests==2.32.3
 requests-cache==1.2.1
 requests-toolbelt==1.0.0
 rfc3339-validator==0.1.4
 rfc3987==1.3.8
-rich==13.8.1
-rpds-py==0.20.1
+rich==13.9.4
+rpds-py==0.21.0
 ruamel.yaml==0.18.6
 ruamel.yaml.clib==0.2.12
-s3transfer==0.10.3
+s3transfer==0.10.4
 scikit-learn==1.4.2
 scipy==1.14.1
 semsql==0.3.3
-setuptools==75.3.0
+setuptools==75.6.0
 shellingham==1.5.4
 ShExJSG==0.8.2
 six==1.16.0
-smmap==5.0.1
+smmap==5.0.0
 sniffio==1.3.1
 snowballstemmer==2.2.0
 sortedcontainers==2.4.0
-soso==0.2.0
+soso @ git+https://github.com/clnsmth/soso.git@main
 soupsieve==2.6
 sparqlslurper==0.5.1
 SPARQLWrapper==2.0.0
-Sphinx==8.0.2
-sphinx-autoapi==3.3.1
+Sphinx==8.1.3
+sphinx-autoapi==3.3.3
 sphinxcontrib-applehelp==2.0.0
 sphinxcontrib-devhelp==2.0.0
 sphinxcontrib-htmlhelp==2.1.0
 sphinxcontrib-jsmath==1.0.1
 sphinxcontrib-qthelp==2.0.0
-sphinxcontrib-serializinghtml==2.0.0
-spinneret==0.1.0
+sphinxcontrib-serializinghtml==1.1.10
 SQLAlchemy==2.0.36
 SQLAlchemy-Utils==0.38.3
-sssom==0.4.12
+sssom==0.4.13
 sssom-schema==1.0.0
-starlette==0.41.2
+starlette==0.41.3
+stdlib-list==0.11.0
 tenacity==9.0.0
 threadpoolctl==3.5.0
-tiktoken==0.7.0
-tokenizers==0.20.1
+tiktoken==0.8.0
+tokenizers==0.20.3
 toml==0.10.2
-tomli==2.0.1
+tomli==2.1.0
 tomlkit==0.13.2
-tqdm==4.66.6
-typeguard==4.4.0
+tqdm==4.67.1
+typeguard==4.4.1
 types-python-dateutil==2.9.0.20241003
 typing_extensions==4.12.2
-tzdata==2024.1
+tzdata==2024.2
 Unidecode==1.3.8
 uri-template==1.3.0
 url-normalize==1.4.3
 urllib3==1.26.20
-uvicorn==0.32.0
+uvicorn==0.32.1
 validators==0.34.0
 watchdog==6.0.0
 wcwidth==0.2.13
-webcolors==24.8.0
+webcolors==24.11.1
 webencodings==0.5.1
-Werkzeug==3.1.1
-wheel==0.44.0
+Werkzeug==3.1.3
+wheel==0.45.1
 wikipedia==1.4.0
 Wikipedia-API==0.7.1
-wrapt==1.16.0
-yarl==1.17.1
-zipp==3.20.2
+wrapt==1.17.0
+yarl==1.18.0
+zipp==3.21.0
 zstandard==0.23.0

From d667b3161d4cd533cb3ea8764dba26b56a85cd50 Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Wed, 27 Nov 2024 06:48:04 -0800
Subject: [PATCH 06/24] feat: implement performance metric logging

Add logging for performance metrics to enable in-depth analysis and
optimization.

- Create a context manager to log metrics of interest (runtime and
memory usage).
- Estimate tokens per LLM call using word count.
---
 src/spinneret/annotator.py | 29 +++++++++++++++++++++++++----
 src/spinneret/benchmark.py | 35 +++++++++++++++++++++++++++++++++++
 src/spinneret/main.py      |  5 +++++
 tests/test_benchmark.py    | 30 ++++++++++++++++++++++++++++++
 4 files changed, 95 insertions(+), 4 deletions(-)
 create mode 100644 tests/test_benchmark.py

diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py
index 3fed6aa..1dd91e2 100644
--- a/src/spinneret/annotator.py
+++ b/src/spinneret/annotator.py
@@ -8,6 +8,7 @@
 from requests import get, exceptions
 import pandas as pd
 from lxml import etree
+from daiquiri import getLogger
 from spinneret.workbook import (
     delete_annotations,
     initialize_workbook_row,
@@ -25,6 +26,8 @@
     expand_curie,
 )
 
+logger = getLogger(__name__)
+
 # pylint: disable=too-many-lines
 
 
@@ -85,6 +88,8 @@ def get_bioportal_annotation(
         key can be loaded as an environment variable from the configuration
         file (see `utilities.load_configuration`).
     """
+    logger.info(f"Text contains {len(text.split())} words")
+
     # Construct the query
     url = "https://data.bioontology.org/annotator"
     payload = {
@@ -159,6 +164,7 @@ def annotate_workbook(
         path as the original workbook.
     """
     print(f"Annotating workbook {workbook_path}")
+    logger.info(f"Annotating with {annotator}")
 
     # Ensure the workbook and eml file match to avoid errors
     pid = os.path.basename(workbook_path).split("_")[0]
@@ -388,7 +394,9 @@ def add_qudt_annotations_to_workbook(
     :param output_path: The path to write the annotated workbook.
     :param overwrite: If True, overwrite existing `QUDT` annotations in the
         `workbook, so a fresh set may be created.
-    :returns: Workbook with QUDT annotations."""
+    :returns: Workbook with QUDT annotations.
+    """
+    logger.info("Annotating units")
 
     # Parameters for the function
     predicate = "uses standard"
@@ -484,7 +492,9 @@ def add_dataset_annotations_to_workbook(
         workbook, so a fresh set may be created.
     :param sample_size: Executes multiple replicates of the annotation request
         to reduce variability of outputs. Variability is inherent in OntoGPT.
-    :returns: Workbook with dataset annotations."""
+    :returns: Workbook with dataset annotations.
+    """
+    logger.info("Annotating dataset")
 
     # Load the workbook and EML for processing
     wb = load_workbook(workbook)
@@ -581,7 +591,9 @@ def add_measurement_type_annotations_to_workbook(
         `get_ontogpt_annotation` documentation for details.
     :param sample_size: Executes multiple replicates of the annotation request
         to reduce variability of outputs. Variability is inherent in OntoGPT.
-    :returns: Workbook with measurement type annotations."""
+    :returns: Workbook with measurement type annotations.
+    """
+    logger.info("Annotating measurement type")
 
     # Parameters for the function
     predicate = "contains measurements of type"
@@ -714,6 +726,8 @@ def get_ontogpt_annotation(
         is required to use this function. For more information, see:
         https://monarch-initiative.github.io/ontogpt/.
     """
+    logger.info(f"Text contains {len(text.split())} words")
+
     # OntoGPT transacts in files, so we write the input text to a temporary
     # file and receive the results as a JSON file. Once the results are parsed
     # we can discard the files.
@@ -792,6 +806,7 @@ def add_process_annotations_to_workbook(
         requires setup and configuration described in the
         `get_ontogpt_annotation` function.
     """
+    logger.info("Annotating process")
 
     # Load the workbook and EML for processing
     wb = load_workbook(workbook)
@@ -907,6 +922,7 @@ def add_env_broad_scale_annotations_to_workbook(
         annotations using OntoGPT, which requires setup and configuration
         described in the `get_ontogpt_annotation` function.
     """
+    logger.info("Annotating broad scale environmental context")
 
     # Load the workbook and EML for processing
     wb = load_workbook(workbook)
@@ -1023,6 +1039,7 @@ def add_env_local_scale_annotations_to_workbook(
         annotations using OntoGPT, which requires setup and configuration
         described in the `get_ontogpt_annotation` function.
     """
+    logger.info("Annotating local scale environmental context")
 
     # Load the workbook and EML for processing
     wb = load_workbook(workbook)
@@ -1137,7 +1154,9 @@ def add_env_medium_annotations_to_workbook(
         `get_ontogpt_annotation` documentation for details.
     :param sample_size: Executes multiple replicates of the annotation request
         to reduce variability of outputs. Variability is inherent in OntoGPT.
-    :returns: Workbook with environmental medium annotations."""
+    :returns: Workbook with environmental medium annotations.
+    """
+    logger.info("Annotating environmental medium")
 
     # Parameters for the function
     predicate = "environmental material"
@@ -1255,6 +1274,7 @@ def add_research_topic_annotations_to_workbook(
         requires setup and configuration described in the
         `get_ontogpt_annotation` function.
     """
+    logger.info("Annotating research topic")
 
     # Load the workbook and EML for processing
     wb = load_workbook(workbook)
@@ -1370,6 +1390,7 @@ def add_methods_annotations_to_workbook(
         requires setup and configuration described in the
         `get_ontogpt_annotation` function.
     """
+    logger.info("Annotating methods")
 
     # Load the workbook and EML for processing
     wb = load_workbook(workbook)
diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py
index ddcc047..548aa26 100644
--- a/src/spinneret/benchmark.py
+++ b/src/spinneret/benchmark.py
@@ -1 +1,36 @@
 """The benchmark module"""
+
+import time
+import tracemalloc
+from contextlib import contextmanager
+from daiquiri import getLogger
+
+
+logger = getLogger(__name__)
+
+
+@contextmanager
+def monitor(name: str) -> None:
+    """
+    Context manager to monitor the duration and memory usage of a function
+    using the `daiquiri` package logger.
+
+    :param name: The name of the function being monitored.
+    :return: None
+    """
+    start_time = time.time()
+    tracemalloc.start()
+    logger.info(f"Starting function '{name}'")
+    try:
+        yield  # The code inside the `with` block runs here
+    except Exception as e:
+        logger.error(f"Function '{name}' raised an exception: {e}")
+        raise
+    finally:
+        current, peak = tracemalloc.get_traced_memory()
+        tracemalloc.stop()
+        duration = time.time() - start_time
+        logger.info(f"Function '{name}' completed in {duration:.4f} seconds")
+        logger.info(
+            f"Memory usage: Current={current / 1024:.2f} KB; Peak={peak / 1024:.2f} KB"
+        )
diff --git a/src/spinneret/main.py b/src/spinneret/main.py
index 80409dc..7579921 100644
--- a/src/spinneret/main.py
+++ b/src/spinneret/main.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 from requests import get, codes
 from rdflib import Graph
+import daiquiri
 from soso.main import convert
 from soso.strategies.eml import EML, get_encoding_format
 from soso.utilities import delete_null_values, generate_citation_from_doi
@@ -14,6 +15,9 @@
 from spinneret.shadow import create_shadow_eml
 
 
+logger = daiquiri.getLogger(__name__)
+
+
 def create_workbooks(eml_dir: str, workbook_dir: str) -> None:
     """Create workbooks for each EML file in a directory
     :param eml_dir: Directory of EML files
@@ -101,6 +105,7 @@ def annotate_workbooks(
             continue
 
         # Create annotated workbook
+        logger.info(f"Creating annotated workbook for {workbook_file}")
         print(f"Creating annotated workbook for {workbook_file}")
         annotate_workbook(
             workbook_path=workbook_dir + "/" + workbook_file,
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
new file mode 100644
index 0000000..119bf2c
--- /dev/null
+++ b/tests/test_benchmark.py
@@ -0,0 +1,30 @@
+"""Test benchmark code"""
+
+import logging
+import daiquiri
+from spinneret.benchmark import monitor
+
+
+def test_monitor(tmp_path):
+    """Test the monitor context manager"""
+
+    def example_function():  # to call with monitor
+        return 1 + 1
+
+    log_file = tmp_path / "test.log"  # set up daiquiri logger
+    daiquiri.setup(
+        level=logging.INFO,
+        outputs=(
+            daiquiri.output.File(log_file),
+            "stdout",
+        ),
+    )
+
+    with monitor("example_function"):  # test with monitor context manager
+        example_function()
+
+    with open(log_file, "r", encoding="utf-8") as file:
+        log = file.read()
+    assert "Starting function 'example_function'" in log
+    assert "Function 'example_function' completed in" in log
+    assert "Memory usage: Current=" in log

From 23907c66d9af541ac233782e0b24d97f5e96499e Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Wed, 27 Nov 2024 07:19:42 -0800
Subject: [PATCH 07/24] refactor: replace print statements with logging

Replace print statements with logging statements to enable more
structured and persistent output. This change provides flexibility for
capturing and analyzing runtime information.
---
 src/spinneret/annotator.py | 16 ++++++++--------
 src/spinneret/main.py      | 11 +++++------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py
index 1dd91e2..78216a5 100644
--- a/src/spinneret/annotator.py
+++ b/src/spinneret/annotator.py
@@ -115,7 +115,7 @@ def get_bioportal_annotation(
         r = get(url, params=payload, timeout=10)
         r.raise_for_status()
     except exceptions.RequestException as e:
-        print(f"Error calling https://data.bioontology.org/annotator: {e}")
+        logger.error(f"Error calling https://data.bioontology.org/annotator: {e}")
         return None
 
     # Parse the results
@@ -126,7 +126,7 @@ def get_bioportal_annotation(
             r = get(self_link, params={"apikey": api_key}, timeout=10)
             r.raise_for_status()
         except exceptions.RequestException as e:
-            print(f"Error calling {self_link}: {e}")
+            logger.error(f"Error calling {self_link}: {e}")
             return None
         uri = r.json().get("@id", None)
         label = r.json().get("prefLabel", None)
@@ -163,14 +163,14 @@ def annotate_workbook(
         in the EML file. The annotated workbook is written back to the same
         path as the original workbook.
     """
-    print(f"Annotating workbook {workbook_path}")
+    logger.info(f"Annotating workbook {workbook_path}")
     logger.info(f"Annotating with {annotator}")
 
     # Ensure the workbook and eml file match to avoid errors
     pid = os.path.basename(workbook_path).split("_")[0]
     eml_file = pid + ".xml"
     if eml_file not in eml_path:
-        print(f"EML file {eml_file} does not match workbook {workbook_path}")
+        logger.warning(f"EML file {eml_file} does not match workbook {workbook_path}")
         return None
 
     # Load the workbook and EML for processing
@@ -366,14 +366,14 @@ def get_qudt_annotation(text: str) -> Union[list, None]:
         r = get(url, timeout=10)
         r.raise_for_status()
     except exceptions.RequestException as e:
-        print(f"Error calling {url}: {e}")
+        logger.error(f"Error calling {url}: {e}")
         return None
     if r.text == "No_Match":
         return None
     try:  # the service has a few JSON encoding bugs
         json = loads(r.text)
     except decoder.JSONDecodeError as e:
-        print(f"Error decoding JSON from {url}: {e}")
+        logger.error(f"Error decoding JSON from {url}: {e}")
         return None
     label = json["qudtLabel"]
     uri = json["qudtURI"]
@@ -753,7 +753,7 @@ def get_ontogpt_annotation(
             os.system(f"rm -rf {cache_path}")
             os.system(cmd)
         except Exception as e:  # pylint: disable=broad-exception-caught
-            print(f"Error calling OntoGPT: {e}")
+            logger.error(f"Error calling OntoGPT: {e}")
             return None
 
         # Parse the results
@@ -761,7 +761,7 @@ def get_ontogpt_annotation(
             with open(output_file, "r", encoding="utf-8") as f:
                 r = load(f)
         except FileNotFoundError as e:
-            print(f"Error reading OntoGPT output file: {e}")
+            logger.error(f"Error reading OntoGPT output file: {e}")
             return None
         named_entities = r.get("named_entities")
         if named_entities is None:  # OntoGPT couldn't find any annotations
diff --git a/src/spinneret/main.py b/src/spinneret/main.py
index 7579921..7a812ee 100644
--- a/src/spinneret/main.py
+++ b/src/spinneret/main.py
@@ -41,7 +41,7 @@ def create_workbooks(eml_dir: str, workbook_dir: str) -> None:
             continue
 
         # Create workbook
-        print(f"Creating workbook for {eml_file}")
+        logger.info(f"Creating workbook for {eml_file}")
         wb = workbook.create(
             eml_file=eml_dir + "/" + eml_file,
             elements=["dataset", "attribute"],
@@ -101,12 +101,11 @@ def annotate_workbooks(
         eml_pid = workbook_file.split("_")[0]
         eml_file = eml_pid + ".xml"
         if not os.path.exists(eml_dir + "/" + eml_file):
-            print(f"Could not find EML file for {workbook_file}")
+            logger.warning(f"Could not find EML file for {workbook_file}")
             continue
 
         # Create annotated workbook
         logger.info(f"Creating annotated workbook for {workbook_file}")
-        print(f"Creating annotated workbook for {workbook_file}")
         annotate_workbook(
             workbook_path=workbook_dir + "/" + workbook_file,
             eml_path=eml_dir + "/" + eml_file,
@@ -145,7 +144,7 @@ def annotate_eml_files(workbook_dir: str, eml_dir: str, output_dir: str) -> None
             continue
 
         # Create annotated EML file
-        print(f"Creating annotated EML file for {eml_path}")
+        logger.info(f"Creating annotated EML file for {eml_path}")
         annotate_eml(
             eml=eml_path,
             workbook=workbook_dir + "/" + workbook_file,
@@ -176,7 +175,7 @@ def create_soso_files(eml_dir: str, output_dir: str) -> None:
         soso_file = eml_pid + ".json"
         if soso_file in soso_files:
             continue
-        print(f"Creating SOSO file for {eml_file}")
+        logger.info(f"Creating SOSO file for {eml_file}")
 
         # Add properties that can't be derived from the EML record
         scope, identifier, revision = eml_pid.split(".")
@@ -292,7 +291,7 @@ def create_shadow_eml_files(eml_dir: str, output_dir: str) -> None:
             continue
 
         # Create shadow EML file
-        print(f"Creating shadow EML file for {eml_file}")
+        logger.info(f"Creating shadow EML file for {eml_file}")
         create_shadow_eml(
             eml_path=eml_dir + "/" + eml_file,
             output_path=output_dir + "/" + shadow_file,

From f003b96237e7e97ea83b21b51612d2959f3f93fb Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Wed, 4 Dec 2024 15:20:46 -0800
Subject: [PATCH 08/24] feat: implement benchmark data collection and testing

Add functionality to collect and analyze benchmark data, including a
dedicated test suite to evaluate this routine.

We have opted for a baseline comparison method to evaluate the
performance of our algorithm across different parameterizations. This
approach offers several advantages, including efficiency and
interpretability. By directly comparing each parameterization to a
fixed baseline, we can quickly assess its relative performance and
identify the optimal configuration. While this method may not uncover
subtle differences between parameterizations that are both better or
worse than the baseline, it provides a practical and timely solution for
our specific goals.
---
 src/spinneret/benchmark.py                    | 340 ++++++++
 src/spinneret/data/prefixmaps.csv             | 753 ++++++++++++++++++
 src/spinneret/utilities.py                    |  33 +
 tests/conftest.py                             |  37 +
 ...ntl.1.59_annotation_workbook_annotated.tsv |  78 ++
 ...ntl.2.37_annotation_workbook_annotated.tsv |  45 ++
 tests/data/benchmark/standard/notes.txt       |   1 +
 .../termset_similarity_score_processed.json   |  12 +
 .../termset_similarity_score_raw.json         |  59 ++
 ...ntl.1.59_annotation_workbook_annotated.tsv |  71 ++
 ...ntl.2.37_annotation_workbook_annotated.tsv |  42 +
 tests/data/benchmark/test_a/notes.txt         |   1 +
 ...ntl.1.59_annotation_workbook_annotated.tsv | 124 +++
 ...ntl.2.37_annotation_workbook_annotated.tsv |  66 ++
 tests/data/benchmark/test_b/notes.txt         |   1 +
 tests/test_benchmark.py                       | 226 +++++-
 tests/test_utilities.py                       |  20 +
 17 files changed, 1908 insertions(+), 1 deletion(-)
 create mode 100644 src/spinneret/data/prefixmaps.csv
 create mode 100644 tests/data/benchmark/standard/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv
 create mode 100644 tests/data/benchmark/standard/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv
 create mode 100644 tests/data/benchmark/standard/notes.txt
 create mode 100644 tests/data/benchmark/termset_similarity_score_processed.json
 create mode 100644 tests/data/benchmark/termset_similarity_score_raw.json
 create mode 100644 tests/data/benchmark/test_a/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv
 create mode 100644 tests/data/benchmark/test_a/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv
 create mode 100644 tests/data/benchmark/test_a/notes.txt
 create mode 100644 tests/data/benchmark/test_b/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv
 create mode 100644 tests/data/benchmark/test_b/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv
 create mode 100644 tests/data/benchmark/test_b/notes.txt

diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py
index 548aa26..2cd8b69 100644
--- a/src/spinneret/benchmark.py
+++ b/src/spinneret/benchmark.py
@@ -1,9 +1,17 @@
 """The benchmark module"""
 
+import os
+from typing import Union
 import time
+from collections import OrderedDict
+import tempfile
 import tracemalloc
+from json import load
 from contextlib import contextmanager
 from daiquiri import getLogger
+import pandas as pd
+from spinneret.utilities import load_workbook, compress_uri
+from spinneret.workbook import delete_duplicate_annotations
 
 
 logger = getLogger(__name__)
@@ -34,3 +42,335 @@ def monitor(name: str) -> None:
         logger.info(
             f"Memory usage: Current={current / 1024:.2f} KB; Peak={peak / 1024:.2f} KB"
         )
+
+
+def benchmark_against_standard(standard_dir: str, test_dirs: list) -> pd.DataFrame:
+    """
+    Benchmarks the performance of test data against a standard. Currently
+    supports select ontologies from the OBO Foundry.
+
+    :param standard_dir: Directory containing the standard annotated workbook
+        files.
+    :param test_dirs: List of directories containing the test annotated
+        workbook files. Each directory represents a different test condition.
+    :return: A pandas DataFrame containing the benchmark results. Comparisons
+        are made between the standard and test data for each predicate and
+        element_xpath combination. The DataFrame contains the following
+        columns:
+
+        - standard_dir: The directory containing the standard annotated
+          workbook files.
+        - test_dir: The directory containing the test annotated workbook files.
+        - standard_file: The name of the standard annotated workbook file.
+        - predicate_value: The value of the predicate column.
+        - element_xpath_value: The value of the element_xpath column.
+        - standard_set: The set of object_ids from the standard data.
+        - test_set: The set of object_ids from the test data.
+        - average_score: The average termset similarity score between the
+          standard and test sets.
+        - best_score: The best termset similarity score between the standard
+          and test sets.
+        - average_jaccard_similarity: The average Jaccard similarity score
+          between the standard and test sets.
+        - best_jaccard_similarity: The best Jaccard similarity score between
+          the standard and test sets.
+        - average_phenodigm_score: The average Phenodigm score between the
+          standard and test sets.
+        - best_phenodigm_score: The best Phenodigm score between the standard
+          and test sets.
+        - average_standard_information_content: The average information content
+          score of the standard set.
+        - best_standard_information_content: The best information content
+          score of the standard set.
+        - average_test_information_content: The average information content
+          score of the test set.
+        - best_test_information_content: The best information content score of
+          the test set.
+    """
+    res = []
+
+    for standard_file in os.listdir(standard_dir):
+        if not standard_file.endswith(".tsv"):  # we are expecting tsv files
+            continue
+        standard_path = os.path.join(standard_dir, standard_file)
+
+        for test_dir in test_dirs:
+            test_path = os.path.join(test_dir, standard_file)
+            if not os.path.exists(test_path):  # we need a matching test file
+                continue
+
+            standard = load_workbook(standard_path)
+            test = load_workbook(test_path)
+
+            # Prepare the data for comparison
+            standard = clean_workbook(standard)
+            test = clean_workbook(test)
+            standard = group_object_ids(standard)
+            test = group_object_ids(test)
+            standard = compress_object_ids(standard)
+            test = compress_object_ids(test)
+
+            for key, standard_set in standard.items():
+                if key not in test:
+                    continue
+                test_set = test[key]
+
+                scores = get_termset_similarity(standard_set, test_set)
+                if scores is None:
+                    continue
+
+                # Parse the scores and add them to the results
+                r = OrderedDict()
+                r["standard_dir"] = standard_dir
+                r["test_dir"] = test_dir
+                r["standard_file"] = standard_file
+                r["predicate_value"] = key[0]
+                r["element_xpath_value"] = key[1]
+                r["standard_set"] = standard_set
+                r["test_set"] = test_set
+                r.update(scores)
+                res.append(r)
+
+    return pd.DataFrame(res)
+
+
+def get_termset_similarity(set1: list, set2: list) -> dict:
+    """
+    Calculate the similarity between two sets of terms.
+
+    :param set1: List of CURIEs for the first set of terms.
+    :param set2: List of CURIEs for the second set of terms.
+    :return: A dictionary containing termset similarity and information content
+        scores. Default values, defined in
+        `benchmark.default_similarity_scores` are returned if the similarity
+        scores cannot be calculated or if an error occurs. For more information
+        on scoring, see the `oaklib` documentation:
+        https://incatools.github.io/ontology-access-kit/guide/similarity.html.
+    """
+    res = default_similarity_scores()  # a default ensures consistent returns
+
+    # Clean the input sets in preparation for similarity scoring
+    set1 = [term for term in set1 if term is not None]  # can't compare None
+    set2 = [term for term in set2 if term is not None]
+    set1 = delete_terms_from_unsupported_ontologies(set1)
+    set2 = delete_terms_from_unsupported_ontologies(set2)
+
+    if not set1 or not set2:  # can't calculate similarity of empty sets
+        logger.info("Cannot calculate similarity for empty sets")
+        return res
+
+    db = get_shared_ontology(set1, set2)
+    if db is None:  # can't compare terms from different ontologies
+        return res
+
+    # Write output file to a temporary location to be read back in later. We
+    # do this because the output cannot be returned as an object.
+    with tempfile.TemporaryDirectory() as temp_dir:
+        output_file = os.path.join(temp_dir, "output.json")
+
+        # Construct and run the termset-similarity command
+        cmd = (
+            f"runoak -i {db} termset-similarity -o {output_file} -O json "
+            f"{' '.join(set1)} @ {' '.join(set2)}"
+        )
+        try:
+            os.system(cmd)
+        except Exception as e:  # pylint: disable=broad-exception-caught
+            logger.error(f"Error running termset-similarity command: {e}")
+            return res
+
+        # Read and parse the similarity scores
+        try:
+            with open(output_file, "r", encoding="utf-8") as f:
+                scores = load(f)
+        except FileNotFoundError as e:
+            logger.error(f"Error reading termset-similarity output file: {e}")
+            return res
+        res = parse_similarity_scores(scores)
+        return res
+
+
+def default_similarity_scores() -> dict:
+    """
+    :return: A dictionary containing default similarity scores. Values are set
+        following `oaklib` conventions.
+    """
+    res = OrderedDict()
+    res["average_score"] = 0.0
+    res["best_score"] = 0.0
+    res["average_jaccard_similarity"] = pd.NA
+    res["best_jaccard_similarity"] = pd.NA
+    res["average_phenodigm_score"] = pd.NA
+    res["best_phenodigm_score"] = pd.NA
+    res["average_standard_information_content"] = pd.NA
+    res["best_standard_information_content"] = pd.NA
+    res["average_test_information_content"] = pd.NA
+    res["best_test_information_content"] = pd.NA
+    return res
+
+
+def clean_workbook(workbook: pd.DataFrame) -> pd.DataFrame:
+    """
+    Clean a workbook for benchmarking.
+
+    :param workbook: The workbook to clean.
+    :return: The cleaned workbook.
+    """
+    # Remove rows where the "object_id" is NaN. This is necessary because
+    # the termset similarity function cannot handle NaN values.
+    workbook = workbook.dropna(subset=["object_id"])
+
+    # Remove rows where the "object_id" starts with "AUTO:", these terms are
+    # not grounded to any ontology and therefore cannot be compared.
+    workbook = workbook[~workbook["object_id"].str.startswith("AUTO:")]
+
+    # Remove duplicate annotations, so we don't inflate the similarity scores
+    # by comparing the same object multiple times.
+    workbook = delete_duplicate_annotations(workbook)
+
+    return workbook
+
+
+def group_object_ids(workbook: pd.DataFrame) -> dict:
+    """
+    Group object_id values by predicate and element_xpath, i.e. the context
+    of the object_id values that we are comparing.
+
+    :param workbook: The workbook to apply the grouping to.
+    :return: The grouped workbook as a dictionary, where the keys are tuples
+        of the workbook predicate and element_xpath values, and the dictionary
+        values are lists of object_id values.
+    """
+    # list_object_id_for_predicate_and_element_xpath
+    # Group data by predicate and element_xpath columns
+    series = workbook.groupby(["predicate", "element_xpath"]).apply(
+        lambda x: x.to_dict("records"), include_groups=False
+    )
+
+    # Only include the "object_id" values, these are what we want to compare
+    res = {key: [d["object_id"] for d in data] for key, data in series.items()}
+    return res
+
+
+def compress_object_ids(object_id_groups: dict) -> dict:
+    """
+    Convert object_ids to CURIEs for comparison.
+
+    :param object_id_groups: The return value from `group_object_ids`.
+    :return: The object_id_groups dictionary with object_ids converted to
+        CURIEs.
+    """
+    for key, data in object_id_groups.items():
+        object_id_groups[key] = [compress_uri(d) if d else None for d in data]
+    return object_id_groups
+
+
+def parse_similarity_scores(scores: list) -> dict:
+    """
+    Parse similarity scores from the output of the `oaklib` termset-similarity
+    command into the format expected by the benchmarking function.
+
+    :param scores: The output of the `oaklib` termset-similarity command.
+    :return: A dictionary containing the parsed similarity scores.
+    """
+    res = default_similarity_scores()
+
+    # Get the "termset similarity" scores
+    res["average_score"] = scores[0].get("average_score")
+    res["best_score"] = scores[0].get("best_score")
+
+    # Get other similarity scores (i.e. information content, jaccard
+    # similarity, phenodigm score)
+    for key in scores[0].keys():
+
+        # Information content scores
+        if key == "subject_best_matches":  # for the subject (i.e. "standard")
+            r = []
+            for item in scores[0][key]:
+                s = scores[0][key][item]["similarity"]["subject_information_content"]
+                r.append(s)
+            res["average_standard_information_content"] = sum(r) / len(r)
+            res["best_standard_information_content"] = max(r)
+        if key == "object_best_matches":  # for the object (i.e. the "test")
+            r = []
+            for item in scores[0][key]:
+                s = scores[0][key][item]["similarity"]["object_information_content"]
+                r.append(s)
+            res["average_test_information_content"] = sum(r) / len(r)
+            res["best_test_information_content"] = max(r)
+
+        # Jaccard similarity scores. Note, we can get this information from
+        # either the subject_best_matches or object_best_matches keys. Doing
+        # both is redundant.
+        if key == "subject_best_matches":
+            r = []
+            for item in scores[0][key]:
+                s = scores[0][key][item]["similarity"]["jaccard_similarity"]
+                r.append(s)
+            res["average_jaccard_similarity"] = sum(r) / len(r)
+            res["best_jaccard_similarity"] = max(r)
+
+        # Phenodigm scores. Note, we can get this information from either the
+        # subject_best_matches or object_best_matches keys. Doing both is
+        # redundant.
+        if key == "subject_best_matches":
+            r = []
+            for item in scores[0][key]:
+                s = scores[0][key][item]["similarity"]["phenodigm_score"]
+                r.append(s)
+            res["average_phenodigm_score"] = sum(r) / len(r)
+            res["best_phenodigm_score"] = max(r)
+
+    return res
+
+
+def delete_terms_from_unsupported_ontologies(curies: list) -> list:
+    """
+    Similarity scoring works for some ontologies and not others, so remove
+    terms that are not from supported ontologies. Supported ontologies are
+    hard-coded in this function.
+
+    :param curies: List of CURIEs.
+    :return: List of CURIEs from supported ontologies.
+    """
+    supported_ontologies = ["ENVO", "ECSO", "ENVTHES"]
+    res = [
+        term
+        for term in curies
+        if any(term.startswith(ontology + ":") for ontology in supported_ontologies)
+    ]
+    return res
+
+
+def get_shared_ontology(set1: list, set2: list) -> Union[str, None]:
+    """
+    Get the most shared ontology of two sets based on the most frequently
+    occurring CURIE prefix.
+
+    :param set1: List of CURIEs for the first set of terms.
+    :param set2: List of CURIEs for the second set of terms.
+    :return: The shared ontology. This value is returned as a string conforming
+        to the `oaklib` conventions for specifying the ontology database input
+        to the termset-similarity function. If no shared ontology is found,
+        None is returned.
+    """
+
+    prefixes1 = [term.split(":")[0] for term in set1]
+    prefixes2 = [term.split(":")[0] for term in set2]
+
+    # Get the most common prefix in the intersection of the two sets
+    intersection = set(prefixes1) & set(prefixes2)
+    counts = {prefix: prefixes1.count(prefix) for prefix in intersection}
+    if len(intersection) == 0:
+        logger.info("Cannot find a common ontology for similarity scoring")
+        return None
+    prefix = max(counts, key=counts.get)
+
+    # Map the prefix to the ontology database
+    if prefix == "ENVO":
+        db = "sqlite:obo:envo"
+    else:
+        logger.info(f"Ontology not supported: {prefix}")
+        return None
+
+    return db
diff --git a/src/spinneret/data/prefixmaps.csv b/src/spinneret/data/prefixmaps.csv
new file mode 100644
index 0000000..2b9bfe9
--- /dev/null
+++ b/src/spinneret/data/prefixmaps.csv
@@ -0,0 +1,753 @@
+context,prefix,namespace,status
+obo,AAO,http://purl.obolibrary.org/obo/AAO_,canonical
+obo,ADO,http://purl.obolibrary.org/obo/ADO_,canonical
+obo,ADW,http://purl.obolibrary.org/obo/ADW_,canonical
+obo,AEO,http://purl.obolibrary.org/obo/AEO_,canonical
+obo,AERO,http://purl.obolibrary.org/obo/AERO_,canonical
+obo,AfPO,http://purl.obolibrary.org/obo/AfPO_,canonical
+obo,AGRO,http://purl.obolibrary.org/obo/AGRO_,canonical
+obo,AISM,http://purl.obolibrary.org/obo/AISM_,canonical
+obo,AMPHX,http://purl.obolibrary.org/obo/AMPHX_,canonical
+obo,APO,http://purl.obolibrary.org/obo/APO_,canonical
+obo,APOLLO_SV,http://purl.obolibrary.org/obo/APOLLO_SV_,canonical
+obo,ARO,http://purl.obolibrary.org/obo/ARO_,canonical
+obo,ATO,http://purl.obolibrary.org/obo/ATO_,canonical
+obo,BCGO,http://purl.obolibrary.org/obo/BCGO_,canonical
+obo,BCO,http://purl.obolibrary.org/obo/BCO_,canonical
+obo,BFO,http://purl.obolibrary.org/obo/BFO_,canonical
+obo,BILA,http://purl.obolibrary.org/obo/BILA_,canonical
+obo,BOOTSTREP,http://purl.obolibrary.org/obo/BOOTSTREP_,canonical
+obo,BSPO,http://purl.obolibrary.org/obo/BSPO_,canonical
+obo,BTO,http://purl.obolibrary.org/obo/BTO_,canonical
+obo,CARO,http://purl.obolibrary.org/obo/CARO_,canonical
+obo,CDAO,http://purl.obolibrary.org/obo/CDAO_,canonical
+obo,CDNO,http://purl.obolibrary.org/obo/CDNO_,canonical
+obo,CEPH,http://purl.obolibrary.org/obo/CEPH_,canonical
+obo,CHEBI,http://purl.obolibrary.org/obo/CHEBI_,canonical
+obo,CHEMINF,http://purl.obolibrary.org/obo/CHEMINF_,canonical
+obo,CHIRO,http://purl.obolibrary.org/obo/CHIRO_,canonical
+obo,CHMO,http://purl.obolibrary.org/obo/CHMO_,canonical
+obo,CIDO,http://purl.obolibrary.org/obo/CIDO_,canonical
+obo,CIO,http://purl.obolibrary.org/obo/CIO_,canonical
+obo,CL,http://purl.obolibrary.org/obo/CL_,canonical
+obo,CLAO,http://purl.obolibrary.org/obo/CLAO_,canonical
+obo,CLO,http://purl.obolibrary.org/obo/CLO_,canonical
+obo,CLYH,http://purl.obolibrary.org/obo/CLYH_,canonical
+obo,CMF,http://purl.obolibrary.org/obo/CMF_,canonical
+obo,CMO,http://purl.obolibrary.org/obo/CMO_,canonical
+obo,COB,http://purl.obolibrary.org/obo/COB_,canonical
+obo,COLAO,http://purl.obolibrary.org/obo/COLAO_,canonical
+obo,CRO,http://purl.obolibrary.org/obo/CRO_,canonical
+obo,CTENO,http://purl.obolibrary.org/obo/CTENO_,canonical
+obo,CTO,http://purl.obolibrary.org/obo/CTO_,canonical
+obo,CVDO,http://purl.obolibrary.org/obo/CVDO_,canonical
+obo,DC_CL,http://purl.obolibrary.org/obo/DC_CL_,canonical
+obo,DDANAT,http://purl.obolibrary.org/obo/DDANAT_,canonical
+obo,DDPHENO,http://purl.obolibrary.org/obo/DDPHENO_,canonical
+obo,DIDEO,http://purl.obolibrary.org/obo/DIDEO_,canonical
+obo,DINTO,http://purl.obolibrary.org/obo/DINTO_,canonical
+obo,DISDRIV,http://purl.obolibrary.org/obo/DISDRIV_,canonical
+obo,DOID,http://purl.obolibrary.org/obo/DOID_,canonical
+obo,DRON,http://purl.obolibrary.org/obo/DRON_,canonical
+obo,DUO,http://purl.obolibrary.org/obo/DUO_,canonical
+obo,ECAO,http://purl.obolibrary.org/obo/ECAO_,canonical
+obo,ECO,http://purl.obolibrary.org/obo/ECO_,canonical
+obo,ECOCORE,http://purl.obolibrary.org/obo/ECOCORE_,canonical
+obo,ECTO,http://purl.obolibrary.org/obo/ECTO_,canonical
+obo,EHDA,http://purl.obolibrary.org/obo/EHDA_,canonical
+obo,EHDAA,http://purl.obolibrary.org/obo/EHDAA_,canonical
+obo,EHDAA2,http://purl.obolibrary.org/obo/EHDAA2_,canonical
+obo,EMAP,http://purl.obolibrary.org/obo/EMAP_,canonical
+obo,EMAPA,http://purl.obolibrary.org/obo/EMAPA_,canonical
+obo,ENVO,http://purl.obolibrary.org/obo/ENVO_,canonical
+obo,EO,http://purl.obolibrary.org/obo/EO_,canonical
+obo,EPIO,http://purl.obolibrary.org/obo/EPIO_,canonical
+obo,EPO,http://purl.obolibrary.org/obo/EPO_,canonical
+obo,ERO,http://purl.obolibrary.org/obo/ERO_,canonical
+obo,EUPATH,http://purl.obolibrary.org/obo/EUPATH_,canonical
+obo,EV,http://purl.obolibrary.org/obo/EV_,canonical
+obo,ExO,http://purl.obolibrary.org/obo/ExO_,canonical
+obo,FAO,http://purl.obolibrary.org/obo/FAO_,canonical
+obo,FBbi,http://purl.obolibrary.org/obo/FBbi_,canonical
+obo,FBbt,http://purl.obolibrary.org/obo/FBbt_,canonical
+obo,FBcv,http://purl.obolibrary.org/obo/FBcv_,canonical
+obo,FBdv,http://purl.obolibrary.org/obo/FBdv_,canonical
+obo,FBSP,http://purl.obolibrary.org/obo/FBSP_,canonical
+obo,FIDEO,http://purl.obolibrary.org/obo/FIDEO_,canonical
+obo,FIX,http://purl.obolibrary.org/obo/FIX_,canonical
+obo,FLOPO,http://purl.obolibrary.org/obo/FLOPO_,canonical
+obo,FLU,http://purl.obolibrary.org/obo/FLU_,canonical
+obo,FMA,http://purl.obolibrary.org/obo/FMA_,canonical
+obo,FOBI,http://purl.obolibrary.org/obo/FOBI_,canonical
+obo,FOODON,http://purl.obolibrary.org/obo/FOODON_,canonical
+obo,FOVT,http://purl.obolibrary.org/obo/FOVT_,canonical
+obo,FYPO,http://purl.obolibrary.org/obo/FYPO_,canonical
+obo,GALLONT,http://purl.obolibrary.org/obo/GALLONT_,canonical
+obo,GAZ,http://purl.obolibrary.org/obo/GAZ_,canonical
+obo,GECKO,http://purl.obolibrary.org/obo/GECKO_,canonical
+obo,GENEPIO,http://purl.obolibrary.org/obo/GENEPIO_,canonical
+obo,GENO,http://purl.obolibrary.org/obo/GENO_,canonical
+obo,GEO,http://purl.obolibrary.org/obo/GEO_,canonical
+obo,GNO,http://purl.obolibrary.org/obo/GNO_,canonical
+obo,GO,http://purl.obolibrary.org/obo/GO_,canonical
+obo,GRO,http://purl.obolibrary.org/obo/GRO_,canonical
+obo,GSSO,http://purl.obolibrary.org/obo/GSSO_,canonical
+obo,HABRONATTUS,http://purl.obolibrary.org/obo/HABRONATTUS_,canonical
+obo,HANCESTRO,http://purl.obolibrary.org/obo/HANCESTRO_,canonical
+obo,HAO,http://purl.obolibrary.org/obo/HAO_,canonical
+obo,HOM,http://purl.obolibrary.org/obo/HOM_,canonical
+obo,HP,http://purl.obolibrary.org/obo/HP_,canonical
+obo,HsapDv,http://purl.obolibrary.org/obo/HsapDv_,canonical
+obo,HSO,http://purl.obolibrary.org/obo/HSO_,canonical
+obo,HTN,http://purl.obolibrary.org/obo/HTN_,canonical
+obo,IAO,http://purl.obolibrary.org/obo/IAO_,canonical
+obo,ICEO,http://purl.obolibrary.org/obo/ICEO_,canonical
+obo,ICO,http://purl.obolibrary.org/obo/ICO_,canonical
+obo,IDO,http://purl.obolibrary.org/obo/IDO_,canonical
+obo,IDOMAL,http://purl.obolibrary.org/obo/IDOMAL_,canonical
+obo,IEV,http://purl.obolibrary.org/obo/IEV_,canonical
+obo,IMR,http://purl.obolibrary.org/obo/IMR_,canonical
+obo,INO,http://purl.obolibrary.org/obo/INO_,canonical
+obo,IPR,http://purl.obolibrary.org/obo/IPR_,canonical
+obo,KISAO,http://purl.obolibrary.org/obo/KISAO_,canonical
+obo,LABO,http://purl.obolibrary.org/obo/LABO_,canonical
+obo,LEPAO,http://purl.obolibrary.org/obo/LEPAO_,canonical
+obo,LIPRO,http://purl.obolibrary.org/obo/LIPRO_,canonical
+obo,LOGGERHEAD,http://purl.obolibrary.org/obo/LOGGERHEAD_,canonical
+obo,MA,http://purl.obolibrary.org/obo/MA_,canonical
+obo,MAMO,http://purl.obolibrary.org/obo/MAMO_,canonical
+obo,MAO,http://purl.obolibrary.org/obo/MAO_,canonical
+obo,MAT,http://purl.obolibrary.org/obo/MAT_,canonical
+obo,MAXO,http://purl.obolibrary.org/obo/MAXO_,canonical
+obo,MCO,http://purl.obolibrary.org/obo/MCO_,canonical
+obo,MCRO,http://purl.obolibrary.org/obo/MCRO_,canonical
+obo,MF,http://purl.obolibrary.org/obo/MF_,canonical
+obo,MFMO,http://purl.obolibrary.org/obo/MFMO_,canonical
+obo,MFO,http://purl.obolibrary.org/obo/MFO_,canonical
+obo,MFOEM,http://purl.obolibrary.org/obo/MFOEM_,canonical
+obo,MFOMD,http://purl.obolibrary.org/obo/MFOMD_,canonical
+obo,MI,http://purl.obolibrary.org/obo/MI_,canonical
+obo,MIAPA,http://purl.obolibrary.org/obo/MIAPA_,canonical
+obo,MICRO,http://purl.obolibrary.org/obo/MICRO_,canonical
+obo,MIRNAO,http://purl.obolibrary.org/obo/MIRNAO_,canonical
+obo,MIRO,http://purl.obolibrary.org/obo/MIRO_,canonical
+obo,MMO,http://purl.obolibrary.org/obo/MMO_,canonical
+obo,MmusDv,http://purl.obolibrary.org/obo/MmusDv_,canonical
+obo,MO,http://purl.obolibrary.org/obo/MO_,canonical
+obo,MOD,http://purl.obolibrary.org/obo/MOD_,canonical
+obo,MONDO,http://purl.obolibrary.org/obo/MONDO_,canonical
+obo,MOP,http://purl.obolibrary.org/obo/MOP_,canonical
+obo,MP,http://purl.obolibrary.org/obo/MP_,canonical
+obo,MPATH,http://purl.obolibrary.org/obo/MPATH_,canonical
+obo,MPIO,http://purl.obolibrary.org/obo/MPIO_,canonical
+obo,MRO,http://purl.obolibrary.org/obo/MRO_,canonical
+obo,MS,http://purl.obolibrary.org/obo/MS_,canonical
+obo,NBO,http://purl.obolibrary.org/obo/NBO_,canonical
+obo,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_,canonical
+obo,NCIT,http://purl.obolibrary.org/obo/NCIT_,canonical
+obo,NCRO,http://purl.obolibrary.org/obo/NCRO_,canonical
+obo,NGBO,http://purl.obolibrary.org/obo/NGBO_,canonical
+obo,NIF_CELL,http://purl.obolibrary.org/obo/NIF_CELL_,canonical
+obo,NIF_DYSFUNCTION,http://purl.obolibrary.org/obo/NIF_DYSFUNCTION_,canonical
+obo,NIF_GROSSANATOMY,http://purl.obolibrary.org/obo/NIF_GROSSANATOMY_,canonical
+obo,NMR,http://purl.obolibrary.org/obo/NMR_,canonical
+obo,NOMEN,http://purl.obolibrary.org/obo/NOMEN_,canonical
+obo,OAE,http://purl.obolibrary.org/obo/OAE_,canonical
+obo,OARCS,http://purl.obolibrary.org/obo/OARCS_,canonical
+obo,OBA,http://purl.obolibrary.org/obo/OBA_,canonical
+obo,OBCS,http://purl.obolibrary.org/obo/OBCS_,canonical
+obo,OBI,http://purl.obolibrary.org/obo/OBI_,canonical
+obo,OBIB,http://purl.obolibrary.org/obo/OBIB_,canonical
+obo,OBO_REL,http://purl.obolibrary.org/obo/OBO_REL_,canonical
+obo,OCCO,http://purl.obolibrary.org/obo/OCCO_,canonical
+obo,OGG,http://purl.obolibrary.org/obo/OGG_,canonical
+obo,OGI,http://purl.obolibrary.org/obo/OGI_,canonical
+obo,OGMS,http://purl.obolibrary.org/obo/OGMS_,canonical
+obo,OGSF,http://purl.obolibrary.org/obo/OGSF_,canonical
+obo,OHD,http://purl.obolibrary.org/obo/OHD_,canonical
+obo,OHMI,http://purl.obolibrary.org/obo/OHMI_,canonical
+obo,OHPI,http://purl.obolibrary.org/obo/OHPI_,canonical
+obo,OlatDv,http://purl.obolibrary.org/obo/OlatDv_,canonical
+obo,OMIABIS,http://purl.obolibrary.org/obo/OMIABIS_,canonical
+obo,OMIT,http://purl.obolibrary.org/obo/OMIT_,canonical
+obo,OMO,http://purl.obolibrary.org/obo/OMO_,canonical
+obo,OMP,http://purl.obolibrary.org/obo/OMP_,canonical
+obo,OMRSE,http://purl.obolibrary.org/obo/OMRSE_,canonical
+obo,ONE,http://purl.obolibrary.org/obo/ONE_,canonical
+obo,ONS,http://purl.obolibrary.org/obo/ONS_,canonical
+obo,ONTOAVIDA,http://purl.obolibrary.org/obo/ONTOAVIDA_,canonical
+obo,ONTONEO,http://purl.obolibrary.org/obo/ONTONEO_,canonical
+obo,OOSTT,http://purl.obolibrary.org/obo/OOSTT_,canonical
+obo,OPL,http://purl.obolibrary.org/obo/OPL_,canonical
+obo,OPMI,http://purl.obolibrary.org/obo/OPMI_,canonical
+obo,ORNASEQ,http://purl.obolibrary.org/obo/ORNASEQ_,canonical
+obo,OVAE,http://purl.obolibrary.org/obo/OVAE_,canonical
+obo,PAO,http://purl.obolibrary.org/obo/PAO_,canonical
+obo,PATO,http://purl.obolibrary.org/obo/PATO_,canonical
+obo,PCL,http://purl.obolibrary.org/obo/PCL_,canonical
+obo,PCO,http://purl.obolibrary.org/obo/PCO_,canonical
+obo,PD_ST,http://purl.obolibrary.org/obo/PD_ST_,canonical
+obo,PDRO,http://purl.obolibrary.org/obo/PDRO_,canonical
+obo,PdumDv,http://purl.obolibrary.org/obo/PdumDv_,canonical
+obo,PECO,http://purl.obolibrary.org/obo/PECO_,canonical
+obo,PGDSO,http://purl.obolibrary.org/obo/PGDSO_,canonical
+obo,PHIPO,http://purl.obolibrary.org/obo/PHIPO_,canonical
+obo,PLANA,http://purl.obolibrary.org/obo/PLANA_,canonical
+obo,PLANP,http://purl.obolibrary.org/obo/PLANP_,canonical
+obo,PLO,http://purl.obolibrary.org/obo/PLO_,canonical
+obo,PO,http://purl.obolibrary.org/obo/PO_,canonical
+obo,PORO,http://purl.obolibrary.org/obo/PORO_,canonical
+obo,PPO,http://purl.obolibrary.org/obo/PPO_,canonical
+obo,PR,http://purl.obolibrary.org/obo/PR_,canonical
+obo,PROCO,http://purl.obolibrary.org/obo/PROCO_,canonical
+obo,PROPREO,http://purl.obolibrary.org/obo/PROPREO_,canonical
+obo,PSDO,http://purl.obolibrary.org/obo/PSDO_,canonical
+obo,PSO,http://purl.obolibrary.org/obo/PSO_,canonical
+obo,PW,http://purl.obolibrary.org/obo/PW_,canonical
+obo,RBO,http://purl.obolibrary.org/obo/RBO_,canonical
+obo,RESID,http://purl.obolibrary.org/obo/RESID_,canonical
+obo,REX,http://purl.obolibrary.org/obo/REX_,canonical
+obo,RNAO,http://purl.obolibrary.org/obo/RNAO_,canonical
+obo,RO,http://purl.obolibrary.org/obo/RO_,canonical
+obo,RS,http://purl.obolibrary.org/obo/RS_,canonical
+obo,RXNO,http://purl.obolibrary.org/obo/RXNO_,canonical
+obo,SAO,http://purl.obolibrary.org/obo/SAO_,canonical
+obo,SBO,http://purl.obolibrary.org/obo/SBO_,canonical
+obo,SCDO,http://purl.obolibrary.org/obo/SCDO_,canonical
+obo,SEP,http://purl.obolibrary.org/obo/SEP_,canonical
+obo,SEPIO,http://purl.obolibrary.org/obo/SEPIO_,canonical
+obo,SIBO,http://purl.obolibrary.org/obo/SIBO_,canonical
+obo,SLSO,http://purl.obolibrary.org/obo/SLSO_,canonical
+obo,SO,http://purl.obolibrary.org/obo/SO_,canonical
+obo,SOPHARM,http://purl.obolibrary.org/obo/SOPHARM_,canonical
+obo,SPD,http://purl.obolibrary.org/obo/SPD_,canonical
+obo,STATO,http://purl.obolibrary.org/obo/STATO_,canonical
+obo,SWO,http://purl.obolibrary.org/obo/SWO_,canonical
+obo,SYMP,http://purl.obolibrary.org/obo/SYMP_,canonical
+obo,T4FS,http://purl.obolibrary.org/obo/T4FS_,canonical
+obo,TADS,http://purl.obolibrary.org/obo/TADS_,canonical
+obo,TAHE,http://purl.obolibrary.org/obo/TAHE_,canonical
+obo,TAHH,http://purl.obolibrary.org/obo/TAHH_,canonical
+obo,TAO,http://purl.obolibrary.org/obo/TAO_,canonical
+obo,TAXRANK,http://purl.obolibrary.org/obo/TAXRANK_,canonical
+obo,TGMA,http://purl.obolibrary.org/obo/TGMA_,canonical
+obo,TO,http://purl.obolibrary.org/obo/TO_,canonical
+obo,TRANS,http://purl.obolibrary.org/obo/TRANS_,canonical
+obo,TTO,http://purl.obolibrary.org/obo/TTO_,canonical
+obo,TXPO,http://purl.obolibrary.org/obo/TXPO_,canonical
+obo,UBERON,http://purl.obolibrary.org/obo/UBERON_,canonical
+obo,UO,http://purl.obolibrary.org/obo/UO_,canonical
+obo,UPA,http://purl.obolibrary.org/obo/UPA_,canonical
+obo,UPHENO,http://purl.obolibrary.org/obo/UPHENO_,canonical
+obo,VariO,http://purl.obolibrary.org/obo/VariO_,canonical
+obo,VBO,http://purl.obolibrary.org/obo/VBO_,canonical
+obo,VHOG,http://purl.obolibrary.org/obo/VHOG_,canonical
+obo,VO,http://purl.obolibrary.org/obo/VO_,canonical
+obo,VSAO,http://purl.obolibrary.org/obo/VSAO_,canonical
+obo,VT,http://purl.obolibrary.org/obo/VT_,canonical
+obo,VTO,http://purl.obolibrary.org/obo/VTO_,canonical
+obo,WBbt,http://purl.obolibrary.org/obo/WBbt_,canonical
+obo,WBls,http://purl.obolibrary.org/obo/WBls_,canonical
+obo,WBPhenotype,http://purl.obolibrary.org/obo/WBPhenotype_,canonical
+obo,XAO,http://purl.obolibrary.org/obo/XAO_,canonical
+obo,XCO,http://purl.obolibrary.org/obo/XCO_,canonical
+obo,XLMOD,http://purl.obolibrary.org/obo/XLMOD_,canonical
+obo,XPO,http://purl.obolibrary.org/obo/XPO_,canonical
+obo,YPO,http://purl.obolibrary.org/obo/YPO_,canonical
+obo,ZEA,http://purl.obolibrary.org/obo/ZEA_,canonical
+obo,ZECO,http://purl.obolibrary.org/obo/ZECO_,canonical
+obo,ZFA,http://purl.obolibrary.org/obo/ZFA_,canonical
+obo,ZFS,http://purl.obolibrary.org/obo/ZFS_,canonical
+obo,ZP,http://purl.obolibrary.org/obo/ZP_,canonical
+bioportal,ABD,http://brd.bsvgateway.org/api/,canonical
+bioportal,ACESO,http://www.semanticweb.org/cbmi/ontologies/2018/10/aceso#,canonical
+bioportal,ACGT-MO,http://www.ifomis.org/acgt/1.0#,canonical
+bioportal,AD-DROP,http://www.semanticweb.org/AD-DROP#,canonical
+bioportal,ADALAB-META,http://rdf.adalab-project.org/ontology/adalab-meta/,canonical
+bioportal,ADAR,http://purl.org/autism-ontology/1.0/autism-core.owl#,canonical
+bioportal,ADAR,http://purl.org/autism-ontology/1.0/CA_ADOS1-2001.owl#,prefix_alias
+bioportal,ADAR,http://purl.org/autism-ontology/1.0/Interests_and_behaviors_phenotype#,prefix_alias
+bioportal,ADAR,http://purl.org/autism-ontology/1.0/assessment-result.owl#,prefix_alias
+bioportal,ADAR,http://purl.org/autism-ontology/1.0/autism-merged.owl#,prefix_alias
+bioportal,ADAR,http://purl.org/autism-ontology/1.0/autism-rules.owl#,prefix_alias
+bioportal,ADAR,http://purl.org/autism-ontology/1.0/ca_adi-2003.owl#,prefix_alias
+bioportal,ADAR,http://purl.org/autism-ontology/1.0/ca_ados4_2001.owl#,prefix_alias
+bioportal,ADAR,http://purl.org/autism-ontology/1.0/ca_vinelandsurvey-2005.owl#,prefix_alias
+bioportal,ADCAD,https://purl.dataone.org/odo/ADCAD_,canonical
+bioportal,ADHER_INTCARE_EN,http://www.semanticweb.org/parracarlos/ontologies/2019/3/untitled-ontology-31#,canonical
+bioportal,ADMO,http://www.semanticweb.org/ADMO#,canonical
+bioportal,ADO,http://scai.fraunhofer.de/AlzheimerOntology#,canonical
+bioportal,AGROCYMAC,http://www.semanticweb.org/yali/ontologies/2019/0/cultivos#,canonical
+bioportal,AGROMOP,http://www.semanticweb.org/vera/ontologies/2020/1/untitled-ontology-5#,canonical
+bioportal,AHOL,http://opendata.inra.fr/AHOL/AHOL_,canonical
+bioportal,AHSO,https://w3id.org/ahso#,canonical
+bioportal,AIRBUS,http://protege.stanford.edu/thesauri/aero/,canonical
+bioportal,AISM,OBO:AISM_,canonical
+bioportal,AMINO-ACID,http://www.co-ode.org/ontologies/amino-acid/2006/05/18/amino-acid.owl#,canonical
+bioportal,AO,http://childhealthservicemodels.eu/asthma#,canonical
+bioportal,APAOCUEMPLOY,http://www.semanticweb.org/ontologies/2015/0/ocupationalemploymentcluster.owl#,canonical
+bioportal,APATREATMENT,http://www.semanticweb.org/ontologies/2015/0/treatmentcluster.owl#,canonical
+bioportal,araport,OBO:Araport_,canonical
+bioportal,ARCRC,http://purl.dataone.org/odo/ARCRC_,canonical
+bioportal,ASDPTO,http://cbmi.med.harvard.edu/asdphenotype#,canonical
+bioportal,ASPECT,http://purl.org/aspect/,canonical
+bioportal,ATC,http://purl.bioontology.org/ontology/ATC/,canonical
+bioportal,ATOL,http://opendata.inra.fr/ATOL/ATOL_,canonical
+bioportal,AURA,http://www.projecthalo.com/aura#,canonical
+bioportal,BAO,http://www.bioassayontology.org/bao#BAO_,canonical
+bioportal,BCI-O,https://w3id.org/BCI-ontology#,canonical
+bioportal,BCTT,http://purl.bioontology.org/ontology/BCTT#,canonical
+bioportal,BFO,http://www.ifomis.org/bfo/1.1/snap#,canonical
+bioportal,BHN,http://chu-rouen.fr/cismef/BHN#,canonical
+bioportal,BIBFRAME,http://id.loc.gov/ontologies/bibframe/,canonical
+bioportal,BIBLIOTEK-O,http://bibliotek-o.org/1.1/ontology/,canonical
+bioportal,BIBLIOTEK-O,http://bibliotek-o.org/ontology/,prefix_alias
+bioportal,BIM,http://cbakerlab.unbsj.ca/unbvps/BIM#,canonical
+bioportal,BIM,http://cbakerlab.unbsj.ca:8080/ontologies/BIM.owl#,prefix_alias
+bioportal,BIM,http://cbakerlab.unbsj.ca:8080/sebi/BIM.owl#,prefix_alias
+bioportal,BIN,http://purl.bioontology.org/ontology/BIN/,canonical
+bioportal,BIRNLEX,http://bioontology.org/projects/ontologies/birnlex#,canonical
+bioportal,BNO,http://www.owl-ontologies.com/Ontology1361987617.owl#,canonical
+bioportal,BOF,http://www.owl-ontologies.com/BiodiversityOntologyFull.owl#,canonical
+bioportal,BP,http://www.biopax.org/release/biopax-level3.owl#,canonical
+bioportal,BRCT,http://www.semanticweb.org/latitude_user/ontologies/2014/8/untitled-ontology-7#,canonical
+bioportal,BRCT,http://www.semanticweb.org/ontologies/2009/9/Ontology1255357986125.owl#,prefix_alias
+bioportal,BRIDG,http://www.bridgmodel.org/owl#,canonical
+bioportal,BRO,http://bioontology.org/ontologies/BiomedicalResourceOntology.owl#,canonical
+bioportal,BRSO,http://purl.jp/bio/10/brso/,canonical
+bioportal,BRSO,http://purl.org/brso/BiologicalResourceStatus#,prefix_alias
+bioportal,BRSO,http://purl.org/brso/BiologicalResourceType#,prefix_alias
+bioportal,BSAO,OBO:BSA_,canonical
+bioportal,BT,http://purl.org/biotop/biotop.owl#,canonical
+bioportal,CABRO,http://www.semanticweb.org/dimitrios/ontologies/2013/2/untitled-ontology-2#,canonical
+bioportal,CARELEX,http://www.CareLex.org/2012/carelex.owl#,canonical
+bioportal,CARRE,file:/Users/allanthird/Work/CARRE/CARREOntology/carre-ontology.xml#,canonical
+bioportal,CARRE,file:/Users/allanthird/Work/CARRE/CARREOntology/carre-sensors.xml#,prefix_alias
+bioportal,CASE-BASE-ONTO,http://www.semanticweb.org/hsh/ontologies/2019/7/CBRDystempOnto#,canonical
+bioportal,CCTOO,OBO:CCTO_,canonical
+bioportal,CDPEO,http://www.semanticweb.org/ontologies/chronic-diease-patient-education-ontology#,canonical
+bioportal,CEDARVS,http://www.semanticweb.org/jgraybeal/ontologies/2015/7/cedarvaluesets#,canonical
+bioportal,cgnc,OBO:CGNC_,canonical
+bioportal,CHD,http://homes.esat.kuleuven.be/~bioiuser/chdwiki/index.php/CHD:CaseReport?id=,canonical
+bioportal,CHEMINF,http://semanticscience.org/resource/CHEMINF_,canonical
+bioportal,CIDOC-CRM,http://www.cidoc-crm.org/cidoc-crm/,canonical
+bioportal,CKDO,http://clininf.eu/ckdo#,canonical
+bioportal,CMDO,http://purl.bioontology.org/ontology/CMDO/,canonical
+bioportal,CMR-QA,http://www.semanticweb.org/ukbiobank/ocmr_isg/CMR-QA#,canonical
+bioportal,CN,http://mmisw.org/ont/Technology/ComputerNetworks/,canonical
+bioportal,CN,http://mmisw.org/ont/Technology/ComputerNetworks#,prefix_alias
+bioportal,CO-WHEAT,OBO:CO_321_,canonical
+bioportal,CODO,http://www.isibang.ac.in/ns/codo#,canonical
+bioportal,COGAT,file:/srv/ncbo/repository/COGAT/8/,canonical
+bioportal,COGPO,http://www.cogpo.org/ontologies/COGPO_,canonical
+bioportal,COGPO,http://www.cogpo.org/ontologies/CogPOver1.owl#,prefix_alias
+bioportal,COGPO,http://www.cogpo.org/ontologies/CogPOver2010.owl#COGPO_,prefix_alias
+bioportal,COID,https://github.com/sap218/coid/blob/master/coid.owl#,canonical
+bioportal,COKPME,http://www.iiitdwd.ac.in/ACB/COKPME#,canonical
+bioportal,COMODI,http://purl.uni-rostock.de/comodi/comodi#,canonical
+bioportal,COSTART,http://purl.bioontology.org/ontology/CST/,canonical
+bioportal,CPRO,http://purl.org/cpr/,canonical
+bioportal,CRISP,http://purl.bioontology.org/ontology/CSP/,canonical
+bioportal,CRYOEM,http://scipion.i2pc.es/ontology/CRYOEM_,canonical
+bioportal,CSSO,http://purl.jp/bio/11/csso/CSSO_,canonical
+bioportal,CU-VO,http://www.semanticweb.org/jdr2160/ontologies/2015/5/venom_ontology#,canonical
+bioportal,CVAO,http://www.semanticweb.org/ontologies/2015/11/CVAO#,canonical
+bioportal,CWD,http://www.semanticweb.org/jbagwell/ontologies/2017/9/untitled-ontology-6#,canonical
+bioportal,CYTO,http://www.semanticweb.org/demetrios/ontologies/2014/5/,canonical
+bioportal,DATACITE,http://purl.org/spar/datacite/,canonical
+bioportal,DCM,http://dicom.nema.org/resources/ontology/DCM/,canonical
+bioportal,DEB,http://www.semanticweb.org/osnathakimi/ontologies/deb#,canonical
+bioportal,DERMO,OBO:DERMO_,canonical
+bioportal,DFO,https://w3id.org/dfo/,canonical
+bioportal,DIAB,http://purl.bioontology.org/ontology/DIAB/,canonical
+bioportal,DIKB,http://purl.org/net/drug-interaction-knowledge-base/DIKB_evidence_ontology.owl#,canonical
+bioportal,DISDRIV,OBO:DISDRIV_,canonical
+bioportal,DLO,https://w3id.org/dlo/,canonical
+bioportal,DLORO,http://www.semanticweb.org/alan/ontologies/2013/8/untitled-ontology-9#,canonical
+bioportal,DOCCC,http://www.semanticweb.org/hll/ontologies/2013/8/untitled-ontology-2#,canonical
+bioportal,DOID,http://purl.org/obo/owl/DOID#,canonical
+bioportal,DOREMUS-KEYS,http://data.doremus.org/vocabulary/key/,canonical
+bioportal,DRANPTO,http://www.semanticweb.org/zhenyuzhang/ontologies/2019/8/nonpharmacological-intervention-for-agitation-in-dementia-ontology/,canonical
+bioportal,DREAMDNPTO,http://www.semanticweb.org/zhenyuzhang/ontologies/2021/DREAMDNPTO#,canonical
+bioportal,DRPSNPTO,http://www.semanticweb.org/zhenyuzhang/ontologies/2020/DRPSNPTO/,canonical
+bioportal,DSEO,http://bigdatau.org/dseo#,canonical
+bioportal,DTO,http://www.drugtargetontology.org/dto/DTO_,canonical
+bioportal,EBP,http://www.semanticweb.org/tswheeler/ontologies/2016/3/EmpowerBP#,canonical
+bioportal,ECAO,OBO:ECAO_,canonical
+bioportal,ECG,http://www.cvrgrid.org/files/ECGOntologyv1.owl#ECG_,canonical
+bioportal,ECP,http://iris.med.duth.gr/research/ecp/ontology/eCP.owl#,canonical
+bioportal,ECSO,http://purl.dataone.org/odo/ECSO_,canonical
+bioportal,EDAM,http://edamontology.org/,canonical
+bioportal,EFO,http://www.ebi.ac.uk/efo/EFO_,canonical
+bioportal,ELECTRICA,http://purl.org/ELECTRICA/,canonical
+bioportal,ELIG,http://www.semanticweb.org/ontologies/2012/8/Ontology1348158066194.owl#,canonical
+bioportal,ELTER_CL,http://vocabs.lter-europe.net/eLTER_CL/,canonical
+bioportal,EMO,http://www.semanticweb.org/ontologies/2011/1/14/EMO.owl/,canonical
+bioportal,ensembl.bacteria,OBO:EnsemblBacteria#_,canonical
+bioportal,ENVS_VARIABLES,http://purl.org/m4m-dk-2/variables/,canonical
+bioportal,ENVTHES,http://vocabs.lter-europe.net/EnvThes/,canonical
+bioportal,EO,http://www.semanticweb.org/ethnicityOntology#,canonical
+bioportal,EOL,http://purl.org/obo/owlEOL_,canonical
+bioportal,EP,http://www.cvrgrid.org/ontologies/Electrophysiology#,canonical
+bioportal,EPIE,https://pat.nichd.nih.gov/patepigeneticentity/,canonical
+bioportal,EPILONT,http://www.semanticweb.org/ontologies/2009/3/EpilepsyOntology.owl#,canonical
+bioportal,EPIO,https://bio.scai.fraunhofer.de/ontology/epilepsy#,canonical
+bioportal,EPIP,https://pat.nichd.nih.gov/patepigeneticprocess/,canonical
+bioportal,EPISEM,http://www.semanticweb.org/danielhier/ontologies/2019/3/untitled-ontology-57/,canonical
+bioportal,EPO,http://www.semanticweb.org/ontologies/epo.owl#,canonical
+bioportal,EPSO,http://www.case.edu/EpilepsyOntology.owl#,canonical
+bioportal,ESSO,http://www.semanticweb.org/rjyy/ontologies/2015/5/ESSO#,canonical
+bioportal,ETHANC,https://github.com/VODANA/Controlled-vocabulary/ethanc/,canonical
+bioportal,EXACT,http://www.owl-ontologies.com/Ontology1184060740.owl#,canonical
+bioportal,EXTRACT,http://purl.org/extract/,canonical
+bioportal,FBbi,http://purl.org/obo/owl/FBbi#FBbi_,canonical
+bioportal,FCC1,http://www.semanticweb.org/diwaleva/ontologies/2019/9/fcc-ontology#,canonical
+bioportal,FDC-GDMT,http://vocab.fairdatacollective.org/gdmt/,canonical
+bioportal,FG,https://w3id.org/fair-genomes/ontology/,canonical
+bioportal,FHHO,http://www.owl-ontologies.com/Ontology1172270693.owl#,canonical
+bioportal,FIRE,http://cerrado.linkeddata.es/ecology/fire#,canonical
+bioportal,FISH-AST,http://purl.org/heritagedata/schemes/560/concepts/,canonical
+bioportal,FISHO,http://bioportal.bioontology.org/ontologies/FISHO#,canonical
+bioportal,FISHO,http://mybiodiversityontologies.um.edu.my/FO.owl#,prefix_alias
+bioportal,FLYGLYCODB,http://www.flyglycodb.org/ontologies/2015/,canonical
+bioportal,FMA,http://purl.org/sig/ont/fma/,canonical
+bioportal,FMA,http://purl.org/obo/owlapi/fma#,prefix_alias
+bioportal,FMA,http://sig.uw.edu/fma#,prefix_alias
+bioportal,GALEN,http://www.co-ode.org/ontologies/galen#,canonical
+bioportal,GAMUTS,http://www.gamuts.net/entity#,canonical
+bioportal,GBOL,http://gbol.life/0.1/,canonical
+bioportal,GCO,http://rdf.biosemantics.org/ontologies/genomecomponents#,canonical
+bioportal,GECKO,OBO:GECKO_,canonical
+bioportal,GENE-CDS,http://www.genomic-cds.org/ont/genomic-cds.owl#,canonical
+bioportal,GEOSPARQL,http://www.opengis.net/ont/geosparql#,canonical
+bioportal,GEOSPECIES,http://rdf.geospecies.org/ont/geospecies#,canonical
+bioportal,GEOSPECIES,http://rdf.geospecies.org/ont/geospecies.owl#,prefix_alias
+bioportal,GFFO,https://raw.githubusercontent.com/mpievolbio-scicomp/GenomeFeatureFormatOntology/main/gffo#,canonical
+bioportal,GFO,http://www.onto-med.de/ontologies/gfo.owl#,canonical
+bioportal,GFO,http://www.onto-med.de/ontologies/gfo-basic.owl#,prefix_alias
+bioportal,GFO-BIO,http://onto.eva.mpg.de/ontologies/gfo-bio.owl#,canonical
+bioportal,GFVO,https://www.codamono.com/biointerchange/gfvo#,canonical
+bioportal,GLYCO,http://glycomics.ccrc.uga.edu/ontologies/GlycO#,canonical
+bioportal,GLYCOCOO,http://purl.jp/bio/12/glyco/,canonical
+bioportal,GML,http://www.opengis.net/ont/gml#,canonical
+bioportal,GML,http://loki.cae.drexel.edu/~wbs/ontology/2004/09/ogc-gml#,prefix_alias
+bioportal,GMO,http://purl.jp/bio/10/gmo/GMO_,canonical
+bioportal,GNO,OBO:GNO_,canonical
+bioportal,GO,http://purl.org/obo/owl/GO#GO_,canonical
+bioportal,GRO,http://www.bootstrep.eu/ontology/GRO#,canonical
+bioportal,GSSO,OBO:GSSO_,canonical
+bioportal,GVO,http://genome-variation.org/resource/gvo#,canonical
+bioportal,HASCO,http://hadatac.org/ont/hasco/,canonical
+bioportal,HCDR,http://www.semanticweb.org/m14067/ontologies/2020/0/untitled-ontology-5#,canonical
+bioportal,HCPCS,http://purl.bioontology.org/ontology/HCPCS/,canonical
+bioportal,HEIO,http://whistl.uwaterloo.ca/heio.owl#,canonical
+bioportal,HFO,http://bmi.utah.edu/ontologies/hfontology/,canonical
+bioportal,HGNC,http://ncicb.nci.nih.gov/xml/owl/EVS/Hugo.owl#HGNC_,canonical
+bioportal,HGNC,OBO:HGNC_,prefix_alias
+bioportal,HIVO0004,http://bioportal/bioontology.org/ontologies/HIVO0004#,canonical
+bioportal,HL7,http://purl.bioontology.org/ontology/HL7/,canonical
+bioportal,HLA,http://purl.org/stemnet/HLA#,canonical
+bioportal,HMIS033B,http://vocab.vodana.org/hmis033b/,canonical
+bioportal,HNS,http://www.humannervousystem.org/KAnOE/2014/dave86#,canonical
+bioportal,HRDO,http://www.limics.org/hrdo/HRDO.owl#,canonical
+bioportal,HSO,https://w3id.org/hso#,canonical
+bioportal,HUPSON,http://scai.fraunhofer.de/HuPSON#,canonical
+bioportal,I-ADOPT,https://w3id.org/iadopt/ont/,canonical
+bioportal,I2SV,https://i2insights.org/index/integration-and-implementation-sciences-vocabulary#,canonical
+bioportal,IAML-MOP,http://data.doremus.org/vocabulary/iaml/mop/,canonical
+bioportal,IBO,http://www.semanticweb.org/eamdouni/ontologies/2015/5/IBO#,canonical
+bioportal,ICD10,http://purl.bioontology.org/ontology/ICD10/,canonical
+bioportal,ICD10,https://cdn.rawgit.com/laiasubirats/rarediseasesontology/master/ICD10_1.0.owl#,prefix_alias
+bioportal,ICD10CM,http://purl.bioontology.org/ontology/ICD10CM/,canonical
+bioportal,ICD10PCS,http://purl.bioontology.org/ontology/ICD10PCS/,canonical
+bioportal,ICD11-BODYSYSTEM,http://who.int/bodysystem.owl#,canonical
+bioportal,ICD9CM,http://purl.bioontology.org/ontology/ICD9CM/,canonical
+bioportal,ICECI,http://who.int/iceci#,canonical
+bioportal,ICECI,http://who.int/iceci.owl#,prefix_alias
+bioportal,ICF,http://who.int/icf#,canonical
+bioportal,ICNP,http://www.icn.ch/icnp#,canonical
+bioportal,ICPC2P,http://purl.bioontology.org/ontology/ICPC2P/,canonical
+bioportal,ICPS,http://www.ICPS/ontologies/,canonical
+bioportal,ID-AMR,http://purl.org/zonmw/id-amr/,canonical
+bioportal,IDEM,http://purl.org/idem/,canonical
+bioportal,IDG_GL,http://druggablegenome.net/,canonical
+bioportal,IDODEN,http://purl.bioontology.org/ontology/IDODEN_,canonical
+bioportal,IMGT-ONTOLOGY,http://www.imgt.org/download/IMGT-ONTOLOGY/IMGT-ONTOLOGY-v1-0-3.owl#,canonical
+bioportal,INBIO,http://www.semanticweb.org/rs/ontologies/INBIO#,canonical
+bioportal,INBIODIV,http://www.semanticweb.org/mca/ontologies/2018/8/untitled-ontology-47#,canonical
+bioportal,INCENTIVE,http://purl.org/incentive/,canonical
+bioportal,INCENTIVE-VARS,http://purl.org/incentive/variables/,canonical
+bioportal,INFRARISK,https://www.infrarisk-fp7.eu/vocabs/#,canonical
+bioportal,INSECTH,http://neuromorpho.org/ontologies/insectH.owl#,canonical
+bioportal,INSNAME,https://www.vodan-totafrica.info/vocs/institutions/,canonical
+bioportal,IntAct,http://identifiers.org/intact/,canonical
+bioportal,INTO,http://www.semanticweb.org/Terrorism#,canonical
+bioportal,INVERSEROLES,http://rds.posccaesar.org/2008/02/OWL/ISO-15926-2_2003#,canonical
+bioportal,IRD,http://www.semanticweb.org/msh/ontologies/2019/9/untitled-ontology-3#,canonical
+bioportal,IRDG,http://www.semanticweb.org/IRDGuyamazon#,canonical
+bioportal,ISO-15926-2_2003,http://rds.posccaesar.org/2008/02/OWL/ISO-15926-2_2003#,namespace_alias
+bioportal,ISO19108TO,http://def.isotc211.org/iso19108/2006/,canonical
+bioportal,ISO19110,http://def.isotc211.org/iso19110/2005/,canonical
+bioportal,ISO19115,http://loki.cae.drexel.edu/~wbs/ontology/2004/09/iso-19115#,canonical
+bioportal,ISO19115CC,http://def.isotc211.org/iso19115/-1/2014/CommonClasses/code/,canonical
+bioportal,ISO19115ID,http://def.isotc211.org/iso19115/2003/,canonical
+bioportal,ISO19115PR,http://www.geosciml.org/vocabularies/iso-19115-codelists.owl#,canonical
+bioportal,ISO19115PR,http://www.geosciml.org/vocabularies/iso-19115-codes.owl#,prefix_alias
+bioportal,ISSVA,http://purl.bioontology.org/ontology/ISSVA/,canonical
+bioportal,JERM,http://jermontology.org/ontology/JERMOntology#,canonical
+bioportal,KISAO,http://www.biomodels.net/kisao/KISAO#KISAO_,canonical
+bioportal,KORO,http://www.knowledgegrid.org/koro#,canonical
+bioportal,KORO,http://www.knowledgegrid.org/koro/1.0.0/koro.owl#,prefix_alias
+bioportal,LAND-SURFACE,http://anzsoil.org/def/au/asls/land-surface/,canonical
+bioportal,LANDFORM,http://anzsoil.org/def/au/asls/landform/,canonical
+bioportal,LDA,http://www.semanticweb.org/ontologies/2008/10/languageacquisition_autism.owl#,canonical
+bioportal,LEGALAPA,http://www.semanticweb.org/ontologies/2014/11/legal.owl#,canonical
+bioportal,LEGALAPATEST2,http://www.semanticweb.org/ontologies/2014/11/legal-2.owl#,canonical
+bioportal,LICO,http://vavlab.ee.boun.edu.tr/carera/khaos/lico.owl#,canonical
+bioportal,LOINC,http://purl.bioontology.org/ontology/LNC/,canonical
+bioportal,LONGCOVID,http://www.semanticweb.org/orchid/ontologies/2021/Long-Covid-Phenotype-Ontology#,canonical
+bioportal,LUNGMAP_H_CELL,OBO:LMHA_,canonical
+bioportal,LUNGMAP_M_CELL,OBO:LMMA_,canonical
+bioportal,M4M19-SUBS,http://purl.org/m4m19/subjects/,canonical
+bioportal,M4M19-VARS,http://purl.org/m4m19/variables#,canonical
+bioportal,MATRELEMENT,http://sweet.jpl.nasa.gov/2.3/matrElement.owl#,canonical
+bioportal,MATRROCK,http://sweet.jpl.nasa.gov/2.3/matrRock.owl#,canonical
+bioportal,MATRROCKIGNEOUS,http://sweet.jpl.nasa.gov/2.3/matrRockIgneous.owl#,canonical
+bioportal,MCBCC,OBO:MCBCC_,canonical
+bioportal,MCCL,http://purl.bioontology.org/ontology/MCCL/CL__,canonical
+bioportal,MCCL,http://www.semanticweb.org/pallabi.d/ontologies/2014/2/untitled-ontology-11#,prefix_alias
+bioportal,MDDB,http://purl.bioontology.org/ontology/MDDB/,canonical
+bioportal,MEDLINEPLUS,http://purl.bioontology.org/ontology/MEDLINEPLUS/,canonical
+bioportal,MEDO,http://www.ebi.ac.uk/efo/medo/MEDO_,canonical
+bioportal,MEDO,http://www.ebi.ac.uk/medo/,prefix_alias
+bioportal,MEO,http://purl.jp/bio/11/meo/,canonical
+bioportal,MESH,http://purl.bioontology.org/ontology/MESH/,canonical
+bioportal,MHC,http://purl.org/stemnet/MHC#,canonical
+bioportal,MIM,http://purl.bioontology.org/ontology/MIM#,canonical
+bioportal,MIRO,OBO:miro#,canonical
+bioportal,MIXS,https://w3id.org/mixs/terms/,canonical
+bioportal,MIXS,https://w3id.org/mixs/vocab/,prefix_alias
+bioportal,MOC,http://sweet.jpl.nasa.gov/2.3/matrOrganicCompound.owl#,canonical
+bioportal,MODSCI,https://w3id.org/skgo/modsci#,canonical
+bioportal,MONO,http://www.owl-ontologies.com/MO.owl#,canonical
+bioportal,MOSAIC,https://purl.dataone.org/odo/MOSAIC_,canonical
+bioportal,MSO,http://scai.fraunhofer.de/MSOntology#,canonical
+bioportal,MSTDE,http://purl.bioontology.org/ontology/MSTDE/,canonical
+bioportal,MSTDE-FRE,http://purl.bioontology.org/ontology/MSTDE-FRE/,canonical
+bioportal,MSV,http://purl.jp/bio/11/msv/,canonical
+bioportal,NCBIGene,http://identifiers.org/ncbigene/,canonical
+bioportal,NCBIGene,OBO:NCBIGene_,prefix_alias
+bioportal,NCBITAXON,http://purl.bioontology.org/ontology/NCBITAXON/,canonical
+bioportal,NCBITAXON,http://www.ncbi.nlm.nih.gov/taxonomy/,prefix_alias
+bioportal,NCBITAXON,OBO:NCBITAXON_,prefix_alias
+bioportal,NCCNEHR,http://www.semanticweb.org/lamb/ontologies/NCCN-EHR#,canonical
+bioportal,NCCO,http://www.semanticweb.org/vanessa/ontologies/2012/7/untitled-ontology-33#,canonical
+bioportal,NCIT,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#,canonical
+bioportal,NDDF,http://purl.bioontology.org/ontology/NDDF/,canonical
+bioportal,NDDO,http://www.purl.org/NDDO/,canonical
+bioportal,NDFRT,http://purl.bioontology.org/ontology/NDFRT/,canonical
+bioportal,NDFRT,http://evs.nci.nih.gov/ftp1/NDF-RT/NDF-RT.owl#,prefix_alias
+bioportal,NEICBEER,http://ontology.deic.dk/cv/beer-ontology/,canonical
+bioportal,NEOMARK3,http://www.neomark.eu/ontologies/,canonical
+bioportal,NEOMARK4,http://neomark.owl#,canonical
+bioportal,NEUDIGS,http://bmkeg.isi.edu/neuDIGs#,canonical
+bioportal,NEUMORE,http://neumore.cis.usouthal.edu/ontologies/NeuMORE-v0.1.owl#,canonical
+bioportal,NIFCELL,http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-Cell.owl#,canonical
+bioportal,NIHSS,https://mre.zcu.cz/ontology/nihss.owl#,canonical
+bioportal,NMOBR,http://neuromorpho.org/ontologies/NMOSp.owl#,canonical
+bioportal,NMR,http://nmrML.org/nmrCV#,canonical
+bioportal,NPI,http://purl.bioontology.org/ontology/NPI#,canonical
+bioportal,NPO,http://purl.bioontology.org/ontology/npo#,canonical
+bioportal,OA,http://www.w3.org/ns/oa#,canonical
+bioportal,OBOE,http://ecoinformatics.org/oboe/oboe.1.2/,canonical
+bioportal,OBOREL,http://www.obofoundry.org/ro/ro.owl#,canonical
+bioportal,OBOREL,OBO:OBO_REL#_,prefix_alias
+bioportal,OBS,http://www.semanticweb.org/bito2/ontologies/2021/3/untitled-ontology-11#,canonical
+bioportal,OCIMIDO,https://github.com/sap218/ocimido/blob/master/ontology/ocimido.owl#,canonical
+bioportal,OCRE,http://purl.org/net/OCRe/,canonical
+bioportal,OGR,http://www.owl-ontologies.com/GeographicalRegion.owl#,canonical
+bioportal,OGROUP,http://protozoadb.biowebdb.org/22/ogroup#,canonical
+bioportal,OM,http://www.ontology-of-units-of-measure.org/resource/,canonical
+bioportal,OMIM,http://purl.bioontology.org/ontology/OMIM/,canonical
+bioportal,OMIM,http://identifiers.org/omim/,prefix_alias
+bioportal,OMV,http://omv.ontoware.org/2005/05/ontology#,canonical
+bioportal,ONL-DP,http://neurolog.unice.fr/ontoneurolog/v3.0/ontoneurolog-dataset-processing.owl#,canonical
+bioportal,ONL-TASKS,http://www.semanticweb.org/bakaev/ontologies/2020/3/untitled-ontology-25#,canonical
+bioportal,ONLIRA,http://vavlab.ee.boun.edu.tr/carera/onlira.owl#,canonical
+bioportal,ONSTR,http://onstr.googlecode.com/svn/tags/currentRelease/2014-09-03/ONSTR.owl#ONSTR_,canonical
+bioportal,ONTOAD,http://doe-generated-ontology.com/OntoAD#,canonical
+bioportal,ONTODM,http://www.ontodm.com/OntoDM-core/OntoDM_,canonical
+bioportal,ONTODM,http://kt.ijs.si/panovp/OntoDM#OntoDM_,prefix_alias
+bioportal,ONTODT,http://ontodm.com/OntoDT#,canonical
+bioportal,ONTODT,http://www.ontodm.com/OntoDT#,prefix_alias
+bioportal,ONTOLURGENCES,http://www.limics.fr/ontologies/ontolurgences#,canonical
+bioportal,ONTOPBM,http://w3id.org/ontopbm#OntoPBM_,canonical
+bioportal,ONTOPNEUMO,http://doe-generated-ontology.com/OntoPneumo#,canonical
+bioportal,ONTOSIM,http://www.semanticweb.org/DIASUS/OntoSIM#,canonical
+bioportal,ONTOSINASC,http://www.semanticweb.org/DIASUS/OntoSINASC#,canonical
+bioportal,ONTOTOX,http://OntoTox.owl#,canonical
+bioportal,OOEVV,http://bmkeg.isi.edu/ooevv/,canonical
+bioportal,OPB,http://bhi.washington.edu/OPB#OPB_,canonical
+bioportal,OPB,http://bhi.washington.edu/OPB##,prefix_alias
+bioportal,OPDE,http://www.mudhc.edu.et/template-vocabulary#,canonical
+bioportal,OPE,http://www.semanticweb.org/ontologies/2013/2/OPE.owl#,canonical
+bioportal,OPTION-ONTOLOGY,http://w3id.org/ontoopt/,canonical
+bioportal,ORCS,OBO:ORCS_,canonical
+bioportal,ORTH,http://purl.org/net/orth#,canonical
+bioportal,OSM,https://saudeconectada.org/saude_mental.owl#,canonical
+bioportal,PANDA,http://purl.bioontology.org/net/brunel/panda#,canonical
+bioportal,PANET,http://purl.org/pan-science/PaNET/,canonical
+bioportal,PATCT,https://pat.nichd.nih.gov/placentalcelltype/,canonical
+bioportal,PATEL,http://www.semanticweb.org/ambrish/ontologies/2020/10/untitled-ontology-24#,canonical
+bioportal,PATGV,https://pat.nichd.nih.gov/patgeneticvariance/,canonical
+bioportal,PATHLEX,http://www.semanticweb.org/david/ontologies/2013/0/pathLex.owl#,canonical
+bioportal,PATIT,https://pat.nichd.nih.gov/patinvestigativetechniques/,canonical
+bioportal,PATMHC,http://pat.nichd.nih.gov/maternalconditions/,canonical
+bioportal,PATO,http://purl.obolibrary.org/obo/PATO_,canonical
+bioportal,PCALION,http://www.semanticweb.org/ontologies/Prostate_cancer#,canonical
+bioportal,PDO,http://purl.jp/bio/11/pdo/,canonical
+bioportal,PDON,http://www.semanticweb.org/ontologies/2011/1/Ontology1296772722296.owl#,canonical
+bioportal,PDQ,http://purl.bioontology.org/ontology/PDQ/,canonical
+bioportal,PDRO,OBO:PDRO/PDRO.owl#,canonical
+bioportal,PDUMDV,OBO:PdumDv_,canonical
+bioportal,PE,http://bmi.utah.edu/ontologies/peontology/,canonical
+bioportal,PE-O,http://www.pepathway.org/peo/1.2#,canonical
+bioportal,PE-O,http://www.pepathway.org/peo/1.1#,prefix_alias
+bioportal,PEDTERM,http://www.owl-ontologies.com/Ontology1358660052.owl#,canonical
+bioportal,PEO,http://knoesis.wright.edu/ParasiteExperiment.owl#,canonical
+bioportal,PESONT,http://www.semanticweb.org/patienceusip/ontologies/2021/7/untitled-ontology-23#,canonical
+bioportal,PGXO,http://pgxo.loria.fr/,canonical
+bioportal,PHENX,http://purl.bioontology.org/ontology/phenX/,canonical
+bioportal,PHYLONT,http://www.semanticweb.org/ontologies/2011/7/Ontology1314368515010.owl#,canonical
+bioportal,PIERO,http://reactionontology.org/piero/,canonical
+bioportal,PLIO,http://www.semanticweb.org/ontologies/2010/3/Ontology1271664172453.owl#,canonical
+bioportal,PLOSTHES,http://localhost/plosthes.2017-1#,canonical
+bioportal,PMD,http://www.onto-med.de/ontologies/gfo-persian-medicine-diseases.owl#,canonical
+bioportal,PMDO,http://www.case.edu/PMDO#,canonical
+bioportal,PMO,http://performedmusicontology.org/ontology/,canonical
+bioportal,PMO-SPEED,http://performedmusicontology.org/ontologies/vocabularies/playing_speed/,canonical
+bioportal,PMR,http://purl.bioontology.org/ontology/PMR.owl#,canonical
+bioportal,PP,https://bitbucket.org/PlantExpAssay/ontology/raw/v0.1/PipelinePatterns.owl#P,canonical
+bioportal,PR,OBO:pr#,canonical
+bioportal,PREGONTO,http://www.clininf.eu/pregnancy#,canonical
+bioportal,PREO,http://presence-ontology.org/ontology//,canonical
+bioportal,PROCCHEMICAL,http://sweet.jpl.nasa.gov/2.3/propChemical.owl#,canonical
+bioportal,PROJ,http://linked.data.gov.au/def/project/,canonical
+bioportal,PROPREO,http://lsdis.cs.uga.edu/projects/glycomics/propreo#,canonical
+bioportal,PROVO,http://www.w3.org/ns/prov-o#,canonical
+bioportal,PROVO,http://www.w3.org/ns/prov-o-20130312#,prefix_alias
+bioportal,PSO_2,http://ontorion.com/PSO#,canonical
+bioportal,QUDT,http://qudt.org/schema/,canonical
+bioportal,QUDT,http://qudt.org/2.1/schema/,prefix_alias
+bioportal,RADLEX,http://radlex.org/RID/,canonical
+bioportal,RADLEX,http://www.radlex.org/RID/,prefix_alias
+bioportal,RADXTT-MVREASONS,https://radx.orgx/vocs/missing-value-reason/,canonical
+bioportal,RCD,http://purl.bioontology.org/ontology/RCD/,canonical
+bioportal,RCTONT,http://www.owl-ontologies.com/RCTOntology.owl#,canonical
+bioportal,RCTV2,http://purl.bioontology.org/ontology/RCTV2/,canonical
+bioportal,RDA-CONTENT,http://rdaregistry.info/termList/RDAContentType#,canonical
+bioportal,REPO,http://purl.bioontology.org/ontology/REPO.owl#,canonical
+bioportal,RH-MESH,http://phenomebrowser.net/ontologies/mesh/mesh.owl#,canonical
+bioportal,RO,http://www.radiomics.org/RO/,canonical
+bioportal,ROLEO,OBO:RoleO_,canonical
+bioportal,ROO,http://www.cancerdata.org/roo/,canonical
+bioportal,ROS,urn:absolute:RadiationOncologyStructuresOntology#,canonical
+bioportal,RPO,http://www.semanticweb.org/ontologies/2012/5/Ontology1338526551855.owl#,canonical
+bioportal,RSA,http://rdf.biosemantics.org/ontologies/rsa#,canonical
+bioportal,RVO,http://w3id.org/rv-ontology#,canonical
+bioportal,SAO,http://ccdb.ucsd.edu/SAO/1.2#,canonical
+bioportal,SARSMUTONTO,file://C/Users/Jamal/Desktop/SARSMutOnto.owl#,canonical
+bioportal,SBO,http://purl.bioontology.org/ontology/SBO/SBO_,canonical
+bioportal,SBO,http://biomodels.net/SBO/SBO_,prefix_alias
+bioportal,SBOL,OBO:SBOL_,canonical
+bioportal,SCHEMA,http://schema.org/,canonical
+bioportal,SCHEMA,http://meta.schema.org/,prefix_alias
+bioportal,SCHEMA,http://www.w3.org/wiki/WebSchemas/,prefix_alias
+bioportal,SCHEMA,https://www.w3.org/wiki/WebSchemas/,prefix_alias
+bioportal,SCIO,http://psink.de/scio/,canonical
+bioportal,SD3,http://www.wiser.pitt.edu/ontologies/SimulationScenarioDeviations.owl#,canonical
+bioportal,SDO,http://mimi.case.edu/ontologies/2009/1/SDO.owl#,canonical
+bioportal,SEDI,http://semantic-dicom.org/dcm#,canonical
+bioportal,SENSO,http://purl.dataone.org/odo/SENSO_,canonical
+bioportal,SEQ,http://www.ontologydesignpatterns.org/cp/owl/sequence.owl#,canonical
+bioportal,SHR,http://www.shojaee.com/shr/shr.owl#,canonical
+bioportal,SITBAC,http://www.semanticweb.org/ontologies/2008/1/Ontology1204037102846.owl#,canonical
+bioportal,SK,http://www.semanticweb.org/sandeepak/digitalforensic#,canonical
+bioportal,SMASH,http://aimlab.cs.uoregon.edu/smash/ontologies/smash-ontology#,canonical
+bioportal,SMASH,http://aimlab.cs.uoregon.edu/smash/ontologies/biomarker.owl#,prefix_alias
+bioportal,SMASH,http://aimlab.cs.uoregon.edu/smash/ontologies/physical-activity.owl#,prefix_alias
+bioportal,SMASH,http://aimlab.cs.uoregon.edu/smash/ontologies/social-activity.owl#,prefix_alias
+bioportal,SNMI,http://purl.bioontology.org/ontology/SNMI/,canonical
+bioportal,SNOMEDCT,http://purl.bioontology.org/ontology/SNOMEDCT/,canonical
+bioportal,SNPO,http://www.loria.fr/~coulet/ontology/snpontology/version1.6/snpontology_full.owl#,canonical
+bioportal,SO,http://purl.org/obo/owl/SO#SO_,canonical
+bioportal,SOCPRES,http://www.semanticweb.org/social-prescribing#,canonical
+bioportal,SOPHARM,http://www.loria.fr/~coulet/sopharm/SOPHARM_,canonical
+bioportal,SOY,OBO:SOY_,canonical
+bioportal,SP,http://purl.org/net/SMARTprotocol#,canonical
+bioportal,SPO,http://www.semanticweb.org/ontologies/2008/8/MultiscaleSkinPhysiologyOntology.owl#,canonical
+bioportal,SPO,http://www.semanticweb.org/ontologies/2008/8/SPO_lightweight_merged.owl#,prefix_alias
+bioportal,SPTO,OBO:SP_,canonical
+bioportal,SSN,http://www.w3.org/ns/ssn/,canonical
+bioportal,SSO,http://surveillance.mcgill.ca/sso/syndromes.owl#,canonical
+bioportal,SSO,http://www.medicine.mcgill.ca/epidemiology/buckeridge/syndromes.owl#,prefix_alias
+bioportal,STMSO,https://bioportal.bioontology.org/ontologies/STMSO#,canonical
+bioportal,STY,http://purl.bioontology.org/ontology/STY/,canonical
+bioportal,SURGICAL,http://www.cablesat.com.au/research/,canonical
+bioportal,SWEET,http://sweetontology.net/,canonical
+bioportal,SWO,http://www.ebi.ac.uk/swo/SWO_,canonical
+bioportal,SWO,http://www.ebi.ac.uk/efo/swo/SWO_,prefix_alias
+bioportal,SWO,http://www.ebi.ac.uk/swo/algorithm/SWO_,prefix_alias
+bioportal,SWO,http://www.ebi.ac.uk/swo/data/SWO_,prefix_alias
+bioportal,SWO,http://www.ebi.ac.uk/swo/interface/SWO_,prefix_alias
+bioportal,SWO,http://www.ebi.ac.uk/swo/license/SWO_,prefix_alias
+bioportal,SWO,http://www.ebi.ac.uk/swo/objective/SWO_,prefix_alias
+bioportal,SWO,http://www.ebi.ac.uk/swo/organization/SWO_,prefix_alias
+bioportal,SWO,http://www.ebi.ac.uk/swo/version/SWO_,prefix_alias
+bioportal,TAXRANK,OBO:taxrank.owl#,canonical
+bioportal,TCDO,http://OntoTCM.org.cn/ontologies/TCDO_,canonical
+bioportal,TCO,http://www.semanticweb.org/hx-jta/ontologies/thyroid_cancer_ontology#,canonical
+bioportal,TDWGSPEC,http://rs.tdwg.org/ontology/voc/Specimen#,canonical
+bioportal,TEDDY,http://identifiers.org/teddy/TEDDY_,canonical
+bioportal,TEO,http://informatics.mayo.edu/TEO.owl#TEO_,canonical
+bioportal,TESTEX,https://bioportal.databiology.com/test1.owl#,canonical
+bioportal,TIME,http://www.w3.org/2006/time#,canonical
+bioportal,TIMEBANK,https://w3id.org/timebank#,canonical
+bioportal,TM-CONST,http://who.int/ictm/constitution#,canonical
+bioportal,TM-MER,http://who.int/ictm/meridians#,canonical
+bioportal,TM-SIGNS-AND-SYMPTS,http://who.int/ictm/signsAndSymptoms#,canonical
+bioportal,TMA,http://bioontology.org/ontologies/tma-minimal#,canonical
+bioportal,TMO,http://www.w3.org/2001/sw/hcls/ns/transmed/,canonical
+bioportal,TOK,http://cui.unige.ch/isi/onto/tok/TOK.owl#,canonical
+bioportal,TOP-MENELAS,http://www.limics.fr/ontologies/menelastop#,canonical
+bioportal,TRAK,OBO:TRAK_,canonical
+bioportal,TRIAGE,http://www.semanticweb.org/philshields/ontologies/2015/4/untitled-ontology-59#,canonical
+bioportal,TRON,OBO:TrOn_,canonical
+bioportal,TXPO,OBO:TXPO_,canonical
+bioportal,TYPON,http://purl.phyloviz.net/ontology/typon#,canonical
+bioportal,UMMS,https://w3id.org/umms/ekg/onto01/,canonical
+bioportal,UNITSONT,http://mimi.case.edu/ontologies/2009/1/UnitsOntology#,canonical
+bioportal,UPA,OBO:UPa_,canonical
+bioportal,VANDF,http://purl.bioontology.org/ontology/VANDF/,canonical
+bioportal,VARIO,OBO:VariO_,canonical
+bioportal,VDOT,http://www.ifomis.org/vdot/vdot_core.owl#vdot_,canonical
+bioportal,VEO,http://sbmi.uth.tmc.edu/ontology/VEO#,canonical
+bioportal,VIDO,OBO:VIDO_,canonical
+bioportal,VODANADISEASES,http://vocab.vodan-totafrica.info/vodana-terms/vdiseases/,canonical
+bioportal,VODANAMFLCODE,http://vocab.vodana.org/vmfl/,canonical
+bioportal,WB-LS,OBO:WBls_,canonical
+bioportal,WC,OBO:WC_,canonical
+bioportal,WEAR,http://purl.org/wear/,canonical
+bioportal,WEAVE,http://purl.org/weave/,canonical
+bioportal,WETAXTOPICS,http://purl.org/neat/,canonical
+bioportal,WIKIPATHWAYS,http://vocabularies.wikipathways.org/wp#,canonical
+bioportal,WIKIPATHWAYS,http://vocabularies.wikipathways.org/wpTypes#,prefix_alias
+bioportal,WSIO,OBO:WSIO_,canonical
+bioportal,WSIO,OBO:http://wsio.org#,prefix_alias
+bioportal,XEO,OBO:XEO_,canonical
+bioportal,XLMOD,OBO:XLMOD_,canonical
+bioportal,XPO,OBO:XPO_,canonical
+bioportal,XREF-FUNDER-REF,http://data.crossref.org/fundingdata/vocabulary/Label-,canonical
+bioportal,ZONMW-ADMIN-MD,http://www.fair-data-collective.com/zonmw/projectadmin/,canonical
+bioportal,ZONMW-CONTENT,http://purl.org/zonmw/covid19/,canonical
+bioportal,ZONMW-GENERIC,http://purl.org/zonmw/generic/,canonical
\ No newline at end of file
diff --git a/src/spinneret/utilities.py b/src/spinneret/utilities.py
index 1aa1648..922691a 100644
--- a/src/spinneret/utilities.py
+++ b/src/spinneret/utilities.py
@@ -2,6 +2,7 @@
 
 from os import environ
 from typing import Union
+import importlib
 from urllib.parse import urlparse
 from json import load
 
@@ -111,3 +112,35 @@ def expand_curie(curie: str) -> str:
     }
     prefix, suffix = curie.split(":")
     return f"{mapping[prefix]}{suffix}"
+
+
+def compress_uri(uri: str) -> str:
+    """
+    Compress a URI into a CURIE based on the prefix mappings in the OBO and
+    BioPortal converters.
+
+    :param uri: The URI to be compressed into a CURIE.
+    :returns: The compressed CURIE. Returns the original URI if the prefix
+        does not have a mapping.
+    :notes: This is a wrapper function around the `prefixmaps` and `curies`
+        libraries.
+    """
+    prefixmaps = load_prefixmaps()
+    match = prefixmaps[prefixmaps["namespace"].apply(lambda x: x in uri)]
+    if not match.empty:
+        prefix = match["prefix"].values[0]
+        suffix = uri.replace(match["namespace"].values[0], "")
+        return f"{prefix}:{suffix}"
+    return uri
+
+
+def load_prefixmaps() -> dict:
+    """
+    Load ontology prefix maps. To be used with `expand_curie` and
+    `compress_uri`.
+
+    :returns: The ontology prefix maps
+    """
+    file = str(importlib.resources.files("spinneret.data")) + "/prefixmaps.csv"
+    prefixmaps = pd.read_csv(file)
+    return prefixmaps
diff --git a/tests/conftest.py b/tests/conftest.py
index 8943e6a..012eee8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,5 +1,6 @@
 """Configure the test suite"""
 
+from json import load
 import pytest
 from spinneret.utilities import load_workbook
 
@@ -18,3 +19,39 @@ def annotated_workbook():
     """Return a fixture for an annotated workbook"""
     wb = load_workbook("tests/edi.3.9_annotation_workbook_annotated.tsv")
     return wb
+
+
+@pytest.fixture(name="termset_similarity_score_raw")
+def termset_similarity_score_raw():
+    """Return a fixture for raw termset similarity scores returned by the
+    `runoak -i {db} termset-similarity` command."""
+    score_file = "tests/data/benchmark/termset_similarity_score_raw.json"
+    with open(score_file, "r", encoding="utf-8") as file:
+        return load(file)
+
+
+@pytest.fixture(name="termset_similarity_score_processed")
+def termset_similarity_score_processed():
+    """Return a fixture for processed termset similarity scores returned by
+    the get_termset_similarity function."""
+    score_file = "tests/data/benchmark/termset_similarity_score_processed.json"
+    with open(score_file, "r", encoding="utf-8") as file:
+        return load(file)
+
+
+@pytest.fixture(name="termset_similarity_score_fields")
+def termset_similarity_score_fields():
+    """Return a fixture for the fields expected in the termset similarity
+    scores"""
+    return [
+        "average_score",
+        "best_score",
+        "average_jaccard_similarity",
+        "best_jaccard_similarity",
+        "average_phenodigm_score",
+        "best_phenodigm_score",
+        "average_standard_information_content",
+        "best_standard_information_content",
+        "average_test_information_content",
+        "best_test_information_content",
+    ]
diff --git a/tests/data/benchmark/standard/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv b/tests/data/benchmark/standard/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv
new file mode 100644
index 0000000..200ba37
--- /dev/null
+++ b/tests/data/benchmark/standard/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv
@@ -0,0 +1,78 @@
+package_id	url	element	element_id	element_xpath	context	description	subject	predicate	predicate_id	object	object_id	author	date	comment
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	freshwater lake biome	http://purl.obolibrary.org/obo/ENVO_01000252	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	oligotrophic lake	http://purl.obolibrary.org/obo/ENVO_01000774	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	eutrophic lake	http://purl.obolibrary.org/obo/ENVO_01000548	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	mesotrophic lake	http://purl.obolibrary.org/obo/ENVO_01000775	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	humic lake	http://purl.obolibrary.org/obo/ENVO_01001021	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	dimictic lake	http://purl.obolibrary.org/obo/ENVO_01000286	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	polymictic lake	http://purl.obolibrary.org/obo/ENVO_01000287	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	contains process	http://purl.obolibrary.org/obo/BFO_0000067	biogeochemical cycling	http://purl.obolibrary.org/obo/ENVO_02500009	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	contains process	http://purl.obolibrary.org/obo/BFO_0000067	acidification of an aquatic environment	http://purl.obolibrary.org/obo/ENVO_01000630	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	environmental monitoring	http://vocabs.lter-europe.net/EnvThes/21335	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	field measurement	http://vocabs.lter-europe.net/EnvThes/20223	https://orcid.org/0000-0003-2261-9932	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	long term monitoring	http://vocabs.lter-europe.net/EnvThes/21337	https://orcid.org/0000-0003-2261-9933	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	observational study	http://vocabs.lter-europe.net/EnvThes/20243	https://orcid.org/0000-0003-2261-9934	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	time series	http://vocabs.lter-europe.net/EnvThes/20285	https://orcid.org/0000-0003-2261-9935	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	water quality monitoring	http://vocabs.lter-europe.net/EnvThes/21339	https://orcid.org/0000-0003-2261-9936	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	mooring	http://vocabs.lter-europe.net/EnvThes/20304	https://orcid.org/0000-0003-2261-9937	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	combustion analysis	https://www.wikidata.org/wiki/Q591867	https://orcid.org/0000-0003-2261-9938	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	b2a8f496-646c-41d3-af2f-53e9520d1954	/eml:eml/dataset	knb-lter-ntl.1.59		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	colorimetric assay	https://www.wikidata.org/wiki/Q5149058	https://orcid.org/0000-0003-2261-9939	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	66ec8e6f-5747-4540-a605-a2f5881869f1	/eml:eml/dataset/dataTable/attributeList/attribute[1]	ntl1_v11.csv	lake name abbreviation	lakeid	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	site identifier	http://purl.dataone.org/odo/ECSO_00002997	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	89e8a721-37cc-4a68-8cd5-81b517bc8348	/eml:eml/dataset/dataTable/attributeList/attribute[2]	ntl1_v11.csv	year	year4	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	year of measurement	http://purl.dataone.org/odo/ECSO_00002050	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	7a1c02cb-743d-433e-b799-30e15f21fc42	/eml:eml/dataset/dataTable/attributeList/attribute[3]	ntl1_v11.csv	day of year	daynum	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	day of year	http://purl.dataone.org/odo/ECSO_00002058	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	4f92fc26-bff5-43ad-83ba-e9eb78cdde32	/eml:eml/dataset/dataTable/attributeList/attribute[4]	ntl1_v11.csv	sample date	sampledate	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	date	http://purl.dataone.org/odo/ECSO_00002051	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	ce72e831-a964-4e5b-a48f-4e736fe8cbde	/eml:eml/dataset/dataTable/attributeList/attribute[5]	ntl1_v11.csv	depth at which the sample or measurement was taken	depth	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	depth	http://purl.dataone.org/odo/ECSO_00000515	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	f97b7698-2e00-4648-ae73-22660b2f7522	/eml:eml/dataset/dataTable/attributeList/attribute[6]	ntl1_v11.csv	sample replicate	rep	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	replicate identifier	http://purl.dataone.org/odo/ECSO_00002989	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	6eeef47d-8fb8-45a4-8ea7-e2f12ce508f7	/eml:eml/dataset/dataTable/attributeList/attribute[7]	ntl1_v11.csv	Location station of sample	sta	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	station identifier	http://purl.dataone.org/odo/ECSO_00002393	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	0e9015d5-6bb8-46d3-81f8-254f4e018e56	/eml:eml/dataset/dataTable/attributeList/attribute[8]	ntl1_v11.csv	sampling event	event	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	sampling occasion	http://purl.dataone.org/odo/ECSO_00002620	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	c1cdf313-99c1-4306-aea0-80d589f28dfb	/eml:eml/dataset/dataTable/attributeList/attribute[9]	ntl1_v11.csv	pH	ph	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	pH	http://purl.dataone.org/odo/ECSO_00001645	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	4dd2235c-d46d-429c-b74c-f1fa536d8f71	/eml:eml/dataset/dataTable/attributeList/attribute[10]	ntl1_v11.csv	pH air equilibrated	phair	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	pH	http://purl.dataone.org/odo/ECSO_00001645	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	34820bf1-ec29-450c-a70e-970f4a1cd9cf	/eml:eml/dataset/dataTable/attributeList/attribute[11]	ntl1_v11.csv	alkalinity	alk	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	total alkalinity	http://purl.dataone.org/odo/ECSO_00001752	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	0a54166f-7a3e-453e-b083-da01e2e26c16	/eml:eml/dataset/dataTable/attributeList/attribute[12]	ntl1_v11.csv	dissolved inorganic carbon	dic	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	dissolved inorganic carbon concentration in freshwater	http://purl.dataone.org/odo/ECSO_00002103	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	64541338-8971-4633-b18f-61c39e493921	/eml:eml/dataset/dataTable/attributeList/attribute[13]	ntl1_v11.csv	total inorganic carbon	tic	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	Freshwater Total Inorganic Carbon Concentration	http://purl.dataone.org/odo/ECSO_00001122	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	eccddf6b-6a90-424f-b6ba-acaf7cbc31a7	/eml:eml/dataset/dataTable/attributeList/attribute[14]	ntl1_v11.csv	dissolved organic carbon	doc	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	dissolved organic carbon concentration in water	http://purl.dataone.org/odo/ECSO_00002143	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	0e6a7f1a-c817-4fd6-8e61-34e615a98a0a	/eml:eml/dataset/dataTable/attributeList/attribute[15]	ntl1_v11.csv	total organic carbon	toc	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	Aquatic Total Organic Carbon Concentration	http://purl.dataone.org/odo/ECSO_00001118	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	d5d5cb58-5fc4-4801-9a78-1d98203c3ee4	/eml:eml/dataset/dataTable/attributeList/attribute[16]	ntl1_v11.csv	(NO3 + NO2) - N	no3no2	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	nitrate and nitrite concentration in water	http://purl.dataone.org/odo/ECSO_00002925	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	10d6081f-3c52-4323-a470-c2cdc799ffce	/eml:eml/dataset/dataTable/attributeList/attribute[17]	ntl1_v11.csv	NO2	no2	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType			https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	dc57229d-7528-47b2-b38b-461a4614a3a7	/eml:eml/dataset/dataTable/attributeList/attribute[18]	ntl1_v11.csv	NH4 - N	nh4	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	ammonium concentration in water	http://purl.dataone.org/odo/ECSO_00001760	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	fbb79bdd-873f-44e5-a4aa-42bdf8072ba0	/eml:eml/dataset/dataTable/attributeList/attribute[19]	ntl1_v11.csv	total dissolved N (filtered sample)	totnf	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	nitrogen concentration	http://purl.dataone.org/odo/ECSO_00001883	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	1d24561a-fdad-4a16-8e35-8bfb480db6ab	/eml:eml/dataset/dataTable/attributeList/attribute[20]	ntl1_v11.csv	total N (unfiltered sample)	totnuf	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	nitrogen concentration	http://purl.dataone.org/odo/ECSO_00001883	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	f1a8c896-e4c9-4c60-b61c-6ee9dc8f3115	/eml:eml/dataset/dataTable/attributeList/attribute[21]	ntl1_v11.csv	total dissolved P (filtered sample)	totpf	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	total dissolved phosphorus concentration in water	http://purl.dataone.org/odo/ECSO_00002844	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	269c37a4-59d5-44ec-9438-7a8bccd4e98b	/eml:eml/dataset/dataTable/attributeList/attribute[22]	ntl1_v11.csv	total P unfiltered	totpuf	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	total dissolved phosphorus concentration in water	http://purl.dataone.org/odo/ECSO_00002844	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	7e390f5c-2cce-4d19-a95e-bdaecd8a9a90	/eml:eml/dataset/dataTable/attributeList/attribute[23]	ntl1_v11.csv	dissolved reactive silica filtered	drsif	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType			https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	ee934a41-29dc-4a17-bdb8-edb0c586869e	/eml:eml/dataset/dataTable/attributeList/attribute[24]	ntl1_v11.csv	bicarbonate reactive silica filtered	brsif	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType			https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	862be757-1124-4d61-8b7e-b9976d4348d2	/eml:eml/dataset/dataTable/attributeList/attribute[25]	ntl1_v11.csv	bicarbonate reactive silica unfiltered	brsiuf	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType			https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	860d9039-f249-4f03-a36f-47d54a167192	/eml:eml/dataset/dataTable/attributeList/attribute[26]	ntl1_v11.csv	total particulate matter	tpm	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	suspended particulate material concentration in water	http://purl.dataone.org/odo/ECSO_00002646	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	b9717e81-a1f5-4cca-b3bf-2bac7e874602	/eml:eml/dataset/dataTable/attributeList/attribute[27]	ntl1_v11.csv	Total Nitrogen (unfiltered) from WI State Lab of Hygiene	totnuf_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	nitrogen concentration	http://purl.dataone.org/odo/ECSO_00001883	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	fead55f1-5384-4b80-8949-6e44fa31cf6c	/eml:eml/dataset/dataTable/attributeList/attribute[28]	ntl1_v11.csv	Nitrate plus Nitrite Nitrogen from WI State Lab. of Hygiene	no3no2_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	nitrate and nitrite concentration in water	http://purl.dataone.org/odo/ECSO_00002925	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	30c173fb-13a4-4dbf-b90b-672de6773227	/eml:eml/dataset/dataTable/attributeList/attribute[29]	ntl1_v11.csv	Ammonium Nitrogen from WI State Lab. of Hygiene	nh4_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	ammonium concentration in water	http://purl.dataone.org/odo/ECSO_00001760	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	e2c8a15a-1f29-4c84-a2db-25f1d8ecac05	/eml:eml/dataset/dataTable/attributeList/attribute[30]	ntl1_v11.csv	Total Kjeldahl Nitrogen from WI State Lab. of Hygiene	kjdl_n_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	nitrate and nitrite concentration in water	http://purl.dataone.org/odo/ECSO_00002925	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	e8120d58-6652-41c6-91b1-a0938a9640cb	/eml:eml/dataset/dataTable/attributeList/attribute[31]	ntl1_v11.csv	Total Phosphorus Unfiltered from WI State Lab. of Hygiene	totpuf_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	total dissolved phosphorus concentration in water	http://purl.dataone.org/odo/ECSO_00002844	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	81903d24-fd91-49fb-86a9-92a04b2c63e6	/eml:eml/dataset/dataTable/attributeList/attribute[32]	ntl1_v11.csv	Dissolved Reactive Phosphorus from WI State Lab. of Hygiene	drp_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	total dissolved phosphorus concentration in water	http://purl.dataone.org/odo/ECSO_00002844	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	5b5b6b66-28ff-4303-84f8-4919b82bea0c	/eml:eml/dataset/dataTable/attributeList/attribute[33]	ntl1_v11.csv	Dissolved Reactive Silica from WI State Lab. of Hygiene	drsif_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType			https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	2f31fe01-d430-45c4-a3f4-b49e422b55c9	/eml:eml/dataset/dataTable/attributeList/attribute[34]	ntl1_v11.csv	data flag for depth	flagdepth	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001720	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	e7a232ae-12c0-42f6-9a24-60127051d584	/eml:eml/dataset/dataTable/attributeList/attribute[35]	ntl1_v11.csv	data flag for ph	flagph	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001721	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	44886cc4-edab-42b4-a626-a05e656bdc2f	/eml:eml/dataset/dataTable/attributeList/attribute[36]	ntl1_v11.csv	data flag for phair	flagphair	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001722	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	89f8f176-d741-4ff3-b216-271e7bf86075	/eml:eml/dataset/dataTable/attributeList/attribute[37]	ntl1_v11.csv	data flag for alkalinity	flagalk	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001723	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	565ebbb5-20aa-4760-b9fb-8e113eb9daac	/eml:eml/dataset/dataTable/attributeList/attribute[38]	ntl1_v11.csv	data flag for dissolved inorganic carbon	flagdic	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001724	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	fc378152-97d6-4332-ae71-a643a6c81157	/eml:eml/dataset/dataTable/attributeList/attribute[39]	ntl1_v11.csv	data flag for total inorganic carbon	flagtic	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001725	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	488d182b-9f28-4995-b575-c4677f6ceb98	/eml:eml/dataset/dataTable/attributeList/attribute[40]	ntl1_v11.csv	data flag for dissolved organic carbon	flagdoc	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001726	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	215c3c0d-1bf4-48dd-8848-0b267964852a	/eml:eml/dataset/dataTable/attributeList/attribute[41]	ntl1_v11.csv	data flag for total organic carbon	flagtoc	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001727	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	4d7f18b9-adf4-4db2-8a32-d9a66cfa1621	/eml:eml/dataset/dataTable/attributeList/attribute[42]	ntl1_v11.csv	data flag for no3no2-n	flagno3no2	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001728	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a02c4fb7-7349-4c81-b19a-48de51099874	/eml:eml/dataset/dataTable/attributeList/attribute[43]	ntl1_v11.csv	data flag for no2	flagno2	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001729	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	73679553-319b-4cd2-bc55-b1a37a1b0f4f	/eml:eml/dataset/dataTable/attributeList/attribute[44]	ntl1_v11.csv	data flag for nh4	flagnh4	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001730	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	f70fb9a4-34d2-4bd5-922b-41c83914b896	/eml:eml/dataset/dataTable/attributeList/attribute[45]	ntl1_v11.csv	data flag for totnf	flagtotnf	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001731	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	cd8180f3-ed1c-4efd-acdd-6433e3f80681	/eml:eml/dataset/dataTable/attributeList/attribute[46]	ntl1_v11.csv	data flag for totnuf	flagtotnuf	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001732	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	d83e548a-85a7-4472-87d7-33fa5205c666	/eml:eml/dataset/dataTable/attributeList/attribute[47]	ntl1_v11.csv	data flag for totpf	flagtotpf	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001733	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	7f301922-dc5a-42aa-aaa2-0ee176d15c30	/eml:eml/dataset/dataTable/attributeList/attribute[48]	ntl1_v11.csv	data flag for totpuf	flagtotpuf	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001734	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	21768e22-4e2f-4b8a-9d36-6b29008d5775	/eml:eml/dataset/dataTable/attributeList/attribute[49]	ntl1_v11.csv	data flag for drsif	flagdrsif	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001735	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	fffeadc3-f4a3-497c-9a54-bb5b598a89b2	/eml:eml/dataset/dataTable/attributeList/attribute[50]	ntl1_v11.csv	data flag for brsif	flagbrsif	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001736	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a6231932-1584-4319-8cec-24069735ed36	/eml:eml/dataset/dataTable/attributeList/attribute[51]	ntl1_v11.csv	data flag for brsiuf	flagbrsiuf	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001737	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	1ec2a1cf-3ee6-4247-a202-0a3f2c7236da	/eml:eml/dataset/dataTable/attributeList/attribute[52]	ntl1_v11.csv	data flag for tpm	flagtpm	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001738	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	21b168c0-5006-48a2-b1db-1776a5d94287	/eml:eml/dataset/dataTable/attributeList/attribute[53]	ntl1_v11.csv	data flag for totnuf_sloh	flagtotnuf_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001739	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	aeb36290-9c30-478f-b09b-237b7316584a	/eml:eml/dataset/dataTable/attributeList/attribute[54]	ntl1_v11.csv	data flag for no3no2_sloh	flagno3no2_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001740	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	6e4ac687-b845-4547-b427-474a65c77a7f	/eml:eml/dataset/dataTable/attributeList/attribute[55]	ntl1_v11.csv	data flag for nh4_sloh	flagnh4_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001741	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	65b6fee7-108e-4fb7-b099-36438fe46493	/eml:eml/dataset/dataTable/attributeList/attribute[56]	ntl1_v11.csv	data flag for kjdl_n_sloh	flagkjdl_n_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001742	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	f5f1cd3f-d1cf-4109-8c42-9aee6cc7da77	/eml:eml/dataset/dataTable/attributeList/attribute[57]	ntl1_v11.csv	data flag for totpuf_sloh	flagtotpuf_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001743	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	beb62153-b695-4365-b8d0-21a0e10495a1	/eml:eml/dataset/dataTable/attributeList/attribute[58]	ntl1_v11.csv	data flag for drp_sloh	flagdrp_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001744	https://orcid.org/0000-0003-2261-9931	2014-10-17	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	c756e4b3-3c07-4e35-a967-d2e9f0a785b1	/eml:eml/dataset/dataTable/attributeList/attribute[59]	ntl1_v11.csv	data flag for drsif_sloh	flagdrsif_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001745	https://orcid.org/0000-0003-2261-9931	2014-10-17	
\ No newline at end of file
diff --git a/tests/data/benchmark/standard/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv b/tests/data/benchmark/standard/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv
new file mode 100644
index 0000000..6ccc514
--- /dev/null
+++ b/tests/data/benchmark/standard/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv
@@ -0,0 +1,45 @@
+package_id	url	element	element_id	element_xpath	context	description	subject	predicate	predicate_id	object	object_id	author	date	comment
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	freshwater lake biome	http://purl.obolibrary.org/obo/ENVO_01000252	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	oligotrophic lake	http://purl.obolibrary.org/obo/ENVO_01000774	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	eutrophic lake	http://purl.obolibrary.org/obo/ENVO_01000548	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	mesotrophic lake	http://purl.obolibrary.org/obo/ENVO_01000775	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	humic lake	http://purl.obolibrary.org/obo/ENVO_01001021	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	dimictic lake	http://purl.obolibrary.org/obo/ENVO_01000286	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	polymictic lake	http://purl.obolibrary.org/obo/ENVO_01000287	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	contains process	http://purl.obolibrary.org/obo/BFO_0000067	biogeochemical cycling	http://purl.obolibrary.org/obo/ENVO_02500009	https://orcid.org/0000-0003-2261-9931	2014-10-15	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	environmental monitoring	http://vocabs.lter-europe.net/EnvThes/21335	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	field measurement	http://vocabs.lter-europe.net/EnvThes/20223	https://orcid.org/0000-0003-2261-9932	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	long term monitoring	http://vocabs.lter-europe.net/EnvThes/21337	https://orcid.org/0000-0003-2261-9933	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	observational study	http://vocabs.lter-europe.net/EnvThes/20243	https://orcid.org/0000-0003-2261-9934	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	time series	http://vocabs.lter-europe.net/EnvThes/20285	https://orcid.org/0000-0003-2261-9935	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	water quality monitoring	http://vocabs.lter-europe.net/EnvThes/21339	https://orcid.org/0000-0003-2261-9936	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	mooring	http://vocabs.lter-europe.net/EnvThes/20304	https://orcid.org/0000-0003-2261-9937	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	probe	http://vocabs.lter-europe.net/EnvThes/20095	https://orcid.org/0000-0003-2261-9939	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	chromatography	https://www.wikidata.org/wiki/Q170050	https://orcid.org/0000-0003-2261-9939	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	573aa136-fe0c-4380-b898-a66812c1dadf	/eml:eml/dataset	knb-lter-ntl.2.37		dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	spectrophotometry	https://www.wikidata.org/wiki/Q332084	https://orcid.org/0000-0003-2261-9939	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	0897a396-1034-4848-bf0b-56fa200d9697	/eml:eml/dataset/dataTable/attributeList/attribute[1]	ntl2_9.csv	lake name abbreviation	lakeid	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	site identifier	http://purl.dataone.org/odo/ECSO_00002997	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	74405050-dfa2-4375-b5b0-5aed73b7f01c	/eml:eml/dataset/dataTable/attributeList/attribute[2]	ntl2_9.csv	four-digit year	year4	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	year of measurement	http://purl.dataone.org/odo/ECSO_00002050	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	d1138b76-ef27-4107-a8e9-b139d50206e4	/eml:eml/dataset/dataTable/attributeList/attribute[3]	ntl2_9.csv	day of the year	daynum	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	day of year	http://purl.dataone.org/odo/ECSO_00002058	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	b9d082a3-e19c-46fa-97dc-f7f6267158d1	/eml:eml/dataset/dataTable/attributeList/attribute[4]	ntl2_9.csv	Formatted date of sample	sampledate	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	date	http://purl.dataone.org/odo/ECSO_00002051	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	e4c5511f-0dfc-4e6f-a7fc-91a0f87acb5b	/eml:eml/dataset/dataTable/attributeList/attribute[5]	ntl2_9.csv	water depth of sample	depth	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	depth	http://purl.dataone.org/odo/ECSO_00000515	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	46744b39-ffcf-4b61-b0ab-c549c2e37061	/eml:eml/dataset/dataTable/attributeList/attribute[6]	ntl2_9.csv	sample replicate	rep	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	replicate identifier	http://purl.dataone.org/odo/ECSO_00002989	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	76314844-0b99-4e8a-990d-9512c61cea35	/eml:eml/dataset/dataTable/attributeList/attribute[7]	ntl2_9.csv	Location station of sample	sta	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	station identifier	http://purl.dataone.org/odo/ECSO_00002393	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	4e32bc02-bd87-43fd-8ed9-2c7f88aef3aa	/eml:eml/dataset/dataTable/attributeList/attribute[8]	ntl2_9.csv	sampling event	event	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	sampling occasion	http://purl.dataone.org/odo/ECSO_00002620	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	c54df277-23a3-4d55-8718-5cf8ff10ed13	/eml:eml/dataset/dataTable/attributeList/attribute[9]	ntl2_9.csv	chloride concentation	cl	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	chlorine concentration in water	http://purl.dataone.org/odo/ECSO_00002033	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	edbba2ef-ddce-4cfc-b4b7-ea3937907ae9	/eml:eml/dataset/dataTable/attributeList/attribute[10]	ntl2_9.csv	sulfate concentration	so4	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	sulfate concentration in water	http://purl.dataone.org/odo/ECSO_00002389	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	c7616c28-1bcb-4de9-b4fc-72febcbf198e	/eml:eml/dataset/dataTable/attributeList/attribute[11]	ntl2_9.csv	calcium concentration	ca	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	calcium concentration in lake water	http://purl.dataone.org/odo/ECSO_00001773	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	d3e9dc49-d8ce-4c36-9b9e-8616b748edcc	/eml:eml/dataset/dataTable/attributeList/attribute[12]	ntl2_9.csv	magnesium concentration	mg	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	magnesium concentration in lake water	http://purl.dataone.org/odo/ECSO_00001791	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	e67ff433-e91c-4d19-9fc3-c8838277ef23	/eml:eml/dataset/dataTable/attributeList/attribute[13]	ntl2_9.csv	sodium concentration	na	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	sodium concentration in lake water	http://purl.dataone.org/odo/ECSO_00001805	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	0db9cae7-415f-4f29-856e-ca14ca224117	/eml:eml/dataset/dataTable/attributeList/attribute[14]	ntl2_9.csv	potassium concentration	k	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	potassium concentration in lake water	http://purl.dataone.org/odo/ECSO_00001799	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	aa922788-30fe-4e1c-8626-6d8bfd1c7b9e	/eml:eml/dataset/dataTable/attributeList/attribute[15]	ntl2_9.csv	iron concentration	fe	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	iron concentration in water	http://purl.dataone.org/odo/ECSO_00001785	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	493815f4-610d-4f81-892d-60b63771f1cb	/eml:eml/dataset/dataTable/attributeList/attribute[16]	ntl2_9.csv	manganese concentration	mn	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	manganese concentration in lake water	http://purl.dataone.org/odo/ECSO_00001793	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	06afb28f-ccdc-4899-9e66-4a27e7a6790e	/eml:eml/dataset/dataTable/attributeList/attribute[17]	ntl2_9.csv	Specific conductance	cond	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	conductivity	http://purl.dataone.org/odo/ECSO_00001534	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	3dd95ddd-484f-4b85-a194-443d632b33ca	/eml:eml/dataset/dataTable/attributeList/attribute[18]	ntl2_9.csv	data flag for chloride	flagcl	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001720	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	efff83dc-e171-4945-b370-d61d631a0e78	/eml:eml/dataset/dataTable/attributeList/attribute[19]	ntl2_9.csv	data flag for so4	flagso4	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001720	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	b4c29470-51af-4366-938f-851f77b8ac71	/eml:eml/dataset/dataTable/attributeList/attribute[20]	ntl2_9.csv	data flag for ca	flagca	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001720	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	b9dd6cfa-c7dd-4490-9ed8-72c5ff7c312a	/eml:eml/dataset/dataTable/attributeList/attribute[21]	ntl2_9.csv	data flag for magnesiumn	flagmg	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001720	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	a5f01d3f-51cd-406d-93af-7e59043d5e04	/eml:eml/dataset/dataTable/attributeList/attribute[22]	ntl2_9.csv	data flag for sodium	flagna	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001720	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	4709caa6-d841-481f-abd3-062bfd18dbd3	/eml:eml/dataset/dataTable/attributeList/attribute[23]	ntl2_9.csv	data flag for potassium	flagk	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001720	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	50a83e04-14e2-4870-86b7-984f8ab061f8	/eml:eml/dataset/dataTable/attributeList/attribute[24]	ntl2_9.csv	data flag for iron	flagfe	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001720	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	7ade0294-adf6-482a-9e21-70eed67eb59b	/eml:eml/dataset/dataTable/attributeList/attribute[25]	ntl2_9.csv	data flag for manganese	flagmn	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001720	https://orcid.org/0000-0003-2261-9931	2014-10-18	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	0d4536b0-94ba-40ea-b1c3-8ca895425ad2	/eml:eml/dataset/dataTable/attributeList/attribute[26]	ntl2_9.csv	data flag for specific conductivity	flagcond	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	data quality flag	http://purl.dataone.org/odo/ECSO_00001720	https://orcid.org/0000-0003-2261-9931	2014-10-18	
\ No newline at end of file
diff --git a/tests/data/benchmark/standard/notes.txt b/tests/data/benchmark/standard/notes.txt
new file mode 100644
index 0000000..5ec11fc
--- /dev/null
+++ b/tests/data/benchmark/standard/notes.txt
@@ -0,0 +1 @@
+These workbooks were annotated with expert human curation. These comprise the standard to test automated annotation methods against.
\ No newline at end of file
diff --git a/tests/data/benchmark/termset_similarity_score_processed.json b/tests/data/benchmark/termset_similarity_score_processed.json
new file mode 100644
index 0000000..6e8bb58
--- /dev/null
+++ b/tests/data/benchmark/termset_similarity_score_processed.json
@@ -0,0 +1,12 @@
+{
+    "average_score": 9.196397212803504,
+    "best_score": 9.196397212803504,
+    "average_jaccard_similarity": 0.8450704225352113,
+    "best_jaccard_similarity": 0.8450704225352113,
+    "average_phenodigm_score": 2.7877595445851306,
+    "best_phenodigm_score": 2.7877595445851306,
+    "average_standard_information_content": 11.196397212803504,
+    "best_standard_information_content": 11.196397212803504,
+    "average_test_information_content": 10.459431618637298,
+    "best_test_information_content": 10.459431618637298
+}
diff --git a/tests/data/benchmark/termset_similarity_score_raw.json b/tests/data/benchmark/termset_similarity_score_raw.json
new file mode 100644
index 0000000..1d86609
--- /dev/null
+++ b/tests/data/benchmark/termset_similarity_score_raw.json
@@ -0,0 +1,59 @@
+[
+    {
+        "subject_termset": {
+            "ENVO:01000252": {
+                "id": "ENVO:01000252",
+                "label": "freshwater lake biome"
+            }
+        },
+        "object_termset": {
+            "ENVO:01000253": {
+                "id": "ENVO:01000253",
+                "label": "freshwater river biome"
+            }
+        },
+        "subject_best_matches": {
+            "ENVO:01000252": {
+                "match_source": "ENVO:01000252",
+                "score": 9.196397212803504,
+                "similarity": {
+                    "subject_id": "ENVO:01000252",
+                    "object_id": "ENVO:01000253",
+                    "ancestor_id": "ENVO:00000873",
+                    "ancestor_label": "freshwater biome",
+                    "object_information_content": 10.459431618637298,
+                    "subject_information_content": 11.196397212803504,
+                    "ancestor_information_content": 9.196397212803504,
+                    "jaccard_similarity": 0.8450704225352113,
+                    "phenodigm_score": 2.7877595445851306
+                },
+                "match_source_label": "freshwater lake biome",
+                "match_target": "ENVO:01000253",
+                "match_target_label": "freshwater river biome"
+            }
+        },
+        "object_best_matches": {
+            "ENVO:01000253": {
+                "match_source": "ENVO:01000253",
+                "score": 9.196397212803504,
+                "similarity": {
+                    "subject_id": "ENVO:01000252",
+                    "object_id": "ENVO:01000253",
+                    "ancestor_id": "ENVO:00000873",
+                    "ancestor_label": "freshwater biome",
+                    "object_information_content": 10.459431618637298,
+                    "subject_information_content": 11.196397212803504,
+                    "ancestor_information_content": 9.196397212803504,
+                    "jaccard_similarity": 0.8450704225352113,
+                    "phenodigm_score": 2.7877595445851306
+                },
+                "match_source_label": "freshwater river biome",
+                "match_target": "ENVO:01000252",
+                "match_target_label": "freshwater lake biome"
+            }
+        },
+        "average_score": 9.196397212803504,
+        "best_score": 9.196397212803504,
+        "@type": "TermSetPairwiseSimilarity"
+    }
+]
\ No newline at end of file
diff --git a/tests/data/benchmark/test_a/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv b/tests/data/benchmark/test_a/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv
new file mode 100644
index 0000000..22e1328
--- /dev/null
+++ b/tests/data/benchmark/test_a/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv
@@ -0,0 +1,71 @@
+package_id	url	element	element_id	element_xpath	context	description	subject	predicate	predicate_id	object	object_id	author	date	comment
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	inland waters	AUTO:inland%20waters	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:25:12.409195	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	aquatic environment	http://purl.obolibrary.org/obo/ENVO_01000317	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:25:12.408893	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	freshwater	AUTO:freshwater	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:25:12.408576	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	lake	AUTO:lake	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:25:12.408274	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	bogs	http://purl.obolibrary.org/obo/ENVO_01001209	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:25:12.407974	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	lakes	AUTO:lakes	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:25:12.407670	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	northern lakes	AUTO:northern%20lakes	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:25:12.407361	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	southern lakes	AUTO:southern%20lakes	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:25:12.407045	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	bog	http://purl.obolibrary.org/obo/ENVO_01001209	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:25:12.406700	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	1d8fcbb8-fc0e-4432-a5d7-f12ac6b5989b	/eml:eml/dataset	knb-lter-ntl.1.59	"Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11
+      Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received. chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry"	dataset							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	ff88d1e6-dcaa-47f0-8599-4611bccb59a9	/eml:eml/dataset/dataTable/attributeList/attribute[1]	ntl1_v11.csv	lake name abbreviation	lakeid							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	90ba6f24-ef50-4322-9f6e-2615bad77a23	/eml:eml/dataset/dataTable/attributeList/attribute[2]	ntl1_v11.csv	year	year4							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	bb088d87-c6e7-4952-b333-350944605412	/eml:eml/dataset/dataTable/attributeList/attribute[3]	ntl1_v11.csv	day of year	daynum							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	1b3c5c94-4448-4d74-9176-cb159da849a9	/eml:eml/dataset/dataTable/attributeList/attribute[4]	ntl1_v11.csv	sample date	sampledate							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	7d427f25-53a5-4bbd-bb3c-a2fa501b3330	/eml:eml/dataset/dataTable/attributeList/attribute[5]	ntl1_v11.csv	depth at which the sample or measurement was taken	depth							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	79351da1-ddde-4bb2-80b1-88a72643c195	/eml:eml/dataset/dataTable/attributeList/attribute[6]	ntl1_v11.csv	sample replicate	rep							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a9946a61-0f90-4d59-9d07-75f22dcab406	/eml:eml/dataset/dataTable/attributeList/attribute[7]	ntl1_v11.csv	Location station of sample	sta							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	5c22fc18-2b88-4c95-9f59-cd64d00bbb62	/eml:eml/dataset/dataTable/attributeList/attribute[8]	ntl1_v11.csv	sampling event	event							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	68931cdd-8d38-4af8-baee-aaf0fe0468b6	/eml:eml/dataset/dataTable/attributeList/attribute[9]	ntl1_v11.csv	pH	ph							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	b8dcce57-8597-43a2-981a-fde2e32a243a	/eml:eml/dataset/dataTable/attributeList/attribute[10]	ntl1_v11.csv	pH air equilibrated	phair							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	c2aa8715-3aa6-4bfb-b752-798f35fd456f	/eml:eml/dataset/dataTable/attributeList/attribute[11]	ntl1_v11.csv	alkalinity	alk							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a3c6dcfc-1ab5-45ef-b543-692235c26d70	/eml:eml/dataset/dataTable/attributeList/attribute[12]	ntl1_v11.csv	dissolved inorganic carbon	dic							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	5a017432-2075-428c-a03f-1b87cf2fe0a4	/eml:eml/dataset/dataTable/attributeList/attribute[13]	ntl1_v11.csv	total inorganic carbon	tic							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	800c66bc-0b8b-4496-b425-68a136215a55	/eml:eml/dataset/dataTable/attributeList/attribute[14]	ntl1_v11.csv	dissolved organic carbon	doc							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	4353b35d-5e66-4d33-b176-cbd95a03e972	/eml:eml/dataset/dataTable/attributeList/attribute[15]	ntl1_v11.csv	total organic carbon	toc							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	5986fb78-2525-4cfe-857d-7d38583e3ecc	/eml:eml/dataset/dataTable/attributeList/attribute[16]	ntl1_v11.csv	(NO3 + NO2) - N	no3no2							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	797c9db2-2702-42ac-a04c-c744751b6a35	/eml:eml/dataset/dataTable/attributeList/attribute[17]	ntl1_v11.csv	NO2	no2							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	58ec98c3-f584-4504-82f4-ed7a6ac971d3	/eml:eml/dataset/dataTable/attributeList/attribute[18]	ntl1_v11.csv	NH4 - N	nh4							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	d597c578-231c-4431-807c-42be84e0d582	/eml:eml/dataset/dataTable/attributeList/attribute[19]	ntl1_v11.csv	total dissolved N (filtered sample)	totnf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	315863bf-7381-417b-bfa5-4ce5a044210b	/eml:eml/dataset/dataTable/attributeList/attribute[20]	ntl1_v11.csv	total N (unfiltered sample)	totnuf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a4ab66d7-90b5-49f9-83c0-2f5ea904abf0	/eml:eml/dataset/dataTable/attributeList/attribute[21]	ntl1_v11.csv	total dissolved P (filtered sample)	totpf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a1fb2bee-9c2f-44a0-958c-5834938604fd	/eml:eml/dataset/dataTable/attributeList/attribute[22]	ntl1_v11.csv	total P unfiltered	totpuf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a57c9579-845a-4711-9c29-a6ad5ac3d66b	/eml:eml/dataset/dataTable/attributeList/attribute[23]	ntl1_v11.csv	dissolved reactive silica filtered	drsif							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a97531d3-56ae-405d-bc12-f1f2a1d328a0	/eml:eml/dataset/dataTable/attributeList/attribute[24]	ntl1_v11.csv	bicarbonate reactive silica filtered	brsif							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	8b70ba6b-ca4e-4b3a-9d89-04e345017ff9	/eml:eml/dataset/dataTable/attributeList/attribute[25]	ntl1_v11.csv	bicarbonate reactive silica unfiltered	brsiuf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	6a8c7ee0-7c2b-4543-a5f4-1a921bea6119	/eml:eml/dataset/dataTable/attributeList/attribute[26]	ntl1_v11.csv	total particulate matter	tpm							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	8849481f-897d-4e80-8c33-816038580e02	/eml:eml/dataset/dataTable/attributeList/attribute[27]	ntl1_v11.csv	Total Nitrogen (unfiltered) from WI State Lab of Hygiene	totnuf_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	2ca00ca9-11cb-442a-93c7-acf8ae4d88fd	/eml:eml/dataset/dataTable/attributeList/attribute[28]	ntl1_v11.csv	Nitrate plus Nitrite Nitrogen from WI State Lab. of Hygiene	no3no2_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	8c1f92d3-eae3-45dd-b6bd-93a36c854aaf	/eml:eml/dataset/dataTable/attributeList/attribute[29]	ntl1_v11.csv	Ammonium Nitrogen from WI State Lab. of Hygiene	nh4_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	3c588629-11f9-4fb9-9a8d-3bc4263468ff	/eml:eml/dataset/dataTable/attributeList/attribute[30]	ntl1_v11.csv	Total Kjeldahl Nitrogen from WI State Lab. of Hygiene	kjdl_n_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	aeca1b6b-a69f-45c1-945b-10e97713a1e6	/eml:eml/dataset/dataTable/attributeList/attribute[31]	ntl1_v11.csv	Total Phosphorus Unfiltered from WI State Lab. of Hygiene	totpuf_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	612f756e-b959-49ba-8ea9-70b9ba4f6183	/eml:eml/dataset/dataTable/attributeList/attribute[32]	ntl1_v11.csv	Dissolved Reactive Phosphorus from WI State Lab. of Hygiene	drp_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	694813a5-75db-4a58-ba2b-32c440e93651	/eml:eml/dataset/dataTable/attributeList/attribute[33]	ntl1_v11.csv	Dissolved Reactive Silica from WI State Lab. of Hygiene	drsif_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	03450955-1c82-4372-84ec-88a65a1fef63	/eml:eml/dataset/dataTable/attributeList/attribute[34]	ntl1_v11.csv	data flag for depth	flagdepth							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	1c724750-20be-47f4-a7fb-ce260d732944	/eml:eml/dataset/dataTable/attributeList/attribute[35]	ntl1_v11.csv	data flag for ph	flagph							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	55d6f8a5-7568-4799-b35e-72d62a49c12b	/eml:eml/dataset/dataTable/attributeList/attribute[36]	ntl1_v11.csv	data flag for phair	flagphair							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	41aadd4c-83d6-41d7-9220-cc2732485a35	/eml:eml/dataset/dataTable/attributeList/attribute[37]	ntl1_v11.csv	data flag for alkalinity	flagalk							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	47aa51ce-f9cf-4a28-bc2e-67f4ec7d6c44	/eml:eml/dataset/dataTable/attributeList/attribute[38]	ntl1_v11.csv	data flag for dissolved inorganic carbon	flagdic							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	d733078a-193d-41a6-8b37-78be3cac73a2	/eml:eml/dataset/dataTable/attributeList/attribute[39]	ntl1_v11.csv	data flag for total inorganic carbon	flagtic							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	83c23a71-cc7c-4394-99b9-8d4b707554b0	/eml:eml/dataset/dataTable/attributeList/attribute[40]	ntl1_v11.csv	data flag for dissolved organic carbon	flagdoc							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a6d5766e-6eca-4d0c-80c8-e08f48302122	/eml:eml/dataset/dataTable/attributeList/attribute[41]	ntl1_v11.csv	data flag for total organic carbon	flagtoc							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	765c99ec-8b08-489f-b521-e6f3102f22eb	/eml:eml/dataset/dataTable/attributeList/attribute[42]	ntl1_v11.csv	data flag for no3no2-n	flagno3no2							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	c8a48b80-b980-4b03-8749-c1412f4b96cc	/eml:eml/dataset/dataTable/attributeList/attribute[43]	ntl1_v11.csv	data flag for no2	flagno2							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	415123b6-ae86-40f9-91ba-2703ab1e390f	/eml:eml/dataset/dataTable/attributeList/attribute[44]	ntl1_v11.csv	data flag for nh4	flagnh4							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	626bcdd6-e16e-4f3a-b2fe-d8c8216f2471	/eml:eml/dataset/dataTable/attributeList/attribute[45]	ntl1_v11.csv	data flag for totnf	flagtotnf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	7847f72d-6405-433a-91c0-db80d29648b7	/eml:eml/dataset/dataTable/attributeList/attribute[46]	ntl1_v11.csv	data flag for totnuf	flagtotnuf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	ee08a1b1-6a37-4b59-b8fd-0fcc837b6cf0	/eml:eml/dataset/dataTable/attributeList/attribute[47]	ntl1_v11.csv	data flag for totpf	flagtotpf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	c34fafd5-058b-4cf7-a9b5-57e9b1027da9	/eml:eml/dataset/dataTable/attributeList/attribute[48]	ntl1_v11.csv	data flag for totpuf	flagtotpuf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	e228b33d-1154-4d43-ada9-3e758f7c4c9f	/eml:eml/dataset/dataTable/attributeList/attribute[49]	ntl1_v11.csv	data flag for drsif	flagdrsif							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	d153c8fb-cbb0-42de-8f1e-67274dd8c949	/eml:eml/dataset/dataTable/attributeList/attribute[50]	ntl1_v11.csv	data flag for brsif	flagbrsif							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	737ab156-01d0-4e61-b961-ce1cf90388e9	/eml:eml/dataset/dataTable/attributeList/attribute[51]	ntl1_v11.csv	data flag for brsiuf	flagbrsiuf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	e9240b0f-5ee5-40cc-b65f-41789f1198dd	/eml:eml/dataset/dataTable/attributeList/attribute[52]	ntl1_v11.csv	data flag for tpm	flagtpm							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	1860cbb7-5ebb-4a6f-8ff2-3bc449d65a2f	/eml:eml/dataset/dataTable/attributeList/attribute[53]	ntl1_v11.csv	data flag for totnuf_sloh	flagtotnuf_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	3d7de8ea-4311-492c-942a-e18b374b7f97	/eml:eml/dataset/dataTable/attributeList/attribute[54]	ntl1_v11.csv	data flag for no3no2_sloh	flagno3no2_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	4b270237-bcec-4e78-97e7-6f0b5342e254	/eml:eml/dataset/dataTable/attributeList/attribute[55]	ntl1_v11.csv	data flag for nh4_sloh	flagnh4_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	4ba154a2-0677-4a99-8e16-b24b8a9dea29	/eml:eml/dataset/dataTable/attributeList/attribute[56]	ntl1_v11.csv	data flag for kjdl_n_sloh	flagkjdl_n_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	66a23414-8dce-467b-ab1d-1b82b0c33f9b	/eml:eml/dataset/dataTable/attributeList/attribute[57]	ntl1_v11.csv	data flag for totpuf_sloh	flagtotpuf_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	231d3b44-1f7d-48d2-b9ff-8004bc332103	/eml:eml/dataset/dataTable/attributeList/attribute[58]	ntl1_v11.csv	data flag for drp_sloh	flagdrp_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	00350a07-c335-4c2e-bd53-e35cb1cc4316	/eml:eml/dataset/dataTable/attributeList/attribute[59]	ntl1_v11.csv	data flag for drsif_sloh	flagdrsif_sloh							
diff --git a/tests/data/benchmark/test_a/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv b/tests/data/benchmark/test_a/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv
new file mode 100644
index 0000000..4c2ba06
--- /dev/null
+++ b/tests/data/benchmark/test_a/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv
@@ -0,0 +1,42 @@
+package_id	url	element	element_id	element_xpath	context	description	subject	predicate	predicate_id	object	object_id	author	date	comment
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	ponds	AUTO:ponds	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:18:04.269472	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	wetlands	http://purl.obolibrary.org/obo/ENVO_00000035	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:18:04.269178	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	bogs	http://purl.obolibrary.org/obo/ENVO_01001209	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:18:04.268873	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	rivers	AUTO:rivers	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:18:04.268577	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	lakes	AUTO:lakes	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:18:04.268276	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	Bog waters	AUTO:Bog%20waters	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:18:04.267973	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	Freshwater lakes	AUTO:Freshwater%20lakes	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:18:04.267668	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	freshwater	AUTO:freshwater	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:18:04.267352	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	inland water	AUTO:inland%20water	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:18:04.267033	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	bog	http://purl.obolibrary.org/obo/ENVO_01001209	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:18:04.266699	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	lake	AUTO:lake	spinneret.annotator.get_onto_gpt_annotation	2024-11-19 14:18:04.266192	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	b761e76b-4888-4a68-8fd1-d192feb87fbe	/eml:eml/dataset	knb-lter-ntl.2.37	"Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.
+      Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.
+      1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.
+      Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11 chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity"	dataset							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	a8be3bdb-ba9b-4462-ac00-4c195d8d6271	/eml:eml/dataset/dataTable/attributeList/attribute[1]	ntl2_9.csv	lake name abbreviation	lakeid							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	2ee0ed01-2526-4d06-abf0-f0baa8bf0405	/eml:eml/dataset/dataTable/attributeList/attribute[2]	ntl2_9.csv	four-digit year	year4							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	2935f46c-c6d6-4b56-8ca4-21b4f53874d3	/eml:eml/dataset/dataTable/attributeList/attribute[3]	ntl2_9.csv	day of the year	daynum							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	78518f7c-bfb0-4e6f-be63-b942a8b09072	/eml:eml/dataset/dataTable/attributeList/attribute[4]	ntl2_9.csv	Formatted date of sample	sampledate							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	8324a47b-fd8b-4477-9751-e662d0f24d87	/eml:eml/dataset/dataTable/attributeList/attribute[5]	ntl2_9.csv	water depth of sample	depth							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	c9006d36-b13f-471a-9c65-165bbba29d3b	/eml:eml/dataset/dataTable/attributeList/attribute[6]	ntl2_9.csv	sample replicate	rep							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	9466e002-fe89-47c4-87ec-b322c55376c4	/eml:eml/dataset/dataTable/attributeList/attribute[7]	ntl2_9.csv	Location station of sample	sta							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	6d59ec94-117f-4378-917b-7e0c785cdc9c	/eml:eml/dataset/dataTable/attributeList/attribute[8]	ntl2_9.csv	sampling event	event							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	5d666d0c-7be9-4d1f-84b2-a5b572d819db	/eml:eml/dataset/dataTable/attributeList/attribute[9]	ntl2_9.csv	chloride concentation	cl							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	a59d2c08-669a-436e-bfe9-6c3196215166	/eml:eml/dataset/dataTable/attributeList/attribute[10]	ntl2_9.csv	sulfate concentration	so4							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	40ef2724-7200-4cbf-8663-0eaef6df880b	/eml:eml/dataset/dataTable/attributeList/attribute[11]	ntl2_9.csv	calcium concentration	ca							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	63021500-f274-4f23-8bcf-520f22cc53bb	/eml:eml/dataset/dataTable/attributeList/attribute[12]	ntl2_9.csv	magnesium concentration	mg							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	b2094513-bba1-4671-9555-d376c5333c5a	/eml:eml/dataset/dataTable/attributeList/attribute[13]	ntl2_9.csv	sodium concentration	na							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	6ae30df9-7862-4488-ad9e-f0f8316081e7	/eml:eml/dataset/dataTable/attributeList/attribute[14]	ntl2_9.csv	potassium concentration	k							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	1b5a4f2e-58db-40e1-a044-38e4451180a0	/eml:eml/dataset/dataTable/attributeList/attribute[15]	ntl2_9.csv	iron concentration	fe							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	e9537242-efd6-4a83-a64a-1b2fd7fc36d8	/eml:eml/dataset/dataTable/attributeList/attribute[16]	ntl2_9.csv	manganese concentration	mn							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	cf23b883-857c-4573-9723-401e2f2fd2e0	/eml:eml/dataset/dataTable/attributeList/attribute[17]	ntl2_9.csv	Specific conductance	cond							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	f95c44fe-7bdd-47a4-85c6-1462f6b9545a	/eml:eml/dataset/dataTable/attributeList/attribute[18]	ntl2_9.csv	data flag for chloride	flagcl							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	d1e21362-d37a-4d18-9656-ee3d94d74cff	/eml:eml/dataset/dataTable/attributeList/attribute[19]	ntl2_9.csv	data flag for so4	flagso4							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	58bb291c-de47-4bbe-b5c4-93816f8f5c77	/eml:eml/dataset/dataTable/attributeList/attribute[20]	ntl2_9.csv	data flag for ca	flagca							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	56db3ae8-db1c-4792-83b3-5c96a883c732	/eml:eml/dataset/dataTable/attributeList/attribute[21]	ntl2_9.csv	data flag for magnesiumn	flagmg							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	b3a5a6d2-96b7-4096-bf6f-7967e443f0bc	/eml:eml/dataset/dataTable/attributeList/attribute[22]	ntl2_9.csv	data flag for sodium	flagna							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	f68e300b-31ef-4c38-b470-0988cfdc268b	/eml:eml/dataset/dataTable/attributeList/attribute[23]	ntl2_9.csv	data flag for potassium	flagk							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	ee010787-3226-4c28-bf1f-fa08f9764f6f	/eml:eml/dataset/dataTable/attributeList/attribute[24]	ntl2_9.csv	data flag for iron	flagfe							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	bdde0f42-1414-4355-bb23-73037fcef5ff	/eml:eml/dataset/dataTable/attributeList/attribute[25]	ntl2_9.csv	data flag for manganese	flagmn							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	ae7d8e74-98f7-4a04-a30b-c1d42ee514cf	/eml:eml/dataset/dataTable/attributeList/attribute[26]	ntl2_9.csv	data flag for specific conductivity	flagcond							
diff --git a/tests/data/benchmark/test_a/notes.txt b/tests/data/benchmark/test_a/notes.txt
new file mode 100644
index 0000000..ee404ce
--- /dev/null
+++ b/tests/data/benchmark/test_a/notes.txt
@@ -0,0 +1 @@
+This run increases sample size to 3 to test if overall grounding improves.
\ No newline at end of file
diff --git a/tests/data/benchmark/test_b/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv b/tests/data/benchmark/test_b/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv
new file mode 100644
index 0000000..d7f8724
--- /dev/null
+++ b/tests/data/benchmark/test_b/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv
@@ -0,0 +1,124 @@
+package_id	url	element	element_id	element_xpath	context	description	subject	predicate	predicate_id	object	object_id	author	date	comment
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[33]	ntl1_v11.csv	Dissolved Reactive Silica from WI State Lab. of Hygiene	drsif_sloh	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:59.477115	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[32]	ntl1_v11.csv	Dissolved Reactive Phosphorus from WI State Lab. of Hygiene	drp_sloh	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:59.137550	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[31]	ntl1_v11.csv	Total Phosphorus Unfiltered from WI State Lab. of Hygiene	totpuf_sloh	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:58.792060	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[30]	ntl1_v11.csv	Total Kjeldahl Nitrogen from WI State Lab. of Hygiene	kjdl_n_sloh	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:58.446345	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[29]	ntl1_v11.csv	Ammonium Nitrogen from WI State Lab. of Hygiene	nh4_sloh	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:58.105312	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[28]	ntl1_v11.csv	Nitrate plus Nitrite Nitrogen from WI State Lab. of Hygiene	no3no2_sloh	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:57.702475	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[27]	ntl1_v11.csv	Total Nitrogen (unfiltered) from WI State Lab of Hygiene	totnuf_sloh	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:57.353412	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[26]	ntl1_v11.csv	total particulate matter	tpm	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:57.008575	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[25]	ntl1_v11.csv	bicarbonate reactive silica unfiltered	brsiuf	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Microgram Per Liter	http://qudt.org/vocab/unit/MicroGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:56.669128	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[24]	ntl1_v11.csv	bicarbonate reactive silica filtered	brsif	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Microgram Per Liter	http://qudt.org/vocab/unit/MicroGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:56.332559	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[23]	ntl1_v11.csv	dissolved reactive silica filtered	drsif	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Microgram Per Liter	http://qudt.org/vocab/unit/MicroGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:56.003383	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[22]	ntl1_v11.csv	total P unfiltered	totpuf	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Microgram Per Liter	http://qudt.org/vocab/unit/MicroGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:55.672201	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[21]	ntl1_v11.csv	total dissolved P (filtered sample)	totpf	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Microgram Per Liter	http://qudt.org/vocab/unit/MicroGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:55.333889	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[20]	ntl1_v11.csv	total N (unfiltered sample)	totnuf	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Microgram Per Liter	http://qudt.org/vocab/unit/MicroGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:54.981935	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[19]	ntl1_v11.csv	total dissolved N (filtered sample)	totnf	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Microgram Per Liter	http://qudt.org/vocab/unit/MicroGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:54.634614	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[18]	ntl1_v11.csv	NH4 - N	nh4	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Microgram Per Liter	http://qudt.org/vocab/unit/MicroGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:54.289731	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[17]	ntl1_v11.csv	NO2	no2	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Microgram Per Liter	http://qudt.org/vocab/unit/MicroGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:53.948702	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[16]	ntl1_v11.csv	(NO3 + NO2) - N	no3no2	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Microgram Per Liter	http://qudt.org/vocab/unit/MicroGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:53.597860	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[15]	ntl1_v11.csv	total organic carbon	toc	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:53.230680	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[14]	ntl1_v11.csv	dissolved organic carbon	doc	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:52.881694	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[13]	ntl1_v11.csv	total inorganic carbon	tic	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:52.536776	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[12]	ntl1_v11.csv	dissolved inorganic carbon	dic	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:52.190681	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[10]	ntl1_v11.csv	pH air equilibrated	phair	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Unitless	http://qudt.org/vocab/unit/UNITLESS	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:51.239842	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[9]	ntl1_v11.csv	pH	ph	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Unitless	http://qudt.org/vocab/unit/UNITLESS	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:50.896707	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[5]	ntl1_v11.csv	depth at which the sample or measurement was taken	depth	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Meter	http://qudt.org/vocab/unit/M	spinneret.annotator.get_qudt_annotation	2024-11-15 15:03:50.550432	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[58]	ntl1_v11.csv	data flag for drp_sloh	flagdrp_sloh	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	Water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 15:03:43.774995	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[49]	ntl1_v11.csv	data flag for drsif	flagdrsif	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	air	http://purl.obolibrary.org/obo/ENVO_00002005	spinneret.annotator.get_ontogpt_annotation	2024-11-15 15:03:02.121445	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[42]	ntl1_v11.csv	data flag for no3no2-n	flagno3no2	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	soil	http://purl.obolibrary.org/obo/ENVO_00001998	spinneret.annotator.get_ontogpt_annotation	2024-11-15 15:02:23.134680	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[42]	ntl1_v11.csv	data flag for no3no2-n	flagno3no2	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 15:02:23.134159	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[37]	ntl1_v11.csv	data flag for alkalinity	flagalk	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 15:01:50.967139	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[35]	ntl1_v11.csv	data flag for ph	flagph	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 15:01:42.468349	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[33]	ntl1_v11.csv	Dissolved Reactive Silica from WI State Lab. of Hygiene	drsif_sloh	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	Groundwater	http://purl.obolibrary.org/obo/ENVO_01001004	spinneret.annotator.get_ontogpt_annotation	2024-11-15 15:01:35.275643	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[33]	ntl1_v11.csv	Dissolved Reactive Silica from WI State Lab. of Hygiene	drsif_sloh	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	Water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 15:01:35.275102	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[26]	ntl1_v11.csv	total particulate matter	tpm	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	air	http://purl.obolibrary.org/obo/ENVO_00002005	spinneret.annotator.get_ontogpt_annotation	2024-11-15 15:00:57.262942	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[23]	ntl1_v11.csv	dissolved reactive silica filtered	drsif	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 15:00:45.680608	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[22]	ntl1_v11.csv	total P unfiltered	totpuf	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	air	http://purl.obolibrary.org/obo/ENVO_00002005	spinneret.annotator.get_ontogpt_annotation	2024-11-15 15:00:40.920276	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[22]	ntl1_v11.csv	total P unfiltered	totpuf	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 15:00:40.919648	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[21]	ntl1_v11.csv	total dissolved P (filtered sample)	totpf	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 15:00:36.165647	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[17]	ntl1_v11.csv	NO2	no2	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	air	http://purl.obolibrary.org/obo/ENVO_00002005	spinneret.annotator.get_ontogpt_annotation	2024-11-15 15:00:14.598300	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[14]	ntl1_v11.csv	dissolved organic carbon	doc	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:59:43.876018	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[13]	ntl1_v11.csv	total inorganic carbon	tic	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:59:39.301191	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[13]	ntl1_v11.csv	total inorganic carbon	tic	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	air	http://purl.obolibrary.org/obo/ENVO_00002005	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:59:39.300685	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[12]	ntl1_v11.csv	dissolved inorganic carbon	dic	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	seawater	http://purl.obolibrary.org/obo/ENVO_00002149	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:59:34.709185	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[11]	ntl1_v11.csv	alkalinity	alk	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:59:30.715581	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[10]	ntl1_v11.csv	pH air equilibrated	phair	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	air	http://purl.obolibrary.org/obo/ENVO_00002005	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:59:25.940691	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[8]	ntl1_v11.csv	sampling event	event	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:59:18.595722	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[8]	ntl1_v11.csv	sampling event	event	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	soil	http://purl.obolibrary.org/obo/ENVO_00001998	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:59:18.595206	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[7]	ntl1_v11.csv	Location station of sample	sta	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	air	http://purl.obolibrary.org/obo/ENVO_00002005	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:59:15.003423	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[7]	ntl1_v11.csv	Location station of sample	sta	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	soil	http://purl.obolibrary.org/obo/ENVO_00001998	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:59:15.003026	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[7]	ntl1_v11.csv	Location station of sample	sta	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:59:15.002437	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[4]	ntl1_v11.csv	sample date	sampledate	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:59:05.980897	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[4]	ntl1_v11.csv	sample date	sampledate	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	soil	http://purl.obolibrary.org/obo/ENVO_00001998	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:59:05.980359	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[41]	ntl1_v11.csv	data flag for total organic carbon	flagtoc	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	total organic carbon	http://purl.dataone.org/odo/ECSO_00000329	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:57:12.186743	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[31]	ntl1_v11.csv	Total Phosphorus Unfiltered from WI State Lab. of Hygiene	totpuf_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	Turbidity	http://purl.dataone.org/odo/ECSO_00002359	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:56:26.356521	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[31]	ntl1_v11.csv	Total Phosphorus Unfiltered from WI State Lab. of Hygiene	totpuf_sloh	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	Conductivity	http://purl.dataone.org/odo/ECSO_00001534	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:56:26.355963	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[5]	ntl1_v11.csv	depth at which the sample or measurement was taken	depth	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	## depth at which the sample or measurement was taken	http://purl.dataone.org/odo/ECSO_00001250	spinneret.annotator.get_ontogpt_annotation	2024-11-15 14:54:24.082809	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Sample Collection and StorageRoutine water samples are collected at one station in the deepest part of each lake. Northern lakes- Samples are collected monthly during ice-free periods and every 5 weeks during ice-covered season for the northern lakes. Samples are collected at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. When lakes are not stratified, samples are collected at  the top, middle, and bottom of the water column.  Southern lake	dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	cadmium reduction method and absorption measurement at 520 nm on Technicon segmented flow autoanalyzer or Astoria-Pacific Astoria II segmented flow autoanalyzer	http://vocabs.lter-europe.net/EnvThes/20803	spinneret.annotator.get_onto_gpt_annotation	2024-11-15 14:53:54.614269	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Sample Collection and StorageRoutine water samples are collected at one station in the deepest part of each lake. Northern lakes- Samples are collected monthly during ice-free periods and every 5 weeks during ice-covered season for the northern lakes. Samples are collected at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. When lakes are not stratified, samples are collected at  the top, middle, and bottom of the water column.  Southern lake	dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	automated colorimetric spectrophotometry using segmented flow autoanalyzer	http://vocabs.lter-europe.net/EnvThes/10375	spinneret.annotator.get_onto_gpt_annotation	2024-11-15 14:53:54.613955	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Sample Collection and StorageRoutine water samples are collected at one station in the deepest part of each lake. Northern lakes- Samples are collected monthly during ice-free periods and every 5 weeks during ice-covered season for the northern lakes. Samples are collected at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. When lakes are not stratified, samples are collected at  the top, middle, and bottom of the water column.  Southern lake	dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	Heteropoly Blue Method and absorption measurement at 820 nm on Bausch and Lomb Spectrophotometer, Technicon AutoAnalyzer II, or Astoria-Pacific Astoria II AutoAnalyzer	http://vocabs.lter-europe.net/EnvThes/20104	spinneret.annotator.get_onto_gpt_annotation	2024-11-15 14:53:54.613636	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Sample Collection and StorageRoutine water samples are collected at one station in the deepest part of each lake. Northern lakes- Samples are collected monthly during ice-free periods and every 5 weeks during ice-covered season for the northern lakes. Samples are collected at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. When lakes are not stratified, samples are collected at  the top, middle, and bottom of the water column.  Southern lake	dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	combustion analysis on Shimadzu TOC-V-csh or TOC-L-cph analyzer	http://vocabs.lter-europe.net/EnvThes/22297	spinneret.annotator.get_onto_gpt_annotation	2024-11-15 14:53:54.613298	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Sample Collection and StorageRoutine water samples are collected at one station in the deepest part of each lake. Northern lakes- Samples are collected monthly during ice-free periods and every 5 weeks during ice-covered season for the northern lakes. Samples are collected at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. When lakes are not stratified, samples are collected at  the top, middle, and bottom of the water column.  Southern lake	dataset	usesMethod	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod	direct determination of Total N	http://vocabs.lter-europe.net/EnvThes/10328	spinneret.annotator.get_onto_gpt_annotation	2024-11-15 14:53:54.612801	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset		/eml:eml/dataset	knb-lter-ntl.1.59	Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	bogs	http://purl.obolibrary.org/obo/ENVO_01001209	spinneret.annotator.get_onto_gpt_annotation	2024-11-15 14:52:47.887392	
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	dataset	1d8fcbb8-fc0e-4432-a5d7-f12ac6b5989b	/eml:eml/dataset	knb-lter-ntl.1.59	"Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11
+      Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received. chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry"	dataset							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	ff88d1e6-dcaa-47f0-8599-4611bccb59a9	/eml:eml/dataset/dataTable/attributeList/attribute[1]	ntl1_v11.csv	lake name abbreviation	lakeid							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	90ba6f24-ef50-4322-9f6e-2615bad77a23	/eml:eml/dataset/dataTable/attributeList/attribute[2]	ntl1_v11.csv	year	year4							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	bb088d87-c6e7-4952-b333-350944605412	/eml:eml/dataset/dataTable/attributeList/attribute[3]	ntl1_v11.csv	day of year	daynum							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	1b3c5c94-4448-4d74-9176-cb159da849a9	/eml:eml/dataset/dataTable/attributeList/attribute[4]	ntl1_v11.csv	sample date	sampledate							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	7d427f25-53a5-4bbd-bb3c-a2fa501b3330	/eml:eml/dataset/dataTable/attributeList/attribute[5]	ntl1_v11.csv	depth at which the sample or measurement was taken	depth							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	79351da1-ddde-4bb2-80b1-88a72643c195	/eml:eml/dataset/dataTable/attributeList/attribute[6]	ntl1_v11.csv	sample replicate	rep							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a9946a61-0f90-4d59-9d07-75f22dcab406	/eml:eml/dataset/dataTable/attributeList/attribute[7]	ntl1_v11.csv	Location station of sample	sta							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	5c22fc18-2b88-4c95-9f59-cd64d00bbb62	/eml:eml/dataset/dataTable/attributeList/attribute[8]	ntl1_v11.csv	sampling event	event							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	68931cdd-8d38-4af8-baee-aaf0fe0468b6	/eml:eml/dataset/dataTable/attributeList/attribute[9]	ntl1_v11.csv	pH	ph							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	b8dcce57-8597-43a2-981a-fde2e32a243a	/eml:eml/dataset/dataTable/attributeList/attribute[10]	ntl1_v11.csv	pH air equilibrated	phair							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	c2aa8715-3aa6-4bfb-b752-798f35fd456f	/eml:eml/dataset/dataTable/attributeList/attribute[11]	ntl1_v11.csv	alkalinity	alk							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a3c6dcfc-1ab5-45ef-b543-692235c26d70	/eml:eml/dataset/dataTable/attributeList/attribute[12]	ntl1_v11.csv	dissolved inorganic carbon	dic							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	5a017432-2075-428c-a03f-1b87cf2fe0a4	/eml:eml/dataset/dataTable/attributeList/attribute[13]	ntl1_v11.csv	total inorganic carbon	tic							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	800c66bc-0b8b-4496-b425-68a136215a55	/eml:eml/dataset/dataTable/attributeList/attribute[14]	ntl1_v11.csv	dissolved organic carbon	doc							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	4353b35d-5e66-4d33-b176-cbd95a03e972	/eml:eml/dataset/dataTable/attributeList/attribute[15]	ntl1_v11.csv	total organic carbon	toc							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	5986fb78-2525-4cfe-857d-7d38583e3ecc	/eml:eml/dataset/dataTable/attributeList/attribute[16]	ntl1_v11.csv	(NO3 + NO2) - N	no3no2							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	797c9db2-2702-42ac-a04c-c744751b6a35	/eml:eml/dataset/dataTable/attributeList/attribute[17]	ntl1_v11.csv	NO2	no2							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	58ec98c3-f584-4504-82f4-ed7a6ac971d3	/eml:eml/dataset/dataTable/attributeList/attribute[18]	ntl1_v11.csv	NH4 - N	nh4							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	d597c578-231c-4431-807c-42be84e0d582	/eml:eml/dataset/dataTable/attributeList/attribute[19]	ntl1_v11.csv	total dissolved N (filtered sample)	totnf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	315863bf-7381-417b-bfa5-4ce5a044210b	/eml:eml/dataset/dataTable/attributeList/attribute[20]	ntl1_v11.csv	total N (unfiltered sample)	totnuf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a4ab66d7-90b5-49f9-83c0-2f5ea904abf0	/eml:eml/dataset/dataTable/attributeList/attribute[21]	ntl1_v11.csv	total dissolved P (filtered sample)	totpf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a1fb2bee-9c2f-44a0-958c-5834938604fd	/eml:eml/dataset/dataTable/attributeList/attribute[22]	ntl1_v11.csv	total P unfiltered	totpuf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a57c9579-845a-4711-9c29-a6ad5ac3d66b	/eml:eml/dataset/dataTable/attributeList/attribute[23]	ntl1_v11.csv	dissolved reactive silica filtered	drsif							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a97531d3-56ae-405d-bc12-f1f2a1d328a0	/eml:eml/dataset/dataTable/attributeList/attribute[24]	ntl1_v11.csv	bicarbonate reactive silica filtered	brsif							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	8b70ba6b-ca4e-4b3a-9d89-04e345017ff9	/eml:eml/dataset/dataTable/attributeList/attribute[25]	ntl1_v11.csv	bicarbonate reactive silica unfiltered	brsiuf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	6a8c7ee0-7c2b-4543-a5f4-1a921bea6119	/eml:eml/dataset/dataTable/attributeList/attribute[26]	ntl1_v11.csv	total particulate matter	tpm							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	8849481f-897d-4e80-8c33-816038580e02	/eml:eml/dataset/dataTable/attributeList/attribute[27]	ntl1_v11.csv	Total Nitrogen (unfiltered) from WI State Lab of Hygiene	totnuf_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	2ca00ca9-11cb-442a-93c7-acf8ae4d88fd	/eml:eml/dataset/dataTable/attributeList/attribute[28]	ntl1_v11.csv	Nitrate plus Nitrite Nitrogen from WI State Lab. of Hygiene	no3no2_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	8c1f92d3-eae3-45dd-b6bd-93a36c854aaf	/eml:eml/dataset/dataTable/attributeList/attribute[29]	ntl1_v11.csv	Ammonium Nitrogen from WI State Lab. of Hygiene	nh4_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	3c588629-11f9-4fb9-9a8d-3bc4263468ff	/eml:eml/dataset/dataTable/attributeList/attribute[30]	ntl1_v11.csv	Total Kjeldahl Nitrogen from WI State Lab. of Hygiene	kjdl_n_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	aeca1b6b-a69f-45c1-945b-10e97713a1e6	/eml:eml/dataset/dataTable/attributeList/attribute[31]	ntl1_v11.csv	Total Phosphorus Unfiltered from WI State Lab. of Hygiene	totpuf_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	612f756e-b959-49ba-8ea9-70b9ba4f6183	/eml:eml/dataset/dataTable/attributeList/attribute[32]	ntl1_v11.csv	Dissolved Reactive Phosphorus from WI State Lab. of Hygiene	drp_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	694813a5-75db-4a58-ba2b-32c440e93651	/eml:eml/dataset/dataTable/attributeList/attribute[33]	ntl1_v11.csv	Dissolved Reactive Silica from WI State Lab. of Hygiene	drsif_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	03450955-1c82-4372-84ec-88a65a1fef63	/eml:eml/dataset/dataTable/attributeList/attribute[34]	ntl1_v11.csv	data flag for depth	flagdepth							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	1c724750-20be-47f4-a7fb-ce260d732944	/eml:eml/dataset/dataTable/attributeList/attribute[35]	ntl1_v11.csv	data flag for ph	flagph							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	55d6f8a5-7568-4799-b35e-72d62a49c12b	/eml:eml/dataset/dataTable/attributeList/attribute[36]	ntl1_v11.csv	data flag for phair	flagphair							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	41aadd4c-83d6-41d7-9220-cc2732485a35	/eml:eml/dataset/dataTable/attributeList/attribute[37]	ntl1_v11.csv	data flag for alkalinity	flagalk							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	47aa51ce-f9cf-4a28-bc2e-67f4ec7d6c44	/eml:eml/dataset/dataTable/attributeList/attribute[38]	ntl1_v11.csv	data flag for dissolved inorganic carbon	flagdic							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	d733078a-193d-41a6-8b37-78be3cac73a2	/eml:eml/dataset/dataTable/attributeList/attribute[39]	ntl1_v11.csv	data flag for total inorganic carbon	flagtic							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	83c23a71-cc7c-4394-99b9-8d4b707554b0	/eml:eml/dataset/dataTable/attributeList/attribute[40]	ntl1_v11.csv	data flag for dissolved organic carbon	flagdoc							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	a6d5766e-6eca-4d0c-80c8-e08f48302122	/eml:eml/dataset/dataTable/attributeList/attribute[41]	ntl1_v11.csv	data flag for total organic carbon	flagtoc							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	765c99ec-8b08-489f-b521-e6f3102f22eb	/eml:eml/dataset/dataTable/attributeList/attribute[42]	ntl1_v11.csv	data flag for no3no2-n	flagno3no2							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	c8a48b80-b980-4b03-8749-c1412f4b96cc	/eml:eml/dataset/dataTable/attributeList/attribute[43]	ntl1_v11.csv	data flag for no2	flagno2							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	415123b6-ae86-40f9-91ba-2703ab1e390f	/eml:eml/dataset/dataTable/attributeList/attribute[44]	ntl1_v11.csv	data flag for nh4	flagnh4							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	626bcdd6-e16e-4f3a-b2fe-d8c8216f2471	/eml:eml/dataset/dataTable/attributeList/attribute[45]	ntl1_v11.csv	data flag for totnf	flagtotnf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	7847f72d-6405-433a-91c0-db80d29648b7	/eml:eml/dataset/dataTable/attributeList/attribute[46]	ntl1_v11.csv	data flag for totnuf	flagtotnuf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	ee08a1b1-6a37-4b59-b8fd-0fcc837b6cf0	/eml:eml/dataset/dataTable/attributeList/attribute[47]	ntl1_v11.csv	data flag for totpf	flagtotpf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	c34fafd5-058b-4cf7-a9b5-57e9b1027da9	/eml:eml/dataset/dataTable/attributeList/attribute[48]	ntl1_v11.csv	data flag for totpuf	flagtotpuf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	e228b33d-1154-4d43-ada9-3e758f7c4c9f	/eml:eml/dataset/dataTable/attributeList/attribute[49]	ntl1_v11.csv	data flag for drsif	flagdrsif							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	d153c8fb-cbb0-42de-8f1e-67274dd8c949	/eml:eml/dataset/dataTable/attributeList/attribute[50]	ntl1_v11.csv	data flag for brsif	flagbrsif							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	737ab156-01d0-4e61-b961-ce1cf90388e9	/eml:eml/dataset/dataTable/attributeList/attribute[51]	ntl1_v11.csv	data flag for brsiuf	flagbrsiuf							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	e9240b0f-5ee5-40cc-b65f-41789f1198dd	/eml:eml/dataset/dataTable/attributeList/attribute[52]	ntl1_v11.csv	data flag for tpm	flagtpm							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	1860cbb7-5ebb-4a6f-8ff2-3bc449d65a2f	/eml:eml/dataset/dataTable/attributeList/attribute[53]	ntl1_v11.csv	data flag for totnuf_sloh	flagtotnuf_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	3d7de8ea-4311-492c-942a-e18b374b7f97	/eml:eml/dataset/dataTable/attributeList/attribute[54]	ntl1_v11.csv	data flag for no3no2_sloh	flagno3no2_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	4b270237-bcec-4e78-97e7-6f0b5342e254	/eml:eml/dataset/dataTable/attributeList/attribute[55]	ntl1_v11.csv	data flag for nh4_sloh	flagnh4_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	4ba154a2-0677-4a99-8e16-b24b8a9dea29	/eml:eml/dataset/dataTable/attributeList/attribute[56]	ntl1_v11.csv	data flag for kjdl_n_sloh	flagkjdl_n_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	66a23414-8dce-467b-ab1d-1b82b0c33f9b	/eml:eml/dataset/dataTable/attributeList/attribute[57]	ntl1_v11.csv	data flag for totpuf_sloh	flagtotpuf_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	231d3b44-1f7d-48d2-b9ff-8004bc332103	/eml:eml/dataset/dataTable/attributeList/attribute[58]	ntl1_v11.csv	data flag for drp_sloh	flagdrp_sloh							
+knb-lter-ntl.1.59	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59	attribute	00350a07-c335-4c2e-bd53-e35cb1cc4316	/eml:eml/dataset/dataTable/attributeList/attribute[59]	ntl1_v11.csv	data flag for drsif_sloh	flagdrsif_sloh							
diff --git a/tests/data/benchmark/test_b/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv b/tests/data/benchmark/test_b/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv
new file mode 100644
index 0000000..47c10f1
--- /dev/null
+++ b/tests/data/benchmark/test_b/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv
@@ -0,0 +1,66 @@
+package_id	url	element	element_id	element_xpath	context	description	subject	predicate	predicate_id	object	object_id	author	date	comment
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[17]	ntl2_9.csv	Specific conductance	cond	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Microsiemens Per Centimeter	http://qudt.org/vocab/unit/MicroS-PER-CentiM	spinneret.annotator.get_qudt_annotation	2024-11-15 12:22:48.796261	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[16]	ntl2_9.csv	manganese concentration	mn	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 12:22:48.393846	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[15]	ntl2_9.csv	iron concentration	fe	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 12:22:48.054581	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[14]	ntl2_9.csv	potassium concentration	k	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 12:22:47.709779	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[13]	ntl2_9.csv	sodium concentration	na	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 12:22:47.364092	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[12]	ntl2_9.csv	magnesium concentration	mg	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 12:22:47.022712	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[11]	ntl2_9.csv	calcium concentration	ca	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 12:22:46.681542	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[10]	ntl2_9.csv	sulfate concentration	so4	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 12:22:46.337658	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[9]	ntl2_9.csv	chloride concentation	cl	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Milligram Per Liter	http://qudt.org/vocab/unit/MilliGM-PER-L	spinneret.annotator.get_qudt_annotation	2024-11-15 12:22:45.996986	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[5]	ntl2_9.csv	water depth of sample	depth	uses standard	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard	Meter	http://qudt.org/vocab/unit/M	spinneret.annotator.get_qudt_annotation	2024-11-15 12:22:45.655668	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[26]	ntl2_9.csv	data flag for specific conductivity	flagcond	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	air	http://purl.obolibrary.org/obo/ENVO_00002005	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:22:44.266991	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[26]	ntl2_9.csv	data flag for specific conductivity	flagcond	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:22:44.266472	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[25]	ntl2_9.csv	data flag for manganese	flagmn	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:22:39.586762	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[25]	ntl2_9.csv	data flag for manganese	flagmn	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	soil	http://purl.obolibrary.org/obo/ENVO_00001998	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:22:39.586271	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[22]	ntl2_9.csv	data flag for sodium	flagna	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	air	http://purl.obolibrary.org/obo/ENVO_00002005	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:22:27.924876	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[22]	ntl2_9.csv	data flag for sodium	flagna	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:22:27.924366	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[20]	ntl2_9.csv	data flag for ca	flagca	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	soil	http://purl.obolibrary.org/obo/ENVO_00001998	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:22:20.011442	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[20]	ntl2_9.csv	data flag for ca	flagca	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:22:20.011081	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[20]	ntl2_9.csv	data flag for ca	flagca	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	air	http://purl.obolibrary.org/obo/ENVO_00002005	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:22:20.010566	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[18]	ntl2_9.csv	data flag for chloride	flagcl	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	air	http://purl.obolibrary.org/obo/ENVO_00002005	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:22:09.334418	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[9]	ntl2_9.csv	chloride concentation	cl	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	air	http://purl.obolibrary.org/obo/ENVO_00002005	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:21:34.749614	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[8]	ntl2_9.csv	sampling event	event	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:21:31.070666	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[8]	ntl2_9.csv	sampling event	event	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	soil	http://purl.obolibrary.org/obo/ENVO_00001998	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:21:31.070162	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[7]	ntl2_9.csv	Location station of sample	sta	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	air	http://purl.obolibrary.org/obo/ENVO_00002005	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:21:24.628417	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[7]	ntl2_9.csv	Location station of sample	sta	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	soil	http://purl.obolibrary.org/obo/ENVO_00001998	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:21:24.628068	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[7]	ntl2_9.csv	Location station of sample	sta	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:21:24.627556	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[5]	ntl2_9.csv	water depth of sample	depth	environmental material	http://purl.obolibrary.org/obo/ENVO_00010483	water	http://purl.obolibrary.org/obo/ENVO_00002006	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:21:09.589054	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[26]	ntl2_9.csv	data flag for specific conductivity	flagcond	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	specific_conductivity	http://purl.dataone.org/odo/ECSO_00001534	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:20:42.642322	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute		/eml:eml/dataset/dataTable/attributeList/attribute[14]	ntl2_9.csv	potassium concentration	k	contains measurements of type	http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType	potassium concentration	http://purl.dataone.org/odo/ECSO_00001120	spinneret.annotator.get_ontogpt_annotation	2024-11-15 12:19:39.657275	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	research topic	http://vocabs.lter-europe.net/EnvThes/21604	Inorganic Nutrients	http://vocabs.lter-europe.net/EnvThes/USLterCV_266	spinneret.annotator.get_onto_gpt_annotation	2024-11-15 12:18:45.731211	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	research topic	http://vocabs.lter-europe.net/EnvThes/21604	Water Chemistry	http://vocabs.lter-europe.net/EnvThes/USLterCV_619	spinneret.annotator.get_onto_gpt_annotation	2024-11-15 12:18:45.730710	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	research topic	http://vocabs.lter-europe.net/EnvThes/21604	Limnology	http://vocabs.lter-europe.net/EnvThes/21749	spinneret.annotator.get_onto_gpt_annotation	2024-11-15 12:18:45.730199	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	env_local_scale	https://genomicsstandardsconsortium.github.io/mixs/0000013/	road	http://purl.obolibrary.org/obo/ENVO_00000064	spinneret.annotator.get_onto_gpt_annotation	2024-11-15 12:18:14.046509	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	env_local_scale	https://genomicsstandardsconsortium.github.io/mixs/0000013/	lake	http://purl.obolibrary.org/obo/ENVO_00000020	spinneret.annotator.get_onto_gpt_annotation	2024-11-15 12:18:14.046007	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset		/eml:eml/dataset	knb-lter-ntl.2.37	Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity	dataset	env_broad_scale	https://genomicsstandardsconsortium.github.io/mixs/0000012/	bogs	http://purl.obolibrary.org/obo/ENVO_01001209	spinneret.annotator.get_onto_gpt_annotation	2024-11-15 12:18:06.439216	
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	dataset	b761e76b-4888-4a68-8fd1-d192feb87fbe	/eml:eml/dataset	knb-lter-ntl.2.37	"Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.
+      Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis.  Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.
+      1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.
+      Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11 chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity"	dataset							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	a8be3bdb-ba9b-4462-ac00-4c195d8d6271	/eml:eml/dataset/dataTable/attributeList/attribute[1]	ntl2_9.csv	lake name abbreviation	lakeid							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	2ee0ed01-2526-4d06-abf0-f0baa8bf0405	/eml:eml/dataset/dataTable/attributeList/attribute[2]	ntl2_9.csv	four-digit year	year4							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	2935f46c-c6d6-4b56-8ca4-21b4f53874d3	/eml:eml/dataset/dataTable/attributeList/attribute[3]	ntl2_9.csv	day of the year	daynum							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	78518f7c-bfb0-4e6f-be63-b942a8b09072	/eml:eml/dataset/dataTable/attributeList/attribute[4]	ntl2_9.csv	Formatted date of sample	sampledate							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	8324a47b-fd8b-4477-9751-e662d0f24d87	/eml:eml/dataset/dataTable/attributeList/attribute[5]	ntl2_9.csv	water depth of sample	depth							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	c9006d36-b13f-471a-9c65-165bbba29d3b	/eml:eml/dataset/dataTable/attributeList/attribute[6]	ntl2_9.csv	sample replicate	rep							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	9466e002-fe89-47c4-87ec-b322c55376c4	/eml:eml/dataset/dataTable/attributeList/attribute[7]	ntl2_9.csv	Location station of sample	sta							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	6d59ec94-117f-4378-917b-7e0c785cdc9c	/eml:eml/dataset/dataTable/attributeList/attribute[8]	ntl2_9.csv	sampling event	event							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	5d666d0c-7be9-4d1f-84b2-a5b572d819db	/eml:eml/dataset/dataTable/attributeList/attribute[9]	ntl2_9.csv	chloride concentation	cl							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	a59d2c08-669a-436e-bfe9-6c3196215166	/eml:eml/dataset/dataTable/attributeList/attribute[10]	ntl2_9.csv	sulfate concentration	so4							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	40ef2724-7200-4cbf-8663-0eaef6df880b	/eml:eml/dataset/dataTable/attributeList/attribute[11]	ntl2_9.csv	calcium concentration	ca							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	63021500-f274-4f23-8bcf-520f22cc53bb	/eml:eml/dataset/dataTable/attributeList/attribute[12]	ntl2_9.csv	magnesium concentration	mg							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	b2094513-bba1-4671-9555-d376c5333c5a	/eml:eml/dataset/dataTable/attributeList/attribute[13]	ntl2_9.csv	sodium concentration	na							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	6ae30df9-7862-4488-ad9e-f0f8316081e7	/eml:eml/dataset/dataTable/attributeList/attribute[14]	ntl2_9.csv	potassium concentration	k							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	1b5a4f2e-58db-40e1-a044-38e4451180a0	/eml:eml/dataset/dataTable/attributeList/attribute[15]	ntl2_9.csv	iron concentration	fe							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	e9537242-efd6-4a83-a64a-1b2fd7fc36d8	/eml:eml/dataset/dataTable/attributeList/attribute[16]	ntl2_9.csv	manganese concentration	mn							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	cf23b883-857c-4573-9723-401e2f2fd2e0	/eml:eml/dataset/dataTable/attributeList/attribute[17]	ntl2_9.csv	Specific conductance	cond							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	f95c44fe-7bdd-47a4-85c6-1462f6b9545a	/eml:eml/dataset/dataTable/attributeList/attribute[18]	ntl2_9.csv	data flag for chloride	flagcl							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	d1e21362-d37a-4d18-9656-ee3d94d74cff	/eml:eml/dataset/dataTable/attributeList/attribute[19]	ntl2_9.csv	data flag for so4	flagso4							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	58bb291c-de47-4bbe-b5c4-93816f8f5c77	/eml:eml/dataset/dataTable/attributeList/attribute[20]	ntl2_9.csv	data flag for ca	flagca							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	56db3ae8-db1c-4792-83b3-5c96a883c732	/eml:eml/dataset/dataTable/attributeList/attribute[21]	ntl2_9.csv	data flag for magnesiumn	flagmg							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	b3a5a6d2-96b7-4096-bf6f-7967e443f0bc	/eml:eml/dataset/dataTable/attributeList/attribute[22]	ntl2_9.csv	data flag for sodium	flagna							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	f68e300b-31ef-4c38-b470-0988cfdc268b	/eml:eml/dataset/dataTable/attributeList/attribute[23]	ntl2_9.csv	data flag for potassium	flagk							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	ee010787-3226-4c28-bf1f-fa08f9764f6f	/eml:eml/dataset/dataTable/attributeList/attribute[24]	ntl2_9.csv	data flag for iron	flagfe							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	bdde0f42-1414-4355-bb23-73037fcef5ff	/eml:eml/dataset/dataTable/attributeList/attribute[25]	ntl2_9.csv	data flag for manganese	flagmn							
+knb-lter-ntl.2.37	https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37	attribute	ae7d8e74-98f7-4a04-a30b-c1d42ee514cf	/eml:eml/dataset/dataTable/attributeList/attribute[26]	ntl2_9.csv	data flag for specific conductivity	flagcond							
diff --git a/tests/data/benchmark/test_b/notes.txt b/tests/data/benchmark/test_b/notes.txt
new file mode 100644
index 0000000..dedf848
--- /dev/null
+++ b/tests/data/benchmark/test_b/notes.txt
@@ -0,0 +1 @@
+This run uses the default parameterization.
\ No newline at end of file
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 119bf2c..8b10a28 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -2,7 +2,22 @@
 
 import logging
 import daiquiri
-from spinneret.benchmark import monitor
+import pandas as pd
+import pytest
+
+from spinneret.benchmark import (
+    monitor,
+    benchmark_against_standard,
+    get_termset_similarity,
+    default_similarity_scores,
+    clean_workbook,
+    group_object_ids,
+    compress_object_ids,
+    parse_similarity_scores,
+    delete_terms_from_unsupported_ontologies,
+    get_shared_ontology,
+)
+from spinneret.utilities import is_url
 
 
 def test_monitor(tmp_path):
@@ -28,3 +43,212 @@ def example_function():  # to call with monitor
     assert "Starting function 'example_function'" in log
     assert "Function 'example_function' completed in" in log
     assert "Memory usage: Current=" in log
+
+
+@pytest.mark.parametrize("use_mock", [True])  # False calculates similarity scores
+def test_benchmark_against_standard(
+    mocker,
+    use_mock,
+    termset_similarity_score_fields,
+    termset_similarity_score_processed,
+):
+    """Test the benchmark_against_standard function"""
+
+    if use_mock:
+        mocker.patch(
+            "spinneret.benchmark.get_termset_similarity",
+            return_value=termset_similarity_score_processed,
+        )
+
+    res = benchmark_against_standard(
+        standard_dir="tests/data/benchmark/standard",
+        test_dirs=["tests/data/benchmark/test_a", "tests/data/benchmark/test_b"],
+    )
+    assert (
+        res.columns.tolist()
+        == [
+            "standard_dir",
+            "test_dir",
+            "standard_file",
+            "predicate_value",
+            "element_xpath_value",
+            "standard_set",
+            "test_set",
+        ]
+        + termset_similarity_score_fields
+    )
+
+
+def test_get_termset_similarity(termset_similarity_score_fields):
+    """Test the get_termset_similarity function"""
+
+    # Get similarity scores for two sets of terms that are closely related.
+    r = get_termset_similarity(
+        set1={"ENVO:01000252"},  # freshwater lake biome
+        set2={"ENVO:01000253"},  # freshwater river biome
+    )
+    assert isinstance(r, dict)
+    assert r.keys() == set(termset_similarity_score_fields)
+    for _, v in r.items():
+        assert isinstance(v, float)
+
+    # We expect lower similarity scores when we change one of the term sets to
+    # a less related set of terms.
+    r2 = get_termset_similarity(
+        set1={"ENVO:01000252"},  # freshwater lake biome
+        set2={"ENVO:01000182"},  # temperate desert biome
+    )
+    assert r2["average_score"] < r["average_score"]
+    assert r2["best_score"] < r["best_score"]
+
+
+def test_get_termset_similarity_with_empty_input_sets():
+    """Test the get_termset_similarity function with empty input sets. The
+    function should return default score values."""
+
+    # Set 1 is empty
+    r = get_termset_similarity(set1=[], set2=["ENVO:01000253"])
+    assert r == default_similarity_scores()
+
+    # Set 2 is empty
+    r = get_termset_similarity(set1=["ENVO:01000252"], set2=[])
+    assert r == default_similarity_scores()
+
+    # Both sets are empty
+    r = get_termset_similarity(set1=[], set2=[])
+    assert r == default_similarity_scores()
+
+
+def test_default_similarity_scores(termset_similarity_score_fields):
+    """Test the default similarity scores return expected fields and values"""
+
+    r = default_similarity_scores()
+    assert isinstance(r, dict)
+    assert set(r.keys()) == set(termset_similarity_score_fields)
+    for k, v in r.items():
+        if k in ["average_score", "best_score"]:
+            assert v == 0.0
+        else:
+            assert isinstance(v, type(pd.NA))
+
+
+def test_clean_workbook(annotated_workbook):
+    """Test the clean_workbook function"""
+    wb = annotated_workbook
+
+    # Dirty-up the workbook by adding NA values and ungrounded terms in the
+    # "object_id" column
+    wb.loc[0, "object_id"] = pd.NA
+    assert wb["object_id"].isna().any()
+    wb.loc[1, "object_id"] = "AUTO:1234"
+    assert wb["object_id"].str.startswith("AUTO:").any()
+
+    # After cleaning, the NA values and ungrounded terms will be gone
+    wb_cleaned = clean_workbook(wb)
+    assert not wb_cleaned["object_id"].isna().any()
+    assert not wb_cleaned["object_id"].str.startswith("AUTO:").any()
+
+
+def test_group_object_ids(annotated_workbook):
+    """Test the group_object_ids function"""
+    wb = annotated_workbook
+
+    # Group the workbook by predicate and element_xpath
+    grouped = group_object_ids(wb)
+    assert isinstance(grouped, dict)
+
+    # The keys are tuples composed of the predicate and element_xpath values
+    assert isinstance(list(grouped.keys())[0], tuple)
+
+    # Each value is a list of object_ids corresponding to the predicate and
+    # element_xpath grouping
+    assert isinstance(list(grouped.values())[0], list)
+    assert isinstance(list(grouped.values())[1][0], str)
+    assert is_url(list(grouped.values())[1][0])
+
+
+def test_compress_object_ids(annotated_workbook):
+    """The test_compress_object_ids function"""
+
+    # Create grouped dictionary for testing
+    wb = annotated_workbook
+    grouped = group_object_ids(wb)
+
+    # Grouped dictionary values are URI strings before compression
+    for _, values in grouped.items():
+        for v in values:
+            if not v:  # skip empty lists
+                continue
+            assert is_url(v)
+
+    # After compression, the values are lists of CURIES
+    compressed = compress_object_ids(grouped)
+    for _, values in compressed.items():
+        for v in values:
+            if not v:  # skip empty lists
+                continue
+            assert not is_url(v)
+            assert len(v.split(":")) == 2
+
+
+def test_parse_similarity_scores(
+    termset_similarity_score_raw, termset_similarity_score_fields
+):
+    """Test the parse_similarity_scores function"""
+
+    # The parsed result should be a dictionary with the expected keys
+    r = parse_similarity_scores(termset_similarity_score_raw)
+    assert isinstance(r, dict)
+    assert set(r.keys()) == set(termset_similarity_score_fields)
+
+
+def test_delete_terms_from_unsupported_ontologies():
+    """Test the delete_terms_from_unsupported_ontologies function"""
+
+    # Terms (CURIES) from supported ontologies are retained
+    supported_terms = ["ENVO:01000252", "ECSO:01000253", "ENVTHES:0000002"]
+    r = delete_terms_from_unsupported_ontologies(supported_terms)
+    assert r == supported_terms
+
+    # Terms from unsupported ontologies are removed
+    mixed_term_list = supported_terms + ["AUTO:1234", "FOO:5678"]
+    r = delete_terms_from_unsupported_ontologies(mixed_term_list)
+    assert r == supported_terms
+
+
+def test_get_shared_ontology():
+    """Test the get_shared_ontology function"""
+
+    # An ontology is returned when the two sets share the same ontology
+    set1 = ["ENVO:01000252", "ENVO:01000253"]
+    set2 = ["ENVO:01000252"]
+    db = get_shared_ontology(set1, set2)
+    assert db == "sqlite:obo:envo"
+
+    set1 = ["ENVO:01000252", "ECSO:01000253"]
+    set2 = ["ENVO:01000252"]
+    db = get_shared_ontology(set1, set2)
+    assert db == "sqlite:obo:envo"
+
+    # None is returned for unsupported ontologies
+    set1 = ["ECSO:01000253"]
+    set2 = ["ECSO:01000253"]
+    db = get_shared_ontology(set1, set2)
+    assert db is None
+
+    # None is returned when the two sets do not share a common ontology
+    set1 = ["ENVO:01000252", "ENVO:01000253"]
+    set2 = ["ECSO:01000252"]
+    db = get_shared_ontology(set1, set2)
+    assert db is None
+
+    # None is returned when one or both sets are empty
+    set1 = []
+    set2 = ["ENVO:01000252"]
+    db = get_shared_ontology(set1, set2)
+    assert db is None
+
+    set1 = []
+    set2 = []
+    db = get_shared_ontology(set1, set2)
+    assert db is None
diff --git a/tests/test_utilities.py b/tests/test_utilities.py
index b8b9492..7c8edb9 100644
--- a/tests/test_utilities.py
+++ b/tests/test_utilities.py
@@ -12,6 +12,8 @@
     write_workbook,
     write_eml,
     expand_curie,
+    compress_uri,
+    load_prefixmaps,
 )
 from spinneret.datasets import get_example_eml_dir
 
@@ -101,3 +103,21 @@ def test_expand_curie():
     )
     # Ungrounded CURIES should return the original CURIE
     assert expand_curie("AUTO:00001203") == "AUTO:00001203"
+
+
+def test_compress_uri():
+    """Test that a URI is compressed to a CURIE"""
+
+    # Return a CURIE if the URI is in the mapping
+    r = compress_uri("http://purl.obolibrary.org/obo/ENVO_00001203")
+    assert r == "ENVO:00001203"
+
+    # Return the original URI if the URI is not in the mapping
+    r = compress_uri("http://example.com/00001203")
+    assert r == "http://example.com/00001203"
+
+
+def test_load_prefixmaps():
+    """Test that the prefixmaps are loaded"""
+    prefixmaps = load_prefixmaps()
+    assert isinstance(prefixmaps, pd.DataFrame)

From 31a5ff4605c02fd1c8a49e7a98d8ff376233f5ce Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Wed, 11 Dec 2024 10:48:55 -0800
Subject: [PATCH 09/24] fix: correct OntoGPT command construction

Remove an extra space from the OntoGPT `extract` command construction to
prevent potential errors and ensure the command executes as expected.
---
 src/spinneret/annotator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py
index 78216a5..925eff9 100644
--- a/src/spinneret/annotator.py
+++ b/src/spinneret/annotator.py
@@ -746,7 +746,7 @@ def get_ontogpt_annotation(
             f"--output-format json -o {output_file}"
         )
         if local_model is not None:
-            cmd += f"  -m ollama/{local_model}"
+            cmd += f" -m ollama/{local_model}"
         try:
             # Clear the cache so that the model can derive new annotations
             cache_path = os.getcwd() + "/.litellm_cache"

From 2a46e3388bd2b4013848c83309ef773dc6e29fa0 Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Wed, 11 Dec 2024 10:55:53 -0800
Subject: [PATCH 10/24] perf: optimize OntoGPT calls using `ollama_chat`

Optimize OntoGPT calls by specifying the `ollama_chat` model within the
`extract` command, leveraging performance improvements recommended by
the `litellm` package.
---
 src/spinneret/annotator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py
index 925eff9..8452c8d 100644
--- a/src/spinneret/annotator.py
+++ b/src/spinneret/annotator.py
@@ -746,7 +746,7 @@ def get_ontogpt_annotation(
             f"--output-format json -o {output_file}"
         )
         if local_model is not None:
-            cmd += f" -m ollama/{local_model}"
+            cmd += f" -m ollama_chat/{local_model}"
         try:
             # Clear the cache so that the model can derive new annotations
             cache_path = os.getcwd() + "/.litellm_cache"

From 44ac7d61598c3f363ff71ee4b66622d1322dbe7b Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Wed, 11 Dec 2024 11:22:53 -0800
Subject: [PATCH 11/24] feat: introduce `temperature` parameter for OntoGPT
 calls

Add a `temperature` parameter to OntoGPT calls, allowing users to
control the model's behavior and adjust the level of creativity or
randomness in the generated output.
---
 src/spinneret/annotator.py | 48 +++++++++++++++++++++++++++++++++++++-
 src/spinneret/main.py      |  5 ++++
 2 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py
index 8452c8d..ea6127e 100644
--- a/src/spinneret/annotator.py
+++ b/src/spinneret/annotator.py
@@ -140,6 +140,7 @@ def annotate_workbook(
     output_path: str,
     annotator: str,
     local_model: str = None,
+    temperature: Union[float, None] = None,
     return_ungrounded: bool = False,
     sample_size: int = 1,
 ) -> None:
@@ -154,6 +155,8 @@ def annotate_workbook(
         the `get_ontogpt_annotation` function. Similarly, BioPortal requires
         an API key and is described in the `get_bioportal_annotation` function.
     :param local_model: See `get_ontogpt_annotation` documentation for details.
+    :param temperature: The temperature parameter for the model. If None, the
+        OntoGPT default will be used.
     :param return_ungrounded: See `get_ontogpt_annotation` documentation for
         details.
     :param sample_size: Executes multiple replicates of the annotation request
@@ -188,6 +191,7 @@ def annotate_workbook(
             wb,
             eml,
             local_model=local_model,
+            temperature=temperature,
             return_ungrounded=return_ungrounded,
             sample_size=sample_size,
         )
@@ -195,6 +199,7 @@ def annotate_workbook(
             wb,
             eml,
             local_model=local_model,
+            temperature=temperature,
             return_ungrounded=return_ungrounded,
             sample_size=sample_size,
         )
@@ -202,6 +207,7 @@ def annotate_workbook(
             wb,
             eml,
             local_model=local_model,
+            temperature=temperature,
             return_ungrounded=return_ungrounded,
             sample_size=sample_size,
         )
@@ -209,6 +215,7 @@ def annotate_workbook(
             wb,
             eml,
             local_model=local_model,
+            temperature=temperature,
             return_ungrounded=return_ungrounded,
             sample_size=sample_size,
         )
@@ -216,6 +223,7 @@ def annotate_workbook(
             wb,
             eml,
             local_model=local_model,
+            temperature=temperature,
             return_ungrounded=return_ungrounded,
             sample_size=sample_size,
         )
@@ -224,6 +232,7 @@ def annotate_workbook(
             eml,
             annotator="ontogpt",
             local_model=local_model,
+            temperature=temperature,
             return_ungrounded=return_ungrounded,
             sample_size=sample_size,
         )
@@ -231,6 +240,7 @@ def annotate_workbook(
             wb,
             eml,
             local_model=local_model,
+            temperature=temperature,
             return_ungrounded=return_ungrounded,
             sample_size=sample_size,
         )
@@ -570,6 +580,7 @@ def add_measurement_type_annotations_to_workbook(
     output_path: str = None,
     overwrite: bool = False,
     local_model: str = None,
+    temperature: Union[float, None] = None,
     return_ungrounded: bool = False,
     sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
@@ -587,6 +598,8 @@ def add_measurement_type_annotations_to_workbook(
         annotations in the workbook, so a fresh set may be created.
     :param local_model: Required if `annotator` is "ontogpt". See
         `get_ontogpt_annotation` documentation for details.
+    :param temperature: The temperature parameter for the model. If `None`, the
+        OntoGPT default will be used.
     :param return_ungrounded: An option if `annotator` is "ontogpt". See
         `get_ontogpt_annotation` documentation for details.
     :param sample_size: Executes multiple replicates of the annotation request
@@ -647,6 +660,7 @@ def add_measurement_type_annotations_to_workbook(
                         text=attribute_description,
                         template="contains_measurement_of_type",
                         local_model=local_model,
+                        temperature=temperature,
                         return_ungrounded=return_ungrounded,
                     )
                     if res is not None:
@@ -705,7 +719,11 @@ def add_measurement_type_annotations_to_workbook(
 
 
 def get_ontogpt_annotation(
-    text: str, template: str, local_model: str = None, return_ungrounded: bool = False
+    text: str,
+    template: str,
+    local_model: str = None,
+    temperature: Union[float, None] = None,
+    return_ungrounded: bool = False,
 ) -> Union[list, None]:
     """
     :param text: The text to be annotated.
@@ -716,6 +734,8 @@ def get_ontogpt_annotation(
         https://ollama.com/library) and should be installed locally. If `None`,
         the configured remote model will be used. See the OntoGPT documentation
         for more information.
+    :param temperature: The temperature parameter for the model. If `None`, the
+        OntoGPT default will be used.
     :param return_ungrounded: If True, return ungrounded annotations. These
         may be useful in identifying potential concepts to add to a vocabulary,
         or to identify concepts that a human curator may be capable of
@@ -747,6 +767,8 @@ def get_ontogpt_annotation(
         )
         if local_model is not None:
             cmd += f" -m ollama_chat/{local_model}"
+        if temperature is not None:
+            cmd += f" --temperature {temperature}"
         try:
             # Clear the cache so that the model can derive new annotations
             cache_path = os.getcwd() + "/.litellm_cache"
@@ -785,6 +807,7 @@ def add_process_annotations_to_workbook(
     output_path: str = None,
     overwrite: bool = False,
     local_model: str = None,
+    temperature: Union[float, None] = None,
     return_ungrounded: bool = False,
     sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
@@ -797,6 +820,8 @@ def add_process_annotations_to_workbook(
     :param overwrite: If True, overwrite existing `process` annotations in the
         workbook, so a fresh set may be created.
     :param local_model: See `get_ontogpt_annotation` documentation for details.
+    :param temperature: The temperature parameter for the model. If `None`, the
+        OntoGPT default will be used.
     :param return_ungrounded: See `get_ontogpt_annotation` documentation for
         details.
     :param sample_size: Executes multiple replicates of the annotation request
@@ -858,6 +883,7 @@ def add_process_annotations_to_workbook(
                 text=element_description,
                 template="contains_process",
                 local_model=local_model,
+                temperature=temperature,
                 return_ungrounded=return_ungrounded,
             )
             if res is not None:
@@ -901,6 +927,7 @@ def add_env_broad_scale_annotations_to_workbook(
     output_path: str = None,
     overwrite: bool = False,
     local_model: str = None,
+    temperature: Union[float, None] = None,
     return_ungrounded: bool = False,
     sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
@@ -913,6 +940,8 @@ def add_env_broad_scale_annotations_to_workbook(
     :param overwrite: If True, overwrite existing `broad scale environmental
         context` annotations in the workbook, so a fresh set may be created.
     :param local_model: See `get_ontogpt_annotation` documentation for details.
+    :param temperature: The temperature parameter for the model. If `None`, the
+        OntoGPT default will be used.
     :param return_ungrounded: See `get_ontogpt_annotation` documentation for
         details.
     :param sample_size: Executes multiple replicates of the annotation request
@@ -973,6 +1002,7 @@ def add_env_broad_scale_annotations_to_workbook(
                 text=element_description,
                 template=predicate,
                 local_model=local_model,
+                temperature=temperature,
                 return_ungrounded=return_ungrounded,
             )
             if res is not None:
@@ -1018,6 +1048,7 @@ def add_env_local_scale_annotations_to_workbook(
     output_path: str = None,
     overwrite: bool = False,
     local_model: str = None,
+    temperature: Union[float, None] = None,
     return_ungrounded: bool = False,
     sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
@@ -1030,6 +1061,8 @@ def add_env_local_scale_annotations_to_workbook(
     :param overwrite: If True, overwrite existing `local scale environmental
         context` annotations in the workbook, so a fresh set may be created.
     :param local_model: See `get_ontogpt_annotation` documentation for details.
+    :param temperature: The temperature parameter for the model. If `None`, the
+        OntoGPT default will be used.
     :param return_ungrounded: See `get_ontogpt_annotation` documentation for
         details.
     :param sample_size: Executes multiple replicates of the annotation request
@@ -1092,6 +1125,7 @@ def add_env_local_scale_annotations_to_workbook(
                 text=element_description,
                 template=predicate,
                 local_model=local_model,
+                temperature=temperature,
                 return_ungrounded=return_ungrounded,
             )
             if res is not None:
@@ -1137,6 +1171,7 @@ def add_env_medium_annotations_to_workbook(
     output_path: str = None,
     overwrite: bool = False,
     local_model: str = None,
+    temperature: Union[float, None] = None,
     return_ungrounded: bool = False,
     sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
@@ -1150,6 +1185,8 @@ def add_env_medium_annotations_to_workbook(
         annotations in the workbook, so a fresh set may be created.
     :param local_model: Required if `annotator` is "ontogpt". See
         `get_ontogpt_annotation` documentation for details.
+    :param temperature: The temperature parameter for the model. If `None`, the
+        OntoGPT default will be used.
     :param return_ungrounded: An option if `annotator` is "ontogpt". See
         `get_ontogpt_annotation` documentation for details.
     :param sample_size: Executes multiple replicates of the annotation request
@@ -1210,6 +1247,7 @@ def add_env_medium_annotations_to_workbook(
                     text=attribute_description,
                     template="env_medium",
                     local_model=local_model,
+                    temperature=temperature,
                     return_ungrounded=return_ungrounded,
                 )
                 if res is not None:
@@ -1253,6 +1291,7 @@ def add_research_topic_annotations_to_workbook(
     output_path: str = None,
     overwrite: bool = False,
     local_model: str = None,
+    temperature: Union[float, None] = None,
     return_ungrounded: bool = False,
     sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
@@ -1265,6 +1304,8 @@ def add_research_topic_annotations_to_workbook(
     :param overwrite: If True, overwrite existing `research topic` annotations
         in the workbook, so a fresh set may be created.
     :param local_model: See `get_ontogpt_annotation` documentation for details.
+    :param temperature: The temperature parameter for the model. If `None`, the
+        OntoGPT default will be used.
     :param return_ungrounded: See `get_ontogpt_annotation` documentation for
         details.
     :param sample_size: Executes multiple replicates of the annotation request
@@ -1326,6 +1367,7 @@ def add_research_topic_annotations_to_workbook(
                 text=element_description,
                 template="research_topic",
                 local_model=local_model,
+                temperature=temperature,
                 return_ungrounded=return_ungrounded,
             )
             if res is not None:
@@ -1369,6 +1411,7 @@ def add_methods_annotations_to_workbook(
     output_path: str = None,
     overwrite: bool = False,
     local_model: str = None,
+    temperature: Union[float, None] = None,
     return_ungrounded: bool = False,
     sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
@@ -1381,6 +1424,8 @@ def add_methods_annotations_to_workbook(
     :param overwrite: If True, overwrite existing `methods` annotations in the
         workbook, so a fresh set may be created.
     :param local_model: See `get_ontogpt_annotation` documentation for details.
+    :param temperature: The temperature parameter for the model. If `None`, the
+        OntoGPT default will be used.
     :param return_ungrounded: See `get_ontogpt_annotation` documentation for
         details.
     :param sample_size: Executes multiple replicates of the annotation request
@@ -1445,6 +1490,7 @@ def add_methods_annotations_to_workbook(
                 text=element_description,
                 template="uses_method",
                 local_model=local_model,
+                temperature=temperature,
                 return_ungrounded=return_ungrounded,
             )
             if res is not None:
diff --git a/src/spinneret/main.py b/src/spinneret/main.py
index 7a812ee..d8d4fd3 100644
--- a/src/spinneret/main.py
+++ b/src/spinneret/main.py
@@ -2,6 +2,7 @@
 
 import os
 from pathlib import Path
+from typing import Union
 from requests import get, codes
 from rdflib import Graph
 import daiquiri
@@ -56,6 +57,7 @@ def annotate_workbooks(
     output_dir: str,
     config_path: str,
     local_model: str = None,
+    temperature: Union[float, None] = None,
     return_ungrounded: bool = False,
     sample_size: int = 1,
 ) -> None:
@@ -70,6 +72,8 @@ def annotate_workbooks(
     :param output_dir: Directory to save annotated workbooks
     :param config_path: Path to configuration file
     :param local_model: See `get_ontogpt_annotation` documentation for details.
+    :param temperature: The temperature parameter for the model. If `None`, the
+        OntoGPT default will be used.
     :param return_ungrounded: See `get_ontogpt_annotation` documentation for
         details.
     :param sample_size: Executes multiple replicates of the annotation request
@@ -112,6 +116,7 @@ def annotate_workbooks(
             annotator=annotator,
             output_path=output_dir + "/" + workbook_file_annotated,
             local_model=local_model,
+            temperature=temperature,
             return_ungrounded=return_ungrounded,
             sample_size=sample_size,
         )

From 1c7926037e76f9137ba25548a030e26e55598f7c Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Wed, 11 Dec 2024 11:44:51 -0800
Subject: [PATCH 12/24] fix: update OntoGPT templates to improve grounding

Update templates to improve ontology grounding, specifically:

1. Improve template prompts to produce more accurate and precise
results.

2. Relax vocabulary branch constraints to enable broader capture of
concepts outside of the target branch due to relevant concepts appearing
in multiple branches within the vocabulary. Do this for all templates
except `contains_process` and `env_medium`, where concepts are
sufficiently constrained to a single branch.

By doing this we increase our reliance on effective prompts to guide the
LLM to extract relevant concepts without extracting irrelevant concepts.
The issue of irrelevant concepts may be addressed downstream in an
additional post processing step that trims out these concepts.

Note vocabulary constraints don't seem to work in vocabularies using the
BioPortal API.

3. Replace semantically descriptive labels (e.g., `measurement_type`) in
templates with less semantically related labels (e.g., `output`). This
change mitigates the risk of the LLM misinterpreting labels as
placeholders for extracted values, leading to parsing errors and
incorrect results.
---
 .../contains_measurement_of_type.yaml         | 11 ++++---
 .../ontogpt/templates/contains_process.yaml   |  7 +++--
 .../ontogpt/templates/env_broad_scale.yaml    | 31 ++++++++++---------
 .../ontogpt/templates/env_local_scale.yaml    | 22 ++-----------
 .../data/ontogpt/templates/env_medium.yaml    |  6 ++--
 .../ontogpt/templates/research_topic.yaml     |  8 ++---
 .../data/ontogpt/templates/uses_method.yaml   | 10 +++---
 7 files changed, 40 insertions(+), 55 deletions(-)

diff --git a/src/spinneret/data/ontogpt/templates/contains_measurement_of_type.yaml b/src/spinneret/data/ontogpt/templates/contains_measurement_of_type.yaml
index f13f06f..6272214 100644
--- a/src/spinneret/data/ontogpt/templates/contains_measurement_of_type.yaml
+++ b/src/spinneret/data/ontogpt/templates/contains_measurement_of_type.yaml
@@ -7,6 +7,7 @@ description: >-
 license: https://creativecommons.org/publicdomain/zero/1.0/
 prefixes:
   rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
+#  rdfs: http://www.w3.org/2000/01/rdf-schema#
   ECSO: http://purl.dataone.org/odo/ECSO_
   envmd: http://w3id.org/ontogpt/contains_measurement_of_type
   linkml: https://w3id.org/linkml/
@@ -22,16 +23,16 @@ classes:
   Dataset:
     tree_root: true
     attributes:
-      measurement_type:
-        description: the type of scientific measurement (or variable) described in the dataset
+      output:
+        description: >-
+          The measurement type or variable of a dataset
         annotations:
-          prompt: semicolon-separated list of the type of scientific measurement (or variable) described in the dataset
+          prompt: >- 
+            semicolon-separated list of dataset variable names described by the text
         range: Measurement
         multivalued: true
 
   Measurement:
     is_a: NamedEntity
-    id_prefixes:
-      - ECSO
     annotations:
       annotators: bioportal:ECSO
diff --git a/src/spinneret/data/ontogpt/templates/contains_process.yaml b/src/spinneret/data/ontogpt/templates/contains_process.yaml
index 6d97e4e..2b8e18e 100644
--- a/src/spinneret/data/ontogpt/templates/contains_process.yaml
+++ b/src/spinneret/data/ontogpt/templates/contains_process.yaml
@@ -25,10 +25,11 @@ classes:
   Dataset:
     tree_root: true
     attributes:
-      contains_process:
-        description: the environmental process, biological process, or planned process investigated in the study
+      output:
+        description: The environmental or anthropogenic processes of the dataset
         annotations:
-          prompt: semicolon-separated list of the environmental process, biological process, or planned process investigated in the study
+          prompt: >-
+            semicolon-separated list of environmental processes or anthropogenic processes described by the text
         range: ContainsProcess
         multivalued: true
 
diff --git a/src/spinneret/data/ontogpt/templates/env_broad_scale.yaml b/src/spinneret/data/ontogpt/templates/env_broad_scale.yaml
index 5a3dda3..8a9e0cd 100644
--- a/src/spinneret/data/ontogpt/templates/env_broad_scale.yaml
+++ b/src/spinneret/data/ontogpt/templates/env_broad_scale.yaml
@@ -24,27 +24,28 @@ classes:
   Dataset:
     tree_root: true
     attributes:
-      env_broad_scale:
-        description: the broad environmental context in which the study was conducted
+      output:
+        description: The broad environmental context of the dataset
         annotations:
-          prompt: semicolon-separated list of broad environmental contexts in which the study was conducted
+          prompt: >- 
+            semicolon-separated list of the large scale environmental systems (e.g. ecosystem, biome)
         range: EnvBroadScale
         multivalued: true
 
   EnvBroadScale:
     is_a: NamedEntity
-    id_prefixes:
-      - ENVO
+#    id_prefixes:
+#      - ENVO
     annotations:
       annotators: sqlite:obo:envo
-    slot_usage:
-      id:
-        values_from:
-          - EnvoEnvironmentalSystem
+#    slot_usage:
+#      id:
+#        values_from:
+#          - EnvoEnvironmentalSystem
 
-enums:
-  EnvoEnvironmentalSystem:
-    reachable_from:
-      source_ontology: obo:envo
-      source_nodes:
-        - ENVO:01000254  # environmental system
+#enums:
+#  EnvoEnvironmentalSystem:
+#    reachable_from:
+#      source_ontology: obo:envo
+#      source_nodes:
+#        - ENVO:01000254  # environmental system
diff --git a/src/spinneret/data/ontogpt/templates/env_local_scale.yaml b/src/spinneret/data/ontogpt/templates/env_local_scale.yaml
index 88f8758..109b595 100644
--- a/src/spinneret/data/ontogpt/templates/env_local_scale.yaml
+++ b/src/spinneret/data/ontogpt/templates/env_local_scale.yaml
@@ -24,31 +24,15 @@ classes:
   Dataset:
     tree_root: true
     attributes:
-      env_local_scale:
-        description: the local environmental context in which the study was conducted
+      output:
+        description: The local environmental context of the dataset
         annotations:
-          prompt: semicolon-separated list of local environmental contexts in which the study was conducted
+          prompt: semicolon-separated list of the local scale environmental features
         range: EnvLocalScale
         multivalued: true
 
   EnvLocalScale:
     is_a: NamedEntity
-    id_prefixes:
-      - ENVO
     annotations:
       annotators: sqlite:obo:envo
-    slot_usage:
-      id:
-        values_from:
-          - EnvoMaterialEntity
 
-enums:
-  EnvoMaterialEntity:
-    reachable_from:
-      source_ontology: obo:envo
-      source_nodes:  # a selection of nodes from the ENVO `material entity` branch
-        - ENVO:01000813  # astronomical body part
-        - ENVO:01001813  # construction
-        - ENVO:01000408  # environmental zone
-        - ENVO:01003020  # fiat part of an astronomical object
-        - ENVO:01000281  # layer
diff --git a/src/spinneret/data/ontogpt/templates/env_medium.yaml b/src/spinneret/data/ontogpt/templates/env_medium.yaml
index 9d14428..8ab18ed 100644
--- a/src/spinneret/data/ontogpt/templates/env_medium.yaml
+++ b/src/spinneret/data/ontogpt/templates/env_medium.yaml
@@ -23,10 +23,10 @@ classes:
   Dataset:
     tree_root: true
     attributes:
-      env_medium:
-        description: the environmental material(s) immediately surrounding the sample or specimen at the time of sampling
+      output:
+        description: The environmental material(s) immediately surrounding the measurement variable at the time of sampling
         annotations:
-          prompt: semicolon-separated list of the environmental material(s) immediately surrounding the sample or specimen at the time of sampling
+          prompt: semicolon-separated list of the environmental material(s) immediately surrounding the measurement variable at the time of sampling
         range: EnvironmentalMedium
         multivalued: true
 
diff --git a/src/spinneret/data/ontogpt/templates/research_topic.yaml b/src/spinneret/data/ontogpt/templates/research_topic.yaml
index b152a3b..7b5d03f 100644
--- a/src/spinneret/data/ontogpt/templates/research_topic.yaml
+++ b/src/spinneret/data/ontogpt/templates/research_topic.yaml
@@ -22,16 +22,14 @@ classes:
   Dataset:
     tree_root: true
     attributes:
-      topic:
-        description: the general scientific area of study concerning the sample(s)
+      output:
+        description: The scientific areas of study of the dataset
         annotations:
-          prompt: semicolon-separated list of scientific areas of study concerning the sample(s)
+          prompt: semicolon-separated list of scientific areas of study described by the text
         range: Topic
         multivalued: true
 
   Topic:
     is_a: NamedEntity
-    id_prefixes:
-      - ENVTHES
     annotations:
       annotators: bioportal:ENVTHES
diff --git a/src/spinneret/data/ontogpt/templates/uses_method.yaml b/src/spinneret/data/ontogpt/templates/uses_method.yaml
index 5ccfaf2..77b8534 100644
--- a/src/spinneret/data/ontogpt/templates/uses_method.yaml
+++ b/src/spinneret/data/ontogpt/templates/uses_method.yaml
@@ -22,16 +22,16 @@ classes:
   Dataset:
     tree_root: true
     attributes:
-      method:
-        description: the type of method or technique used to gather data
+      output:
+        description: >-
+          The type of method or technique used to create the dataset
         annotations:
-          prompt: semicolon-separated list of the type of method or technique used to gather data
+          prompt: >-
+            semicolon-separated list of the type of method or technique used to create the dataset
         range: Method
         multivalued: true
 
   Method:
     is_a: NamedEntity
-    id_prefixes:
-      - ENVTHES
     annotations:
       annotators: bioportal:ENVTHES

From 5a09e7ed8c17e56083e960e08769f1ed08781b56 Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Wed, 11 Dec 2024 16:33:10 -0800
Subject: [PATCH 13/24] feat: enhance CURIE expansion with expanded prefix map

Updated the `expand_curie` function to utilize a significantly larger
prefix map, enabling the expansion of a wider range of CURIEs.
---
 src/spinneret/utilities.py | 20 ++++++++++----------
 tests/test_utilities.py    |  7 +++++++
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/spinneret/utilities.py b/src/spinneret/utilities.py
index 922691a..6d56f2a 100644
--- a/src/spinneret/utilities.py
+++ b/src/spinneret/utilities.py
@@ -97,21 +97,21 @@ def write_eml(eml: etree._ElementTree, output_path: str) -> None:
 
 def expand_curie(curie: str) -> str:
     """
+    Expand a CURIE into a URI based on the prefix mappings in the OBO and
+    BioPortal converters.
+
     :param curie: The CURIE to be expanded.
     :returns: The expanded CURIE. Returns the original CURIE if the prefix
         does not have a mapping.
+    :notes: This is a wrapper function around the `prefixmaps` and `curies`
+        libraries.
     """
-    mapping = {
-        "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
-        "linkml": "https://w3id.org/linkml/",
-        "ECSO": "http://purl.dataone.org/odo/ECSO_",
-        "ENVO": "http://purl.obolibrary.org/obo/ENVO_",
-        "BFO": "http://purl.obolibrary.org/obo/BFO_",
-        "ENVTHES": "http://vocabs.lter-europe.net/EnvThes/",
-        "AUTO": "AUTO:",  # return ungrounded CURIEs as is
-    }
+    prefixmaps = load_prefixmaps()
     prefix, suffix = curie.split(":")
-    return f"{mapping[prefix]}{suffix}"
+    namespace = prefixmaps[prefixmaps["prefix"] == prefix]["namespace"]
+    if len(namespace) > 0:
+        return f"{namespace.to_string(index=False).strip()}{suffix}"
+    return curie
 
 
 def compress_uri(uri: str) -> str:
diff --git a/tests/test_utilities.py b/tests/test_utilities.py
index 7c8edb9..6a566d7 100644
--- a/tests/test_utilities.py
+++ b/tests/test_utilities.py
@@ -93,6 +93,8 @@ def test_write_eml(tmp_path):
 
 def test_expand_curie():
     """Test that a CURIE is expanded to a URL"""
+
+    # Recognized CURIES should return the corresponding URI
     assert expand_curie("ECSO:00001203") == "http://purl.dataone.org/odo/ECSO_00001203"
     assert (
         expand_curie("ENVO:00001203") == "http://purl.obolibrary.org/obo/ENVO_00001203"
@@ -101,6 +103,11 @@ def test_expand_curie():
         expand_curie("ENVTHES:00001203")
         == "http://vocabs.lter-europe.net/EnvThes/00001203"
     )
+    assert (
+        expand_curie("OBOE:00001203")
+        == "http://ecoinformatics.org/oboe/oboe.1.2/00001203"
+    )
+
     # Ungrounded CURIES should return the original CURIE
     assert expand_curie("AUTO:00001203") == "AUTO:00001203"
 

From 31d0e9cf12ae01017038b253c74b7093b0282593 Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Fri, 13 Dec 2024 06:50:34 -0800
Subject: [PATCH 14/24] fix: handle multiple semicolons in CURIE expansion

Correct the expand_curie function to handle CURIEs containing more than
one semicolon, preventing the ValueError: too many values to unpack
error.
---
 src/spinneret/utilities.py | 15 ++++++++++++++-
 tests/test_utilities.py    | 10 ++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/spinneret/utilities.py b/src/spinneret/utilities.py
index 6d56f2a..80ad65c 100644
--- a/src/spinneret/utilities.py
+++ b/src/spinneret/utilities.py
@@ -5,10 +5,13 @@
 import importlib
 from urllib.parse import urlparse
 from json import load
+import daiquiri
 
 import pandas as pd
 from lxml import etree
 
+logger = daiquiri.getLogger(__name__)
+
 
 def load_configuration(config_file: str) -> None:
     """Loads the configuration file as global environment variables for use
@@ -107,7 +110,17 @@ def expand_curie(curie: str) -> str:
         libraries.
     """
     prefixmaps = load_prefixmaps()
-    prefix, suffix = curie.split(":")
+
+    # On rare occasion we encounter a CURIE with multiple colons, so we need
+    # to use exception handling and issue a warning.
+    try:
+        prefix, suffix = curie.split(":")
+    except ValueError:
+        logger.warning(
+            f"Warning: {curie} is not recognized. Returning the original string."
+        )
+        return curie
+
     namespace = prefixmaps[prefixmaps["prefix"] == prefix]["namespace"]
     if len(namespace) > 0:
         return f"{namespace.to_string(index=False).strip()}{suffix}"
diff --git a/tests/test_utilities.py b/tests/test_utilities.py
index 6a566d7..3baa6f4 100644
--- a/tests/test_utilities.py
+++ b/tests/test_utilities.py
@@ -112,6 +112,16 @@ def test_expand_curie():
     assert expand_curie("AUTO:00001203") == "AUTO:00001203"
 
 
+def test_expand_curie_handles_multiple_semicolon():
+    """Test that a CURIE with multiple semicolons does not raise an error
+
+    This is an unusual case that has occurred in past integration tests. Not sure
+    what the source of this issue is but are testing for it here.
+    """
+    curie = "ENVO:PATO:00001203"
+    assert expand_curie(curie) == curie
+
+
 def test_compress_uri():
     """Test that a URI is compressed to a CURIE"""
 

From 8fd9962ac308279f6c61fea0ea697d0d79e01145 Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Thu, 19 Dec 2024 11:00:48 -0500
Subject: [PATCH 15/24] feat: visualize grounding rates across OntoGPT
 configurations

Implement a visualization to assess the grounding success rates of
different OntoGPT configurations. This visualization utilizes a 100%
stacked bar chart to compare and contrast the performance of various
configurations.
---
 environment-min.yml        |   4 +-
 environment.yml            | 154 +++++++------
 poetry.lock                | 435 ++++++++++++++++++++++++++++++++++++-
 pyproject.toml             |   1 +
 requirements.txt           | 130 ++++++-----
 src/spinneret/benchmark.py | 116 +++++++++-
 tests/test_benchmark.py    |  35 +++
 7 files changed, 748 insertions(+), 127 deletions(-)

diff --git a/environment-min.yml b/environment-min.yml
index 3983356..3f48ce9 100644
--- a/environment-min.yml
+++ b/environment-min.yml
@@ -17,7 +17,5 @@ dependencies:
   - sphinx
   - sphinx-autoapi
   - daiquiri
-  - pip
-  - pip:
-      - git+https://github.com/clnsmth/soso.git@main
+  - matplotlib
 prefix: /opt/miniconda3/envs/spinneret
diff --git a/environment.yml b/environment.yml
index e417283..d7c40db 100644
--- a/environment.yml
+++ b/environment.yml
@@ -6,25 +6,25 @@ dependencies:
   - alabaster=1.0.0
   - annotated-types=0.7.0
   - anyio=4.6.2.post1
-  - astroid=3.3.5
   - babel=2.16.0
   - backoff=2.2.1
-  - black=24.10.0
+  - brotli=1.1.0
+  - brotli-bin=1.1.0
   - brotli-python=1.1.0
   - bzip2=1.0.8
-  - ca-certificates=2024.8.30
-  - certifi=2024.8.30
+  - ca-certificates=2024.12.14
+  - certifi=2024.12.14
   - cffi=1.17.1
-  - charset-normalizer=3.4.0
   - click=8.1.7
   - click-option-group=0.5.6
   - colorama=0.4.6
-  - coverage=7.6.8
-  - daiquiri=3.0.0
-  - dill=0.3.9
+  - contourpy=1.3.1
+  - cycler=0.12.1
   - docutils=0.21.2
   - dotty-dict=1.3.1
   - exceptiongroup=1.2.2
+  - fonttools=4.55.3
+  - freetype=2.12.1
   - gitdb=4.0.11
   - gitpython=3.1.43
   - gql=3.5.0
@@ -37,122 +37,129 @@ dependencies:
   - importlib-resources=6.4.5
   - importlib_resources=6.4.5
   - iniconfig=2.0.0
-  - isodate=0.7.2
   - isort=5.13.2
   - jinja2=3.1.4
+  - kiwisolver=1.4.7
+  - lcms2=2.16
+  - lerc=4.0.0
   - libblas=3.9.0
+  - libbrotlicommon=1.1.0
+  - libbrotlidec=1.1.0
+  - libbrotlienc=1.1.0
   - libcblas=3.9.0
   - libcxx=19.1.4
+  - libdeflate=1.23
   - libexpat=2.6.4
   - libffi=3.4.2
   - libgfortran=5.0.0
   - libgfortran5=13.2.0
   - libiconv=1.17
+  - libjpeg-turbo=3.0.0
   - liblapack=3.9.0
+  - liblzma=5.6.3
   - libopenblas=0.3.28
+  - libpng=1.6.44
   - libsqlite=3.47.0
+  - libtiff=4.7.0
+  - libwebp-base=1.4.0
+  - libxcb=1.17.0
   - libxml2=2.13.5
   - libxslt=1.1.39
   - libzlib=1.3.1
   - llvm-openmp=19.1.4
   - lxml=5.3.0
   - markdown-it-py=3.0.0
-  - markupsafe=3.0.2
+  - matplotlib=3.10.0
+  - matplotlib-base=3.10.0
   - mccabe=0.7.0
   - mdit-py-plugins=0.4.2
   - mdurl=0.1.2
   - multidict=6.1.0
+  - munkres=1.1.4
   - mypy_extensions=1.0.0
   - myst-parser=4.0.0
   - ncurses=6.5
+  - openjpeg=2.5.3
   - openssl=3.4.0
-  - packaging=24.2
-  - pandas=2.2.3
   - pathspec=0.12.1
+  - pillow=11.0.0
   - pip=24.3.1
   - platformdirs=4.3.6
   - pluggy=1.5.0
   - propcache=0.2.0
+  - pthread-stubs=0.4
   - pycparser=2.22
-  - pydantic=2.10.1
-  - pydantic-core=2.27.1
   - pygments=2.18.0
-  - pylint=3.3.1
-  - pyparsing=3.2.0
   - pysocks=1.7.1
   - pytest=8.3.3
-  - pytest-cov=6.0.0
   - pytest-mock=3.14.0
   - python=3.11.10
   - python-dateutil=2.9.0.post0
-  - python-gitlab=4.13.0
   - python-json-logger=2.0.7
-  - python-semantic-release=9.14.0
-  - python-tzdata=2024.2
   - python_abi=3.11
-  - pytz=2024.1
   - pyyaml=6.0.2
-  - rdflib=7.1.1
+  - qhull=2020.2
   - readline=8.2
   - requests=2.32.3
   - requests-toolbelt=1.0.0
-  - rich=13.9.4
-  - setuptools=75.6.0
   - shellingham=1.5.4
   - six=1.16.0
-  - smmap=5.0.0
   - sniffio=1.3.1
   - snowballstemmer=2.2.0
-  - sphinx=8.1.3
-  - sphinx-autoapi=3.3.3
   - sphinxcontrib-applehelp=2.0.0
   - sphinxcontrib-devhelp=2.0.0
   - sphinxcontrib-htmlhelp=2.1.0
   - sphinxcontrib-jsmath=1.0.1
   - sphinxcontrib-qthelp=2.0.0
-  - sphinxcontrib-serializinghtml=1.1.10
   - stdlib-list=0.11.0
   - tk=8.6.13
   - toml=0.10.2
   - tomli=2.1.0
   - tomlkit=0.13.2
+  - tornado=6.4.2
   - typing-extensions=4.12.2
   - typing_extensions=4.12.2
-  - tzdata=2024b
+  - unicodedata2=15.1.0
   - wheel=0.45.1
+  - xorg-libxau=1.0.12
+  - xorg-libxdmcp=1.1.5
   - xz=5.2.6
   - yaml=0.2.5
-  - yarl=1.18.0
-  - zipp=3.21.0
   - zstandard=0.23.0
   - zstd=1.5.6
   - pip:
       - adeft==0.12.3
       - aiofiles==24.1.0
       - aiohappyeyeballs==2.4.3
-      - aiohttp==3.11.7
+      - aiohttp==3.10.10
       - aiosignal==1.3.1
       - airium==0.2.6
       - aniso8601==9.0.1
       - antlr4-python3-runtime==4.9.3
       - appdirs==1.4.4
       - arrow==1.3.0
+      - astroid==3.2.4
       - attrs==24.2.0
       - bcp47==0.1.0
       - beautifulsoup4==4.12.3
       - bioc==2.1
-      - blinker==1.9.0
-      - boto3==1.35.69
-      - botocore==1.35.69
+      - black==24.8.0
+      - blinker==1.8.2
+      - boto3==1.35.54
+      - botocore==1.35.54
       - cachier==3.1.2
       - cattrs==24.1.2
       - cfgraph==0.2.1
       - chardet==5.2.0
-      - class-resolver==0.5.4
+      - charset-normalizer==3.3.2
+      - class-resolver==0.5.2
+      - coverage==7.6.1
       - curies==0.9.0
+      - daiquiri==3.2.5.1
       - defusedxml==0.7.1
-      - deprecated==1.2.15
+      - deprecated==1.2.14
       - deprecation==2.1.0
+      - dill==0.3.8
       - diskcache==5.6.3
       - distro==1.9.0
       - docopt==0.6.2
@@ -161,7 +168,7 @@ dependencies:
       - eutils==0.6.0
       - fastobo==0.12.3
       - filelock==3.16.1
-      - flask==3.1.0
+      - flask==3.0.3
       - flask-restx==1.3.0
       - fqdn==1.5.1
       - frontend==0.0.3
@@ -173,7 +180,7 @@ dependencies:
       - h11==0.14.0
       - hbreader==0.9.1
       - html5lib==1.1
-      - httpcore==1.0.7
+      - httpcore==1.0.6
       - httpx==0.27.2
       - huggingface-hub==0.26.2
       - ijson==3.3.0
@@ -181,9 +188,10 @@ dependencies:
       - inflect==7.4.0
       - inflection==0.5.1
       - intervaltree==3.1.0
+      - isodate==0.6.1
       - isoduration==20.11.0
       - itsdangerous==2.2.0
-      - jiter==0.7.1
+      - jiter==0.7.0
       - jmespath==1.0.1
       - joblib==1.4.2
       - json-flattener==0.1.9
@@ -203,83 +211,105 @@ dependencies:
       - linkml-owl==0.3.0
       - linkml-renderer==0.3.1
       - linkml-runtime==1.8.3
-      - litellm==1.52.16
+      - litellm==1.51.2
+      - markupsafe==2.1.5
       - more-click==0.1.2
       - more-itertools==10.5.0
       - ndex2==3.9.0
       - networkx==3.4.2
       - nltk==3.9.1
-      - numpy==2.0.2
+      - numpy==2.1.1
       - oaklib==0.6.18
       - ols-client==0.1.4
-      - ontogpt==1.0.8
+      - ontogpt==1.0.6
       - ontoportal-client==0.0.4
-      - openai==1.55.1
+      - openai==1.53.0
       - openpyxl==3.1.5
       - owlrl==6.0.2
+      - packaging==24.1
+      - pandas==2.2.2
       - pansql==0.0.1
       - parse==1.20.2
       - ply==3.11
-      - portalocker==3.0.0
+      - portalocker==2.10.1
       - prefixcommons==0.1.12
       - prefixmaps==0.2.6
-      - prettytable==3.12.0
+      - prettytable==3.11.0
       - pronto==2.5.8
+      - pydantic==2.9.2
+      - pydantic-core==2.23.4
       - pyjsg==0.11.10
-      - pymupdf==1.24.14
+      - pylint==3.2.7
+      - pymupdf==1.24.13
+      - pyparsing==3.1.4
       - pyshacl==0.26.0
       - pyshex==0.8.1
       - pyshexc==0.9.1
       - pysolr==3.10.0
-      - pystow==0.6.1
+      - pystow==0.5.6
+      - pytest-cov==5.0.0
       - pytest-logging==2015.11.4
       - python-dotenv==1.0.1
+      - python-gitlab==4.11.1
+      - python-semantic-release==9.8.8
       - pytrie==0.4.0
+      - pytz==2024.2
       - ratelimit==2.2.1
+      - rdflib==7.0.0
       - rdflib-jsonld==0.6.1
       - rdflib-shim==1.0.3
       - referencing==0.35.1
-      - regex==2024.11.6
+      - regex==2024.9.11
       - requests-cache==1.2.1
       - rfc3339-validator==0.1.4
       - rfc3987==1.3.8
-      - rpds-py==0.21.0
+      - rich==13.8.1
+      - rpds-py==0.20.1
       - ruamel-yaml==0.18.6
       - ruamel-yaml-clib==0.2.12
-      - s3transfer==0.10.4
+      - s3transfer==0.10.3
       - scikit-learn==1.4.2
       - scipy==1.14.1
       - semsql==0.3.3
+      - setuptools==75.3.0
       - shexjsg==0.8.2
+      - smmap==5.0.1
       - sortedcontainers==2.4.0
+      - soso==0.2.0
       - soupsieve==2.6
       - sparqlslurper==0.5.1
       - sparqlwrapper==2.0.0
+      - sphinx==8.0.2
+      - sphinx-autoapi==3.3.1
+      - sphinxcontrib-serializinghtml==2.0.0
+      - spinneret==0.2.0
       - sqlalchemy==2.0.36
       - sqlalchemy-utils==0.38.3
-      - sssom==0.4.13
+      - sssom==0.4.12
       - sssom-schema==1.0.0
-      - starlette==0.41.3
+      - starlette==0.41.2
       - tenacity==9.0.0
       - threadpoolctl==3.5.0
-      - tiktoken==0.8.0
-      - tokenizers==0.20.3
-      - tqdm==4.67.1
-      - typeguard==4.4.1
+      - tiktoken==0.7.0
+      - tokenizers==0.20.1
+      - tqdm==4.66.6
+      - typeguard==4.4.0
       - types-python-dateutil==2.9.0.20241003
+      - tzdata==2024.1
       - unidecode==1.3.8
       - uri-template==1.3.0
       - url-normalize==1.4.3
       - urllib3==1.26.20
-      - uvicorn==0.32.1
+      - uvicorn==0.32.0
       - validators==0.34.0
       - watchdog==6.0.0
       - wcwidth==0.2.13
-      - webcolors==24.11.1
+      - webcolors==24.8.0
       - webencodings==0.5.1
-      - werkzeug==3.1.3
+      - werkzeug==3.1.1
       - wikipedia==1.4.0
       - wikipedia-api==0.7.1
-      - wrapt==1.17.0
-      - git+https://github.com/clnsmth/soso.git@main
+      - wrapt==1.16.0
+      - yarl==1.17.1
+      - zipp==3.20.2
 prefix: /opt/miniconda3/envs/spinneret
diff --git a/poetry.lock b/poetry.lock
index 98a35ed..a2fc226 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -705,6 +705,79 @@ files = [
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
 
+[[package]]
+name = "contourpy"
+version = "1.3.1"
+description = "Python library for calculating contours of 2D quadrilateral grids"
+optional = false
+python-versions = ">=3.10"
+files = [
+    {file = "contourpy-1.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a045f341a77b77e1c5de31e74e966537bba9f3c4099b35bf4c2e3939dd54cdab"},
+    {file = "contourpy-1.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:500360b77259914f7805af7462e41f9cb7ca92ad38e9f94d6c8641b089338124"},
+    {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2f926efda994cdf3c8d3fdb40b9962f86edbc4457e739277b961eced3d0b4c1"},
+    {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:adce39d67c0edf383647a3a007de0a45fd1b08dedaa5318404f1a73059c2512b"},
+    {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abbb49fb7dac584e5abc6636b7b2a7227111c4f771005853e7d25176daaf8453"},
+    {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0cffcbede75c059f535725c1680dfb17b6ba8753f0c74b14e6a9c68c29d7ea3"},
+    {file = "contourpy-1.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ab29962927945d89d9b293eabd0d59aea28d887d4f3be6c22deaefbb938a7277"},
+    {file = "contourpy-1.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:974d8145f8ca354498005b5b981165b74a195abfae9a8129df3e56771961d595"},
+    {file = "contourpy-1.3.1-cp310-cp310-win32.whl", hash = "sha256:ac4578ac281983f63b400f7fe6c101bedc10651650eef012be1ccffcbacf3697"},
+    {file = "contourpy-1.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:174e758c66bbc1c8576992cec9599ce8b6672b741b5d336b5c74e35ac382b18e"},
+    {file = "contourpy-1.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e8b974d8db2c5610fb4e76307e265de0edb655ae8169e8b21f41807ccbeec4b"},
+    {file = "contourpy-1.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:20914c8c973f41456337652a6eeca26d2148aa96dd7ac323b74516988bea89fc"},
+    {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19d40d37c1c3a4961b4619dd9d77b12124a453cc3d02bb31a07d58ef684d3d86"},
+    {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:113231fe3825ebf6f15eaa8bc1f5b0ddc19d42b733345eae0934cb291beb88b6"},
+    {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4dbbc03a40f916a8420e420d63e96a1258d3d1b58cbdfd8d1f07b49fcbd38e85"},
+    {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a04ecd68acbd77fa2d39723ceca4c3197cb2969633836ced1bea14e219d077c"},
+    {file = "contourpy-1.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c414fc1ed8ee1dbd5da626cf3710c6013d3d27456651d156711fa24f24bd1291"},
+    {file = "contourpy-1.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:31c1b55c1f34f80557d3830d3dd93ba722ce7e33a0b472cba0ec3b6535684d8f"},
+    {file = "contourpy-1.3.1-cp311-cp311-win32.whl", hash = "sha256:f611e628ef06670df83fce17805c344710ca5cde01edfdc72751311da8585375"},
+    {file = "contourpy-1.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:b2bdca22a27e35f16794cf585832e542123296b4687f9fd96822db6bae17bfc9"},
+    {file = "contourpy-1.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0ffa84be8e0bd33410b17189f7164c3589c229ce5db85798076a3fa136d0e509"},
+    {file = "contourpy-1.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805617228ba7e2cbbfb6c503858e626ab528ac2a32a04a2fe88ffaf6b02c32bc"},
+    {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ade08d343436a94e633db932e7e8407fe7de8083967962b46bdfc1b0ced39454"},
+    {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:47734d7073fb4590b4a40122b35917cd77be5722d80683b249dac1de266aac80"},
+    {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2ba94a401342fc0f8b948e57d977557fbf4d515f03c67682dd5c6191cb2d16ec"},
+    {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efa874e87e4a647fd2e4f514d5e91c7d493697127beb95e77d2f7561f6905bd9"},
+    {file = "contourpy-1.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1bf98051f1045b15c87868dbaea84f92408337d4f81d0e449ee41920ea121d3b"},
+    {file = "contourpy-1.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:61332c87493b00091423e747ea78200659dc09bdf7fd69edd5e98cef5d3e9a8d"},
+    {file = "contourpy-1.3.1-cp312-cp312-win32.whl", hash = "sha256:e914a8cb05ce5c809dd0fe350cfbb4e881bde5e2a38dc04e3afe1b3e58bd158e"},
+    {file = "contourpy-1.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:08d9d449a61cf53033612cb368f3a1b26cd7835d9b8cd326647efe43bca7568d"},
+    {file = "contourpy-1.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a761d9ccfc5e2ecd1bf05534eda382aa14c3e4f9205ba5b1684ecfe400716ef2"},
+    {file = "contourpy-1.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:523a8ee12edfa36f6d2a49407f705a6ef4c5098de4f498619787e272de93f2d5"},
+    {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece6df05e2c41bd46776fbc712e0996f7c94e0d0543af1656956d150c4ca7c81"},
+    {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:573abb30e0e05bf31ed067d2f82500ecfdaec15627a59d63ea2d95714790f5c2"},
+    {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9fa36448e6a3a1a9a2ba23c02012c43ed88905ec80163f2ffe2421c7192a5d7"},
+    {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ea9924d28fc5586bf0b42d15f590b10c224117e74409dd7a0be3b62b74a501c"},
+    {file = "contourpy-1.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b75aa69cb4d6f137b36f7eb2ace9280cfb60c55dc5f61c731fdf6f037f958a3"},
+    {file = "contourpy-1.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:041b640d4ec01922083645a94bb3b2e777e6b626788f4095cf21abbe266413c1"},
+    {file = "contourpy-1.3.1-cp313-cp313-win32.whl", hash = "sha256:36987a15e8ace5f58d4d5da9dca82d498c2bbb28dff6e5d04fbfcc35a9cb3a82"},
+    {file = "contourpy-1.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:a7895f46d47671fa7ceec40f31fae721da51ad34bdca0bee83e38870b1f47ffd"},
+    {file = "contourpy-1.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9ddeb796389dadcd884c7eb07bd14ef12408aaae358f0e2ae24114d797eede30"},
+    {file = "contourpy-1.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:19c1555a6801c2f084c7ddc1c6e11f02eb6a6016ca1318dd5452ba3f613a1751"},
+    {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:841ad858cff65c2c04bf93875e384ccb82b654574a6d7f30453a04f04af71342"},
+    {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4318af1c925fb9a4fb190559ef3eec206845f63e80fb603d47f2d6d67683901c"},
+    {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:14c102b0eab282427b662cb590f2e9340a9d91a1c297f48729431f2dcd16e14f"},
+    {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05e806338bfeaa006acbdeba0ad681a10be63b26e1b17317bfac3c5d98f36cda"},
+    {file = "contourpy-1.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4d76d5993a34ef3df5181ba3c92fabb93f1eaa5729504fb03423fcd9f3177242"},
+    {file = "contourpy-1.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:89785bb2a1980c1bd87f0cb1517a71cde374776a5f150936b82580ae6ead44a1"},
+    {file = "contourpy-1.3.1-cp313-cp313t-win32.whl", hash = "sha256:8eb96e79b9f3dcadbad2a3891672f81cdcab7f95b27f28f1c67d75f045b6b4f1"},
+    {file = "contourpy-1.3.1-cp313-cp313t-win_amd64.whl", hash = "sha256:287ccc248c9e0d0566934e7d606201abd74761b5703d804ff3df8935f523d546"},
+    {file = "contourpy-1.3.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b457d6430833cee8e4b8e9b6f07aa1c161e5e0d52e118dc102c8f9bd7dd060d6"},
+    {file = "contourpy-1.3.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb76c1a154b83991a3cbbf0dfeb26ec2833ad56f95540b442c73950af2013750"},
+    {file = "contourpy-1.3.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:44a29502ca9c7b5ba389e620d44f2fbe792b1fb5734e8b931ad307071ec58c53"},
+    {file = "contourpy-1.3.1.tar.gz", hash = "sha256:dfd97abd83335045a913e3bcc4a09c0ceadbe66580cf573fe961f4a825efa699"},
+]
+
+[package.dependencies]
+numpy = ">=1.23"
+
+[package.extras]
+bokeh = ["bokeh", "selenium"]
+docs = ["furo", "sphinx (>=7.2)", "sphinx-copybutton"]
+mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.11.1)", "types-Pillow"]
+test = ["Pillow", "contourpy[test-no-images]", "matplotlib"]
+test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"]
+
 [[package]]
 name = "coverage"
 version = "7.6.1"
@@ -812,6 +885,21 @@ pandas = ["pandas"]
 rdflib = ["rdflib"]
 tests = ["coverage", "pytest", "requests"]
 
+[[package]]
+name = "cycler"
+version = "0.12.1"
+description = "Composable style cycles"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"},
+    {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"},
+]
+
+[package.extras]
+docs = ["ipython", "matplotlib", "numpydoc", "sphinx"]
+tests = ["pytest", "pytest-cov", "pytest-xdist"]
+
 [[package]]
 name = "daiquiri"
 version = "3.2.5.1"
@@ -1099,6 +1187,79 @@ dev = ["Faker (==2.0.0)", "black", "blinker", "invoke (==2.2.0)", "mock (==3.0.5
 doc = ["Sphinx (==5.3.0)", "alabaster (==0.7.12)", "sphinx-issues (==3.0.1)"]
 test = ["Faker (==2.0.0)", "blinker", "invoke (==2.2.0)", "mock (==3.0.5)", "pytest (==7.0.1)", "pytest-benchmark (==3.4.1)", "pytest-cov (==4.0.0)", "pytest-flask (==1.3.0)", "pytest-mock (==3.6.1)", "pytest-profiling (==1.7.0)", "setuptools", "twine (==3.8.0)", "tzlocal"]
 
+[[package]]
+name = "fonttools"
+version = "4.55.3"
+description = "Tools to manipulate font files"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "fonttools-4.55.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1dcc07934a2165ccdc3a5a608db56fb3c24b609658a5b340aee4ecf3ba679dc0"},
+    {file = "fonttools-4.55.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f7d66c15ba875432a2d2fb419523f5d3d347f91f48f57b8b08a2dfc3c39b8a3f"},
+    {file = "fonttools-4.55.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27e4ae3592e62eba83cd2c4ccd9462dcfa603ff78e09110680a5444c6925d841"},
+    {file = "fonttools-4.55.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62d65a3022c35e404d19ca14f291c89cc5890032ff04f6c17af0bd1927299674"},
+    {file = "fonttools-4.55.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d342e88764fb201286d185093781bf6628bbe380a913c24adf772d901baa8276"},
+    {file = "fonttools-4.55.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:dd68c87a2bfe37c5b33bcda0fba39b65a353876d3b9006fde3adae31f97b3ef5"},
+    {file = "fonttools-4.55.3-cp310-cp310-win32.whl", hash = "sha256:1bc7ad24ff98846282eef1cbeac05d013c2154f977a79886bb943015d2b1b261"},
+    {file = "fonttools-4.55.3-cp310-cp310-win_amd64.whl", hash = "sha256:b54baf65c52952db65df39fcd4820668d0ef4766c0ccdf32879b77f7c804d5c5"},
+    {file = "fonttools-4.55.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8c4491699bad88efe95772543cd49870cf756b019ad56294f6498982408ab03e"},
+    {file = "fonttools-4.55.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5323a22eabddf4b24f66d26894f1229261021dacd9d29e89f7872dd8c63f0b8b"},
+    {file = "fonttools-4.55.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5480673f599ad410695ca2ddef2dfefe9df779a9a5cda89503881e503c9c7d90"},
+    {file = "fonttools-4.55.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da9da6d65cd7aa6b0f806556f4985bcbf603bf0c5c590e61b43aa3e5a0f822d0"},
+    {file = "fonttools-4.55.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e894b5bd60d9f473bed7a8f506515549cc194de08064d829464088d23097331b"},
+    {file = "fonttools-4.55.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:aee3b57643827e237ff6ec6d28d9ff9766bd8b21e08cd13bff479e13d4b14765"},
+    {file = "fonttools-4.55.3-cp311-cp311-win32.whl", hash = "sha256:eb6ca911c4c17eb51853143624d8dc87cdcdf12a711fc38bf5bd21521e79715f"},
+    {file = "fonttools-4.55.3-cp311-cp311-win_amd64.whl", hash = "sha256:6314bf82c54c53c71805318fcf6786d986461622dd926d92a465199ff54b1b72"},
+    {file = "fonttools-4.55.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f9e736f60f4911061235603a6119e72053073a12c6d7904011df2d8fad2c0e35"},
+    {file = "fonttools-4.55.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7a8aa2c5e5b8b3bcb2e4538d929f6589a5c6bdb84fd16e2ed92649fb5454f11c"},
+    {file = "fonttools-4.55.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07f8288aacf0a38d174445fc78377a97fb0b83cfe352a90c9d9c1400571963c7"},
+    {file = "fonttools-4.55.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8d5e8916c0970fbc0f6f1bece0063363bb5857a7f170121a4493e31c3db3314"},
+    {file = "fonttools-4.55.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ae3b6600565b2d80b7c05acb8e24d2b26ac407b27a3f2e078229721ba5698427"},
+    {file = "fonttools-4.55.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:54153c49913f45065c8d9e6d0c101396725c5621c8aee744719300f79771d75a"},
+    {file = "fonttools-4.55.3-cp312-cp312-win32.whl", hash = "sha256:827e95fdbbd3e51f8b459af5ea10ecb4e30af50221ca103bea68218e9615de07"},
+    {file = "fonttools-4.55.3-cp312-cp312-win_amd64.whl", hash = "sha256:e6e8766eeeb2de759e862004aa11a9ea3d6f6d5ec710551a88b476192b64fd54"},
+    {file = "fonttools-4.55.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a430178ad3e650e695167cb53242dae3477b35c95bef6525b074d87493c4bf29"},
+    {file = "fonttools-4.55.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:529cef2ce91dc44f8e407cc567fae6e49a1786f2fefefa73a294704c415322a4"},
+    {file = "fonttools-4.55.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e75f12c82127486fac2d8bfbf5bf058202f54bf4f158d367e41647b972342ca"},
+    {file = "fonttools-4.55.3-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:859c358ebf41db18fb72342d3080bce67c02b39e86b9fbcf1610cca14984841b"},
+    {file = "fonttools-4.55.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:546565028e244a701f73df6d8dd6be489d01617863ec0c6a42fa25bf45d43048"},
+    {file = "fonttools-4.55.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:aca318b77f23523309eec4475d1fbbb00a6b133eb766a8bdc401faba91261abe"},
+    {file = "fonttools-4.55.3-cp313-cp313-win32.whl", hash = "sha256:8c5ec45428edaa7022f1c949a632a6f298edc7b481312fc7dc258921e9399628"},
+    {file = "fonttools-4.55.3-cp313-cp313-win_amd64.whl", hash = "sha256:11e5de1ee0d95af4ae23c1a138b184b7f06e0b6abacabf1d0db41c90b03d834b"},
+    {file = "fonttools-4.55.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:caf8230f3e10f8f5d7593eb6d252a37caf58c480b19a17e250a63dad63834cf3"},
+    {file = "fonttools-4.55.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b586ab5b15b6097f2fb71cafa3c98edfd0dba1ad8027229e7b1e204a58b0e09d"},
+    {file = "fonttools-4.55.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8c2794ded89399cc2169c4d0bf7941247b8d5932b2659e09834adfbb01589aa"},
+    {file = "fonttools-4.55.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf4fe7c124aa3f4e4c1940880156e13f2f4d98170d35c749e6b4f119a872551e"},
+    {file = "fonttools-4.55.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:86721fbc389ef5cc1e2f477019e5069e8e4421e8d9576e9c26f840dbb04678de"},
+    {file = "fonttools-4.55.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:89bdc5d88bdeec1b15af790810e267e8332d92561dce4f0748c2b95c9bdf3926"},
+    {file = "fonttools-4.55.3-cp38-cp38-win32.whl", hash = "sha256:bc5dbb4685e51235ef487e4bd501ddfc49be5aede5e40f4cefcccabc6e60fb4b"},
+    {file = "fonttools-4.55.3-cp38-cp38-win_amd64.whl", hash = "sha256:cd70de1a52a8ee2d1877b6293af8a2484ac82514f10b1c67c1c5762d38073e56"},
+    {file = "fonttools-4.55.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bdcc9f04b36c6c20978d3f060e5323a43f6222accc4e7fcbef3f428e216d96af"},
+    {file = "fonttools-4.55.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c3ca99e0d460eff46e033cd3992a969658c3169ffcd533e0a39c63a38beb6831"},
+    {file = "fonttools-4.55.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22f38464daa6cdb7b6aebd14ab06609328fe1e9705bb0fcc7d1e69de7109ee02"},
+    {file = "fonttools-4.55.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed63959d00b61959b035c7d47f9313c2c1ece090ff63afea702fe86de00dbed4"},
+    {file = "fonttools-4.55.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5e8d657cd7326eeaba27de2740e847c6b39dde2f8d7cd7cc56f6aad404ddf0bd"},
+    {file = "fonttools-4.55.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:fb594b5a99943042c702c550d5494bdd7577f6ef19b0bc73877c948a63184a32"},
+    {file = "fonttools-4.55.3-cp39-cp39-win32.whl", hash = "sha256:dc5294a3d5c84226e3dbba1b6f61d7ad813a8c0238fceea4e09aa04848c3d851"},
+    {file = "fonttools-4.55.3-cp39-cp39-win_amd64.whl", hash = "sha256:aedbeb1db64496d098e6be92b2e63b5fac4e53b1b92032dfc6988e1ea9134a4d"},
+    {file = "fonttools-4.55.3-py3-none-any.whl", hash = "sha256:f412604ccbeee81b091b420272841e5ec5ef68967a9790e80bffd0e30b8e2977"},
+    {file = "fonttools-4.55.3.tar.gz", hash = "sha256:3983313c2a04d6cc1fe9251f8fc647754cf49a61dac6cb1e7249ae67afaafc45"},
+]
+
+[package.extras]
+all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"]
+graphite = ["lz4 (>=1.7.4.2)"]
+interpolatable = ["munkres", "pycairo", "scipy"]
+lxml = ["lxml (>=4.0)"]
+pathops = ["skia-pathops (>=0.5.0)"]
+plot = ["matplotlib"]
+repacker = ["uharfbuzz (>=0.23.0)"]
+symfont = ["sympy"]
+type1 = ["xattr"]
+ufo = ["fs (>=2.2.0,<3)"]
+unicode = ["unicodedata2 (>=15.1.0)"]
+woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"]
+
 [[package]]
 name = "fqdn"
 version = "1.5.1"
@@ -2141,6 +2302,129 @@ lark = ">=1.1.2"
 linkml-runtime = ">=1.1.24"
 prefixmaps = ">=0.2.0,<0.3.0"
 
+[[package]]
+name = "kiwisolver"
+version = "1.4.7"
+description = "A fast implementation of the Cassowary constraint solver"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8a9c83f75223d5e48b0bc9cb1bf2776cf01563e00ade8775ffe13b0b6e1af3a6"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58370b1ffbd35407444d57057b57da5d6549d2d854fa30249771775c63b5fe17"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aa0abdf853e09aff551db11fce173e2177d00786c688203f52c87ad7fcd91ef9"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8d53103597a252fb3ab8b5845af04c7a26d5e7ea8122303dd7a021176a87e8b9"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:88f17c5ffa8e9462fb79f62746428dd57b46eb931698e42e990ad63103f35e6c"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88a9ca9c710d598fd75ee5de59d5bda2684d9db36a9f50b6125eaea3969c2599"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4d742cb7af1c28303a51b7a27aaee540e71bb8e24f68c736f6f2ffc82f2bf05"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28c7fea2196bf4c2f8d46a0415c77a1c480cc0724722f23d7410ffe9842c407"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e968b84db54f9d42046cf154e02911e39c0435c9801681e3fc9ce8a3c4130278"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0c18ec74c0472de033e1bebb2911c3c310eef5649133dd0bedf2a169a1b269e5"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8f0ea6da6d393d8b2e187e6a5e3fb81f5862010a40c3945e2c6d12ae45cfb2ad"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:f106407dda69ae456dd1227966bf445b157ccc80ba0dff3802bb63f30b74e895"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:84ec80df401cfee1457063732d90022f93951944b5b58975d34ab56bb150dfb3"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-win32.whl", hash = "sha256:71bb308552200fb2c195e35ef05de12f0c878c07fc91c270eb3d6e41698c3bcc"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-win_amd64.whl", hash = "sha256:44756f9fd339de0fb6ee4f8c1696cfd19b2422e0d70b4cefc1cc7f1f64045a8c"},
+    {file = "kiwisolver-1.4.7-cp310-cp310-win_arm64.whl", hash = "sha256:78a42513018c41c2ffd262eb676442315cbfe3c44eed82385c2ed043bc63210a"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d2b0e12a42fb4e72d509fc994713d099cbb15ebf1103545e8a45f14da2dfca54"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2a8781ac3edc42ea4b90bc23e7d37b665d89423818e26eb6df90698aa2287c95"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:46707a10836894b559e04b0fd143e343945c97fd170d69a2d26d640b4e297935"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef97b8df011141c9b0f6caf23b29379f87dd13183c978a30a3c546d2c47314cb"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab58c12a2cd0fc769089e6d38466c46d7f76aced0a1f54c77652446733d2d02"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:803b8e1459341c1bb56d1c5c010406d5edec8a0713a0945851290a7930679b51"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9a9e8a507420fe35992ee9ecb302dab68550dedc0da9e2880dd88071c5fb052"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18077b53dc3bb490e330669a99920c5e6a496889ae8c63b58fbc57c3d7f33a18"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6af936f79086a89b3680a280c47ea90b4df7047b5bdf3aa5c524bbedddb9e545"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3abc5b19d24af4b77d1598a585b8a719beb8569a71568b66f4ebe1fb0449460b"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:933d4de052939d90afbe6e9d5273ae05fb836cc86c15b686edd4b3560cc0ee36"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:65e720d2ab2b53f1f72fb5da5fb477455905ce2c88aaa671ff0a447c2c80e8e3"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3bf1ed55088f214ba6427484c59553123fdd9b218a42bbc8c6496d6754b1e523"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-win32.whl", hash = "sha256:4c00336b9dd5ad96d0a558fd18a8b6f711b7449acce4c157e7343ba92dd0cf3d"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:929e294c1ac1e9f615c62a4e4313ca1823ba37326c164ec720a803287c4c499b"},
+    {file = "kiwisolver-1.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:e33e8fbd440c917106b237ef1a2f1449dfbb9b6f6e1ce17c94cd6a1e0d438376"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:5360cc32706dab3931f738d3079652d20982511f7c0ac5711483e6eab08efff2"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942216596dc64ddb25adb215c3c783215b23626f8d84e8eff8d6d45c3f29f75a"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:48b571ecd8bae15702e4f22d3ff6a0f13e54d3d00cd25216d5e7f658242065ee"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad42ba922c67c5f219097b28fae965e10045ddf145d2928bfac2eb2e17673640"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:612a10bdae23404a72941a0fc8fa2660c6ea1217c4ce0dbcab8a8f6543ea9e7f"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e838bba3a3bac0fe06d849d29772eb1afb9745a59710762e4ba3f4cb8424483"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:22f499f6157236c19f4bbbd472fa55b063db77a16cd74d49afe28992dff8c258"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693902d433cf585133699972b6d7c42a8b9f8f826ebcaf0132ff55200afc599e"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4e77f2126c3e0b0d055f44513ed349038ac180371ed9b52fe96a32aa071a5107"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:657a05857bda581c3656bfc3b20e353c232e9193eb167766ad2dc58b56504948"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4bfa75a048c056a411f9705856abfc872558e33c055d80af6a380e3658766038"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:34ea1de54beef1c104422d210c47c7d2a4999bdecf42c7b5718fbe59a4cac383"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:90da3b5f694b85231cf93586dad5e90e2d71b9428f9aad96952c99055582f520"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-win32.whl", hash = "sha256:18e0cca3e008e17fe9b164b55735a325140a5a35faad8de92dd80265cd5eb80b"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:58cb20602b18f86f83a5c87d3ee1c766a79c0d452f8def86d925e6c60fbf7bfb"},
+    {file = "kiwisolver-1.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:f5a8b53bdc0b3961f8b6125e198617c40aeed638b387913bf1ce78afb1b0be2a"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2e6039dcbe79a8e0f044f1c39db1986a1b8071051efba3ee4d74f5b365f5226e"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a1ecf0ac1c518487d9d23b1cd7139a6a65bc460cd101ab01f1be82ecf09794b6"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7ab9ccab2b5bd5702ab0803676a580fffa2aa178c2badc5557a84cc943fcf750"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f816dd2277f8d63d79f9c8473a79fe54047bc0467754962840782c575522224d"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf8bcc23ceb5a1b624572a1623b9f79d2c3b337c8c455405ef231933a10da379"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dea0bf229319828467d7fca8c7c189780aa9ff679c94539eed7532ebe33ed37c"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c06a4c7cf15ec739ce0e5971b26c93638730090add60e183530d70848ebdd34"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:913983ad2deb14e66d83c28b632fd35ba2b825031f2fa4ca29675e665dfecbe1"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5337ec7809bcd0f424c6b705ecf97941c46279cf5ed92311782c7c9c2026f07f"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4c26ed10c4f6fa6ddb329a5120ba3b6db349ca192ae211e882970bfc9d91420b"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c619b101e6de2222c1fcb0531e1b17bbffbe54294bfba43ea0d411d428618c27"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:073a36c8273647592ea332e816e75ef8da5c303236ec0167196793eb1e34657a"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3ce6b2b0231bda412463e152fc18335ba32faf4e8c23a754ad50ffa70e4091ee"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-win32.whl", hash = "sha256:f4c9aee212bc89d4e13f58be11a56cc8036cabad119259d12ace14b34476fd07"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:8a3ec5aa8e38fc4c8af308917ce12c536f1c88452ce554027e55b22cbbfbff76"},
+    {file = "kiwisolver-1.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:76c8094ac20ec259471ac53e774623eb62e6e1f56cd8690c67ce6ce4fcb05650"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5d5abf8f8ec1f4e22882273c423e16cae834c36856cac348cfbfa68e01c40f3a"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:aeb3531b196ef6f11776c21674dba836aeea9d5bd1cf630f869e3d90b16cfade"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7d755065e4e866a8086c9bdada157133ff466476a2ad7861828e17b6026e22c"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08471d4d86cbaec61f86b217dd938a83d85e03785f51121e791a6e6689a3be95"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7bbfcb7165ce3d54a3dfbe731e470f65739c4c1f85bb1018ee912bae139e263b"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d34eb8494bea691a1a450141ebb5385e4b69d38bb8403b5146ad279f4b30fa3"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9242795d174daa40105c1d86aba618e8eab7bf96ba8c3ee614da8302a9f95503"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a0f64a48bb81af7450e641e3fe0b0394d7381e342805479178b3d335d60ca7cf"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8e045731a5416357638d1700927529e2b8ab304811671f665b225f8bf8d8f933"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4322872d5772cae7369f8351da1edf255a604ea7087fe295411397d0cfd9655e"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e1631290ee9271dffe3062d2634c3ecac02c83890ada077d225e081aca8aab89"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:edcfc407e4eb17e037bca59be0e85a2031a2ac87e4fed26d3e9df88b4165f92d"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4d05d81ecb47d11e7f8932bd8b61b720bf0b41199358f3f5e36d38e28f0532c5"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-win32.whl", hash = "sha256:b38ac83d5f04b15e515fd86f312479d950d05ce2368d5413d46c088dda7de90a"},
+    {file = "kiwisolver-1.4.7-cp38-cp38-win_amd64.whl", hash = "sha256:d83db7cde68459fc803052a55ace60bea2bae361fc3b7a6d5da07e11954e4b09"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3f9362ecfca44c863569d3d3c033dbe8ba452ff8eed6f6b5806382741a1334bd"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8df2eb9b2bac43ef8b082e06f750350fbbaf2887534a5be97f6cf07b19d9583"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f32d6edbc638cde7652bd690c3e728b25332acbadd7cad670cc4a02558d9c417"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e2e6c39bd7b9372b0be21456caab138e8e69cc0fc1190a9dfa92bd45a1e6e904"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dda56c24d869b1193fcc763f1284b9126550eaf84b88bbc7256e15028f19188a"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79849239c39b5e1fd906556c474d9b0439ea6792b637511f3fe3a41158d89ca8"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5e3bc157fed2a4c02ec468de4ecd12a6e22818d4f09cde2c31ee3226ffbefab2"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3da53da805b71e41053dc670f9a820d1157aae77b6b944e08024d17bcd51ef88"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8705f17dfeb43139a692298cb6637ee2e59c0194538153e83e9ee0c75c2eddde"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:82a5c2f4b87c26bb1a0ef3d16b5c4753434633b83d365cc0ddf2770c93829e3c"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ce8be0466f4c0d585cdb6c1e2ed07232221df101a4c6f28821d2aa754ca2d9e2"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:409afdfe1e2e90e6ee7fc896f3df9a7fec8e793e58bfa0d052c8a82f99c37abb"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5b9c3f4ee0b9a439d2415012bd1b1cc2df59e4d6a9939f4d669241d30b414327"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-win32.whl", hash = "sha256:a79ae34384df2b615eefca647a2873842ac3b596418032bef9a7283675962644"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-win_amd64.whl", hash = "sha256:cf0438b42121a66a3a667de17e779330fc0f20b0d97d59d2f2121e182b0505e4"},
+    {file = "kiwisolver-1.4.7-cp39-cp39-win_arm64.whl", hash = "sha256:764202cc7e70f767dab49e8df52c7455e8de0df5d858fa801a11aa0d882ccf3f"},
+    {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:94252291e3fe68001b1dd747b4c0b3be12582839b95ad4d1b641924d68fd4643"},
+    {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5b7dfa3b546da08a9f622bb6becdb14b3e24aaa30adba66749d38f3cc7ea9706"},
+    {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd3de6481f4ed8b734da5df134cd5a6a64fe32124fe83dde1e5b5f29fe30b1e6"},
+    {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a91b5f9f1205845d488c928e8570dcb62b893372f63b8b6e98b863ebd2368ff2"},
+    {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40fa14dbd66b8b8f470d5fc79c089a66185619d31645f9b0773b88b19f7223c4"},
+    {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:eb542fe7933aa09d8d8f9d9097ef37532a7df6497819d16efe4359890a2f417a"},
+    {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bfa1acfa0c54932d5607e19a2c24646fb4c1ae2694437789129cf099789a3b00"},
+    {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:eee3ea935c3d227d49b4eb85660ff631556841f6e567f0f7bda972df6c2c9935"},
+    {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f3160309af4396e0ed04db259c3ccbfdc3621b5559b5453075e5de555e1f3a1b"},
+    {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a17f6a29cf8935e587cc8a4dbfc8368c55edc645283db0ce9801016f83526c2d"},
+    {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10849fb2c1ecbfae45a693c070e0320a91b35dd4bcf58172c023b994283a124d"},
+    {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:ac542bf38a8a4be2dc6b15248d36315ccc65f0743f7b1a76688ffb6b5129a5c2"},
+    {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8b01aac285f91ca889c800042c35ad3b239e704b150cfd3382adfc9dcc780e39"},
+    {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:48be928f59a1f5c8207154f935334d374e79f2b5d212826307d072595ad76a2e"},
+    {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f37cfe618a117e50d8c240555331160d73d0411422b59b5ee217843d7b693608"},
+    {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:599b5c873c63a1f6ed7eead644a8a380cfbdf5db91dcb6f85707aaab213b1674"},
+    {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:801fa7802e5cfabe3ab0c81a34c323a319b097dfb5004be950482d882f3d7225"},
+    {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0c6c43471bc764fad4bc99c5c2d6d16a676b1abf844ca7c8702bdae92df01ee0"},
+    {file = "kiwisolver-1.4.7.tar.gz", hash = "sha256:9893ff81bd7107f7b685d3017cc6583daadb4fc26e4a888350df530e41980a60"},
+]
+
 [[package]]
 name = "lark"
 version = "1.2.2"
@@ -2555,6 +2839,63 @@ files = [
     {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"},
 ]
 
+[[package]]
+name = "matplotlib"
+version = "3.10.0"
+description = "Python plotting package"
+optional = false
+python-versions = ">=3.10"
+files = [
+    {file = "matplotlib-3.10.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2c5829a5a1dd5a71f0e31e6e8bb449bc0ee9dbfb05ad28fc0c6b55101b3a4be6"},
+    {file = "matplotlib-3.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a2a43cbefe22d653ab34bb55d42384ed30f611bcbdea1f8d7f431011a2e1c62e"},
+    {file = "matplotlib-3.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:607b16c8a73943df110f99ee2e940b8a1cbf9714b65307c040d422558397dac5"},
+    {file = "matplotlib-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01d2b19f13aeec2e759414d3bfe19ddfb16b13a1250add08d46d5ff6f9be83c6"},
+    {file = "matplotlib-3.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e6c6461e1fc63df30bf6f80f0b93f5b6784299f721bc28530477acd51bfc3d1"},
+    {file = "matplotlib-3.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:994c07b9d9fe8d25951e3202a68c17900679274dadfc1248738dcfa1bd40d7f3"},
+    {file = "matplotlib-3.10.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:fd44fc75522f58612ec4a33958a7e5552562b7705b42ef1b4f8c0818e304a363"},
+    {file = "matplotlib-3.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c58a9622d5dbeb668f407f35f4e6bfac34bb9ecdcc81680c04d0258169747997"},
+    {file = "matplotlib-3.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:845d96568ec873be63f25fa80e9e7fae4be854a66a7e2f0c8ccc99e94a8bd4ef"},
+    {file = "matplotlib-3.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5439f4c5a3e2e8eab18e2f8c3ef929772fd5641876db71f08127eed95ab64683"},
+    {file = "matplotlib-3.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4673ff67a36152c48ddeaf1135e74ce0d4bce1bbf836ae40ed39c29edf7e2765"},
+    {file = "matplotlib-3.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:7e8632baebb058555ac0cde75db885c61f1212e47723d63921879806b40bec6a"},
+    {file = "matplotlib-3.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4659665bc7c9b58f8c00317c3c2a299f7f258eeae5a5d56b4c64226fca2f7c59"},
+    {file = "matplotlib-3.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d44cb942af1693cced2604c33a9abcef6205601c445f6d0dc531d813af8a2f5a"},
+    {file = "matplotlib-3.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a994f29e968ca002b50982b27168addfd65f0105610b6be7fa515ca4b5307c95"},
+    {file = "matplotlib-3.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b0558bae37f154fffda54d779a592bc97ca8b4701f1c710055b609a3bac44c8"},
+    {file = "matplotlib-3.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:503feb23bd8c8acc75541548a1d709c059b7184cde26314896e10a9f14df5f12"},
+    {file = "matplotlib-3.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:c40ba2eb08b3f5de88152c2333c58cee7edcead0a2a0d60fcafa116b17117adc"},
+    {file = "matplotlib-3.10.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96f2886f5c1e466f21cc41b70c5a0cd47bfa0015eb2d5793c88ebce658600e25"},
+    {file = "matplotlib-3.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:12eaf48463b472c3c0f8dbacdbf906e573013df81a0ab82f0616ea4b11281908"},
+    {file = "matplotlib-3.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fbbabc82fde51391c4da5006f965e36d86d95f6ee83fb594b279564a4c5d0d2"},
+    {file = "matplotlib-3.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad2e15300530c1a94c63cfa546e3b7864bd18ea2901317bae8bbf06a5ade6dcf"},
+    {file = "matplotlib-3.10.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3547d153d70233a8496859097ef0312212e2689cdf8d7ed764441c77604095ae"},
+    {file = "matplotlib-3.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:c55b20591ced744aa04e8c3e4b7543ea4d650b6c3c4b208c08a05b4010e8b442"},
+    {file = "matplotlib-3.10.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9ade1003376731a971e398cc4ef38bb83ee8caf0aee46ac6daa4b0506db1fd06"},
+    {file = "matplotlib-3.10.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:95b710fea129c76d30be72c3b38f330269363fbc6e570a5dd43580487380b5ff"},
+    {file = "matplotlib-3.10.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cdbaf909887373c3e094b0318d7ff230b2ad9dcb64da7ade654182872ab2593"},
+    {file = "matplotlib-3.10.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d907fddb39f923d011875452ff1eca29a9e7f21722b873e90db32e5d8ddff12e"},
+    {file = "matplotlib-3.10.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3b427392354d10975c1d0f4ee18aa5844640b512d5311ef32efd4dd7db106ede"},
+    {file = "matplotlib-3.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5fd41b0ec7ee45cd960a8e71aea7c946a28a0b8a4dcee47d2856b2af051f334c"},
+    {file = "matplotlib-3.10.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:81713dd0d103b379de4516b861d964b1d789a144103277769238c732229d7f03"},
+    {file = "matplotlib-3.10.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:359f87baedb1f836ce307f0e850d12bb5f1936f70d035561f90d41d305fdacea"},
+    {file = "matplotlib-3.10.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae80dc3a4add4665cf2faa90138384a7ffe2a4e37c58d83e115b54287c4f06ef"},
+    {file = "matplotlib-3.10.0.tar.gz", hash = "sha256:b886d02a581b96704c9d1ffe55709e49b4d2d52709ccebc4be42db856e511278"},
+]
+
+[package.dependencies]
+contourpy = ">=1.0.1"
+cycler = ">=0.10"
+fonttools = ">=4.22.0"
+kiwisolver = ">=1.3.1"
+numpy = ">=1.23"
+packaging = ">=20.0"
+pillow = ">=8"
+pyparsing = ">=2.3.1"
+python-dateutil = ">=2.7"
+
+[package.extras]
+dev = ["meson-python (>=0.13.1,<0.17.0)", "pybind11 (>=2.13.2,!=2.13.3)", "setuptools (>=64)", "setuptools_scm (>=7)"]
+
 [[package]]
 name = "mccabe"
 version = "0.7.0"
@@ -3193,6 +3534,98 @@ files = [
     {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
 ]
 
+[[package]]
+name = "pillow"
+version = "11.0.0"
+description = "Python Imaging Library (Fork)"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pillow-11.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:6619654954dc4936fcff82db8eb6401d3159ec6be81e33c6000dfd76ae189947"},
+    {file = "pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b3c5ac4bed7519088103d9450a1107f76308ecf91d6dabc8a33a2fcfb18d0fba"},
+    {file = "pillow-11.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a65149d8ada1055029fcb665452b2814fe7d7082fcb0c5bed6db851cb69b2086"},
+    {file = "pillow-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88a58d8ac0cc0e7f3a014509f0455248a76629ca9b604eca7dc5927cc593c5e9"},
+    {file = "pillow-11.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c26845094b1af3c91852745ae78e3ea47abf3dbcd1cf962f16b9a5fbe3ee8488"},
+    {file = "pillow-11.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:1a61b54f87ab5786b8479f81c4b11f4d61702830354520837f8cc791ebba0f5f"},
+    {file = "pillow-11.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:674629ff60030d144b7bca2b8330225a9b11c482ed408813924619c6f302fdbb"},
+    {file = "pillow-11.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:598b4e238f13276e0008299bd2482003f48158e2b11826862b1eb2ad7c768b97"},
+    {file = "pillow-11.0.0-cp310-cp310-win32.whl", hash = "sha256:9a0f748eaa434a41fccf8e1ee7a3eed68af1b690e75328fd7a60af123c193b50"},
+    {file = "pillow-11.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:a5629742881bcbc1f42e840af185fd4d83a5edeb96475a575f4da50d6ede337c"},
+    {file = "pillow-11.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:ee217c198f2e41f184f3869f3e485557296d505b5195c513b2bfe0062dc537f1"},
+    {file = "pillow-11.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1c1d72714f429a521d8d2d018badc42414c3077eb187a59579f28e4270b4b0fc"},
+    {file = "pillow-11.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:499c3a1b0d6fc8213519e193796eb1a86a1be4b1877d678b30f83fd979811d1a"},
+    {file = "pillow-11.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8b2351c85d855293a299038e1f89db92a2f35e8d2f783489c6f0b2b5f3fe8a3"},
+    {file = "pillow-11.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f4dba50cfa56f910241eb7f883c20f1e7b1d8f7d91c750cd0b318bad443f4d5"},
+    {file = "pillow-11.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5ddbfd761ee00c12ee1be86c9c0683ecf5bb14c9772ddbd782085779a63dd55b"},
+    {file = "pillow-11.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:45c566eb10b8967d71bf1ab8e4a525e5a93519e29ea071459ce517f6b903d7fa"},
+    {file = "pillow-11.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b4fd7bd29610a83a8c9b564d457cf5bd92b4e11e79a4ee4716a63c959699b306"},
+    {file = "pillow-11.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cb929ca942d0ec4fac404cbf520ee6cac37bf35be479b970c4ffadf2b6a1cad9"},
+    {file = "pillow-11.0.0-cp311-cp311-win32.whl", hash = "sha256:006bcdd307cc47ba43e924099a038cbf9591062e6c50e570819743f5607404f5"},
+    {file = "pillow-11.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:52a2d8323a465f84faaba5236567d212c3668f2ab53e1c74c15583cf507a0291"},
+    {file = "pillow-11.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:16095692a253047fe3ec028e951fa4221a1f3ed3d80c397e83541a3037ff67c9"},
+    {file = "pillow-11.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d2c0a187a92a1cb5ef2c8ed5412dd8d4334272617f532d4ad4de31e0495bd923"},
+    {file = "pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:084a07ef0821cfe4858fe86652fffac8e187b6ae677e9906e192aafcc1b69903"},
+    {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8069c5179902dcdce0be9bfc8235347fdbac249d23bd90514b7a47a72d9fecf4"},
+    {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f02541ef64077f22bf4924f225c0fd1248c168f86e4b7abdedd87d6ebaceab0f"},
+    {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fcb4621042ac4b7865c179bb972ed0da0218a076dc1820ffc48b1d74c1e37fe9"},
+    {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:00177a63030d612148e659b55ba99527803288cea7c75fb05766ab7981a8c1b7"},
+    {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8853a3bf12afddfdf15f57c4b02d7ded92c7a75a5d7331d19f4f9572a89c17e6"},
+    {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3107c66e43bda25359d5ef446f59c497de2b5ed4c7fdba0894f8d6cf3822dafc"},
+    {file = "pillow-11.0.0-cp312-cp312-win32.whl", hash = "sha256:86510e3f5eca0ab87429dd77fafc04693195eec7fd6a137c389c3eeb4cfb77c6"},
+    {file = "pillow-11.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:8ec4a89295cd6cd4d1058a5e6aec6bf51e0eaaf9714774e1bfac7cfc9051db47"},
+    {file = "pillow-11.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:27a7860107500d813fcd203b4ea19b04babe79448268403172782754870dac25"},
+    {file = "pillow-11.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bcd1fb5bb7b07f64c15618c89efcc2cfa3e95f0e3bcdbaf4642509de1942a699"},
+    {file = "pillow-11.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0e038b0745997c7dcaae350d35859c9715c71e92ffb7e0f4a8e8a16732150f38"},
+    {file = "pillow-11.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ae08bd8ffc41aebf578c2af2f9d8749d91f448b3bfd41d7d9ff573d74f2a6b2"},
+    {file = "pillow-11.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d69bfd8ec3219ae71bcde1f942b728903cad25fafe3100ba2258b973bd2bc1b2"},
+    {file = "pillow-11.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:61b887f9ddba63ddf62fd02a3ba7add935d053b6dd7d58998c630e6dbade8527"},
+    {file = "pillow-11.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:c6a660307ca9d4867caa8d9ca2c2658ab685de83792d1876274991adec7b93fa"},
+    {file = "pillow-11.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:73e3a0200cdda995c7e43dd47436c1548f87a30bb27fb871f352a22ab8dcf45f"},
+    {file = "pillow-11.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fba162b8872d30fea8c52b258a542c5dfd7b235fb5cb352240c8d63b414013eb"},
+    {file = "pillow-11.0.0-cp313-cp313-win32.whl", hash = "sha256:f1b82c27e89fffc6da125d5eb0ca6e68017faf5efc078128cfaa42cf5cb38798"},
+    {file = "pillow-11.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:8ba470552b48e5835f1d23ecb936bb7f71d206f9dfeee64245f30c3270b994de"},
+    {file = "pillow-11.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:846e193e103b41e984ac921b335df59195356ce3f71dcfd155aa79c603873b84"},
+    {file = "pillow-11.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4ad70c4214f67d7466bea6a08061eba35c01b1b89eaa098040a35272a8efb22b"},
+    {file = "pillow-11.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6ec0d5af64f2e3d64a165f490d96368bb5dea8b8f9ad04487f9ab60dc4bb6003"},
+    {file = "pillow-11.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c809a70e43c7977c4a42aefd62f0131823ebf7dd73556fa5d5950f5b354087e2"},
+    {file = "pillow-11.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:4b60c9520f7207aaf2e1d94de026682fc227806c6e1f55bba7606d1c94dd623a"},
+    {file = "pillow-11.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:1e2688958a840c822279fda0086fec1fdab2f95bf2b717b66871c4ad9859d7e8"},
+    {file = "pillow-11.0.0-cp313-cp313t-win32.whl", hash = "sha256:607bbe123c74e272e381a8d1957083a9463401f7bd01287f50521ecb05a313f8"},
+    {file = "pillow-11.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5c39ed17edea3bc69c743a8dd3e9853b7509625c2462532e62baa0732163a904"},
+    {file = "pillow-11.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:75acbbeb05b86bc53cbe7b7e6fe00fbcf82ad7c684b3ad82e3d711da9ba287d3"},
+    {file = "pillow-11.0.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2e46773dc9f35a1dd28bd6981332fd7f27bec001a918a72a79b4133cf5291dba"},
+    {file = "pillow-11.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2679d2258b7f1192b378e2893a8a0a0ca472234d4c2c0e6bdd3380e8dfa21b6a"},
+    {file = "pillow-11.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eda2616eb2313cbb3eebbe51f19362eb434b18e3bb599466a1ffa76a033fb916"},
+    {file = "pillow-11.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20ec184af98a121fb2da42642dea8a29ec80fc3efbaefb86d8fdd2606619045d"},
+    {file = "pillow-11.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:8594f42df584e5b4bb9281799698403f7af489fba84c34d53d1c4bfb71b7c4e7"},
+    {file = "pillow-11.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:c12b5ae868897c7338519c03049a806af85b9b8c237b7d675b8c5e089e4a618e"},
+    {file = "pillow-11.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:70fbbdacd1d271b77b7721fe3cdd2d537bbbd75d29e6300c672ec6bb38d9672f"},
+    {file = "pillow-11.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5178952973e588b3f1360868847334e9e3bf49d19e169bbbdfaf8398002419ae"},
+    {file = "pillow-11.0.0-cp39-cp39-win32.whl", hash = "sha256:8c676b587da5673d3c75bd67dd2a8cdfeb282ca38a30f37950511766b26858c4"},
+    {file = "pillow-11.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:94f3e1780abb45062287b4614a5bc0874519c86a777d4a7ad34978e86428b8dd"},
+    {file = "pillow-11.0.0-cp39-cp39-win_arm64.whl", hash = "sha256:290f2cc809f9da7d6d622550bbf4c1e57518212da51b6a30fe8e0a270a5b78bd"},
+    {file = "pillow-11.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1187739620f2b365de756ce086fdb3604573337cc28a0d3ac4a01ab6b2d2a6d2"},
+    {file = "pillow-11.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fbbcb7b57dc9c794843e3d1258c0fbf0f48656d46ffe9e09b63bbd6e8cd5d0a2"},
+    {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d203af30149ae339ad1b4f710d9844ed8796e97fda23ffbc4cc472968a47d0b"},
+    {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21a0d3b115009ebb8ac3d2ebec5c2982cc693da935f4ab7bb5c8ebe2f47d36f2"},
+    {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:73853108f56df97baf2bb8b522f3578221e56f646ba345a372c78326710d3830"},
+    {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e58876c91f97b0952eb766123bfef372792ab3f4e3e1f1a2267834c2ab131734"},
+    {file = "pillow-11.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:224aaa38177597bb179f3ec87eeefcce8e4f85e608025e9cfac60de237ba6316"},
+    {file = "pillow-11.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:5bd2d3bdb846d757055910f0a59792d33b555800813c3b39ada1829c372ccb06"},
+    {file = "pillow-11.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:375b8dd15a1f5d2feafff536d47e22f69625c1aa92f12b339ec0b2ca40263273"},
+    {file = "pillow-11.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:daffdf51ee5db69a82dd127eabecce20729e21f7a3680cf7cbb23f0829189790"},
+    {file = "pillow-11.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7326a1787e3c7b0429659e0a944725e1b03eeaa10edd945a86dead1913383944"},
+    {file = "pillow-11.0.0.tar.gz", hash = "sha256:72bacbaf24ac003fea9bff9837d1eedb6088758d41e100c1552930151f677739"},
+]
+
+[package.extras]
+docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"]
+fpx = ["olefile"]
+mic = ["olefile"]
+tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
+typing = ["typing-extensions"]
+xmp = ["defusedxml"]
+
 [[package]]
 name = "platformdirs"
 version = "4.3.6"
@@ -5807,4 +6240,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "c615df7205bca842a4fd1edb0ea32df5fa2ff3d72506cc28285f4d4a300e708f"
+content-hash = "7af44d4cbf6980cf49dbaa6c299f86365b442cef1246d8538c34be00f9d230b7"
diff --git a/pyproject.toml b/pyproject.toml
index 0185814..71497a7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,6 +15,7 @@ lxml = "^5.3.0"
 soso = {git = "https://github.com/clnsmth/soso.git", rev = "main"}
 ontogpt = "^1.0.6"
 daiquiri = "^3.2.5.1"
+matplotlib = "^3.10.0"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.3.2"
diff --git a/requirements.txt b/requirements.txt
index 55de71b..3235fea 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 adeft==0.12.3
 aiofiles==24.1.0
 aiohappyeyeballs==2.4.3
-aiohttp==3.11.7
+aiohttp==3.10.10
 aiosignal==1.3.1
 airium==0.2.6
 alabaster==1.0.0
@@ -11,36 +11,38 @@ antlr4-python3-runtime==4.9.3
 anyio==4.6.2.post1
 appdirs==1.4.4
 arrow==1.3.0
-astroid==3.3.5
+astroid==3.2.4
 attrs==24.2.0
 babel==2.16.0
 backoff==2.2.1
 bcp47==0.1.0
 beautifulsoup4==4.12.3
 bioc==2.1
-black==24.10.0
-blinker==1.9.0
-boto3==1.35.69
-botocore==1.35.69
+black==24.8.0
+blinker==1.8.2
+boto3==1.35.54
+botocore==1.35.54
 Brotli==1.1.0
 cachier==3.1.2
 cattrs==24.1.2
-certifi==2024.8.30
+certifi==2024.12.14
 cffi==1.17.1
 CFGraph==0.2.1
 chardet==5.2.0
-charset-normalizer==3.4.0
-class_resolver==0.5.4
+charset-normalizer==3.3.2
+class_resolver==0.5.2
 click==8.1.7
 click-option-group==0.5.6
 colorama==0.4.6
-coverage==7.6.8
+contourpy==1.3.1
+coverage==7.6.1
 curies==0.9.0
-daiquiri==3.0.0
+cycler==0.12.1
+daiquiri==3.2.5.1
 defusedxml==0.7.1
-Deprecated==1.2.15
+Deprecated==1.2.14
 deprecation==2.1.0
-dill==0.3.9
+dill==0.3.8
 diskcache==5.6.3
 distro==1.9.0
 docopt==0.6.2
@@ -52,8 +54,9 @@ eutils==0.6.0
 exceptiongroup==1.2.2
 fastobo==0.12.3
 filelock==3.16.1
-Flask==3.1.0
+Flask==3.0.3
 flask-restx==1.3.0
+fonttools==4.55.3
 fqdn==1.5.1
 frontend==0.0.3
 frozenlist==1.5.0
@@ -70,7 +73,7 @@ h2==4.1.0
 hbreader==0.9.1
 hpack==4.0.0
 html5lib==1.1
-httpcore==1.0.7
+httpcore==1.0.6
 httpx==0.27.2
 huggingface-hub==0.26.2
 hyperframe==6.0.1
@@ -83,12 +86,12 @@ inflect==7.4.0
 inflection==0.5.1
 iniconfig==2.0.0
 intervaltree==3.1.0
-isodate==0.7.2
+isodate==0.6.1
 isoduration==20.11.0
 isort==5.13.2
 itsdangerous==2.2.0
 Jinja2==3.1.4
-jiter==0.7.1
+jiter==0.7.0
 jmespath==1.0.1
 joblib==1.4.2
 json-flattener==0.1.9
@@ -102,148 +105,155 @@ jsonschema==4.23.0
 jsonschema-specifications==2024.10.1
 kgcl-rdflib==0.5.0
 kgcl_schema==0.6.9
+kiwisolver==1.4.7
 lark==1.2.2
 linkml==1.8.5
 linkml-dataops==0.1.0
 linkml-owl==0.3.0
 linkml-renderer==0.3.1
 linkml-runtime==1.8.3
-litellm==1.52.16
+litellm==1.51.2
 lxml==5.3.0
 markdown-it-py==3.0.0
-MarkupSafe==3.0.2
+MarkupSafe==2.1.5
+matplotlib==3.10.0
 mccabe==0.7.0
 mdit-py-plugins==0.4.2
 mdurl==0.1.2
 more-click==0.1.2
 more-itertools==10.5.0
 multidict==6.1.0
+munkres==1.1.4
 mypy-extensions==1.0.0
 myst-parser==4.0.0
 ndex2==3.9.0
 networkx==3.4.2
 nltk==3.9.1
-numpy==2.0.2
+numpy==2.1.1
 oaklib==0.6.18
 ols-client==0.1.4
-ontogpt==1.0.8
+ontogpt==1.0.6
 ontoportal-client==0.0.4
-openai==1.55.1
+openai==1.53.0
 openpyxl==3.1.5
 owlrl==6.0.2
-packaging==24.2
-pandas==2.2.3
+packaging==24.1
+pandas==2.2.2
 pansql==0.0.1
 parse==1.20.2
 pathspec==0.12.1
+pillow==11.0.0
 pip==24.3.1
 platformdirs==4.3.6
 pluggy==1.5.0
 ply==3.11
-portalocker==3.0.0
+portalocker==2.10.1
 prefixcommons==0.1.12
 prefixmaps==0.2.6
-prettytable==3.12.0
+prettytable==3.11.0
 pronto==2.5.8
 propcache==0.2.0
 pycparser==2.22
-pydantic==2.10.1
-pydantic_core==2.27.1
+pydantic==2.9.2
+pydantic_core==2.23.4
 Pygments==2.18.0
 PyJSG==0.11.10
-pylint==3.3.1
-PyMuPDF==1.24.14
-pyparsing==3.2.0
+pylint==3.2.7
+PyMuPDF==1.24.13
+pyparsing==3.1.4
 pyshacl==0.26.0
 PyShEx==0.8.1
 PyShExC==0.9.1
 PySocks==1.7.1
 pysolr==3.10.0
-pystow==0.6.1
+pystow==0.5.6
 pytest==8.3.3
-pytest-cov==6.0.0
+pytest-cov==5.0.0
 pytest-logging==2015.11.4
 pytest-mock==3.14.0
 python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
-python-gitlab==4.13.0
+python-gitlab==4.11.1
 python-json-logger==2.0.7
-python-semantic-release==9.14.0
+python-semantic-release==9.8.8
 PyTrie==0.4.0
-pytz==2024.1
+pytz==2024.2
 PyYAML==6.0.2
 ratelimit==2.2.1
-rdflib==7.1.1
+rdflib==7.0.0
 rdflib-jsonld==0.6.1
 rdflib-shim==1.0.3
 referencing==0.35.1
-regex==2024.11.6
+regex==2024.9.11
 requests==2.32.3
 requests-cache==1.2.1
 requests-toolbelt==1.0.0
 rfc3339-validator==0.1.4
 rfc3987==1.3.8
-rich==13.9.4
-rpds-py==0.21.0
+rich==13.8.1
+rpds-py==0.20.1
 ruamel.yaml==0.18.6
 ruamel.yaml.clib==0.2.12
-s3transfer==0.10.4
+s3transfer==0.10.3
 scikit-learn==1.4.2
 scipy==1.14.1
 semsql==0.3.3
-setuptools==75.6.0
+setuptools==75.3.0
 shellingham==1.5.4
 ShExJSG==0.8.2
 six==1.16.0
-smmap==5.0.0
+smmap==5.0.1
 sniffio==1.3.1
 snowballstemmer==2.2.0
 sortedcontainers==2.4.0
-soso @ git+https://github.com/clnsmth/soso.git@main
+soso==0.2.0
 soupsieve==2.6
 sparqlslurper==0.5.1
 SPARQLWrapper==2.0.0
-Sphinx==8.1.3
-sphinx-autoapi==3.3.3
+Sphinx==8.0.2
+sphinx-autoapi==3.3.1
 sphinxcontrib-applehelp==2.0.0
 sphinxcontrib-devhelp==2.0.0
 sphinxcontrib-htmlhelp==2.1.0
 sphinxcontrib-jsmath==1.0.1
 sphinxcontrib-qthelp==2.0.0
-sphinxcontrib-serializinghtml==1.1.10
+sphinxcontrib-serializinghtml==2.0.0
+spinneret==0.2.0
 SQLAlchemy==2.0.36
 SQLAlchemy-Utils==0.38.3
-sssom==0.4.13
+sssom==0.4.12
 sssom-schema==1.0.0
-starlette==0.41.3
+starlette==0.41.2
 stdlib-list==0.11.0
 tenacity==9.0.0
 threadpoolctl==3.5.0
-tiktoken==0.8.0
-tokenizers==0.20.3
+tiktoken==0.7.0
+tokenizers==0.20.1
 toml==0.10.2
 tomli==2.1.0
 tomlkit==0.13.2
-tqdm==4.67.1
-typeguard==4.4.1
+tornado==6.4.2
+tqdm==4.66.6
+typeguard==4.4.0
 types-python-dateutil==2.9.0.20241003
 typing_extensions==4.12.2
-tzdata==2024.2
+tzdata==2024.1
+unicodedata2==15.1.0
 Unidecode==1.3.8
 uri-template==1.3.0
 url-normalize==1.4.3
 urllib3==1.26.20
-uvicorn==0.32.1
+uvicorn==0.32.0
 validators==0.34.0
 watchdog==6.0.0
 wcwidth==0.2.13
-webcolors==24.11.1
+webcolors==24.8.0
 webencodings==0.5.1
-Werkzeug==3.1.3
+Werkzeug==3.1.1
 wheel==0.45.1
 wikipedia==1.4.0
 Wikipedia-API==0.7.1
-wrapt==1.17.0
-yarl==1.18.0
-zipp==3.21.0
+wrapt==1.16.0
+yarl==1.17.1
+zipp==3.20.2
 zstandard==0.23.0
diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py
index 2cd8b69..c6e8c15 100644
--- a/src/spinneret/benchmark.py
+++ b/src/spinneret/benchmark.py
@@ -10,8 +10,9 @@
 from contextlib import contextmanager
 from daiquiri import getLogger
 import pandas as pd
+import matplotlib.pyplot as plt
 from spinneret.utilities import load_workbook, compress_uri
-from spinneret.workbook import delete_duplicate_annotations
+from spinneret.workbook import delete_duplicate_annotations, delete_unannotated_rows
 
 
 logger = getLogger(__name__)
@@ -374,3 +375,116 @@ def get_shared_ontology(set1: list, set2: list) -> Union[str, None]:
         return None
 
     return db
+
+
+def plot_grounding_rates(
+    grounding_rates: dict, configuration: str, output_file: str
+) -> None:
+    """
+    Plot the grounding rates of the test data.
+
+    :param grounding_rates: The return value from the `get_grounding_rates`
+        function.
+    :param configuration: The configuration of OntoGPT that was used to
+        generate the test data. This is typically the directory name of the
+        test data.
+    :param output_file: The path to save the plot to, as a PNG file.
+    :return: None
+    """
+
+    # Reformating the grounding rates dictionary into a DataFrame for plotting
+    df = pd.DataFrame(grounding_rates).T
+
+    # Calculate percentages
+    df_percent = df.div(df.sum(axis=1), axis=0) * 100
+
+    # Add data labels to the bars
+    plt.figure(figsize=(10, 6))
+    bottom = [0] * len(df)
+    for col in df_percent.columns:
+        bars = plt.bar(df_percent.index, df_percent[col], bottom=bottom, label=col)
+        for item in bars:
+            height = item.get_height()
+            if height > 5:  # Only add labels if the segment is large enough
+                plt.text(
+                    item.get_x() + item.get_width() / 2,
+                    item.get_y() + height / 2,
+                    f"{height:.1f}%",
+                    ha="center",
+                    va="center",
+                    color="white",
+                    fontsize=9,
+                )
+        bottom = [bottom[i] + df_percent[col][i] for i in range(len(bottom))]
+
+    plt.ylabel("Percentage")
+    title = f"OntoGPT Grounding Rates for Configuration '{configuration}'"
+    plt.title(title)
+    plt.xticks(rotation=-20)
+    plt.legend(title="State")
+    plt.tight_layout()
+    plt.savefig(output_file, dpi=300)
+    plt.show()
+
+
+def get_grounding_rates(test_dir: str) -> dict:
+    """
+    Get the OntoGPT grounding rates of the test data, by predicate.
+
+    Predicates may have different grounding rates, due to differences in LLM
+    prompting and the nature of the vocabularies/ontologies being grounded to.
+
+    :param test_dir: Path to a directory containing the test annotated
+        workbook files.
+    :return: A nested set of dictionaries containing the grounding rates of the
+        test data. The first level of dictionary keys are the predicates, and
+        the values are a second dictionary with keys "grounded" and
+        "ungrounded". The values of these keys are the number of grounded and
+        ungrounded terms, respectively.
+    """
+    res = {
+        "env_broad_scale": {"grounded": 0, "ungrounded": 0},
+        "env_local_scale": {"grounded": 0, "ungrounded": 0},
+        "contains process": {"grounded": 0, "ungrounded": 0},
+        "environmental material": {"grounded": 0, "ungrounded": 0},
+        "contains measurements of type": {"grounded": 0, "ungrounded": 0},
+        "uses standard": {"grounded": 0, "ungrounded": 0},
+        "usesMethod": {"grounded": 0, "ungrounded": 0},
+        "research topic": {"grounded": 0, "ungrounded": 0},
+    }
+
+    files = [f for f in os.listdir(test_dir) if f.endswith(".tsv")]
+    for file in files:
+        path = os.path.join(test_dir, file)
+        logger.info(f"Getting grounding rates for {path}")
+        wb = load_workbook(path)
+        wb = delete_unannotated_rows(wb)  # OntoGPT skipped these, don't count
+
+        # Group object_ids by predicate and element_xpath. These represent
+        # unique annotation opportunities for OntoGPT to ground to an ontology.
+        object_id_groups = group_object_ids(wb)
+
+        # For each group determine if the object_ids are grounded or ungrounded
+        for key, data in object_id_groups.items():
+            predicate = key[0]
+            if is_grounded(data):
+                res[predicate]["grounded"] += 1
+            else:
+                res[predicate]["ungrounded"] += 1
+    return res
+
+
+def is_grounded(data: list) -> bool:
+    """
+    Determine if the list contains a grounded object_id.
+
+    :param data: List of object_ids.
+    :return: True if the list contains a grounded object_id, False otherwise.
+        A grounded term is defined as a term that starts with "http".
+        Ungrounded terms are those that begin with "AUTO:" or are None.
+    """
+    # Remove None and NaN values from list to avoid errors on string matching
+    data = [d for d in data if d is not None]
+    data = [d for d in data if not pd.isna(d)]
+
+    return any("http" in s for s in data)
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 8b10a28..34482c0 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -16,6 +16,8 @@
     parse_similarity_scores,
     delete_terms_from_unsupported_ontologies,
     get_shared_ontology,
+    get_grounding_rates,
+    is_grounded,
 )
 from spinneret.utilities import is_url
 
@@ -252,3 +254,36 @@ def test_get_shared_ontology():
     set2 = []
     db = get_shared_ontology(set1, set2)
     assert db is None
+
+
+def test_get_grounding_rates():
+    """Test the get_grounding_rates function"""
+    grounding_rates = get_grounding_rates("tests/data/benchmark/test_a")
+
+    # The result is a dictionary with expected keys and value types
+    assert isinstance(grounding_rates, dict)
+    assert set(grounding_rates.keys()) == {
+        "contains measurements of type",
+        "contains process",
+        "environmental material",
+        "uses standard",
+        "env_local_scale",
+        "research topic",
+        "env_broad_scale",
+        "usesMethod",
+    }
+    for _, v in grounding_rates.items():
+        for k2, v2 in v.items():
+            assert k2 in ["grounded", "ungrounded"]
+            assert isinstance(v2, int)
+
+
+def test_is_grounded():
+    """Test the is_grounded function"""
+
+    # Lists with None or NaN values are not grounded
+    assert not is_grounded([None])
+    assert not is_grounded([pd.NA])
+
+    # But lists with strings starting with "http" are grounded
+    assert is_grounded(["http://example.com"])

From 13b2eb64bcd5f3b01c9e41767f445897a803970e Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Fri, 20 Dec 2024 11:45:31 -0500
Subject: [PATCH 16/24] feat: add logging to `benchmark_against_standard` for
 better insights

Add logging capabilities to the `benchmark_against_standard` function to
provide insights into the ongoing execution process, especially helpful
for this time-consuming operation.
---
 src/spinneret/benchmark.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py
index c6e8c15..7930579 100644
--- a/src/spinneret/benchmark.py
+++ b/src/spinneret/benchmark.py
@@ -94,9 +94,11 @@ def benchmark_against_standard(standard_dir: str, test_dirs: list) -> pd.DataFra
         if not standard_file.endswith(".tsv"):  # we are expecting tsv files
             continue
         standard_path = os.path.join(standard_dir, standard_file)
+        logger.info(f"Benchmarking against standard file: {standard_path}")
 
         for test_dir in test_dirs:
             test_path = os.path.join(test_dir, standard_file)
+            logger.info(f"Comparing to test file: {test_path}")
             if not os.path.exists(test_path):  # we need a matching test file
                 continue
 

From 513e5e5789e84fe10d215c844c1d5098be6fb2f5 Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Fri, 20 Dec 2024 12:44:44 -0500
Subject: [PATCH 17/24] test: create test data for term-set similarity score
 analysis

Create a set of test data containing term-set similarity scores for
various configurations, enabling unit testing of downstream functions
that analyze and interpret these scores.
---
 tests/conftest.py                                  | 11 +++++++++++
 tests/data/benchmark/termset_similarity_scores.tsv | 11 +++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 tests/data/benchmark/termset_similarity_scores.tsv

diff --git a/tests/conftest.py b/tests/conftest.py
index 012eee8..f6f8ba0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,6 +2,7 @@
 
 from json import load
 import pytest
+import pandas as pd
 from spinneret.utilities import load_workbook
 
 
@@ -55,3 +56,13 @@ def termset_similarity_score_fields():
         "average_test_information_content",
         "best_test_information_content",
     ]
+
+
+@pytest.fixture(name="termset_similarity_score_dataframe")
+def termset_similarity_score_dataframe():
+    """Return a fixture for a dataframe of termset similarity scores returned
+    by the benchmark_against_standard function"""
+    scores = pd.read_csv(
+        "tests/data/benchmark/termset_similarity_scores.tsv", sep="\t", encoding="utf-8"
+    )
+    return scores
diff --git a/tests/data/benchmark/termset_similarity_scores.tsv b/tests/data/benchmark/termset_similarity_scores.tsv
new file mode 100644
index 0000000..eb2fee7
--- /dev/null
+++ b/tests/data/benchmark/termset_similarity_scores.tsv
@@ -0,0 +1,11 @@
+standard_dir	test_dir	standard_file	predicate_value	element_xpath_value	standard_set	test_set	average_score	best_score	average_jaccard_similarity	best_jaccard_similarity	average_phenodigm_score	best_phenodigm_score	average_standard_information_content	best_standard_information_content	average_test_information_content	best_test_information_content
+tests/data/benchmark/standard	tests/data/benchmark/test_a	knb-lter-ntl.1.59_annotation_workbook_annotated.tsv	env_broad_scale	/eml:eml/dataset	['ENVO:01000286', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01001021', 'ENVO:01000774', 'ENVO:01000287', 'ENVO:01000252']	['ENVO:01000317', 'ENVO:01001209', 'ENVO:01001209']	4.399317289600849	4.616452786848972	0.2719553079933457	0.28735632183908044	1.1051688054274622	1.1517668569518282	12.185656141890044	12.78135971352466	7.598198606401752	8.321928094887362
+tests/data/benchmark/standard	tests/data/benchmark/test_b	knb-lter-ntl.1.59_annotation_workbook_annotated.tsv	contains measurements of type	/eml:eml/dataset/dataTable/attributeList/attribute[31]	['ECSO:00002844']	['ECSO:00002359', 'ECSO:00001534']	0.0	0.0								
+tests/data/benchmark/standard	tests/data/benchmark/test_b	knb-lter-ntl.1.59_annotation_workbook_annotated.tsv	contains measurements of type	/eml:eml/dataset/dataTable/attributeList/attribute[41]	['ECSO:00001727']	['ECSO:00000329']	0.0	0.0								
+tests/data/benchmark/standard	tests/data/benchmark/test_b	knb-lter-ntl.1.59_annotation_workbook_annotated.tsv	contains measurements of type	/eml:eml/dataset/dataTable/attributeList/attribute[5]	['ECSO:00000515']	['ECSO:00001250']	0.0	0.0								
+tests/data/benchmark/standard	tests/data/benchmark/test_b	knb-lter-ntl.1.59_annotation_workbook_annotated.tsv	env_broad_scale	/eml:eml/dataset	['ENVO:01000286', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01001021', 'ENVO:01000774', 'ENVO:01000287', 'ENVO:01000252']	['ENVO:01001209']	4.509617311638698	4.616452786848972	0.2719553079933457	0.28735632183908044	1.1051688054274622	1.1517668569518282	12.185656141890044	12.78135971352466	6.874469117916141	6.874469117916141
+tests/data/benchmark/standard	tests/data/benchmark/test_b	knb-lter-ntl.1.59_annotation_workbook_annotated.tsv	usesMethod	/eml:eml/dataset	['ENVTHES:21335', 'ENVTHES:20223', 'ENVTHES:21337', 'ENVTHES:20243', 'ENVTHES:20285', 'ENVTHES:21339', 'ENVTHES:20304', 'https://www.wikidata.org/wiki/Q591867', 'https://www.wikidata.org/wiki/Q5149058']	['ENVTHES:20803', 'ENVTHES:10375', 'ENVTHES:20104', 'ENVTHES:22297', 'ENVTHES:10328']	0.0	0.0								
+tests/data/benchmark/standard	tests/data/benchmark/test_a	knb-lter-ntl.2.37_annotation_workbook_annotated.tsv	env_broad_scale	/eml:eml/dataset	['ENVO:01001021', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01000774', 'ENVO:01000286', 'ENVO:01000287', 'ENVO:01000252']	['ENVO:00000035', 'ENVO:01001209', 'ENVO:01001209']	4.521487919995395	4.616452786848972	0.2689232631619699	0.2840909090909091	1.0989954987335404	1.1452040294162371	12.185656141890044	12.78135971352466	9.035433165359823	11.196397212803504
+tests/data/benchmark/standard	tests/data/benchmark/test_b	knb-lter-ntl.2.37_annotation_workbook_annotated.tsv	contains measurements of type	/eml:eml/dataset/dataTable/attributeList/attribute[14]	['ECSO:00001799']	['ECSO:00001120']	0.0	0.0								
+tests/data/benchmark/standard	tests/data/benchmark/test_b	knb-lter-ntl.2.37_annotation_workbook_annotated.tsv	contains measurements of type	/eml:eml/dataset/dataTable/attributeList/attribute[26]	['ECSO:00001720']	['ECSO:00001534']	0.0	0.0								
+tests/data/benchmark/standard	tests/data/benchmark/test_b	knb-lter-ntl.2.37_annotation_workbook_annotated.tsv	env_broad_scale	/eml:eml/dataset	['ENVO:01001021', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01000774', 'ENVO:01000286', 'ENVO:01000287', 'ENVO:01000252']	['ENVO:01001209']	4.509617311638698	4.616452786848972	0.2719553079933457	0.28735632183908044	1.1051688054274622	1.1517668569518282	12.185656141890044	12.78135971352466	6.874469117916141	6.874469117916141

From 1bd118495fb9a46c98336bdac6aded865b5de07f Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Fri, 20 Dec 2024 16:17:01 -0500
Subject: [PATCH 18/24] feat: visualize similarity metrics by predicate

Implement a visualization to assess the accuracy of different OntoGPT
configurations relative to a baseline standard for each predicate
represented by OntoGPT templates. Use a simple box plot to
effectively display and compare similarity metrics across predicate
values.
---
 src/spinneret/benchmark.py | 74 ++++++++++++++++++++++++++++++++++++++
 tests/test_benchmark.py    | 11 ++++++
 2 files changed, 85 insertions(+)

diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py
index 7930579..d4f1e90 100644
--- a/src/spinneret/benchmark.py
+++ b/src/spinneret/benchmark.py
@@ -11,6 +11,7 @@
 from daiquiri import getLogger
 import pandas as pd
 import matplotlib.pyplot as plt
+import numpy as np
 from spinneret.utilities import load_workbook, compress_uri
 from spinneret.workbook import delete_duplicate_annotations, delete_unannotated_rows
 
@@ -490,3 +491,76 @@ def is_grounded(data: list) -> bool:
     data = [d for d in data if not pd.isna(d)]
 
     return any("http" in s for s in data)
+
+
+def plot_similarity_scores_by_predicate(
+    benchmark_results: pd.DataFrame,
+    test_dir_path: str,
+    metric: str,
+    output_file: str = None,
+) -> None:
+    """
+    To see predicate level performance for an OntoGPT test configuration
+
+    :param benchmark_results: The return value from the
+        `benchmark_against_standard` function.
+    :param test_dir_path: Path to the test directory containing the test
+        annotated workbook files for the desired configuration. This should be
+        a value from the `test_dir` column of the benchmark_results DataFrame,
+        which indicates the configuration comparison to plot.
+    :param metric: The metric to plot. This should be a column name from the
+        benchmark_results DataFrame, e.g. "average_score", "best_score", etc.
+    :param output_file: The path to save the plot to, as a PNG file.
+    :return: None
+    """
+    # Subset the benchmark results dataframe to only include the desired
+    # columns: test_dir, metric
+    df = benchmark_results[benchmark_results["test_dir"] == test_dir_path][
+        ["predicate_value", metric]
+    ]
+
+    # Remove empty rows where the metric is 0 or NaN to avoid plotting them
+    df = df.dropna(subset=[metric])
+    df = df[df[metric] != 0]
+
+    # Order the "predicate_value" column to ensure the plot's x-axis is ordered
+    # correctly
+    df["predicate_value"] = pd.Categorical(
+        df["predicate_value"],
+        [
+            "env_broad_scale",
+            "env_local_scale",
+            "contains process",
+            "environmental material",
+            "contains measurements of type",
+            "uses standard",
+            "usesMethod",
+            "research topic",
+        ],
+    )
+
+    plt.figure(figsize=(10, 6))
+    grouped_data_long = df.groupby("predicate_value")[metric].apply(list)
+    plt.boxplot(
+        grouped_data_long.values, labels=grouped_data_long.index, showmeans=True
+    )
+
+    # Add individual data points (jittered)
+    for i, group_data in enumerate(grouped_data_long):
+        x = np.random.normal(i + 1, 0.08, size=len(group_data))  # Jitter x-values
+        plt.plot(x, group_data, "o", alpha=0.25, color="grey")
+
+    configuration = os.path.basename(test_dir_path)
+
+    plt.xlabel("Predicate")
+    plt.ylabel("Score")
+    title = (
+        f"Similarity Score '{metric}' Against Benchmark Standard for "
+        f"Configuration '{configuration}'"
+    )
+    plt.title(title)
+    plt.xticks(rotation=-20)
+    plt.tight_layout()
+    if output_file:
+        plt.savefig(output_file, dpi=300)
+    plt.show()
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 34482c0..eb432f2 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -18,6 +18,7 @@
     get_shared_ontology,
     get_grounding_rates,
     is_grounded,
+    plot_similarity_scores_by_predicate,
 )
 from spinneret.utilities import is_url
 
@@ -287,3 +288,13 @@ def test_is_grounded():
 
     # But lists with strings starting with "http" are grounded
     assert is_grounded(["http://example.com"])
+
+
+@pytest.mark.skip(reason="Manual inspection required")
+def test_plot_similarity_scores_by_predicate(termset_similarity_score_dataframe):
+    """Test the plot_similarity_scores_by_predicate function"""
+    plot_similarity_scores_by_predicate(
+        benchmark_results=termset_similarity_score_dataframe,
+        test_dir_path="tests/data/benchmark/test_a",
+        metric="average_score",
+    )

From b39262910b05e2b0ea7de92e3759683c654c8920 Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Fri, 20 Dec 2024 16:58:52 -0500
Subject: [PATCH 19/24] feat: visualize similarity metrics by configuration

Implement a visualization to assess the accuracy of different OntoGPT
configurations relative to a baseline. Use a simple box plot to display
and compare configurations.
---
 src/spinneret/benchmark.py | 45 ++++++++++++++++++++++++++++++++++++++
 tests/test_benchmark.py    | 10 +++++++++
 2 files changed, 55 insertions(+)

diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py
index d4f1e90..63f6236 100644
--- a/src/spinneret/benchmark.py
+++ b/src/spinneret/benchmark.py
@@ -564,3 +564,48 @@ def plot_similarity_scores_by_predicate(
     if output_file:
         plt.savefig(output_file, dpi=300)
     plt.show()
+
+
+def plot_similarity_scores_by_configuration(
+    benchmark_results: pd.DataFrame,
+    metric: str,
+    output_file: str = None,
+) -> None:
+    """
+    To see configuration level performance for an OntoGPT predicate
+
+    :param benchmark_results: The return value from the
+        `benchmark_against_standard` function.
+    :param metric: The metric to plot. This should be a column name from the
+        benchmark_results DataFrame, e.g. "average_score", "best_score", etc.
+    :param output_file: The path to save the plot to, as a PNG file.
+    :return: None
+    """
+    # Subset the benchmark results dataframe to only include the desired
+    # columns: test_dir, metric
+    df = benchmark_results[["test_dir", metric]]
+
+    # Remove empty rows where the metric is 0 or NaN to avoid plotting them
+    df = df.dropna(subset=[metric])
+    df = df[df[metric] != 0]
+
+    plt.figure(figsize=(10, 6))
+    grouped_data_long = df.groupby("test_dir")[metric].apply(list)
+    plt.boxplot(
+        grouped_data_long.values, labels=grouped_data_long.index, showmeans=True
+    )
+
+    # Add individual data points (jittered)
+    for i, group_data in enumerate(grouped_data_long):
+        x = np.random.normal(i + 1, 0.08, size=len(group_data))  # Jitter x-values
+        plt.plot(x, group_data, "o", alpha=0.25, color="grey")
+
+    plt.xlabel("Configuration")
+    plt.ylabel("Score")
+    title = f"Similarity Score '{metric}' Across Configurations"
+    plt.title(title)
+    plt.xticks(rotation=-20)
+    plt.tight_layout()
+    if output_file:
+        plt.savefig(output_file, dpi=300)
+    plt.show()
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index eb432f2..5890997 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -19,6 +19,7 @@
     get_grounding_rates,
     is_grounded,
     plot_similarity_scores_by_predicate,
+    plot_similarity_scores_by_configuration,
 )
 from spinneret.utilities import is_url
 
@@ -298,3 +299,12 @@ def test_plot_similarity_scores_by_predicate(termset_similarity_score_dataframe)
         test_dir_path="tests/data/benchmark/test_a",
         metric="average_score",
     )
+
+
+@pytest.mark.skip(reason="Manual inspection required")
+def test_plot_similarity_scores_by_configuration(termset_similarity_score_dataframe):
+    """Test the plot_similarity_scores_by_configuration function"""
+    plot_similarity_scores_by_configuration(
+        benchmark_results=termset_similarity_score_dataframe,
+        metric="average_score",
+    )

From 18d01bbf495bee37977bccf3b46f2114dce40a78 Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Fri, 20 Dec 2024 17:08:36 -0500
Subject: [PATCH 20/24] feat: make plot writing to file optional in
 `plot_grounding_rates`

Make writing plots to file optional in the `plot_grounding_rates`
function by introducing a new parameter to control this behavior. This
allows for flexible usage, including previewing plots without generating
files.
---
 src/spinneret/benchmark.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py
index 63f6236..d69af9b 100644
--- a/src/spinneret/benchmark.py
+++ b/src/spinneret/benchmark.py
@@ -381,7 +381,7 @@ def get_shared_ontology(set1: list, set2: list) -> Union[str, None]:
 
 
 def plot_grounding_rates(
-    grounding_rates: dict, configuration: str, output_file: str
+    grounding_rates: dict, configuration: str, output_file: str = None
 ) -> None:
     """
     Plot the grounding rates of the test data.
@@ -426,7 +426,8 @@ def plot_grounding_rates(
     plt.xticks(rotation=-20)
     plt.legend(title="State")
     plt.tight_layout()
-    plt.savefig(output_file, dpi=300)
+    if output_file:
+        plt.savefig(output_file, dpi=300)
     plt.show()
 
 

From 25f0a8bc89c406c70fc1259e68abc8c243c94384 Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Mon, 23 Dec 2024 11:56:08 -0500
Subject: [PATCH 21/24] refactor: remove outdated
 `add_dataset_annotations_to_workbook` function

Remove the outdated `add_dataset_annotations_to_workbook` function, as
it lacks the necessary granularity for predicate-level categorization of
semantic annotations, a crucial aspect of our current annotation model.

While alternative approaches exist (e.g., annotating with terms from
multiple vocabularies and then categorizing based on branch), the
ongoing development and active community support for OntoGPT suggest a
more promising long-term solution.
---
 src/spinneret/annotator.py | 265 ++++++++++---------------------------
 src/spinneret/main.py      |   6 -
 tests/test_annotator.py    | 212 ++---------------------------
 3 files changed, 81 insertions(+), 402 deletions(-)

diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py
index ea6127e..a1637f8 100644
--- a/src/spinneret/annotator.py
+++ b/src/spinneret/annotator.py
@@ -138,7 +138,6 @@ def annotate_workbook(
     workbook_path: str,
     eml_path: str,
     output_path: str,
-    annotator: str,
     local_model: str = None,
     temperature: Union[float, None] = None,
     return_ungrounded: bool = False,
@@ -150,10 +149,6 @@ def annotate_workbook(
         corresponding to the EML file.
     :param eml_path: The path to the EML file corresponding to the workbook.
     :param output_path: The path to write the annotated workbook.
-    :param annotator: The annotator to use for grounding. Options are "ontogpt"
-        and "bioportal". OntoGPT requires setup and configuration described in
-        the `get_ontogpt_annotation` function. Similarly, BioPortal requires
-        an API key and is described in the `get_bioportal_annotation` function.
     :param local_model: See `get_ontogpt_annotation` documentation for details.
     :param temperature: The temperature parameter for the model. If None, the
         OntoGPT default will be used.
@@ -167,7 +162,6 @@ def annotate_workbook(
         path as the original workbook.
     """
     logger.info(f"Annotating workbook {workbook_path}")
-    logger.info(f"Annotating with {annotator}")
 
     # Ensure the workbook and eml file match to avoid errors
     pid = os.path.basename(workbook_path).split("_")[0]
@@ -180,70 +174,63 @@ def annotate_workbook(
     wb = load_workbook(workbook_path)
     eml = load_eml(eml_path)
 
-    # Run workbook annotators, results of one are used as input for the next
-    if annotator == "bioportal":
-        wb = add_dataset_annotations_to_workbook(wb, eml, sample_size=sample_size)
-        wb = add_measurement_type_annotations_to_workbook(
-            wb, eml, annotator=annotator, sample_size=sample_size
-        )
-    elif annotator == "ontogpt":
-        wb = add_env_broad_scale_annotations_to_workbook(
-            wb,
-            eml,
-            local_model=local_model,
-            temperature=temperature,
-            return_ungrounded=return_ungrounded,
-            sample_size=sample_size,
-        )
-        wb = add_env_local_scale_annotations_to_workbook(
-            wb,
-            eml,
-            local_model=local_model,
-            temperature=temperature,
-            return_ungrounded=return_ungrounded,
-            sample_size=sample_size,
-        )
-        wb = add_process_annotations_to_workbook(
-            wb,
-            eml,
-            local_model=local_model,
-            temperature=temperature,
-            return_ungrounded=return_ungrounded,
-            sample_size=sample_size,
-        )
-        wb = add_methods_annotations_to_workbook(
-            wb,
-            eml,
-            local_model=local_model,
-            temperature=temperature,
-            return_ungrounded=return_ungrounded,
-            sample_size=sample_size,
-        )
-        wb = add_research_topic_annotations_to_workbook(
-            wb,
-            eml,
-            local_model=local_model,
-            temperature=temperature,
-            return_ungrounded=return_ungrounded,
-            sample_size=sample_size,
-        )
-        wb = add_measurement_type_annotations_to_workbook(
-            wb,
-            eml,
-            annotator="ontogpt",
-            local_model=local_model,
-            temperature=temperature,
-            return_ungrounded=return_ungrounded,
-            sample_size=sample_size,
-        )
-        wb = add_env_medium_annotations_to_workbook(
-            wb,
-            eml,
-            local_model=local_model,
-            temperature=temperature,
-            return_ungrounded=return_ungrounded,
-            sample_size=sample_size,
-        )
+    # Run workbook annotator, results of one are used as input for the next
+    wb = add_env_broad_scale_annotations_to_workbook(
+        wb,
+        eml,
+        local_model=local_model,
+        temperature=temperature,
+        return_ungrounded=return_ungrounded,
+        sample_size=sample_size,
+    )
+    wb = add_env_local_scale_annotations_to_workbook(
+        wb,
+        eml,
+        local_model=local_model,
+        temperature=temperature,
+        return_ungrounded=return_ungrounded,
+        sample_size=sample_size,
+    )
+    wb = add_process_annotations_to_workbook(
+        wb,
+        eml,
+        local_model=local_model,
+        temperature=temperature,
+        return_ungrounded=return_ungrounded,
+        sample_size=sample_size,
+    )
+    wb = add_methods_annotations_to_workbook(
+        wb,
+        eml,
+        local_model=local_model,
+        temperature=temperature,
+        return_ungrounded=return_ungrounded,
+        sample_size=sample_size,
+    )
+    wb = add_research_topic_annotations_to_workbook(
+        wb,
+        eml,
+        local_model=local_model,
+        temperature=temperature,
+        return_ungrounded=return_ungrounded,
+        sample_size=sample_size,
+    )
+    wb = add_measurement_type_annotations_to_workbook(
+        wb,
+        eml,
+        local_model=local_model,
+        temperature=temperature,
+        return_ungrounded=return_ungrounded,
+        sample_size=sample_size,
+    )
+    wb = add_env_medium_annotations_to_workbook(
+        wb,
+        eml,
+        local_model=local_model,
+        temperature=temperature,
+        return_ungrounded=return_ungrounded,
+        sample_size=sample_size,
+    )
     wb = add_qudt_annotations_to_workbook(wb, eml)  # irrespective of annotator
 
     write_workbook(wb, output_path)
@@ -485,98 +472,11 @@ def add_qudt_annotations_to_workbook(
     return wb
 
 
-def add_dataset_annotations_to_workbook(
-    workbook: Union[str, pd.core.frame.DataFrame],
-    eml: Union[str, etree._ElementTree],
-    output_path: str = None,
-    overwrite: bool = False,
-    sample_size: int = 1,
-) -> pd.core.frame.DataFrame:
-    """
-    :param workbook: Either the path to the workbook to be annotated, or the
-        workbook itself as a pandas DataFrame.
-    :param eml: Either the path to the EML file corresponding to the workbook,
-        or the EML file itself as an lxml etree.
-    :param output_path: The path to write the annotated workbook.
-    :param overwrite: If True, overwrite existing `dataset` annotations in the
-        workbook, so a fresh set may be created.
-    :param sample_size: Executes multiple replicates of the annotation request
-        to reduce variability of outputs. Variability is inherent in OntoGPT.
-    :returns: Workbook with dataset annotations.
-    """
-    logger.info("Annotating dataset")
-
-    # Load the workbook and EML for processing
-    wb = load_workbook(workbook)
-    eml = load_eml(eml)
-
-    # Set the author identifier for consistent reference below
-    author = "spinneret.annotator.get_bioportal_annotation"
-
-    # Remove existing dataset annotations if overwrite is True, using a set of
-    # criteria that accurately define the annotations to remove.
-    if overwrite:
-        wb = delete_annotations(
-            workbook=wb,
-            criteria={
-                "element": "dataset",
-                "element_xpath": "/eml:eml/dataset",
-                "author": author,
-            },
-        )
-
-    # Get the dataset annotations
-    dataset_element = eml.xpath("//dataset")[0]
-    element_description = get_description(dataset_element)
-    annotations = []
-    for _ in range(sample_size):
-        res = get_bioportal_annotation(  # expecting a list of annotations
-            text=element_description,
-            api_key=os.environ["BIOPORTAL_API_KEY"],
-            ontologies="ENVO",  # ENVO provides environmental terms
-            exclude_synonyms="true",
-        )
-        if res is not None:
-            annotations.extend(res)
-    if len(annotations) == 0:
-        annotations = None
-
-    # Add dataset annotations to the workbook
-    if annotations is not None:
-        for annotation in annotations:
-            row = initialize_workbook_row()
-            row["package_id"] = get_package_id(eml)
-            row["url"] = get_package_url(eml)
-            row["element"] = dataset_element.tag
-            if "id" in dataset_element.attrib:
-                row["element_id"] = dataset_element.attrib["id"]
-            else:
-                row["element_id"] = pd.NA
-            row["element_xpath"] = eml.getpath(dataset_element)
-            row["context"] = get_subject_and_context(dataset_element)["context"]
-            row["description"] = element_description
-            row["subject"] = get_subject_and_context(dataset_element)["subject"]
-            row["predicate"] = "is about"
-            row["predicate_id"] = "http://purl.obolibrary.org/obo/IAO_0000136"
-            row["object"] = annotation["label"]
-            row["object_id"] = annotation["uri"]
-            row["author"] = author
-            row["date"] = pd.Timestamp.now()
-            row = pd.DataFrame([row], dtype=str)
-            wb = pd.concat([wb, row], ignore_index=True)
-        wb = delete_duplicate_annotations(wb)
-
-    if output_path:
-        write_workbook(wb, output_path)
-    return wb
-
-
 # pylint: disable=too-many-branches
 # pylint: disable=too-many-statements
 def add_measurement_type_annotations_to_workbook(
     workbook: Union[str, pd.core.frame.DataFrame],
     eml: Union[str, etree._ElementTree],
-    annotator: str,
     output_path: str = None,
     overwrite: bool = False,
     local_model: str = None,
@@ -589,10 +489,6 @@ def add_measurement_type_annotations_to_workbook(
         workbook itself as a pandas DataFrame.
     :param eml: Either the path to the EML file corresponding to the workbook,
         or the EML file itself as an lxml etree.
-    :param annotator: The annotator to use for grounding. Options are "ontogpt"
-        and "bioportal". OntoGPT requires setup and configuration described in
-        the `get_ontogpt_annotation` function. Similarly, BioPortal requires
-        an API key and is described in the `get_bioportal_annotation` function.
     :param output_path: The path to write the annotated workbook.
     :param overwrite: If True, overwrite existing `measurement type`
         annotations in the workbook, so a fresh set may be created.
@@ -652,35 +548,19 @@ def add_measurement_type_annotations_to_workbook(
         )
 
         if annotations is None:
-            # Select an annotator, and get the measurement type annotations
-            if annotator.lower() == "ontogpt":
-                annotations = []
-                for _ in range(sample_size):
-                    res = get_ontogpt_annotation(
-                        text=attribute_description,
-                        template="contains_measurement_of_type",
-                        local_model=local_model,
-                        temperature=temperature,
-                        return_ungrounded=return_ungrounded,
-                    )
-                    if res is not None:
-                        annotations.extend(res)
-                if len(annotations) == 0:
-                    annotations = None
-            else:
-                annotations = []
-                for _ in range(sample_size):
-                    res = get_bioportal_annotation(
-                        # expecting a list of annotations
-                        text=attribute_description,
-                        api_key=os.environ["BIOPORTAL_API_KEY"],
-                        ontologies="ECSO",  # ECSO provides measurment terms
-                        exclude_synonyms="true",
-                    )
-                    if res is not None:
-                        annotations.extend(res)
-                if len(annotations) == 0:
-                    annotations = None
+            annotations = []
+            for _ in range(sample_size):
+                res = get_ontogpt_annotation(
+                    text=attribute_description,
+                    template="contains_measurement_of_type",
+                    local_model=local_model,
+                    temperature=temperature,
+                    return_ungrounded=return_ungrounded,
+                )
+                if res is not None:
+                    annotations.extend(res)
+            if len(annotations) == 0:
+                annotations = None
 
         # Add the measurement type annotations to the workbook
         if annotations is not None:
@@ -704,10 +584,7 @@ def add_measurement_type_annotations_to_workbook(
                 )
                 row["object"] = annotation["label"]
                 row["object_id"] = annotation["uri"]
-                if annotator.lower() == "ontogpt":
-                    row["author"] = "spinneret.annotator.get_ontogpt_annotation"
-                elif annotator.lower() == "bioportal":
-                    row["author"] = "spinneret.annotator.get_bioportal_annotation"
+                row["author"] = "spinneret.annotator.get_ontogpt_annotation"
                 row["date"] = pd.Timestamp.now()
                 row = pd.DataFrame([row], dtype=str)
                 wb = pd.concat([wb, row], ignore_index=True)
diff --git a/src/spinneret/main.py b/src/spinneret/main.py
index d8d4fd3..4e2d382 100644
--- a/src/spinneret/main.py
+++ b/src/spinneret/main.py
@@ -53,7 +53,6 @@ def create_workbooks(eml_dir: str, workbook_dir: str) -> None:
 def annotate_workbooks(
     workbook_dir: str,
     eml_dir: str,
-    annotator: str,
     output_dir: str,
     config_path: str,
     local_model: str = None,
@@ -65,10 +64,6 @@ def annotate_workbooks(
 
     :param workbook_dir: Directory of unannotated workbooks
     :param eml_dir: Directory of EML files corresponding to workbooks
-    :param annotator: The annotator to use for grounding. Options are "ontogpt"
-        and "bioportal". OntoGPT requires setup and configuration described in
-        the `get_ontogpt_annotation` function. Similarly, BioPortal requires
-        an API key and is described in the `get_bioportal_annotation` function.
     :param output_dir: Directory to save annotated workbooks
     :param config_path: Path to configuration file
     :param local_model: See `get_ontogpt_annotation` documentation for details.
@@ -113,7 +108,6 @@ def annotate_workbooks(
         annotate_workbook(
             workbook_path=workbook_dir + "/" + workbook_file,
             eml_path=eml_dir + "/" + eml_file,
-            annotator=annotator,
             output_path=output_dir + "/" + workbook_file_annotated,
             local_model=local_model,
             temperature=temperature,
diff --git a/tests/test_annotator.py b/tests/test_annotator.py
index afc0ef0..0fcbf1d 100644
--- a/tests/test_annotator.py
+++ b/tests/test_annotator.py
@@ -13,7 +13,6 @@
     annotate_eml,
     create_annotation_element,
     add_qudt_annotations_to_workbook,
-    add_dataset_annotations_to_workbook,
     add_measurement_type_annotations_to_workbook,
     add_process_annotations_to_workbook,
     add_env_broad_scale_annotations_to_workbook,
@@ -81,73 +80,6 @@ def test_get_bioportal_annotation(mocker, use_mock, get_annotation_fixture):
         assert item["uri"] != ""
 
 
-# pylint: disable=duplicate-code
-@pytest.mark.parametrize("use_mock", [True])  # False tests with real HTTP requests
-def test_annotate_workbook_with_bioportal(
-    tmp_path, mocker, use_mock, get_annotation_fixture
-):
-    """Test annotate_workbook using the BioPortal annotator"""
-
-    # Configure the mock responses
-    if use_mock:
-        mocker.patch(
-            "spinneret.annotator.get_bioportal_annotation",
-            return_value=get_annotation_fixture,
-        )
-        os.environ["BIOPORTAL_API_KEY"] = "mock api key"
-        mocker.patch(
-            "spinneret.annotator.get_qudt_annotation",
-            return_value=get_annotation_fixture,
-        )
-    else:
-        if not os.path.exists("config.json"):
-            pytest.skip(
-                "Skipping test due to missing config.json file in package root."
-            )
-        load_configuration("config.json")
-
-    # Copy the workbook to tmp_path for editing
-    wb_path = "tests/edi.3.9_annotation_workbook.tsv"
-    wb_path_copy = str(tmp_path) + "/edi.3.9_annotation_workbook.tsv"
-    copyfile(wb_path, wb_path_copy)
-    wb_path_annotated = str(tmp_path) + "/edi.3.9_annotation_workbook_annotated.tsv"
-
-    # Check features of the unannotated workbook
-    assert os.path.exists(wb_path_copy)
-    wb = load_workbook(wb_path_copy)
-    # The columns to be annotated should be empty
-    cols_to_annotate = [
-        "predicate",
-        "predicate_id",
-        "object",
-        "object_id",
-        "author",
-        "date",
-    ]
-    for col in cols_to_annotate:
-        assert wb[col].isnull().all()
-
-    # Annotate the workbook copy
-    annotate_workbook(
-        workbook_path=wb_path_copy,
-        eml_path=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=wb_path_annotated,
-        annotator="bioportal",
-    )
-
-    # Check the workbook was annotated
-    assert os.path.exists(wb_path_annotated)
-    wb = load_workbook(wb_path_annotated)
-    # The columns to be annotated should be full
-    for col in cols_to_annotate:
-        assert not wb[col].isnull().all()
-    # The authors are the annotator functions called under this configuration
-    authors = wb["author"].unique()
-    authors = [x for x in authors if pd.notna(x)]
-    assert "spinneret.annotator.get_bioportal_annotation" in authors
-    assert "spinneret.annotator.get_qudt_annotation" in authors
-
-
 # pylint: disable=duplicate-code
 @pytest.mark.parametrize("use_mock", [True])  # False tests with real LLM queries
 def test_annotate_workbook_with_ontogpt(
@@ -192,7 +124,6 @@ def test_annotate_workbook_with_ontogpt(
         workbook_path=wb_path_copy,
         eml_path=get_example_eml_dir() + "/" + "edi.3.9.xml",
         output_path=wb_path_annotated,
-        annotator="ontogpt",
         local_model="llama3.2",
         return_ungrounded=True,  # ensures we get at least one annotation back
     )
@@ -409,120 +340,24 @@ def test_has_annotations():
     assert has_annotations(wb) is True
 
 
-@pytest.mark.parametrize("use_mock", [True])  # False makes real HTTP requests
-def test_add_dataset_annotations_to_workbook(tmp_path, use_mock, mocker):
-    """Test add_dataset_annotations_to_workbook"""
-
-    # Parameterize the test
-    workbook_path = "tests/edi.3.9_annotation_workbook.tsv"
-    output_path = str(tmp_path) + "edi.3.9_annotation_workbook_qudt.tsv"
-
-    # The workbook shouldn't have any annotations yet
-    wb = load_workbook(workbook_path)
-    assert not has_annotations(wb)
-
-    # The workbook has annotations after calling the function
-    if use_mock:
-        mocker.patch(
-            "spinneret.annotator.get_bioportal_annotation",
-            return_value=[
-                {
-                    "label": "freshwater lake biome",
-                    "uri": "http://purl.obolibrary.org/obo/ENVO_01000252",
-                }
-            ],
-        )
-        os.environ["BIOPORTAL_API_KEY"] = "mock api key"
-    wb = add_dataset_annotations_to_workbook(
-        workbook=workbook_path,
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-    )
-    assert has_annotations(wb)
-
-    # Overwriting changes the annotations. Note, we can't test this with real
-    # requests because we'll expect the same results as the first call.
-    if use_mock:
-        mocker.patch(  # an arbitrary response to check for
-            "spinneret.annotator.get_bioportal_annotation",
-            return_value=[
-                {
-                    "label": "A different biome",
-                    "uri": "http://purl.obolibrary.org/obo/ENVO_XXXXXXXX",
-                }
-            ],
-        )
-        os.environ["BIOPORTAL_API_KEY"] = "mock api key"
-    wb = add_dataset_annotations_to_workbook(
-        workbook=output_path,  # the output from the first call
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-        overwrite=True,
-    )
-    assert wb["object"].str.contains("A different biome").any()
-    assert (
-        wb["object_id"]
-        .str.contains("http://purl.obolibrary.org/obo/ENVO_XXXXXXXX")
-        .any()
-    )
-
-    # Original annotations are gone
-    assert not wb["object"].str.contains("freshwater lake biome").any()
-    assert (
-        not wb["object_id"]
-        .str.contains("http://purl.obolibrary.org/obo/ENVO_01000252")
-        .any()
-    )
-
-
-def test_add_dataset_annotations_to_workbook_io_options(tmp_path, mocker):
-    """Test add_dataset_annotations_to_workbook with different input and output
-    options"""
-
-    mocker.patch(
-        "spinneret.annotator.get_bioportal_annotation",
-        return_value=[
-            {
-                "label": "freshwater lake biome",
-                "uri": "http://purl.obolibrary.org/obo/ENVO_01000252",
-            }
-        ],
-    )
-    os.environ["BIOPORTAL_API_KEY"] = "mock api key"
-
-    # Accepts file path as input
-    output_path = str(tmp_path) + "edi.3.9_annotation_workbook_dataset.tsv"
-    wb = add_dataset_annotations_to_workbook(
-        workbook="tests/edi.3.9_annotation_workbook.tsv",
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-    )
-    wb = load_workbook(output_path)
-    assert has_annotations(wb)
-
-    # Accepts dataframes and etree objects as input
-    wb = load_workbook("tests/edi.3.9_annotation_workbook.tsv")
-    eml = load_eml(get_example_eml_dir() + "/" + "edi.3.9.xml")
-    wb = add_dataset_annotations_to_workbook(workbook=wb, eml=eml)
-    assert has_annotations(wb)
-
-
 @pytest.mark.parametrize("use_mock", [True])  # False makes real HTTP requests
 def test_add_measurement_type_annotations_to_workbook(tmp_path, use_mock, mocker):
     """Test add_measurement_type_annotations_to_workbook"""
 
     # Parameterize the test
     workbook_path = "tests/edi.3.9_annotation_workbook.tsv"
-    output_path = str(tmp_path) + "edi.3.9_annotation_workbook_qudt.tsv"
+    output_path = str(tmp_path) + "edi.3.9_annotation_workbook_annotated.tsv"
 
     # The workbook shouldn't have any annotations yet
     wb = load_workbook(workbook_path)
     assert not has_annotations(wb)
 
-    # The workbook has annotations after calling the function
+    # The workbook "should" have annotations after calling the function. We
+    # say "should" because OntoGPT is non-deterministic, and we can't always
+    # expect the same results, or any results at all.
     if use_mock:
         mocker.patch(
-            "spinneret.annotator.get_bioportal_annotation",
+            "spinneret.annotator.get_ontogpt_annotation",
             return_value=[
                 {
                     "label": "depth",
@@ -530,11 +365,9 @@ def test_add_measurement_type_annotations_to_workbook(tmp_path, use_mock, mocker
                 }
             ],
         )
-        os.environ["BIOPORTAL_API_KEY"] = "mock api key"
     wb = add_measurement_type_annotations_to_workbook(
         workbook=workbook_path,
         eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        annotator="bioportal",
         output_path=output_path,
     )
     assert has_annotations(wb)
@@ -543,7 +376,7 @@ def test_add_measurement_type_annotations_to_workbook(tmp_path, use_mock, mocker
     # requests because we'll expect the same results as the first call.
     if use_mock:
         mocker.patch(  # an arbitrary response to check for
-            "spinneret.annotator.get_bioportal_annotation",
+            "spinneret.annotator.get_ontogpt_annotation",
             return_value=[
                 {
                     "label": "A different measurement type",
@@ -551,11 +384,9 @@ def test_add_measurement_type_annotations_to_workbook(tmp_path, use_mock, mocker
                 }
             ],
         )
-        os.environ["BIOPORTAL_API_KEY"] = "mock api key"
     wb = add_measurement_type_annotations_to_workbook(
         workbook=output_path,  # the output from the first call
         eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        annotator="bioportal",
         output_path=output_path,
         overwrite=True,
     )
@@ -578,7 +409,7 @@ def test_add_measurement_type_annotations_to_workbook_io_options(tmp_path, mocke
     and output options"""
 
     mocker.patch(
-        "spinneret.annotator.get_bioportal_annotation",
+        "spinneret.annotator.get_ontogpt_annotation",
         return_value=[
             {
                 "label": "depth",
@@ -586,14 +417,12 @@ def test_add_measurement_type_annotations_to_workbook_io_options(tmp_path, mocke
             }
         ],
     )
-    os.environ["BIOPORTAL_API_KEY"] = "mock api key"
 
     # Accepts file path as input
-    output_path = str(tmp_path) + "edi.3.9_annotation_workbook_dataset.tsv"
+    output_path = str(tmp_path) + "edi.3.9_annotation_workbook_annotated.tsv"
     wb = add_measurement_type_annotations_to_workbook(
         workbook="tests/edi.3.9_annotation_workbook.tsv",
         eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        annotator="bioportal",
         output_path=output_path,
     )
     wb = load_workbook(output_path)
@@ -602,15 +431,12 @@ def test_add_measurement_type_annotations_to_workbook_io_options(tmp_path, mocke
     # Accepts dataframes and etree objects as input
     wb = load_workbook("tests/edi.3.9_annotation_workbook.tsv")
     eml = load_eml(get_example_eml_dir() + "/" + "edi.3.9.xml")
-    wb = add_measurement_type_annotations_to_workbook(
-        workbook=wb, eml=eml, annotator="bioportal"
-    )
+    wb = add_measurement_type_annotations_to_workbook(workbook=wb, eml=eml)
     assert has_annotations(wb)
 
 
 def test_annotators_are_listed_as_authors(tmp_path, mocker):
-    """Test that the annotators are listed as authors in the workbook. Test
-    this for each workbook annotator with an annotator parameter."""
+    """Test that the annotators are listed as authors in the workbook."""
 
     # Test for the `add_measurement_type_annotations_to_workbook` function
     # using the OntoGPT annotator
@@ -622,30 +448,12 @@ def test_annotators_are_listed_as_authors(tmp_path, mocker):
         workbook="tests/edi.3.9_annotation_workbook.tsv",
         eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
         output_path=str(tmp_path) + "edi.3.9_annotation_workbook_dataset.tsv",
-        annotator="ontogpt",
         local_model="llama3.2",
     )
     authors = wb["author"].unique()
     authors = [x for x in authors if pd.notna(x)]
     assert "spinneret.annotator.get_ontogpt_annotation" == authors[0]
 
-    # Test for the `add_measurement_type_annotations_to_workbook` function
-    # using the Bioportal annotator
-    mocker.patch(
-        "spinneret.annotator.get_bioportal_annotation",
-        return_value=[{"label": "a label", "uri": "a uri"}],
-    )
-    os.environ["BIOPORTAL_API_KEY"] = "mock api key"
-    wb = add_measurement_type_annotations_to_workbook(
-        workbook="tests/edi.3.9_annotation_workbook.tsv",
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=str(tmp_path) + "edi.3.9_annotation_workbook_dataset.tsv",
-        annotator="bioportal",
-    )
-    authors = wb["author"].unique()
-    authors = [x for x in authors if pd.notna(x)]
-    assert "spinneret.annotator.get_bioportal_annotation" == authors[0]
-
 
 @pytest.mark.parametrize("use_mock", [True])  # False tests with real local LLM queries
 def test_get_ontogpt_annotation(mocker, use_mock):

From ed668b1ecf46121045733d37f0dfdbc357d043ca Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Fri, 27 Dec 2024 21:10:08 -0500
Subject: [PATCH 22/24] refactor: consolidate OntoGPT workbook annotators into
 a single function

Consolidate multiple OntoGPT workbook annotator functions into a single,
unified function to improve code maintainability, reduce redundancy, and
enhance overall code clarity.
---
 src/spinneret/annotator.py | 922 ++++---------------------------------
 src/spinneret/utilities.py |  73 +++
 tests/test_annotator.py    | 374 +--------------
 tests/test_utilities.py    |  43 ++
 4 files changed, 210 insertions(+), 1202 deletions(-)

diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py
index a1637f8..7bee5cd 100644
--- a/src/spinneret/annotator.py
+++ b/src/spinneret/annotator.py
@@ -24,6 +24,9 @@
     write_workbook,
     write_eml,
     expand_curie,
+    get_elements_for_predicate,
+    get_template_for_predicate,
+    get_predicate_id_for_predicate,
 )
 
 logger = getLogger(__name__)
@@ -175,63 +178,26 @@ def annotate_workbook(
     eml = load_eml(eml_path)
 
     # Run workbook annotator, results of one are used as input for the next
-    wb = add_env_broad_scale_annotations_to_workbook(
-        wb,
-        eml,
-        local_model=local_model,
-        temperature=temperature,
-        return_ungrounded=return_ungrounded,
-        sample_size=sample_size,
-    )
-    wb = add_env_local_scale_annotations_to_workbook(
-        wb,
-        eml,
-        local_model=local_model,
-        temperature=temperature,
-        return_ungrounded=return_ungrounded,
-        sample_size=sample_size,
-    )
-    wb = add_process_annotations_to_workbook(
-        wb,
-        eml,
-        local_model=local_model,
-        temperature=temperature,
-        return_ungrounded=return_ungrounded,
-        sample_size=sample_size,
-    )
-    wb = add_methods_annotations_to_workbook(
-        wb,
-        eml,
-        local_model=local_model,
-        temperature=temperature,
-        return_ungrounded=return_ungrounded,
-        sample_size=sample_size,
-    )
-    wb = add_research_topic_annotations_to_workbook(
-        wb,
-        eml,
-        local_model=local_model,
-        temperature=temperature,
-        return_ungrounded=return_ungrounded,
-        sample_size=sample_size,
-    )
-    wb = add_measurement_type_annotations_to_workbook(
-        wb,
-        eml,
-        local_model=local_model,
-        temperature=temperature,
-        return_ungrounded=return_ungrounded,
-        sample_size=sample_size,
-    )
-    wb = add_env_medium_annotations_to_workbook(
-        wb,
-        eml,
-        local_model=local_model,
-        temperature=temperature,
-        return_ungrounded=return_ungrounded,
-        sample_size=sample_size,
-    )
-    wb = add_qudt_annotations_to_workbook(wb, eml)  # irrespective of annotator
+    predicates = [
+        "contains measurements of type",
+        "contains process",
+        "env_broad_scale",
+        "env_local_scale",
+        "environmental material",
+        "research topic",
+        "usesMethod",
+    ]
+    for p in predicates:
+        wb = add_predicate_annotations_to_workbook(
+            predicate=p,
+            workbook=wb,
+            eml=eml,
+            local_model=local_model,
+            temperature=temperature,
+            return_ungrounded=return_ungrounded,
+            sample_size=sample_size,
+        )
+    wb = add_qudt_annotations_to_workbook(wb, eml)
 
     write_workbook(wb, output_path)
     return None
@@ -472,129 +438,6 @@ def add_qudt_annotations_to_workbook(
     return wb
 
 
-# pylint: disable=too-many-branches
-# pylint: disable=too-many-statements
-def add_measurement_type_annotations_to_workbook(
-    workbook: Union[str, pd.core.frame.DataFrame],
-    eml: Union[str, etree._ElementTree],
-    output_path: str = None,
-    overwrite: bool = False,
-    local_model: str = None,
-    temperature: Union[float, None] = None,
-    return_ungrounded: bool = False,
-    sample_size: int = 1,
-) -> pd.core.frame.DataFrame:
-    """
-    :param workbook: Either the path to the workbook to be annotated, or the
-        workbook itself as a pandas DataFrame.
-    :param eml: Either the path to the EML file corresponding to the workbook,
-        or the EML file itself as an lxml etree.
-    :param output_path: The path to write the annotated workbook.
-    :param overwrite: If True, overwrite existing `measurement type`
-        annotations in the workbook, so a fresh set may be created.
-    :param local_model: Required if `annotator` is "ontogpt". See
-        `get_ontogpt_annotation` documentation for details.
-    :param temperature: The temperature parameter for the model. If `None`, the
-        OntoGPT default will be used.
-    :param return_ungrounded: An option if `annotator` is "ontogpt". See
-        `get_ontogpt_annotation` documentation for details.
-    :param sample_size: Executes multiple replicates of the annotation request
-        to reduce variability of outputs. Variability is inherent in OntoGPT.
-    :returns: Workbook with measurement type annotations.
-    """
-    logger.info("Annotating measurement type")
-
-    # Parameters for the function
-    predicate = "contains measurements of type"
-
-    # Load the workbook and EML for processing
-    wb = load_workbook(workbook)
-    eml = load_eml(eml)
-
-    # Remove existing measurement type annotations if overwrite is True, using
-    # a set of criteria that accurately define the annotations to remove.
-    if overwrite:
-        wb = delete_annotations(
-            workbook=wb,
-            criteria={
-                "element": "attribute",
-                "element_xpath": "/attribute",
-                "author": "spinneret.annotator",  # any spinneret annotator
-            },
-        )
-
-    # Iterate over EML attributes and add measurement type annotations to the
-    # workbook
-    attributes = eml.xpath("//attribute")
-    for attribute in attributes:
-        attribute_element = attribute
-        attribute_xpath = eml.getpath(attribute_element)
-        attribute_description = get_description(attribute_element)
-
-        # Skip if this element already has an annotation in the workbook, to
-        # prevent duplicate annotations from being added.
-        if has_annotation(wb, attribute_xpath, predicate):
-            return wb
-
-        # Reuse existing annotations for elements with identical tag names,
-        # descriptions, and predicate labels, to reduce redundant processing.
-        # Note this assumes semantic equivalence between elements with matching
-        # tags and descriptions.
-        annotations = get_annotation_from_workbook(
-            workbook=wb,
-            element=attribute_element.tag,
-            description=attribute_description,
-            predicate=predicate,
-        )
-
-        if annotations is None:
-            annotations = []
-            for _ in range(sample_size):
-                res = get_ontogpt_annotation(
-                    text=attribute_description,
-                    template="contains_measurement_of_type",
-                    local_model=local_model,
-                    temperature=temperature,
-                    return_ungrounded=return_ungrounded,
-                )
-                if res is not None:
-                    annotations.extend(res)
-            if len(annotations) == 0:
-                annotations = None
-
-        # Add the measurement type annotations to the workbook
-        if annotations is not None:
-            for annotation in annotations:
-                row = initialize_workbook_row()
-                row["package_id"] = get_package_id(eml)
-                row["url"] = get_package_url(eml)
-                row["element"] = attribute_element.tag
-                if "id" in attribute_element.attrib:
-                    row["element_id"] = attribute_element.attrib["id"]
-                else:
-                    row["element_id"] = pd.NA
-                row["element_xpath"] = attribute_xpath
-                row["context"] = get_subject_and_context(attribute_element)["context"]
-                row["description"] = get_description(attribute_element)
-                row["subject"] = get_subject_and_context(attribute_element)["subject"]
-                row["predicate"] = predicate
-                row["predicate_id"] = (
-                    "http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#"
-                    "containsMeasurementsOfType"
-                )
-                row["object"] = annotation["label"]
-                row["object_id"] = annotation["uri"]
-                row["author"] = "spinneret.annotator.get_ontogpt_annotation"
-                row["date"] = pd.Timestamp.now()
-                row = pd.DataFrame([row], dtype=str)
-                wb = pd.concat([wb, row], ignore_index=True)
-            wb = delete_duplicate_annotations(wb)
-
-    if output_path:
-        write_workbook(wb, output_path)
-    return wb
-
-
 def get_ontogpt_annotation(
     text: str,
     template: str,
@@ -678,248 +521,8 @@ def get_ontogpt_annotation(
     return annotations
 
 
-def add_process_annotations_to_workbook(
-    workbook: Union[str, pd.core.frame.DataFrame],
-    eml: Union[str, etree._ElementTree],
-    output_path: str = None,
-    overwrite: bool = False,
-    local_model: str = None,
-    temperature: Union[float, None] = None,
-    return_ungrounded: bool = False,
-    sample_size: int = 1,
-) -> pd.core.frame.DataFrame:
-    """
-    :param workbook: Either the path to the workbook to be annotated, or the
-        workbook itself as a pandas DataFrame.
-    :param eml: Either the path to the EML file corresponding to the workbook,
-        or the EML file itself as an lxml etree.
-    :param output_path: The path to write the annotated workbook.
-    :param overwrite: If True, overwrite existing `process` annotations in the
-        workbook, so a fresh set may be created.
-    :param local_model: See `get_ontogpt_annotation` documentation for details.
-    :param temperature: The temperature parameter for the model. If `None`, the
-        OntoGPT default will be used.
-    :param return_ungrounded: See `get_ontogpt_annotation` documentation for
-        details.
-    :param sample_size: Executes multiple replicates of the annotation request
-        to reduce variability of outputs. Variability is inherent in OntoGPT.
-    :returns: Workbook with process annotations.
-    :notes: This function retrieves process annotations using OntoGPT, which
-        requires setup and configuration described in the
-        `get_ontogpt_annotation` function.
-    """
-    logger.info("Annotating process")
-
-    # Load the workbook and EML for processing
-    wb = load_workbook(workbook)
-    eml = load_eml(eml)
-
-    # Parameters for the function
-    dataset_element = eml.xpath("//dataset")[0]
-    element_description = get_description(dataset_element)
-    element_xpath = eml.getpath(dataset_element)
-    predicate = "contains process"
-
-    # Set the author identifier for consistent reference below
-    author = "spinneret.annotator.get_onto_gpt_annotation"
-
-    # Remove existing process annotations if overwrite is True, using a set of
-    # criteria that accurately define the annotations to remove.
-    if overwrite:
-        wb = delete_annotations(
-            workbook=wb,
-            criteria={
-                "element": "dataset",
-                "element_xpath": "/eml:eml/dataset",
-                "predicate": predicate,
-                "author": author,
-            },
-        )
-
-    # Skip if this element already has an annotation in the workbook, to
-    # prevent duplicate annotations from being added.
-    if has_annotation(wb, element_xpath, predicate):
-        return wb
-
-    # Reuse existing annotations for elements with identical tag names,
-    # descriptions, and predicate labels, to reduce redundant processing.
-    # Note this assumes semantic equivalence between elements with matching
-    # tags and descriptions.
-    annotations = get_annotation_from_workbook(
-        workbook=wb,
-        element=dataset_element.tag,
-        description=element_description,
-        predicate=predicate,
-    )
-
-    if annotations is None:
-        # Get the process annotations
-        annotations = []
-        for _ in range(sample_size):
-            res = get_ontogpt_annotation(
-                text=element_description,
-                template="contains_process",
-                local_model=local_model,
-                temperature=temperature,
-                return_ungrounded=return_ungrounded,
-            )
-            if res is not None:
-                annotations.extend(res)
-        if len(annotations) == 0:
-            annotations = None
-
-    # Add process annotations to the workbook
-    if annotations is not None:
-        for annotation in annotations:
-            row = initialize_workbook_row()
-            row["package_id"] = get_package_id(eml)
-            row["url"] = get_package_url(eml)
-            row["element"] = dataset_element.tag
-            if "id" in dataset_element.attrib:
-                row["element_id"] = dataset_element.attrib["id"]
-            else:
-                row["element_id"] = pd.NA
-            row["element_xpath"] = eml.getpath(dataset_element)
-            row["context"] = get_subject_and_context(dataset_element)["context"]
-            row["description"] = element_description
-            row["subject"] = get_subject_and_context(dataset_element)["subject"]
-            row["predicate"] = predicate
-            row["predicate_id"] = "http://purl.obolibrary.org/obo/BFO_0000067"
-            row["object"] = annotation["label"]
-            row["object_id"] = annotation["uri"]
-            row["author"] = author
-            row["date"] = pd.Timestamp.now()
-            row = pd.DataFrame([row], dtype=str)
-            wb = pd.concat([wb, row], ignore_index=True)
-        wb = delete_duplicate_annotations(wb)
-
-    if output_path:
-        write_workbook(wb, output_path)
-    return wb
-
-
-def add_env_broad_scale_annotations_to_workbook(
-    workbook: Union[str, pd.core.frame.DataFrame],
-    eml: Union[str, etree._ElementTree],
-    output_path: str = None,
-    overwrite: bool = False,
-    local_model: str = None,
-    temperature: Union[float, None] = None,
-    return_ungrounded: bool = False,
-    sample_size: int = 1,
-) -> pd.core.frame.DataFrame:
-    """
-    :param workbook: Either the path to the workbook to be annotated, or the
-        workbook itself as a pandas DataFrame.
-    :param eml: Either the path to the EML file corresponding to the workbook,
-        or the EML file itself as an lxml etree.
-    :param output_path: The path to write the annotated workbook.
-    :param overwrite: If True, overwrite existing `broad scale environmental
-        context` annotations in the workbook, so a fresh set may be created.
-    :param local_model: See `get_ontogpt_annotation` documentation for details.
-    :param temperature: The temperature parameter for the model. If `None`, the
-        OntoGPT default will be used.
-    :param return_ungrounded: See `get_ontogpt_annotation` documentation for
-        details.
-    :param sample_size: Executes multiple replicates of the annotation request
-        to reduce variability of outputs. Variability is inherent in OntoGPT.
-    :returns: Workbook with broad scale environmental context annotations.
-    :notes: This function retrieves broad scale environmental context
-        annotations using OntoGPT, which requires setup and configuration
-        described in the `get_ontogpt_annotation` function.
-    """
-    logger.info("Annotating broad scale environmental context")
-
-    # Load the workbook and EML for processing
-    wb = load_workbook(workbook)
-    eml = load_eml(eml)
-
-    # Parameters for the function
-    author = "spinneret.annotator.get_onto_gpt_annotation"
-    dataset_element = eml.xpath("//dataset")[0]
-    element_description = get_description(dataset_element)
-    element_xpath = eml.getpath(dataset_element)
-    predicate = "env_broad_scale"
-
-    # Remove existing broad scale environmental context annotations if
-    # overwrite is True, using a set of criteria that accurately define the
-    # annotations to remove.
-    if overwrite:
-        wb = delete_annotations(
-            workbook=wb,
-            criteria={
-                "element": "dataset",
-                "element_xpath": "/eml:eml/dataset",
-                "predicate": predicate,
-                "author": author,
-            },
-        )
-
-    # Skip if this element already has an annotation in the workbook, to
-    # prevent duplicate annotations from being added.
-    if has_annotation(wb, element_xpath, predicate):
-        return wb
-
-    # Reuse existing annotations for elements with identical tag names,
-    # descriptions, and predicate labels, to reduce redundant processing.
-    # Note this assumes semantic equivalence between elements with matching
-    # tags and descriptions.
-    annotations = get_annotation_from_workbook(
-        workbook=wb,
-        element=dataset_element.tag,
-        description=element_description,
-        predicate=predicate,
-    )
-
-    if annotations is None:
-        # Get the broad scale environmental context annotations
-        annotations = []
-        for _ in range(sample_size):
-            res = get_ontogpt_annotation(
-                text=element_description,
-                template=predicate,
-                local_model=local_model,
-                temperature=temperature,
-                return_ungrounded=return_ungrounded,
-            )
-            if res is not None:
-                annotations.extend(res)
-        if len(annotations) == 0:
-            annotations = None
-
-    # Add broad scale environmental context annotations to the workbook
-    if annotations is not None:
-        for annotation in annotations:
-            row = initialize_workbook_row()
-            row["package_id"] = get_package_id(eml)
-            row["url"] = get_package_url(eml)
-            row["element"] = dataset_element.tag
-            if "id" in dataset_element.attrib:
-                row["element_id"] = dataset_element.attrib["id"]
-            else:
-                row["element_id"] = pd.NA
-            row["element_xpath"] = eml.getpath(dataset_element)
-            row["context"] = get_subject_and_context(dataset_element)["context"]
-            row["description"] = element_description
-            row["subject"] = get_subject_and_context(dataset_element)["subject"]
-            row["predicate"] = predicate
-            row["predicate_id"] = (
-                "https://genomicsstandardsconsortium.github.io/mixs/0000012/"
-            )
-            row["object"] = annotation["label"]
-            row["object_id"] = annotation["uri"]
-            row["author"] = author
-            row["date"] = pd.Timestamp.now()
-            row = pd.DataFrame([row], dtype=str)
-            wb = pd.concat([wb, row], ignore_index=True)
-        wb = delete_duplicate_annotations(wb)
-
-    if output_path:
-        write_workbook(wb, output_path)
-    return wb
-
-
-def add_env_local_scale_annotations_to_workbook(
+def add_predicate_annotations_to_workbook(
+    predicate: str,
     workbook: Union[str, pd.core.frame.DataFrame],
     eml: Union[str, etree._ElementTree],
     output_path: str = None,
@@ -930,13 +533,19 @@ def add_env_local_scale_annotations_to_workbook(
     sample_size: int = 1,
 ) -> pd.core.frame.DataFrame:
     """
+    :param predicate: The predicate label for the annotation. This guides the
+        annotation process with which OntoGPT template to use. The options are:
+        `contains measurements of type`, `contains process`, `env_broad_scale`,
+        `env_local_scale`, `environmental material`, `research topic`,
+        `usesMethod`, `uses standard`.
     :param workbook: Either the path to the workbook to be annotated, or the
         workbook itself as a pandas DataFrame.
     :param eml: Either the path to the EML file corresponding to the workbook,
         or the EML file itself as an lxml etree.
     :param output_path: The path to write the annotated workbook.
-    :param overwrite: If True, overwrite existing `local scale environmental
-        context` annotations in the workbook, so a fresh set may be created.
+    :param overwrite: If True, overwrite existing annotations in the workbook,
+        so a fresh set may be created. Only annotations with the same predicate
+        as the `predicate` input will be removed.
     :param local_model: See `get_ontogpt_annotation` documentation for details.
     :param temperature: The temperature parameter for the model. If `None`, the
         OntoGPT default will be used.
@@ -944,185 +553,66 @@ def add_env_local_scale_annotations_to_workbook(
         details.
     :param sample_size: Executes multiple replicates of the annotation request
         to reduce variability of outputs. Variability is inherent in OntoGPT.
-    :returns: Workbook with local scale environmental context annotations.
-    :notes: This function retrieves local scale environmental context
-        annotations using OntoGPT, which requires setup and configuration
-        described in the `get_ontogpt_annotation` function.
+    :returns: Workbook with predicate annotations.
+    :notes: This function retrieves annotations using OntoGPT, except for the
+        `uses standard` which uses a deterministic method. OntoGPT requires
+        setup and configuration described in the `get_ontogpt_annotation`
+        function.
     """
-    logger.info("Annotating local scale environmental context")
 
     # Load the workbook and EML for processing
     wb = load_workbook(workbook)
     eml = load_eml(eml)
 
-    # Parameters for the function
-    dataset_element = eml.xpath("//dataset")[0]
-    element_description = get_description(dataset_element)
-    element_xpath = eml.getpath(dataset_element)
-    predicate = "env_local_scale"
-
-    # Set the author identifier for consistent reference below
-    author = "spinneret.annotator.get_onto_gpt_annotation"
-
-    # Remove existing local scale environmental context annotations if
-    # overwrite is True, using a set of criteria that accurately define the
-    # annotations to remove.
-    if overwrite:
-        wb = delete_annotations(
-            workbook=wb,
-            criteria={
-                "element": "dataset",
-                "element_xpath": "/eml:eml/dataset",
-                "predicate": predicate,
-                "author": author,
-            },
-        )
-
-    # Skip if this element already has an annotation in the workbook, to
-    # prevent duplicate annotations from being added.
-    if has_annotation(wb, element_xpath, predicate):
-        return wb
-
-    # Reuse existing annotations for elements with identical tag names,
-    # descriptions, and predicate labels, to reduce redundant processing.
-    # Note this assumes semantic equivalence between elements with matching
-    # tags and descriptions.
-    annotations = get_annotation_from_workbook(
-        workbook=wb,
-        element=dataset_element.tag,
-        description=element_description,
-        predicate=predicate,
-    )
-
-    if annotations is None:
-        # Get the local scale environmental context annotations
-        annotations = []
-        for _ in range(sample_size):
-            res = get_ontogpt_annotation(
-                text=element_description,
-                template=predicate,
-                local_model=local_model,
-                temperature=temperature,
-                return_ungrounded=return_ungrounded,
-            )
-            if res is not None:
-                annotations.extend(res)
-        if len(annotations) == 0:
-            annotations = None
-
-    # Add local scale environmental context annotations to the workbook
-    if annotations is not None:
-        for annotation in annotations:
-            row = initialize_workbook_row()
-            row["package_id"] = get_package_id(eml)
-            row["url"] = get_package_url(eml)
-            row["element"] = dataset_element.tag
-            if "id" in dataset_element.attrib:
-                row["element_id"] = dataset_element.attrib["id"]
-            else:
-                row["element_id"] = pd.NA
-            row["element_xpath"] = eml.getpath(dataset_element)
-            row["context"] = get_subject_and_context(dataset_element)["context"]
-            row["description"] = element_description
-            row["subject"] = get_subject_and_context(dataset_element)["subject"]
-            row["predicate"] = predicate
-            row["predicate_id"] = (
-                "https://genomicsstandardsconsortium.github.io/mixs/0000013/"
+    # Annotate for each element in the set that matches the predicate
+    elements = get_elements_for_predicate(eml, predicate)
+    for element in elements:
+        logger.info(f"Annotating {predicate}")
+
+        # Parameters for use below
+        element_tag = element.tag
+        element_description = get_description(element)
+        element_xpath = eml.getpath(element)
+        template = get_template_for_predicate(predicate)
+        predicate_id = get_predicate_id_for_predicate(predicate)
+        author = "spinneret.annotator.get_ontogpt_annotation"
+
+        # Remove existing annotations if instructed to do so
+        if overwrite:
+            wb = delete_annotations(
+                workbook=wb,
+                criteria={
+                    "element": element_tag,
+                    "element_xpath": element_xpath,
+                    "predicate": predicate,
+                    "author": author,
+                },
             )
-            row["object"] = annotation["label"]
-            row["object_id"] = annotation["uri"]
-            row["author"] = author
-            row["date"] = pd.Timestamp.now()
-            row = pd.DataFrame([row], dtype=str)
-            wb = pd.concat([wb, row], ignore_index=True)
-        wb = delete_duplicate_annotations(wb)
-
-    if output_path:
-        write_workbook(wb, output_path)
-    return wb
-
-
-def add_env_medium_annotations_to_workbook(
-    workbook: Union[str, pd.core.frame.DataFrame],
-    eml: Union[str, etree._ElementTree],
-    output_path: str = None,
-    overwrite: bool = False,
-    local_model: str = None,
-    temperature: Union[float, None] = None,
-    return_ungrounded: bool = False,
-    sample_size: int = 1,
-) -> pd.core.frame.DataFrame:
-    """
-    :param workbook: Either the path to the workbook to be annotated, or the
-        workbook itself as a pandas DataFrame.
-    :param eml: Either the path to the EML file corresponding to the workbook,
-        or the EML file itself as an lxml etree.
-    :param output_path: The path to write the annotated workbook.
-    :param overwrite: If True, overwrite existing `environmental medium`
-        annotations in the workbook, so a fresh set may be created.
-    :param local_model: Required if `annotator` is "ontogpt". See
-        `get_ontogpt_annotation` documentation for details.
-    :param temperature: The temperature parameter for the model. If `None`, the
-        OntoGPT default will be used.
-    :param return_ungrounded: An option if `annotator` is "ontogpt". See
-        `get_ontogpt_annotation` documentation for details.
-    :param sample_size: Executes multiple replicates of the annotation request
-        to reduce variability of outputs. Variability is inherent in OntoGPT.
-    :returns: Workbook with environmental medium annotations.
-    """
-    logger.info("Annotating environmental medium")
-
-    # Parameters for the function
-    predicate = "environmental material"
-
-    # Load the workbook and EML for processing
-    wb = load_workbook(workbook)
-    eml = load_eml(eml)
-
-    # Remove existing environmental medium annotations if overwrite is True,
-    # using a set of criteria that accurately define the annotations to remove.
-    if overwrite:
-        wb = delete_annotations(
-            workbook=wb,
-            criteria={
-                "element": "attribute",
-                "element_xpath": "/attribute",
-                "predicate": "environmental material",
-                "author": "spinneret.annotator.get_ontogpt_annotation",
-            },
-        )
-
-    # Iterate over EML attributes and add environmental medium annotations to
-    # the workbook
-    attributes = eml.xpath("//attribute")
-    for attribute in attributes:
-        attribute_element = attribute
-        attribute_xpath = eml.getpath(attribute_element)
-        attribute_description = get_description(attribute_element)
 
-        # Skip if this element already has an annotation in the workbook, to
-        # prevent duplicate annotations from being added.
-        if has_annotation(wb, attribute_xpath, predicate):
+        # Skip if this element already has an annotation in the workbook, to:
+        # prevent duplicate annotations, and to allow for resuming annotation
+        # of a partially annotated workbook.
+        if has_annotation(wb, element_xpath, predicate):
             return wb
 
         # Reuse existing annotations for elements with identical tag names,
         # descriptions, and predicate labels, to reduce redundant processing.
         # Note this assumes semantic equivalence between elements with matching
-        # tags and descriptions.
+        # tags and descriptions, which is generally true.
         annotations = get_annotation_from_workbook(
             workbook=wb,
-            element=attribute_element.tag,
-            description=attribute_description,
+            element=element_tag,
+            description=element_description,
             predicate=predicate,
         )
 
         if annotations is None:
-            # Get the environmental medium annotations from the annotator
+            # Get the annotations
             annotations = []
             for _ in range(sample_size):
                 res = get_ontogpt_annotation(
-                    text=attribute_description,
-                    template="env_medium",
+                    text=element_description,
+                    template=template,
                     local_model=local_model,
                     temperature=temperature,
                     return_ungrounded=return_ungrounded,
@@ -1132,281 +622,35 @@ def add_env_medium_annotations_to_workbook(
             if len(annotations) == 0:
                 annotations = None
 
-        # And add the environmental medium annotations to the workbook
+        # Add annotations to the workbook
         if annotations is not None:
             for annotation in annotations:
                 row = initialize_workbook_row()
                 row["package_id"] = get_package_id(eml)
                 row["url"] = get_package_url(eml)
-                row["element"] = attribute_element.tag
-                if "id" in attribute_element.attrib:
-                    row["element_id"] = attribute_element.attrib["id"]
+                row["element"] = element_tag
+                if "id" in element.attrib:
+                    row["element_id"] = element.attrib["id"]
                 else:
                     row["element_id"] = pd.NA
-                row["element_xpath"] = attribute_xpath
-                row["context"] = get_subject_and_context(attribute_element)["context"]
-                row["description"] = attribute_description
-                row["subject"] = get_subject_and_context(attribute_element)["subject"]
+                row["element_xpath"] = eml.getpath(element)
+                row["context"] = get_subject_and_context(element)["context"]
+                row["description"] = element_description
+                row["subject"] = get_subject_and_context(element)["subject"]
                 row["predicate"] = predicate
-                row["predicate_id"] = "http://purl.obolibrary.org/obo/ENVO_00010483"
+                row["predicate_id"] = predicate_id
                 row["object"] = annotation["label"]
                 row["object_id"] = annotation["uri"]
-                row["author"] = "spinneret.annotator.get_ontogpt_annotation"
+                row["author"] = author
                 row["date"] = pd.Timestamp.now()
                 row = pd.DataFrame([row], dtype=str)
                 wb = pd.concat([wb, row], ignore_index=True)
             wb = delete_duplicate_annotations(wb)
 
-    if output_path:
-        write_workbook(wb, output_path)
-    return wb
-
-
-def add_research_topic_annotations_to_workbook(
-    workbook: Union[str, pd.core.frame.DataFrame],
-    eml: Union[str, etree._ElementTree],
-    output_path: str = None,
-    overwrite: bool = False,
-    local_model: str = None,
-    temperature: Union[float, None] = None,
-    return_ungrounded: bool = False,
-    sample_size: int = 1,
-) -> pd.core.frame.DataFrame:
-    """
-    :param workbook: Either the path to the workbook to be annotated, or the
-        workbook itself as a pandas DataFrame.
-    :param eml: Either the path to the EML file corresponding to the workbook,
-        or the EML file itself as an lxml etree.
-    :param output_path: The path to write the annotated workbook.
-    :param overwrite: If True, overwrite existing `research topic` annotations
-        in the workbook, so a fresh set may be created.
-    :param local_model: See `get_ontogpt_annotation` documentation for details.
-    :param temperature: The temperature parameter for the model. If `None`, the
-        OntoGPT default will be used.
-    :param return_ungrounded: See `get_ontogpt_annotation` documentation for
-        details.
-    :param sample_size: Executes multiple replicates of the annotation request
-        to reduce variability of outputs. Variability is inherent in OntoGPT.
-    :returns: Workbook with research topic annotations.
-    :notes: This function retrieves research topic annotations using OntoGPT, which
-        requires setup and configuration described in the
-        `get_ontogpt_annotation` function.
-    """
-    logger.info("Annotating research topic")
-
-    # Load the workbook and EML for processing
-    wb = load_workbook(workbook)
-    eml = load_eml(eml)
-
-    # Parameters for the function
-    dataset_element = eml.xpath("//dataset")[0]
-    element_description = get_description(dataset_element)
-    element_xpath = eml.getpath(dataset_element)
-    predicate = "research topic"
-
-    # Set the author identifier for consistent reference below
-    author = "spinneret.annotator.get_onto_gpt_annotation"
-
-    # Remove existing research topic annotations if overwrite is True, using a set of
-    # criteria that accurately define the annotations to remove.
-    if overwrite:
-        wb = delete_annotations(
-            workbook=wb,
-            criteria={
-                "element": "dataset",
-                "element_xpath": "/eml:eml/dataset",
-                "predicate": "research topic",
-                "author": author,
-            },
-        )
-
-    # Skip if this element already has an annotation in the workbook, to
-    # prevent duplicate annotations from being added.
-    if has_annotation(wb, element_xpath, predicate):
+        if output_path:
+            write_workbook(wb, output_path)
         return wb
 
-    # Reuse existing annotations for elements with identical tag names,
-    # descriptions, and predicate labels, to reduce redundant processing.
-    # Note this assumes semantic equivalence between elements with matching
-    # tags and descriptions.
-    annotations = get_annotation_from_workbook(
-        workbook=wb,
-        element=dataset_element.tag,
-        description=element_description,
-        predicate=predicate,
-    )
-
-    if annotations is None:
-        # Get the research topic annotations
-        annotations = []
-        for _ in range(sample_size):
-            res = get_ontogpt_annotation(
-                text=element_description,
-                template="research_topic",
-                local_model=local_model,
-                temperature=temperature,
-                return_ungrounded=return_ungrounded,
-            )
-            if res is not None:
-                annotations.extend(res)
-        if len(annotations) == 0:
-            annotations = None
-
-    # Add research topic annotations to the workbook
-    if annotations is not None:
-        for annotation in annotations:
-            row = initialize_workbook_row()
-            row["package_id"] = get_package_id(eml)
-            row["url"] = get_package_url(eml)
-            row["element"] = dataset_element.tag
-            if "id" in dataset_element.attrib:
-                row["element_id"] = dataset_element.attrib["id"]
-            else:
-                row["element_id"] = pd.NA
-            row["element_xpath"] = eml.getpath(dataset_element)
-            row["context"] = get_subject_and_context(dataset_element)["context"]
-            row["description"] = element_description
-            row["subject"] = get_subject_and_context(dataset_element)["subject"]
-            row["predicate"] = predicate
-            row["predicate_id"] = "http://vocabs.lter-europe.net/EnvThes/21604"
-            row["object"] = annotation["label"]
-            row["object_id"] = annotation["uri"]
-            row["author"] = author
-            row["date"] = pd.Timestamp.now()
-            row = pd.DataFrame([row], dtype=str)
-            wb = pd.concat([wb, row], ignore_index=True)
-        wb = delete_duplicate_annotations(wb)
-
-    if output_path:
-        write_workbook(wb, output_path)
-    return wb
-
-
-def add_methods_annotations_to_workbook(
-    workbook: Union[str, pd.core.frame.DataFrame],
-    eml: Union[str, etree._ElementTree],
-    output_path: str = None,
-    overwrite: bool = False,
-    local_model: str = None,
-    temperature: Union[float, None] = None,
-    return_ungrounded: bool = False,
-    sample_size: int = 1,
-) -> pd.core.frame.DataFrame:
-    """
-    :param workbook: Either the path to the workbook to be annotated, or the
-        workbook itself as a pandas DataFrame.
-    :param eml: Either the path to the EML file corresponding to the workbook,
-        or the EML file itself as an lxml etree.
-    :param output_path: The path to write the annotated workbook.
-    :param overwrite: If True, overwrite existing `methods` annotations in the
-        workbook, so a fresh set may be created.
-    :param local_model: See `get_ontogpt_annotation` documentation for details.
-    :param temperature: The temperature parameter for the model. If `None`, the
-        OntoGPT default will be used.
-    :param return_ungrounded: See `get_ontogpt_annotation` documentation for
-        details.
-    :param sample_size: Executes multiple replicates of the annotation request
-        to reduce variability of outputs. Variability is inherent in OntoGPT.
-    :returns: Workbook with methods annotations.
-    :notes: This function retrieves methods annotations using OntoGPT, which
-        requires setup and configuration described in the
-        `get_ontogpt_annotation` function.
-    """
-    logger.info("Annotating methods")
-
-    # Load the workbook and EML for processing
-    wb = load_workbook(workbook)
-    eml = load_eml(eml)
-
-    # Parameters for the function
-    # Get the methods annotations, if the methods element exists in the EML
-    dataset_element = eml.xpath("//dataset")[0]
-    methods_element = eml.xpath("//dataset/methods")
-    if not methods_element:
-        return wb
-    element_description = get_description(methods_element[0])
-    element_xpath = eml.getpath(dataset_element)
-    predicate = "usesMethod"
-
-    # Set the author identifier for consistent reference below
-    author = "spinneret.annotator.get_onto_gpt_annotation"
-
-    # Remove existing methods annotations if overwrite is True, using a set of
-    # criteria that accurately define the annotations to remove.
-    if overwrite:
-        wb = delete_annotations(
-            workbook=wb,
-            criteria={
-                "element": "dataset",
-                "element_xpath": "/eml:eml/dataset",
-                "predicate": predicate,
-                "author": author,
-            },
-        )
-
-    # Skip if this element already has an annotation in the workbook, to
-    # prevent duplicate annotations from being added.
-    if has_annotation(wb, element_xpath, predicate):
-        return wb
-
-    # Reuse existing annotations for elements with identical tag names,
-    # descriptions, and predicate labels, to reduce redundant processing.
-    # Note this assumes semantic equivalence between elements with matching
-    # tags and descriptions.
-    annotations = get_annotation_from_workbook(
-        workbook=wb,
-        element=dataset_element.tag,
-        description=element_description,
-        predicate=predicate,
-    )
-
-    if annotations is None:
-        annotations = []
-        for _ in range(sample_size):
-            res = get_ontogpt_annotation(
-                text=element_description,
-                template="uses_method",
-                local_model=local_model,
-                temperature=temperature,
-                return_ungrounded=return_ungrounded,
-            )
-            if res is not None:
-                annotations.extend(res)
-        if len(annotations) == 0:
-            annotations = None
-
-    # Add methods annotations to the workbook. Note, methods annotations are
-    # at the dataset level.
-    if annotations is not None:
-        for annotation in annotations:
-            row = initialize_workbook_row()
-            row["package_id"] = get_package_id(eml)
-            row["url"] = get_package_url(eml)
-            row["element"] = dataset_element.tag
-            if "id" in dataset_element.attrib:
-                row["element_id"] = dataset_element.attrib["id"]
-            else:
-                row["element_id"] = pd.NA
-            row["element_xpath"] = eml.getpath(dataset_element)
-            row["context"] = get_subject_and_context(dataset_element)["context"]
-            row["description"] = element_description[0:500]  # don't need all of it
-            row["subject"] = get_subject_and_context(dataset_element)["subject"]
-            row["predicate"] = predicate
-            row["predicate_id"] = (
-                "http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod"
-            )
-            row["object"] = annotation["label"]
-            row["object_id"] = annotation["uri"]
-            row["author"] = author
-            row["date"] = pd.Timestamp.now()
-            row = pd.DataFrame([row], dtype=str)
-            wb = pd.concat([wb, row], ignore_index=True)
-        wb = delete_duplicate_annotations(wb)
-
-    if output_path:
-        write_workbook(wb, output_path)
-    return wb
-
 
 def get_annotation_from_workbook(
     workbook: Union[str, pd.core.frame.DataFrame],
diff --git a/src/spinneret/utilities.py b/src/spinneret/utilities.py
index 80ad65c..3551276 100644
--- a/src/spinneret/utilities.py
+++ b/src/spinneret/utilities.py
@@ -157,3 +157,76 @@ def load_prefixmaps() -> dict:
     file = str(importlib.resources.files("spinneret.data")) + "/prefixmaps.csv"
     prefixmaps = pd.read_csv(file)
     return prefixmaps
+
+
+def get_elements_for_predicate(eml: etree._ElementTree, predicate: str) -> list:
+    """
+    Get the EML elements that corresponds to a predicate. Elements contain
+    the information from which annotations are derived.
+
+    :param eml: An EML document.
+    :param predicate: The predicate to be used to find the element(s).
+    :returns: The element(s) that corresponds to the predicate, each as an
+    etree._Element. If the predicate is not found, returns empty list.
+    """
+    predicate_and_xpath = {
+        "contains measurements of type": "//attribute",
+        "contains process": "//dataset",
+        "env_broad_scale": "//dataset",
+        "env_local_scale": "//dataset",
+        "environmental material": "//attribute",
+        "research topic": "//dataset",
+        "usesMethod": "//dataset/methods",
+    }
+    xpath = predicate_and_xpath.get(predicate)
+    if xpath:
+        return eml.xpath(xpath)
+    logger.warning(f"Predicate {predicate} not found in the list of predicates.")
+    return []
+
+
+def get_template_for_predicate(predicate: str) -> Union[str, None]:
+    """
+    :param predicate: The predicate to be used to find the template.
+    :returns: The OntoGPT template for the predicate. Returns None if the
+        predicate is not found.
+    """
+    predicate_and_template = {
+        "contains measurements of type": "contains_measurement_of_type",
+        "contains process": "contains_process",
+        "env_broad_scale": "env_broad_scale",
+        "env_local_scale": "env_local_scale",
+        "environmental material": "env_medium",
+        "research topic": "research_topic",
+        "usesMethod": "uses_method",
+    }
+    template = predicate_and_template.get(predicate)
+    if not template:
+        logger.warning(f"Predicate {predicate} not found in the list of predicates.")
+    return template
+
+
+def get_predicate_id_for_predicate(predicate: str) -> Union[str, None]:
+    """
+    :param predicate: The predicate to be used to find the predicate ID.
+    :returns: The predicate ID for the predicate. Returns None if the
+        predicate is not found.
+    """
+    predicate_and_id = {
+        "contains measurements of type": "http://ecoinformatics.org/oboe/"
+        "oboe.1.2/oboe-core.owl#"
+        "containsMeasurementsOfType",
+        "contains process": "http://purl.obolibrary.org/obo/BFO_0000067",
+        "env_broad_scale": "https://genomicsstandardsconsortium.github.io/mixs"
+        "/0000012/",
+        "env_local_scale": "https://genomicsstandardsconsortium.github.io/mixs"
+        "/0000013/",
+        "environmental material": "http://purl.obolibrary.org/obo/" "ENVO_00010483",
+        "research topic": "http://vocabs.lter-europe.net/EnvThes/21604",
+        "usesMethod": "http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#"
+        "usesMethod",
+    }
+    predicate_id = predicate_and_id.get(predicate)
+    if not predicate_id:
+        logger.warning(f"Predicate {predicate} not found in the list of predicates.")
+    return predicate_id
diff --git a/tests/test_annotator.py b/tests/test_annotator.py
index 0fcbf1d..1e17498 100644
--- a/tests/test_annotator.py
+++ b/tests/test_annotator.py
@@ -13,15 +13,9 @@
     annotate_eml,
     create_annotation_element,
     add_qudt_annotations_to_workbook,
-    add_measurement_type_annotations_to_workbook,
-    add_process_annotations_to_workbook,
-    add_env_broad_scale_annotations_to_workbook,
-    add_env_local_scale_annotations_to_workbook,
-    add_env_medium_annotations_to_workbook,
-    add_research_topic_annotations_to_workbook,
-    add_methods_annotations_to_workbook,
     get_annotation_from_workbook,
     has_annotation,
+    add_predicate_annotations_to_workbook,
 )
 from spinneret.utilities import (
     load_configuration,
@@ -82,10 +76,8 @@ def test_get_bioportal_annotation(mocker, use_mock, get_annotation_fixture):
 
 # pylint: disable=duplicate-code
 @pytest.mark.parametrize("use_mock", [True])  # False tests with real LLM queries
-def test_annotate_workbook_with_ontogpt(
-    tmp_path, mocker, use_mock, get_annotation_fixture
-):
-    """Test annotate_workbook using the OntoGPT annotator"""
+def test_annotate_workbook(tmp_path, mocker, use_mock, get_annotation_fixture):
+    """Test annotate_workbook"""
 
     # Configure the mock responses
     if use_mock:
@@ -340,111 +332,15 @@ def test_has_annotations():
     assert has_annotations(wb) is True
 
 
-@pytest.mark.parametrize("use_mock", [True])  # False makes real HTTP requests
-def test_add_measurement_type_annotations_to_workbook(tmp_path, use_mock, mocker):
-    """Test add_measurement_type_annotations_to_workbook"""
-
-    # Parameterize the test
-    workbook_path = "tests/edi.3.9_annotation_workbook.tsv"
-    output_path = str(tmp_path) + "edi.3.9_annotation_workbook_annotated.tsv"
-
-    # The workbook shouldn't have any annotations yet
-    wb = load_workbook(workbook_path)
-    assert not has_annotations(wb)
-
-    # The workbook "should" have annotations after calling the function. We
-    # say "should" because OntoGPT is non-deterministic, and we can't always
-    # expect the same results, or any results at all.
-    if use_mock:
-        mocker.patch(
-            "spinneret.annotator.get_ontogpt_annotation",
-            return_value=[
-                {
-                    "label": "depth",
-                    "uri": "http://purl.dataone.org/odo/ECSO_00000515",
-                }
-            ],
-        )
-    wb = add_measurement_type_annotations_to_workbook(
-        workbook=workbook_path,
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-    )
-    assert has_annotations(wb)
-
-    # Overwriting changes the annotations. Note, we can't test this with real
-    # requests because we'll expect the same results as the first call.
-    if use_mock:
-        mocker.patch(  # an arbitrary response to check for
-            "spinneret.annotator.get_ontogpt_annotation",
-            return_value=[
-                {
-                    "label": "A different measurement type",
-                    "uri": "http://purl.dataone.org/odo/ECSO_XXXXXXXX",
-                }
-            ],
-        )
-    wb = add_measurement_type_annotations_to_workbook(
-        workbook=output_path,  # the output from the first call
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-        overwrite=True,
-    )
-    assert wb["object"].str.contains("A different measurement type").any()
-    assert (
-        wb["object_id"].str.contains("http://purl.dataone.org/odo/ECSO_XXXXXXXX").any()
-    )
-
-    # Original annotations are gone
-    assert not wb["object"].str.contains("depth").any()
-    assert (
-        not wb["object_id"]
-        .str.contains("http://purl.dataone.org/odo/ECSO_00000515")
-        .any()
-    )
-
-
-def test_add_measurement_type_annotations_to_workbook_io_options(tmp_path, mocker):
-    """Test add_measurement_type_annotations_to_workbook with different input
-    and output options"""
-
-    mocker.patch(
-        "spinneret.annotator.get_ontogpt_annotation",
-        return_value=[
-            {
-                "label": "depth",
-                "uri": "http://purl.dataone.org/odo/ECSO_00000515",
-            }
-        ],
-    )
-
-    # Accepts file path as input
-    output_path = str(tmp_path) + "edi.3.9_annotation_workbook_annotated.tsv"
-    wb = add_measurement_type_annotations_to_workbook(
-        workbook="tests/edi.3.9_annotation_workbook.tsv",
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-    )
-    wb = load_workbook(output_path)
-    assert has_annotations(wb)
-
-    # Accepts dataframes and etree objects as input
-    wb = load_workbook("tests/edi.3.9_annotation_workbook.tsv")
-    eml = load_eml(get_example_eml_dir() + "/" + "edi.3.9.xml")
-    wb = add_measurement_type_annotations_to_workbook(workbook=wb, eml=eml)
-    assert has_annotations(wb)
-
-
 def test_annotators_are_listed_as_authors(tmp_path, mocker):
     """Test that the annotators are listed as authors in the workbook."""
 
-    # Test for the `add_measurement_type_annotations_to_workbook` function
-    # using the OntoGPT annotator
     mocker.patch(
         "spinneret.annotator.get_ontogpt_annotation",
         return_value=[{"label": "a label", "uri": "a uri"}],
     )
-    wb = add_measurement_type_annotations_to_workbook(
+    wb = add_predicate_annotations_to_workbook(
+        predicate="contains measurements of type",
         workbook="tests/edi.3.9_annotation_workbook.tsv",
         eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
         output_path=str(tmp_path) + "edi.3.9_annotation_workbook_dataset.tsv",
@@ -496,258 +392,8 @@ def test_get_ontogpt_annotation(mocker, use_mock):
 
 
 @pytest.mark.parametrize("use_mock", [True])  # False tests with real local LLM queries
-def test_add_process_annotations_to_workbook(tmp_path, use_mock, mocker):
-    """Test add_process_annotations_to_workbook"""
-
-    # Parameterize the test
-    workbook_path = "tests/edi.3.9_annotation_workbook.tsv"
-    output_path = str(tmp_path) + "edi.3.9_annotation_workbook_qudt.tsv"
-
-    # The workbook shouldn't have any annotations yet
-    wb = load_workbook(workbook_path)
-    assert not has_annotations(wb)
-
-    # The workbook has annotations after calling the function
-    if use_mock:
-        mocker.patch(
-            "spinneret.annotator.get_ontogpt_annotation",
-            return_value=[{"label": "a label", "uri": "a uri"}],
-        )
-    wb = add_process_annotations_to_workbook(
-        workbook=workbook_path,
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-        local_model="llama3.2",
-        return_ungrounded=True,  # ensures we get at least one annotation back
-    )
-    assert has_annotations(wb)
-
-    # Overwriting changes the annotations. Note, we can't test this with real
-    # requests because we'll expect the same results as the first call.
-    if use_mock:
-        mocker.patch(  # an arbitrary response to check for
-            "spinneret.annotator.get_ontogpt_annotation",
-            return_value=[{"label": "a different label", "uri": "a different uri"}],
-        )
-    wb = add_process_annotations_to_workbook(
-        workbook=output_path,  # the output from the first call
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-        local_model="llama3.2",
-        return_ungrounded=True,  # ensures we get at least one annotation back
-        overwrite=True,
-    )
-    assert wb["object"].str.contains("a different label").any()
-    assert wb["object_id"].str.contains("a different uri").any()
-
-    # Original annotations are gone
-    assert not wb["object"].str.contains("a label").any()
-    assert not wb["object_id"].str.contains("a uri").any()
-
-
-@pytest.mark.parametrize("use_mock", [True])  # False tests with real local LLM queries
-def test_add_env_broad_scale_annotations_to_workbook(tmp_path, use_mock, mocker):
-    """Test add_env_broad_scale_annotations_to_workbook"""
-
-    # Parameterize the test
-    workbook_path = "tests/edi.3.9_annotation_workbook.tsv"
-    output_path = str(tmp_path) + "edi.3.9_annotation_workbook.tsv"
-
-    # The workbook shouldn't have any annotations yet
-    wb = load_workbook(workbook_path)
-    assert not has_annotations(wb)
-
-    # The workbook has annotations after calling the function
-    if use_mock:
-        mocker.patch(
-            "spinneret.annotator.get_ontogpt_annotation",
-            return_value=[{"label": "a label", "uri": "a uri"}],
-        )
-    wb = add_env_broad_scale_annotations_to_workbook(
-        workbook=workbook_path,
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-        local_model="llama3.2",
-        return_ungrounded=True,  # ensures we get at least one annotation back
-    )
-    assert has_annotations(wb)
-
-    # Overwriting changes the annotations. Note, we can't test this with real
-    # requests because we'll expect the same results as the first call.
-    if use_mock:
-        mocker.patch(  # an arbitrary response to check for
-            "spinneret.annotator.get_ontogpt_annotation",
-            return_value=[{"label": "a different label", "uri": "a different uri"}],
-        )
-    wb = add_env_broad_scale_annotations_to_workbook(
-        workbook=output_path,  # the output from the first call
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-        local_model="llama3.2",
-        return_ungrounded=True,  # ensures we get at least one annotation back
-        overwrite=True,
-    )
-    assert wb["object"].str.contains("a different label").any()
-    assert wb["object_id"].str.contains("a different uri").any()
-
-    # Original annotations are gone
-    assert not wb["object"].str.contains("a label").any()
-    assert not wb["object_id"].str.contains("a uri").any()
-
-
-@pytest.mark.parametrize("use_mock", [True])  # False tests with real local LLM queries
-def test_add_env_local_scale_annotations_to_workbook(tmp_path, use_mock, mocker):
-    """Test add_env_local_scale_annotations_to_workbook"""
-
-    # Parameterize the test
-    workbook_path = "tests/edi.3.9_annotation_workbook.tsv"
-    output_path = str(tmp_path) + "edi.3.9_annotation_workbook.tsv"
-
-    # The workbook shouldn't have any annotations yet
-    wb = load_workbook(workbook_path)
-    assert not has_annotations(wb)
-
-    # The workbook has annotations after calling the function
-    if use_mock:
-        mocker.patch(
-            "spinneret.annotator.get_ontogpt_annotation",
-            return_value=[{"label": "a label", "uri": "a uri"}],
-        )
-    wb = add_env_local_scale_annotations_to_workbook(
-        workbook=workbook_path,
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-        local_model="llama3.2",
-        return_ungrounded=True,  # ensures we get at least one annotation back
-    )
-    assert has_annotations(wb)
-
-    # Overwriting changes the annotations. Note, we can't test this with real
-    # requests because we'll expect the same results as the first call.
-    if use_mock:
-        mocker.patch(  # an arbitrary response to check for
-            "spinneret.annotator.get_ontogpt_annotation",
-            return_value=[{"label": "a different label", "uri": "a different uri"}],
-        )
-    wb = add_env_local_scale_annotations_to_workbook(
-        workbook=output_path,  # the output from the first call
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-        local_model="llama3.2",
-        return_ungrounded=True,  # ensures we get at least one annotation back
-        overwrite=True,
-    )
-    assert wb["object"].str.contains("a different label").any()
-    assert wb["object_id"].str.contains("a different uri").any()
-
-    # Original annotations are gone
-    assert not wb["object"].str.contains("a label").any()
-    assert not wb["object_id"].str.contains("a uri").any()
-
-
-@pytest.mark.parametrize("use_mock", [True])  # False tests with real local LLM queries
-def test_add_env_medium_annotations_to_workbook(tmp_path, use_mock, mocker):
-    """Test add_env_medium_annotations_to_workbook"""
-
-    # Parameterize the test
-    workbook_path = "tests/edi.3.9_annotation_workbook.tsv"
-    output_path = str(tmp_path) + "edi.3.9_annotation_workbook.tsv"
-
-    # The workbook shouldn't have any annotations yet
-    wb = load_workbook(workbook_path)
-    assert not has_annotations(wb)
-
-    # The workbook has annotations after calling the function
-    if use_mock:
-        mocker.patch(
-            "spinneret.annotator.get_ontogpt_annotation",
-            return_value=[{"label": "a label", "uri": "a uri"}],
-        )
-    wb = add_env_medium_annotations_to_workbook(
-        workbook=workbook_path,
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-        local_model="llama3.2",
-        return_ungrounded=True,  # ensures we get at least one annotation back
-    )
-    assert has_annotations(wb)
-
-    # Overwriting changes the annotations. Note, we can't test this with real
-    # requests because we'll expect the same results as the first call.
-    if use_mock:
-        mocker.patch(  # an arbitrary response to check for
-            "spinneret.annotator.get_ontogpt_annotation",
-            return_value=[{"label": "a different label", "uri": "a different uri"}],
-        )
-    wb = add_env_medium_annotations_to_workbook(
-        workbook=output_path,  # the output from the first call
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-        local_model="llama3.2",
-        return_ungrounded=True,  # ensures we get at least one annotation back
-        overwrite=True,
-    )
-    assert wb["object"].str.contains("a different label").any()
-    assert wb["object_id"].str.contains("a different uri").any()
-
-    # Original annotations are gone
-    assert not wb["object"].str.contains("a label").any()
-    assert not wb["object_id"].str.contains("a uri").any()
-
-
-@pytest.mark.parametrize("use_mock", [True])  # False tests with real local LLM queries
-def test_add_research_topic_annotations_to_workbook(tmp_path, use_mock, mocker):
-    """Test add_research_topic_annotations_to_workbook"""
-
-    # Parameterize the test
-    workbook_path = "tests/edi.3.9_annotation_workbook.tsv"
-    output_path = str(tmp_path) + "edi.3.9_annotation_workbook.tsv"
-
-    # The workbook shouldn't have any annotations yet
-    wb = load_workbook(workbook_path)
-    assert not has_annotations(wb)
-
-    # The workbook has annotations after calling the function
-    if use_mock:
-        mocker.patch(
-            "spinneret.annotator.get_ontogpt_annotation",
-            return_value=[{"label": "a label", "uri": "a uri"}],
-        )
-    wb = add_research_topic_annotations_to_workbook(
-        workbook=workbook_path,
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-        local_model="llama3.2",
-        return_ungrounded=True,  # ensures we get at least one annotation back
-    )
-    assert has_annotations(wb)
-
-    # Overwriting changes the annotations. Note, we can't test this with real
-    # requests because we'll expect the same results as the first call.
-    if use_mock:
-        mocker.patch(  # an arbitrary response to check for
-            "spinneret.annotator.get_ontogpt_annotation",
-            return_value=[{"label": "a different label", "uri": "a different uri"}],
-        )
-    wb = add_research_topic_annotations_to_workbook(
-        workbook=output_path,  # the output from the first call
-        eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
-        output_path=output_path,
-        local_model="llama3.2",
-        return_ungrounded=True,  # ensures we get at least one annotation back
-        overwrite=True,
-    )
-    assert wb["object"].str.contains("a different label").any()
-    assert wb["object_id"].str.contains("a different uri").any()
-
-    # Original annotations are gone
-    assert not wb["object"].str.contains("a label").any()
-    assert not wb["object_id"].str.contains("a uri").any()
-
-
-@pytest.mark.parametrize("use_mock", [True])  # False tests with real local LLM queries
-def test_add_methods_annotations_to_workbook(tmp_path, use_mock, mocker):
-    """Test add_methods_annotations_to_workbook"""
+def test_add_predicate_annotations_to_workbook(tmp_path, use_mock, mocker):
+    """Test add_predicate_annotations_to_workbook"""
 
     # Parameterize the test
     workbook_path = "tests/edi.3.9_annotation_workbook.tsv"
@@ -763,7 +409,8 @@ def test_add_methods_annotations_to_workbook(tmp_path, use_mock, mocker):
             "spinneret.annotator.get_ontogpt_annotation",
             return_value=[{"label": "a label", "uri": "a uri"}],
         )
-    wb = add_methods_annotations_to_workbook(
+    wb = add_predicate_annotations_to_workbook(
+        predicate="env_broad_scale",
         workbook=workbook_path,
         eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
         output_path=output_path,
@@ -779,7 +426,8 @@ def test_add_methods_annotations_to_workbook(tmp_path, use_mock, mocker):
             "spinneret.annotator.get_ontogpt_annotation",
             return_value=[{"label": "a different label", "uri": "a different uri"}],
         )
-    wb = add_methods_annotations_to_workbook(
+    wb = add_predicate_annotations_to_workbook(
+        predicate="env_broad_scale",
         workbook=output_path,  # the output from the first call
         eml=get_example_eml_dir() + "/" + "edi.3.9.xml",
         output_path=output_path,
diff --git a/tests/test_utilities.py b/tests/test_utilities.py
index 3baa6f4..231d1b5 100644
--- a/tests/test_utilities.py
+++ b/tests/test_utilities.py
@@ -14,6 +14,9 @@
     expand_curie,
     compress_uri,
     load_prefixmaps,
+    get_elements_for_predicate,
+    get_template_for_predicate,
+    get_predicate_id_for_predicate,
 )
 from spinneret.datasets import get_example_eml_dir
 
@@ -138,3 +141,43 @@ def test_load_prefixmaps():
     """Test that the prefixmaps are loaded"""
     prefixmaps = load_prefixmaps()
     assert isinstance(prefixmaps, pd.DataFrame)
+
+
+def test_get_elements_for_predicate():
+    """Test that elements are retrieved for a given predicate"""
+    eml = load_eml(get_example_eml_dir() + "/" + "edi.3.9.xml")
+
+    # Elements are retrieved for a given predicate
+    elements = get_elements_for_predicate(eml, "contains measurements of type")
+    for element in elements:
+        assert element.tag == "attribute"
+        assert isinstance(element, etree._Element)
+
+    # Elements are not retrieved for a non-existent predicate
+    elements = get_elements_for_predicate(eml, "non-existent predicate")
+    assert elements == []
+
+
+def test_get_template_for_predicate():
+    """Test that a template is retrieved for a given predicate"""
+    # The template is retrieved for a given predicate
+    template = get_template_for_predicate("contains measurements of type")
+    assert template == "contains_measurement_of_type"
+
+    # None is returned for a non-existent predicate
+    template = get_template_for_predicate("non-existent predicate")
+    assert template is None
+
+
+def test_get_predicate_id_for_predicate():
+    """Test that a predicate ID is retrieved for a given predicate"""
+    # The predicate ID is retrieved for a given predicate
+    predicate_id = get_predicate_id_for_predicate("contains measurements of type")
+    assert predicate_id == (
+        "http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#"
+        "containsMeasurementsOfType"
+    )
+
+    # None is returned for a non-existent predicate
+    predicate_id = get_predicate_id_for_predicate("non-existent predicate")
+    assert predicate_id is None

From 5a495842e17f8dad5db95453ad2a540382b52ce3 Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Sat, 28 Dec 2024 10:04:26 -0500
Subject: [PATCH 23/24] fix: correct return logic in
 `add_predicate_annotations_to_workbook`

Resolve an issue in the `add_predicate_annotations_to_workbook` function
that prevents it from returning the expected results.
---
 src/spinneret/annotator.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py
index 7bee5cd..ac5b0bc 100644
--- a/src/spinneret/annotator.py
+++ b/src/spinneret/annotator.py
@@ -647,9 +647,9 @@ def add_predicate_annotations_to_workbook(
                 wb = pd.concat([wb, row], ignore_index=True)
             wb = delete_duplicate_annotations(wb)
 
-        if output_path:
-            write_workbook(wb, output_path)
-        return wb
+    if output_path:
+        write_workbook(wb, output_path)
+    return wb
 
 
 def get_annotation_from_workbook(

From ee474938e6c15baec225f1a303c842dfb731f78b Mon Sep 17 00:00:00 2001
From: Colin Smith <colin.smith@wisc.edu>
Date: Fri, 17 Jan 2025 12:43:38 -0800
Subject: [PATCH 24/24] build: configure Read the Docs for explicit path to
 config.py

Update `.readthedocs.yaml` to explicitly specify the path to
`config.py`. This ensures proper documentation builds and avoids
potential issues with an upcoming deprecation of inferred configuration.
---
 .readthedocs.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index 092d342..ebd1092 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -7,6 +7,7 @@ build:
 
 sphinx:
   fail_on_warning: false
+  configuration: docs/source/conf.py
 
 python:
   # Install our python package before building the docs