From 9e8570a52f3448712af71738c74bb18d93f99a55 Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Mon, 18 Nov 2024 07:50:24 -0800 Subject: [PATCH 01/24] fix: add missing parameters to `annotate_workbooks` Add missing parameters to the `annotate_workbooks` function to ensure correct argument propagation to its subfunctions. --- src/spinneret/main.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/spinneret/main.py b/src/spinneret/main.py index 930f224..24c6d07 100644 --- a/src/spinneret/main.py +++ b/src/spinneret/main.py @@ -46,7 +46,13 @@ def create_workbooks(eml_dir: str, workbook_dir: str) -> None: def annotate_workbooks( - workbook_dir: str, eml_dir: str, annotator: str, output_dir: str, config_path: str + workbook_dir: str, + eml_dir: str, + annotator: str, + output_dir: str, + config_path: str, + local_model: str = None, + return_ungrounded: bool = False, ) -> None: """Create workbooks for each EML file in a directory @@ -58,6 +64,9 @@ def annotate_workbooks( an API key and is described in the `get_bioportal_annotation` function. :param output_dir: Directory to save annotated workbooks :param config_path: Path to configuration file + :param local_model: See `get_ontogpt_annotation` documentation for details. + :param return_ungrounded: See `get_ontogpt_annotation` documentation for + details. :return: None :notes: Annotated workbooks will not be created if they already exist. """ @@ -95,6 +104,8 @@ def annotate_workbooks( eml_path=eml_dir + "/" + eml_file, annotator=annotator, output_path=output_dir + "/" + workbook_file_annotated, + local_model=local_model, + return_ungrounded=return_ungrounded, ) From d3427737114469e2be59cdf7289a724c08a8601b Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Tue, 19 Nov 2024 12:46:10 -0800 Subject: [PATCH 02/24] fix: prevent OntoGPT cache-related errors by clearing cache Implement a cache-clearing mechanism before each OntoGPT call to mitigate issues where cached results, particularly those without grounded concepts, could lead to processing errors. This ensures that each call to OntoGPT is fresh and produces reliable results. --- src/spinneret/annotator.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py index 503ccfe..88ba0b8 100644 --- a/src/spinneret/annotator.py +++ b/src/spinneret/annotator.py @@ -680,6 +680,9 @@ def get_ontogpt_annotation( if local_model is not None: cmd += f" -m ollama/{local_model}" try: + # Clear the cache so that the model can derive new annotations + cache_path = os.getcwd() + "/.litellm_cache" + os.system(f"rm -rf {cache_path}") os.system(cmd) except Exception as e: # pylint: disable=broad-exception-caught print(f"Error calling OntoGPT: {e}") From 57e6df729b005224621d594a9ef169a7bc128f40 Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Wed, 20 Nov 2024 08:48:29 -0800 Subject: [PATCH 03/24] perf: enhance OntoGPT grounding with sample size Implement a strategy to combine multiple OntoGPT runs for each input to improve the consistency and completeness of concept grounding. This approach addresses the variability inherent in the OntoGPT process, resulting in more reliable and accurate annotations. --- src/spinneret/annotator.py | 226 +++++++++++++++++++++++++++---------- src/spinneret/main.py | 4 + 2 files changed, 171 insertions(+), 59 deletions(-) diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py index 88ba0b8..3fed6aa 100644 --- a/src/spinneret/annotator.py +++ b/src/spinneret/annotator.py @@ -136,6 +136,7 @@ def annotate_workbook( annotator: str, local_model: str = None, return_ungrounded: bool = False, + sample_size: int = 1, ) -> None: """Annotate a workbook with automated annotation @@ -150,6 +151,8 @@ def annotate_workbook( :param local_model: See `get_ontogpt_annotation` documentation for details. :param return_ungrounded: See `get_ontogpt_annotation` documentation for details. + :param sample_size: Executes multiple replicates of the annotation request + to reduce variability of outputs. Variability is inherent in OntoGPT. :returns: None :notes: The workbook is annotated by annotators best suited for the XPaths in the EML file. The annotated workbook is written back to the same @@ -170,23 +173,45 @@ def annotate_workbook( # Run workbook annotators, results of one are used as input for the next if annotator == "bioportal": - wb = add_dataset_annotations_to_workbook(wb, eml) - wb = add_measurement_type_annotations_to_workbook(wb, eml, annotator=annotator) + wb = add_dataset_annotations_to_workbook(wb, eml, sample_size=sample_size) + wb = add_measurement_type_annotations_to_workbook( + wb, eml, annotator=annotator, sample_size=sample_size + ) elif annotator == "ontogpt": wb = add_env_broad_scale_annotations_to_workbook( - wb, eml, local_model=local_model, return_ungrounded=return_ungrounded + wb, + eml, + local_model=local_model, + return_ungrounded=return_ungrounded, + sample_size=sample_size, ) wb = add_env_local_scale_annotations_to_workbook( - wb, eml, local_model=local_model, return_ungrounded=return_ungrounded + wb, + eml, + local_model=local_model, + return_ungrounded=return_ungrounded, + sample_size=sample_size, ) wb = add_process_annotations_to_workbook( - wb, eml, local_model=local_model, return_ungrounded=return_ungrounded + wb, + eml, + local_model=local_model, + return_ungrounded=return_ungrounded, + sample_size=sample_size, ) wb = add_methods_annotations_to_workbook( - wb, eml, local_model=local_model, return_ungrounded=return_ungrounded + wb, + eml, + local_model=local_model, + return_ungrounded=return_ungrounded, + sample_size=sample_size, ) wb = add_research_topic_annotations_to_workbook( - wb, eml, local_model=local_model, return_ungrounded=return_ungrounded + wb, + eml, + local_model=local_model, + return_ungrounded=return_ungrounded, + sample_size=sample_size, ) wb = add_measurement_type_annotations_to_workbook( wb, @@ -194,9 +219,14 @@ def annotate_workbook( annotator="ontogpt", local_model=local_model, return_ungrounded=return_ungrounded, + sample_size=sample_size, ) wb = add_env_medium_annotations_to_workbook( - wb, eml, local_model=local_model, return_ungrounded=return_ungrounded + wb, + eml, + local_model=local_model, + return_ungrounded=return_ungrounded, + sample_size=sample_size, ) wb = add_qudt_annotations_to_workbook(wb, eml) # irrespective of annotator @@ -442,6 +472,7 @@ def add_dataset_annotations_to_workbook( eml: Union[str, etree._ElementTree], output_path: str = None, overwrite: bool = False, + sample_size: int = 1, ) -> pd.core.frame.DataFrame: """ :param workbook: Either the path to the workbook to be annotated, or the @@ -451,6 +482,8 @@ def add_dataset_annotations_to_workbook( :param output_path: The path to write the annotated workbook. :param overwrite: If True, overwrite existing `dataset` annotations in the workbook, so a fresh set may be created. + :param sample_size: Executes multiple replicates of the annotation request + to reduce variability of outputs. Variability is inherent in OntoGPT. :returns: Workbook with dataset annotations.""" # Load the workbook and EML for processing @@ -475,12 +508,18 @@ def add_dataset_annotations_to_workbook( # Get the dataset annotations dataset_element = eml.xpath("//dataset")[0] element_description = get_description(dataset_element) - annotations = get_bioportal_annotation( # expecting a list of annotations - text=element_description, - api_key=os.environ["BIOPORTAL_API_KEY"], - ontologies="ENVO", # ENVO provides environmental terms - exclude_synonyms="true", - ) + annotations = [] + for _ in range(sample_size): + res = get_bioportal_annotation( # expecting a list of annotations + text=element_description, + api_key=os.environ["BIOPORTAL_API_KEY"], + ontologies="ENVO", # ENVO provides environmental terms + exclude_synonyms="true", + ) + if res is not None: + annotations.extend(res) + if len(annotations) == 0: + annotations = None # Add dataset annotations to the workbook if annotations is not None: @@ -513,6 +552,7 @@ def add_dataset_annotations_to_workbook( # pylint: disable=too-many-branches +# pylint: disable=too-many-statements def add_measurement_type_annotations_to_workbook( workbook: Union[str, pd.core.frame.DataFrame], eml: Union[str, etree._ElementTree], @@ -521,6 +561,7 @@ def add_measurement_type_annotations_to_workbook( overwrite: bool = False, local_model: str = None, return_ungrounded: bool = False, + sample_size: int = 1, ) -> pd.core.frame.DataFrame: """ :param workbook: Either the path to the workbook to be annotated, or the @@ -538,6 +579,8 @@ def add_measurement_type_annotations_to_workbook( `get_ontogpt_annotation` documentation for details. :param return_ungrounded: An option if `annotator` is "ontogpt". See `get_ontogpt_annotation` documentation for details. + :param sample_size: Executes multiple replicates of the annotation request + to reduce variability of outputs. Variability is inherent in OntoGPT. :returns: Workbook with measurement type annotations.""" # Parameters for the function @@ -586,21 +629,32 @@ def add_measurement_type_annotations_to_workbook( if annotations is None: # Select an annotator, and get the measurement type annotations if annotator.lower() == "ontogpt": - annotations = get_ontogpt_annotation( - text=attribute_description, - template="contains_measurement_of_type", - local_model=local_model, - return_ungrounded=return_ungrounded, - ) + annotations = [] + for _ in range(sample_size): + res = get_ontogpt_annotation( + text=attribute_description, + template="contains_measurement_of_type", + local_model=local_model, + return_ungrounded=return_ungrounded, + ) + if res is not None: + annotations.extend(res) + if len(annotations) == 0: + annotations = None else: - annotations = ( - get_bioportal_annotation( # expecting a list of annotations + annotations = [] + for _ in range(sample_size): + res = get_bioportal_annotation( + # expecting a list of annotations text=attribute_description, api_key=os.environ["BIOPORTAL_API_KEY"], ontologies="ECSO", # ECSO provides measurment terms exclude_synonyms="true", ) - ) + if res is not None: + annotations.extend(res) + if len(annotations) == 0: + annotations = None # Add the measurement type annotations to the workbook if annotations is not None: @@ -718,6 +772,7 @@ def add_process_annotations_to_workbook( overwrite: bool = False, local_model: str = None, return_ungrounded: bool = False, + sample_size: int = 1, ) -> pd.core.frame.DataFrame: """ :param workbook: Either the path to the workbook to be annotated, or the @@ -730,6 +785,8 @@ def add_process_annotations_to_workbook( :param local_model: See `get_ontogpt_annotation` documentation for details. :param return_ungrounded: See `get_ontogpt_annotation` documentation for details. + :param sample_size: Executes multiple replicates of the annotation request + to reduce variability of outputs. Variability is inherent in OntoGPT. :returns: Workbook with process annotations. :notes: This function retrieves process annotations using OntoGPT, which requires setup and configuration described in the @@ -780,12 +837,18 @@ def add_process_annotations_to_workbook( if annotations is None: # Get the process annotations - annotations = get_ontogpt_annotation( - text=element_description, - template="contains_process", - local_model=local_model, - return_ungrounded=return_ungrounded, - ) + annotations = [] + for _ in range(sample_size): + res = get_ontogpt_annotation( + text=element_description, + template="contains_process", + local_model=local_model, + return_ungrounded=return_ungrounded, + ) + if res is not None: + annotations.extend(res) + if len(annotations) == 0: + annotations = None # Add process annotations to the workbook if annotations is not None: @@ -824,6 +887,7 @@ def add_env_broad_scale_annotations_to_workbook( overwrite: bool = False, local_model: str = None, return_ungrounded: bool = False, + sample_size: int = 1, ) -> pd.core.frame.DataFrame: """ :param workbook: Either the path to the workbook to be annotated, or the @@ -836,6 +900,8 @@ def add_env_broad_scale_annotations_to_workbook( :param local_model: See `get_ontogpt_annotation` documentation for details. :param return_ungrounded: See `get_ontogpt_annotation` documentation for details. + :param sample_size: Executes multiple replicates of the annotation request + to reduce variability of outputs. Variability is inherent in OntoGPT. :returns: Workbook with broad scale environmental context annotations. :notes: This function retrieves broad scale environmental context annotations using OntoGPT, which requires setup and configuration @@ -885,12 +951,18 @@ def add_env_broad_scale_annotations_to_workbook( if annotations is None: # Get the broad scale environmental context annotations - annotations = get_ontogpt_annotation( - text=element_description, - template=predicate, - local_model=local_model, - return_ungrounded=return_ungrounded, - ) + annotations = [] + for _ in range(sample_size): + res = get_ontogpt_annotation( + text=element_description, + template=predicate, + local_model=local_model, + return_ungrounded=return_ungrounded, + ) + if res is not None: + annotations.extend(res) + if len(annotations) == 0: + annotations = None # Add broad scale environmental context annotations to the workbook if annotations is not None: @@ -931,6 +1003,7 @@ def add_env_local_scale_annotations_to_workbook( overwrite: bool = False, local_model: str = None, return_ungrounded: bool = False, + sample_size: int = 1, ) -> pd.core.frame.DataFrame: """ :param workbook: Either the path to the workbook to be annotated, or the @@ -943,6 +1016,8 @@ def add_env_local_scale_annotations_to_workbook( :param local_model: See `get_ontogpt_annotation` documentation for details. :param return_ungrounded: See `get_ontogpt_annotation` documentation for details. + :param sample_size: Executes multiple replicates of the annotation request + to reduce variability of outputs. Variability is inherent in OntoGPT. :returns: Workbook with local scale environmental context annotations. :notes: This function retrieves local scale environmental context annotations using OntoGPT, which requires setup and configuration @@ -994,12 +1069,18 @@ def add_env_local_scale_annotations_to_workbook( if annotations is None: # Get the local scale environmental context annotations - annotations = get_ontogpt_annotation( - text=element_description, - template=predicate, - local_model=local_model, - return_ungrounded=return_ungrounded, - ) + annotations = [] + for _ in range(sample_size): + res = get_ontogpt_annotation( + text=element_description, + template=predicate, + local_model=local_model, + return_ungrounded=return_ungrounded, + ) + if res is not None: + annotations.extend(res) + if len(annotations) == 0: + annotations = None # Add local scale environmental context annotations to the workbook if annotations is not None: @@ -1040,6 +1121,7 @@ def add_env_medium_annotations_to_workbook( overwrite: bool = False, local_model: str = None, return_ungrounded: bool = False, + sample_size: int = 1, ) -> pd.core.frame.DataFrame: """ :param workbook: Either the path to the workbook to be annotated, or the @@ -1053,6 +1135,8 @@ def add_env_medium_annotations_to_workbook( `get_ontogpt_annotation` documentation for details. :param return_ungrounded: An option if `annotator` is "ontogpt". See `get_ontogpt_annotation` documentation for details. + :param sample_size: Executes multiple replicates of the annotation request + to reduce variability of outputs. Variability is inherent in OntoGPT. :returns: Workbook with environmental medium annotations.""" # Parameters for the function @@ -1101,12 +1185,18 @@ def add_env_medium_annotations_to_workbook( if annotations is None: # Get the environmental medium annotations from the annotator - annotations = get_ontogpt_annotation( - text=attribute_description, - template="env_medium", - local_model=local_model, - return_ungrounded=return_ungrounded, - ) + annotations = [] + for _ in range(sample_size): + res = get_ontogpt_annotation( + text=attribute_description, + template="env_medium", + local_model=local_model, + return_ungrounded=return_ungrounded, + ) + if res is not None: + annotations.extend(res) + if len(annotations) == 0: + annotations = None # And add the environmental medium annotations to the workbook if annotations is not None: @@ -1145,6 +1235,7 @@ def add_research_topic_annotations_to_workbook( overwrite: bool = False, local_model: str = None, return_ungrounded: bool = False, + sample_size: int = 1, ) -> pd.core.frame.DataFrame: """ :param workbook: Either the path to the workbook to be annotated, or the @@ -1157,6 +1248,8 @@ def add_research_topic_annotations_to_workbook( :param local_model: See `get_ontogpt_annotation` documentation for details. :param return_ungrounded: See `get_ontogpt_annotation` documentation for details. + :param sample_size: Executes multiple replicates of the annotation request + to reduce variability of outputs. Variability is inherent in OntoGPT. :returns: Workbook with research topic annotations. :notes: This function retrieves research topic annotations using OntoGPT, which requires setup and configuration described in the @@ -1207,12 +1300,18 @@ def add_research_topic_annotations_to_workbook( if annotations is None: # Get the research topic annotations - annotations = get_ontogpt_annotation( - text=element_description, - template="research_topic", - local_model=local_model, - return_ungrounded=return_ungrounded, - ) + annotations = [] + for _ in range(sample_size): + res = get_ontogpt_annotation( + text=element_description, + template="research_topic", + local_model=local_model, + return_ungrounded=return_ungrounded, + ) + if res is not None: + annotations.extend(res) + if len(annotations) == 0: + annotations = None # Add research topic annotations to the workbook if annotations is not None: @@ -1251,6 +1350,7 @@ def add_methods_annotations_to_workbook( overwrite: bool = False, local_model: str = None, return_ungrounded: bool = False, + sample_size: int = 1, ) -> pd.core.frame.DataFrame: """ :param workbook: Either the path to the workbook to be annotated, or the @@ -1263,6 +1363,8 @@ def add_methods_annotations_to_workbook( :param local_model: See `get_ontogpt_annotation` documentation for details. :param return_ungrounded: See `get_ontogpt_annotation` documentation for details. + :param sample_size: Executes multiple replicates of the annotation request + to reduce variability of outputs. Variability is inherent in OntoGPT. :returns: Workbook with methods annotations. :notes: This function retrieves methods annotations using OntoGPT, which requires setup and configuration described in the @@ -1316,12 +1418,18 @@ def add_methods_annotations_to_workbook( ) if annotations is None: - annotations = get_ontogpt_annotation( - text=element_description, - template="uses_method", - local_model=local_model, - return_ungrounded=return_ungrounded, - ) + annotations = [] + for _ in range(sample_size): + res = get_ontogpt_annotation( + text=element_description, + template="uses_method", + local_model=local_model, + return_ungrounded=return_ungrounded, + ) + if res is not None: + annotations.extend(res) + if len(annotations) == 0: + annotations = None # Add methods annotations to the workbook. Note, methods annotations are # at the dataset level. diff --git a/src/spinneret/main.py b/src/spinneret/main.py index 24c6d07..80409dc 100644 --- a/src/spinneret/main.py +++ b/src/spinneret/main.py @@ -53,6 +53,7 @@ def annotate_workbooks( config_path: str, local_model: str = None, return_ungrounded: bool = False, + sample_size: int = 1, ) -> None: """Create workbooks for each EML file in a directory @@ -67,6 +68,8 @@ def annotate_workbooks( :param local_model: See `get_ontogpt_annotation` documentation for details. :param return_ungrounded: See `get_ontogpt_annotation` documentation for details. + :param sample_size: Executes multiple replicates of the annotation request + to reduce variability of outputs. Variability is inherent in OntoGPT. :return: None :notes: Annotated workbooks will not be created if they already exist. """ @@ -106,6 +109,7 @@ def annotate_workbooks( output_path=output_dir + "/" + workbook_file_annotated, local_model=local_model, return_ungrounded=return_ungrounded, + sample_size=sample_size, ) From 66843ba5fe8d2c41ce78875a214a43522c51b321 Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Mon, 25 Nov 2024 16:32:12 -0800 Subject: [PATCH 04/24] feat: initialize benchmark testing module Create a new module to facilitate benchmark testing, allowing for performance evaluation and optimization. --- docs/source/user/api.rst | 6 ++++++ src/spinneret/benchmark.py | 1 + 2 files changed, 7 insertions(+) create mode 100644 src/spinneret/benchmark.py diff --git a/docs/source/user/api.rst b/docs/source/user/api.rst index 85f787f..b360ae3 100644 --- a/docs/source/user/api.rst +++ b/docs/source/user/api.rst @@ -9,6 +9,12 @@ Annotator Module .. automodule:: spinneret.annotator :members: +Benchmark Module +---------------- + +.. automodule:: spinneret.benchmark + :members: + Datasets Module --------------- diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py new file mode 100644 index 0000000..ddcc047 --- /dev/null +++ b/src/spinneret/benchmark.py @@ -0,0 +1 @@ +"""The benchmark module""" From 864889eac7791a2cf91c03a176975d973f2e6caf Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Tue, 26 Nov 2024 07:32:19 -0800 Subject: [PATCH 05/24] feat: implement logging for debugging Add logging capabilities to enhance debugging and runtime monitoring. --- environment-min.yml | 4 + environment.yml | 176 ++++++++++++++++++++++---------------------- poetry.lock | 31 +++++++- pyproject.toml | 1 + requirements.txt | 130 ++++++++++++++++---------------- 5 files changed, 192 insertions(+), 150 deletions(-) diff --git a/environment-min.yml b/environment-min.yml index f8892ff..3983356 100644 --- a/environment-min.yml +++ b/environment-min.yml @@ -16,4 +16,8 @@ dependencies: - rdflib - sphinx - sphinx-autoapi + - daiquiri + - pip + - pip: + - git+https://github.com/clnsmth/soso.git@main prefix: /opt/miniconda3/envs/spinneret diff --git a/environment.yml b/environment.yml index c919f4d..e417283 100644 --- a/environment.yml +++ b/environment.yml @@ -5,127 +5,153 @@ channels: dependencies: - alabaster=1.0.0 - annotated-types=0.7.0 - - anyascii=0.3.2 - - astroid=3.2.4 - - black=24.8.0 + - anyio=4.6.2.post1 + - astroid=3.3.5 + - babel=2.16.0 + - backoff=2.2.1 + - black=24.10.0 - brotli-python=1.1.0 - bzip2=1.0.8 - ca-certificates=2024.8.30 + - certifi=2024.8.30 - cffi=1.17.1 - - charset-normalizer=3.3.2 + - charset-normalizer=3.4.0 - click=8.1.7 - click-option-group=0.5.6 - colorama=0.4.6 - - coverage=7.6.1 - - dill=0.3.8 + - coverage=7.6.8 + - daiquiri=3.0.0 + - dill=0.3.9 - docutils=0.21.2 - dotty-dict=1.3.1 - exceptiongroup=1.2.2 - gitdb=4.0.11 - gitpython=3.1.43 + - gql=3.5.0 + - graphql-core=3.2.5 - h2=4.1.0 - hpack=4.0.0 - hyperframe=6.0.1 - - icu=75.1 + - idna=3.10 - imagesize=1.4.1 - - importlib-metadata=8.5.0 + - importlib-resources=6.4.5 + - importlib_resources=6.4.5 - iniconfig=2.0.0 - - isodate=0.6.1 + - isodate=0.7.2 - isort=5.13.2 - jinja2=3.1.4 - libblas=3.9.0 - libcblas=3.9.0 - - libcxx=18.1.8 - - libexpat=2.6.3 + - libcxx=19.1.4 + - libexpat=2.6.4 - libffi=3.4.2 - libgfortran=5.0.0 - libgfortran5=13.2.0 - libiconv=1.17 - liblapack=3.9.0 - - libopenblas=0.3.27 - - libsqlite=3.46.1 - - libxml2=2.12.7 + - libopenblas=0.3.28 + - libsqlite=3.47.0 + - libxml2=2.13.5 - libxslt=1.1.39 - libzlib=1.3.1 - - llvm-openmp=18.1.8 + - llvm-openmp=19.1.4 - lxml=5.3.0 - markdown-it-py=3.0.0 - - markupsafe=2.1.5 + - markupsafe=3.0.2 - mccabe=0.7.0 + - mdit-py-plugins=0.4.2 - mdurl=0.1.2 + - multidict=6.1.0 - mypy_extensions=1.0.0 - myst-parser=4.0.0 - ncurses=6.5 - - openssl=3.3.2 - - packaging=24.1 - - pandas=2.2.2 + - openssl=3.4.0 + - packaging=24.2 + - pandas=2.2.3 - pathspec=0.12.1 - - pip=24.2 + - pip=24.3.1 + - platformdirs=4.3.6 - pluggy=1.5.0 + - propcache=0.2.0 - pycparser=2.22 + - pydantic=2.10.1 + - pydantic-core=2.27.1 - pygments=2.18.0 + - pylint=3.3.1 + - pyparsing=3.2.0 - pysocks=1.7.1 - - pytest-cov=5.0.0 + - pytest=8.3.3 + - pytest-cov=6.0.0 - pytest-mock=3.14.0 - python=3.11.10 - - python-tzdata=2024.1 + - python-dateutil=2.9.0.post0 + - python-gitlab=4.13.0 + - python-json-logger=2.0.7 + - python-semantic-release=9.14.0 + - python-tzdata=2024.2 - python_abi=3.11 + - pytz=2024.1 - pyyaml=6.0.2 - - rdflib=7.0.0 + - rdflib=7.1.1 - readline=8.2 - requests=2.32.3 - requests-toolbelt=1.0.0 + - rich=13.9.4 + - setuptools=75.6.0 - shellingham=1.5.4 - six=1.16.0 + - smmap=5.0.0 + - sniffio=1.3.1 - snowballstemmer=2.2.0 - - sphinx=8.0.2 + - sphinx=8.1.3 + - sphinx-autoapi=3.3.3 - sphinxcontrib-applehelp=2.0.0 - sphinxcontrib-devhelp=2.0.0 - sphinxcontrib-htmlhelp=2.1.0 - sphinxcontrib-jsmath=1.0.1 - sphinxcontrib-qthelp=2.0.0 + - sphinxcontrib-serializinghtml=1.1.10 + - stdlib-list=0.11.0 - tk=8.6.13 - toml=0.10.2 - - tomli=2.0.1 + - tomli=2.1.0 - tomlkit=0.13.2 - typing-extensions=4.12.2 - typing_extensions=4.12.2 - - tzdata=2024a - - wheel=0.44.0 + - tzdata=2024b + - wheel=0.45.1 - xz=5.2.6 - yaml=0.2.5 - - zipp=3.20.2 + - yarl=1.18.0 + - zipp=3.21.0 - zstandard=0.23.0 - zstd=1.5.6 - pip: - adeft==0.12.3 - aiofiles==24.1.0 - aiohappyeyeballs==2.4.3 - - aiohttp==3.10.10 + - aiohttp==3.11.7 - aiosignal==1.3.1 - airium==0.2.6 - aniso8601==9.0.1 - antlr4-python3-runtime==4.9.3 - - anyio==4.6.2.post1 - appdirs==1.4.4 - arrow==1.3.0 - attrs==24.2.0 - - babel==2.16.0 - bcp47==0.1.0 - beautifulsoup4==4.12.3 - bioc==2.1 - - blinker==1.8.2 - - boto3==1.35.54 - - botocore==1.35.54 + - blinker==1.9.0 + - boto3==1.35.69 + - botocore==1.35.69 - cachier==3.1.2 - cattrs==24.1.2 - - certifi==2024.8.30 - cfgraph==0.2.1 - chardet==5.2.0 - - class-resolver==0.5.2 + - class-resolver==0.5.4 - curies==0.9.0 - defusedxml==0.7.1 - - deprecated==1.2.14 + - deprecated==1.2.15 - deprecation==2.1.0 - diskcache==5.6.3 - distro==1.9.0 @@ -135,7 +161,7 @@ dependencies: - eutils==0.6.0 - fastobo==0.12.3 - filelock==3.16.1 - - flask==3.0.3 + - flask==3.1.0 - flask-restx==1.3.0 - fqdn==1.5.1 - frontend==0.0.3 @@ -143,21 +169,21 @@ dependencies: - fsspec==2024.10.0 - funowl==0.2.3 - gilda==1.4.0 + - graphviz==0.20.3 - h11==0.14.0 - hbreader==0.9.1 - html5lib==1.1 - - httpcore==1.0.6 + - httpcore==1.0.7 - httpx==0.27.2 - huggingface-hub==0.26.2 - - idna==3.10 - ijson==3.3.0 - - importlib-resources==6.4.5 + - importlib-metadata==8.5.0 - inflect==7.4.0 - inflection==0.5.1 - intervaltree==3.1.0 - isoduration==20.11.0 - itsdangerous==2.2.0 - - jiter==0.7.0 + - jiter==0.7.1 - jmespath==1.0.1 - joblib==1.4.2 - json-flattener==0.1.9 @@ -177,105 +203,83 @@ dependencies: - linkml-owl==0.3.0 - linkml-renderer==0.3.1 - linkml-runtime==1.8.3 - - litellm==1.51.2 - - mdit-py-plugins==0.4.2 + - litellm==1.52.16 - more-click==0.1.2 - more-itertools==10.5.0 - - multidict==6.1.0 - ndex2==3.9.0 - networkx==3.4.2 - nltk==3.9.1 - - numpy==2.1.1 + - numpy==2.0.2 - oaklib==0.6.18 - ols-client==0.1.4 - - ontogpt==1.0.6 + - ontogpt==1.0.8 - ontoportal-client==0.0.4 - - openai==1.53.0 + - openai==1.55.1 - openpyxl==3.1.5 - owlrl==6.0.2 - pansql==0.0.1 - parse==1.20.2 - - platformdirs==4.3.6 - ply==3.11 - - portalocker==2.10.1 + - portalocker==3.0.0 - prefixcommons==0.1.12 - prefixmaps==0.2.6 - - prettytable==3.11.0 + - prettytable==3.12.0 - pronto==2.5.8 - - propcache==0.2.0 - - pydantic==2.9.2 - - pydantic-core==2.23.4 - pyjsg==0.11.10 - - pylint==3.2.7 - - pymupdf==1.24.13 - - pyparsing==3.1.4 + - pymupdf==1.24.14 - pyshacl==0.26.0 - pyshex==0.8.1 - pyshexc==0.9.1 - pysolr==3.10.0 - - pystow==0.5.6 - - pytest==8.3.3 + - pystow==0.6.1 - pytest-logging==2015.11.4 - - python-dateutil==2.9.0.post0 - python-dotenv==1.0.1 - - python-gitlab==4.11.1 - - python-graphviz==0.20.3 - - python-semantic-release==9.8.8 - pytrie==0.4.0 - - pytz==2024.2 - ratelimit==2.2.1 - rdflib-jsonld==0.6.1 - rdflib-shim==1.0.3 - referencing==0.35.1 - - regex==2024.9.11 + - regex==2024.11.6 - requests-cache==1.2.1 - rfc3339-validator==0.1.4 - rfc3987==1.3.8 - - rich==13.8.1 - - rpds-py==0.20.1 + - rpds-py==0.21.0 - ruamel-yaml==0.18.6 - ruamel-yaml-clib==0.2.12 - - s3transfer==0.10.3 + - s3transfer==0.10.4 - scikit-learn==1.4.2 - scipy==1.14.1 - semsql==0.3.3 - - setuptools==75.3.0 - shexjsg==0.8.2 - - smmap==5.0.1 - - sniffio==1.3.1 - sortedcontainers==2.4.0 - - soso==0.2.0 - soupsieve==2.6 - sparqlslurper==0.5.1 - sparqlwrapper==2.0.0 - - sphinx-autoapi==3.3.1 - - sphinxcontrib-serializinghtml==2.0.0 - - spinneret==0.1.0 - sqlalchemy==2.0.36 - sqlalchemy-utils==0.38.3 - - sssom==0.4.12 + - sssom==0.4.13 - sssom-schema==1.0.0 - - starlette==0.41.2 + - starlette==0.41.3 - tenacity==9.0.0 - threadpoolctl==3.5.0 - - tiktoken==0.7.0 - - tokenizers==0.20.1 - - tqdm==4.66.6 - - typeguard==4.4.0 + - tiktoken==0.8.0 + - tokenizers==0.20.3 + - tqdm==4.67.1 + - typeguard==4.4.1 - types-python-dateutil==2.9.0.20241003 - unidecode==1.3.8 - uri-template==1.3.0 - url-normalize==1.4.3 - urllib3==1.26.20 - - uvicorn==0.32.0 + - uvicorn==0.32.1 - validators==0.34.0 - watchdog==6.0.0 - wcwidth==0.2.13 - - webcolors==24.8.0 + - webcolors==24.11.1 - webencodings==0.5.1 - - werkzeug==3.1.1 + - werkzeug==3.1.3 - wikipedia==1.4.0 - wikipedia-api==0.7.1 - - wrapt==1.16.0 - - yarl==1.17.1 + - wrapt==1.17.0 + - git+https://github.com/clnsmth/soso.git@main prefix: /opt/miniconda3/envs/spinneret diff --git a/poetry.lock b/poetry.lock index e4ae891..98a35ed 100644 --- a/poetry.lock +++ b/poetry.lock @@ -812,6 +812,24 @@ pandas = ["pandas"] rdflib = ["rdflib"] tests = ["coverage", "pytest", "requests"] +[[package]] +name = "daiquiri" +version = "3.2.5.1" +description = "Library to configure Python logging easily" +optional = false +python-versions = ">=3.8" +files = [ + {file = "daiquiri-3.2.5.1-py3-none-any.whl", hash = "sha256:4a3457f54fc077e12796b258dfdc7f16572177e41c95d84c54bf010a9fd371d3"}, + {file = "daiquiri-3.2.5.1.tar.gz", hash = "sha256:5f2e86d6fca8bc38d1e9adfa605184df6fdea3702e07ca02d16aa3d0043b2eec"}, +] + +[package.dependencies] +python-json-logger = "*" + +[package.extras] +systemd = ["systemd-python (>=234)"] +test = ["pytest"] + [[package]] name = "defusedxml" version = "0.7.1" @@ -3846,6 +3864,17 @@ autocompletion = ["argcomplete (>=1.10.0,<3)"] graphql = ["gql[httpx] (>=3.5.0,<4)"] yaml = ["PyYaml (>=6.0.1)"] +[[package]] +name = "python-json-logger" +version = "2.0.7" +description = "A python library adding a json log formatter" +optional = false +python-versions = ">=3.6" +files = [ + {file = "python-json-logger-2.0.7.tar.gz", hash = "sha256:23e7ec02d34237c5aa1e29a070193a4ea87583bb4e7f8fd06d3de8264c4b2e1c"}, + {file = "python_json_logger-2.0.7-py3-none-any.whl", hash = "sha256:f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd"}, +] + [[package]] name = "python-semantic-release" version = "9.8.8" @@ -5778,4 +5807,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "504cee6f72fc1b325a1c41af49eabb86803369006287185a9f1db856e18e7dc8" +content-hash = "c615df7205bca842a4fd1edb0ea32df5fa2ff3d72506cc28285f4d4a300e708f" diff --git a/pyproject.toml b/pyproject.toml index 5dbc5e6..0185814 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ rdflib = "^7.0.0" lxml = "^5.3.0" soso = {git = "https://github.com/clnsmth/soso.git", rev = "main"} ontogpt = "^1.0.6" +daiquiri = "^3.2.5.1" [tool.poetry.group.dev.dependencies] pytest = "^8.3.2" diff --git a/requirements.txt b/requirements.txt index ea61ee6..55de71b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,27 +1,27 @@ adeft==0.12.3 aiofiles==24.1.0 aiohappyeyeballs==2.4.3 -aiohttp==3.10.10 +aiohttp==3.11.7 aiosignal==1.3.1 airium==0.2.6 alabaster==1.0.0 aniso8601==9.0.1 annotated-types==0.7.0 antlr4-python3-runtime==4.9.3 -anyascii==0.3.2 anyio==4.6.2.post1 appdirs==1.4.4 arrow==1.3.0 -astroid==3.2.4 +astroid==3.3.5 attrs==24.2.0 babel==2.16.0 +backoff==2.2.1 bcp47==0.1.0 beautifulsoup4==4.12.3 bioc==2.1 -black==24.8.0 -blinker==1.8.2 -boto3==1.35.54 -botocore==1.35.54 +black==24.10.0 +blinker==1.9.0 +boto3==1.35.69 +botocore==1.35.69 Brotli==1.1.0 cachier==3.1.2 cattrs==24.1.2 @@ -29,17 +29,18 @@ certifi==2024.8.30 cffi==1.17.1 CFGraph==0.2.1 chardet==5.2.0 -charset-normalizer==3.3.2 -class_resolver==0.5.2 +charset-normalizer==3.4.0 +class_resolver==0.5.4 click==8.1.7 click-option-group==0.5.6 colorama==0.4.6 -coverage==7.6.1 +coverage==7.6.8 curies==0.9.0 +daiquiri==3.0.0 defusedxml==0.7.1 -Deprecated==1.2.14 +Deprecated==1.2.15 deprecation==2.1.0 -dill==0.3.8 +dill==0.3.9 diskcache==5.6.3 distro==1.9.0 docopt==0.6.2 @@ -51,7 +52,7 @@ eutils==0.6.0 exceptiongroup==1.2.2 fastobo==0.12.3 filelock==3.16.1 -Flask==3.0.3 +Flask==3.1.0 flask-restx==1.3.0 fqdn==1.5.1 frontend==0.0.3 @@ -61,13 +62,15 @@ funowl==0.2.3 gilda==1.4.0 gitdb==4.0.11 GitPython==3.1.43 +gql==3.5.0 +graphql-core==3.2.5 graphviz==0.20.3 h11==0.14.0 h2==4.1.0 hbreader==0.9.1 hpack==4.0.0 html5lib==1.1 -httpcore==1.0.6 +httpcore==1.0.7 httpx==0.27.2 huggingface-hub==0.26.2 hyperframe==6.0.1 @@ -80,12 +83,12 @@ inflect==7.4.0 inflection==0.5.1 iniconfig==2.0.0 intervaltree==3.1.0 -isodate==0.6.1 +isodate==0.7.2 isoduration==20.11.0 isort==5.13.2 itsdangerous==2.2.0 Jinja2==3.1.4 -jiter==0.7.0 +jiter==0.7.1 jmespath==1.0.1 joblib==1.4.2 json-flattener==0.1.9 @@ -105,10 +108,10 @@ linkml-dataops==0.1.0 linkml-owl==0.3.0 linkml-renderer==0.3.1 linkml-runtime==1.8.3 -litellm==1.51.2 +litellm==1.52.16 lxml==5.3.0 markdown-it-py==3.0.0 -MarkupSafe==2.1.5 +MarkupSafe==3.0.2 mccabe==0.7.0 mdit-py-plugins==0.4.2 mdurl==0.1.2 @@ -120,126 +123,127 @@ myst-parser==4.0.0 ndex2==3.9.0 networkx==3.4.2 nltk==3.9.1 -numpy==2.1.1 +numpy==2.0.2 oaklib==0.6.18 ols-client==0.1.4 -ontogpt==1.0.6 +ontogpt==1.0.8 ontoportal-client==0.0.4 -openai==1.53.0 +openai==1.55.1 openpyxl==3.1.5 owlrl==6.0.2 -packaging==24.1 -pandas==2.2.2 +packaging==24.2 +pandas==2.2.3 pansql==0.0.1 parse==1.20.2 pathspec==0.12.1 -pip==24.2 +pip==24.3.1 platformdirs==4.3.6 pluggy==1.5.0 ply==3.11 -portalocker==2.10.1 +portalocker==3.0.0 prefixcommons==0.1.12 prefixmaps==0.2.6 -prettytable==3.11.0 +prettytable==3.12.0 pronto==2.5.8 propcache==0.2.0 pycparser==2.22 -pydantic==2.9.2 -pydantic_core==2.23.4 +pydantic==2.10.1 +pydantic_core==2.27.1 Pygments==2.18.0 PyJSG==0.11.10 -pylint==3.2.7 -PyMuPDF==1.24.13 -pyparsing==3.1.4 +pylint==3.3.1 +PyMuPDF==1.24.14 +pyparsing==3.2.0 pyshacl==0.26.0 PyShEx==0.8.1 PyShExC==0.9.1 PySocks==1.7.1 pysolr==3.10.0 -pystow==0.5.6 +pystow==0.6.1 pytest==8.3.3 -pytest-cov==5.0.0 +pytest-cov==6.0.0 pytest-logging==2015.11.4 pytest-mock==3.14.0 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 -python-gitlab==4.11.1 -python-semantic-release==9.8.8 +python-gitlab==4.13.0 +python-json-logger==2.0.7 +python-semantic-release==9.14.0 PyTrie==0.4.0 -pytz==2024.2 +pytz==2024.1 PyYAML==6.0.2 ratelimit==2.2.1 -rdflib==7.0.0 +rdflib==7.1.1 rdflib-jsonld==0.6.1 rdflib-shim==1.0.3 referencing==0.35.1 -regex==2024.9.11 +regex==2024.11.6 requests==2.32.3 requests-cache==1.2.1 requests-toolbelt==1.0.0 rfc3339-validator==0.1.4 rfc3987==1.3.8 -rich==13.8.1 -rpds-py==0.20.1 +rich==13.9.4 +rpds-py==0.21.0 ruamel.yaml==0.18.6 ruamel.yaml.clib==0.2.12 -s3transfer==0.10.3 +s3transfer==0.10.4 scikit-learn==1.4.2 scipy==1.14.1 semsql==0.3.3 -setuptools==75.3.0 +setuptools==75.6.0 shellingham==1.5.4 ShExJSG==0.8.2 six==1.16.0 -smmap==5.0.1 +smmap==5.0.0 sniffio==1.3.1 snowballstemmer==2.2.0 sortedcontainers==2.4.0 -soso==0.2.0 +soso @ git+https://github.com/clnsmth/soso.git@main soupsieve==2.6 sparqlslurper==0.5.1 SPARQLWrapper==2.0.0 -Sphinx==8.0.2 -sphinx-autoapi==3.3.1 +Sphinx==8.1.3 +sphinx-autoapi==3.3.3 sphinxcontrib-applehelp==2.0.0 sphinxcontrib-devhelp==2.0.0 sphinxcontrib-htmlhelp==2.1.0 sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==2.0.0 -sphinxcontrib-serializinghtml==2.0.0 -spinneret==0.1.0 +sphinxcontrib-serializinghtml==1.1.10 SQLAlchemy==2.0.36 SQLAlchemy-Utils==0.38.3 -sssom==0.4.12 +sssom==0.4.13 sssom-schema==1.0.0 -starlette==0.41.2 +starlette==0.41.3 +stdlib-list==0.11.0 tenacity==9.0.0 threadpoolctl==3.5.0 -tiktoken==0.7.0 -tokenizers==0.20.1 +tiktoken==0.8.0 +tokenizers==0.20.3 toml==0.10.2 -tomli==2.0.1 +tomli==2.1.0 tomlkit==0.13.2 -tqdm==4.66.6 -typeguard==4.4.0 +tqdm==4.67.1 +typeguard==4.4.1 types-python-dateutil==2.9.0.20241003 typing_extensions==4.12.2 -tzdata==2024.1 +tzdata==2024.2 Unidecode==1.3.8 uri-template==1.3.0 url-normalize==1.4.3 urllib3==1.26.20 -uvicorn==0.32.0 +uvicorn==0.32.1 validators==0.34.0 watchdog==6.0.0 wcwidth==0.2.13 -webcolors==24.8.0 +webcolors==24.11.1 webencodings==0.5.1 -Werkzeug==3.1.1 -wheel==0.44.0 +Werkzeug==3.1.3 +wheel==0.45.1 wikipedia==1.4.0 Wikipedia-API==0.7.1 -wrapt==1.16.0 -yarl==1.17.1 -zipp==3.20.2 +wrapt==1.17.0 +yarl==1.18.0 +zipp==3.21.0 zstandard==0.23.0 From d667b3161d4cd533cb3ea8764dba26b56a85cd50 Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Wed, 27 Nov 2024 06:48:04 -0800 Subject: [PATCH 06/24] feat: implement performance metric logging Add logging for performance metrics to enable in-depth analysis and optimization. - Create a context manager to log metrics of interest (runtime and memory usage). - Estimate tokens per LLM call using word count. --- src/spinneret/annotator.py | 29 +++++++++++++++++++++++++---- src/spinneret/benchmark.py | 35 +++++++++++++++++++++++++++++++++++ src/spinneret/main.py | 5 +++++ tests/test_benchmark.py | 30 ++++++++++++++++++++++++++++++ 4 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 tests/test_benchmark.py diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py index 3fed6aa..1dd91e2 100644 --- a/src/spinneret/annotator.py +++ b/src/spinneret/annotator.py @@ -8,6 +8,7 @@ from requests import get, exceptions import pandas as pd from lxml import etree +from daiquiri import getLogger from spinneret.workbook import ( delete_annotations, initialize_workbook_row, @@ -25,6 +26,8 @@ expand_curie, ) +logger = getLogger(__name__) + # pylint: disable=too-many-lines @@ -85,6 +88,8 @@ def get_bioportal_annotation( key can be loaded as an environment variable from the configuration file (see `utilities.load_configuration`). """ + logger.info(f"Text contains {len(text.split())} words") + # Construct the query url = "https://data.bioontology.org/annotator" payload = { @@ -159,6 +164,7 @@ def annotate_workbook( path as the original workbook. """ print(f"Annotating workbook {workbook_path}") + logger.info(f"Annotating with {annotator}") # Ensure the workbook and eml file match to avoid errors pid = os.path.basename(workbook_path).split("_")[0] @@ -388,7 +394,9 @@ def add_qudt_annotations_to_workbook( :param output_path: The path to write the annotated workbook. :param overwrite: If True, overwrite existing `QUDT` annotations in the `workbook, so a fresh set may be created. - :returns: Workbook with QUDT annotations.""" + :returns: Workbook with QUDT annotations. + """ + logger.info("Annotating units") # Parameters for the function predicate = "uses standard" @@ -484,7 +492,9 @@ def add_dataset_annotations_to_workbook( workbook, so a fresh set may be created. :param sample_size: Executes multiple replicates of the annotation request to reduce variability of outputs. Variability is inherent in OntoGPT. - :returns: Workbook with dataset annotations.""" + :returns: Workbook with dataset annotations. + """ + logger.info("Annotating dataset") # Load the workbook and EML for processing wb = load_workbook(workbook) @@ -581,7 +591,9 @@ def add_measurement_type_annotations_to_workbook( `get_ontogpt_annotation` documentation for details. :param sample_size: Executes multiple replicates of the annotation request to reduce variability of outputs. Variability is inherent in OntoGPT. - :returns: Workbook with measurement type annotations.""" + :returns: Workbook with measurement type annotations. + """ + logger.info("Annotating measurement type") # Parameters for the function predicate = "contains measurements of type" @@ -714,6 +726,8 @@ def get_ontogpt_annotation( is required to use this function. For more information, see: https://monarch-initiative.github.io/ontogpt/. """ + logger.info(f"Text contains {len(text.split())} words") + # OntoGPT transacts in files, so we write the input text to a temporary # file and receive the results as a JSON file. Once the results are parsed # we can discard the files. @@ -792,6 +806,7 @@ def add_process_annotations_to_workbook( requires setup and configuration described in the `get_ontogpt_annotation` function. """ + logger.info("Annotating process") # Load the workbook and EML for processing wb = load_workbook(workbook) @@ -907,6 +922,7 @@ def add_env_broad_scale_annotations_to_workbook( annotations using OntoGPT, which requires setup and configuration described in the `get_ontogpt_annotation` function. """ + logger.info("Annotating broad scale environmental context") # Load the workbook and EML for processing wb = load_workbook(workbook) @@ -1023,6 +1039,7 @@ def add_env_local_scale_annotations_to_workbook( annotations using OntoGPT, which requires setup and configuration described in the `get_ontogpt_annotation` function. """ + logger.info("Annotating local scale environmental context") # Load the workbook and EML for processing wb = load_workbook(workbook) @@ -1137,7 +1154,9 @@ def add_env_medium_annotations_to_workbook( `get_ontogpt_annotation` documentation for details. :param sample_size: Executes multiple replicates of the annotation request to reduce variability of outputs. Variability is inherent in OntoGPT. - :returns: Workbook with environmental medium annotations.""" + :returns: Workbook with environmental medium annotations. + """ + logger.info("Annotating environmental medium") # Parameters for the function predicate = "environmental material" @@ -1255,6 +1274,7 @@ def add_research_topic_annotations_to_workbook( requires setup and configuration described in the `get_ontogpt_annotation` function. """ + logger.info("Annotating research topic") # Load the workbook and EML for processing wb = load_workbook(workbook) @@ -1370,6 +1390,7 @@ def add_methods_annotations_to_workbook( requires setup and configuration described in the `get_ontogpt_annotation` function. """ + logger.info("Annotating methods") # Load the workbook and EML for processing wb = load_workbook(workbook) diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py index ddcc047..548aa26 100644 --- a/src/spinneret/benchmark.py +++ b/src/spinneret/benchmark.py @@ -1 +1,36 @@ """The benchmark module""" + +import time +import tracemalloc +from contextlib import contextmanager +from daiquiri import getLogger + + +logger = getLogger(__name__) + + +@contextmanager +def monitor(name: str) -> None: + """ + Context manager to monitor the duration and memory usage of a function + using the `daiquiri` package logger. + + :param name: The name of the function being monitored. + :return: None + """ + start_time = time.time() + tracemalloc.start() + logger.info(f"Starting function '{name}'") + try: + yield # The code inside the `with` block runs here + except Exception as e: + logger.error(f"Function '{name}' raised an exception: {e}") + raise + finally: + current, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + duration = time.time() - start_time + logger.info(f"Function '{name}' completed in {duration:.4f} seconds") + logger.info( + f"Memory usage: Current={current / 1024:.2f} KB; Peak={peak / 1024:.2f} KB" + ) diff --git a/src/spinneret/main.py b/src/spinneret/main.py index 80409dc..7579921 100644 --- a/src/spinneret/main.py +++ b/src/spinneret/main.py @@ -4,6 +4,7 @@ from pathlib import Path from requests import get, codes from rdflib import Graph +import daiquiri from soso.main import convert from soso.strategies.eml import EML, get_encoding_format from soso.utilities import delete_null_values, generate_citation_from_doi @@ -14,6 +15,9 @@ from spinneret.shadow import create_shadow_eml +logger = daiquiri.getLogger(__name__) + + def create_workbooks(eml_dir: str, workbook_dir: str) -> None: """Create workbooks for each EML file in a directory :param eml_dir: Directory of EML files @@ -101,6 +105,7 @@ def annotate_workbooks( continue # Create annotated workbook + logger.info(f"Creating annotated workbook for {workbook_file}") print(f"Creating annotated workbook for {workbook_file}") annotate_workbook( workbook_path=workbook_dir + "/" + workbook_file, diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py new file mode 100644 index 0000000..119bf2c --- /dev/null +++ b/tests/test_benchmark.py @@ -0,0 +1,30 @@ +"""Test benchmark code""" + +import logging +import daiquiri +from spinneret.benchmark import monitor + + +def test_monitor(tmp_path): + """Test the monitor context manager""" + + def example_function(): # to call with monitor + return 1 + 1 + + log_file = tmp_path / "test.log" # set up daiquiri logger + daiquiri.setup( + level=logging.INFO, + outputs=( + daiquiri.output.File(log_file), + "stdout", + ), + ) + + with monitor("example_function"): # test with monitor context manager + example_function() + + with open(log_file, "r", encoding="utf-8") as file: + log = file.read() + assert "Starting function 'example_function'" in log + assert "Function 'example_function' completed in" in log + assert "Memory usage: Current=" in log From 23907c66d9af541ac233782e0b24d97f5e96499e Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Wed, 27 Nov 2024 07:19:42 -0800 Subject: [PATCH 07/24] refactor: replace print statements with logging Replace print statements with logging statements to enable more structured and persistent output. This change provides flexibility for capturing and analyzing runtime information. --- src/spinneret/annotator.py | 16 ++++++++-------- src/spinneret/main.py | 11 +++++------ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py index 1dd91e2..78216a5 100644 --- a/src/spinneret/annotator.py +++ b/src/spinneret/annotator.py @@ -115,7 +115,7 @@ def get_bioportal_annotation( r = get(url, params=payload, timeout=10) r.raise_for_status() except exceptions.RequestException as e: - print(f"Error calling https://data.bioontology.org/annotator: {e}") + logger.error(f"Error calling https://data.bioontology.org/annotator: {e}") return None # Parse the results @@ -126,7 +126,7 @@ def get_bioportal_annotation( r = get(self_link, params={"apikey": api_key}, timeout=10) r.raise_for_status() except exceptions.RequestException as e: - print(f"Error calling {self_link}: {e}") + logger.error(f"Error calling {self_link}: {e}") return None uri = r.json().get("@id", None) label = r.json().get("prefLabel", None) @@ -163,14 +163,14 @@ def annotate_workbook( in the EML file. The annotated workbook is written back to the same path as the original workbook. """ - print(f"Annotating workbook {workbook_path}") + logger.info(f"Annotating workbook {workbook_path}") logger.info(f"Annotating with {annotator}") # Ensure the workbook and eml file match to avoid errors pid = os.path.basename(workbook_path).split("_")[0] eml_file = pid + ".xml" if eml_file not in eml_path: - print(f"EML file {eml_file} does not match workbook {workbook_path}") + logger.warning(f"EML file {eml_file} does not match workbook {workbook_path}") return None # Load the workbook and EML for processing @@ -366,14 +366,14 @@ def get_qudt_annotation(text: str) -> Union[list, None]: r = get(url, timeout=10) r.raise_for_status() except exceptions.RequestException as e: - print(f"Error calling {url}: {e}") + logger.error(f"Error calling {url}: {e}") return None if r.text == "No_Match": return None try: # the service has a few JSON encoding bugs json = loads(r.text) except decoder.JSONDecodeError as e: - print(f"Error decoding JSON from {url}: {e}") + logger.error(f"Error decoding JSON from {url}: {e}") return None label = json["qudtLabel"] uri = json["qudtURI"] @@ -753,7 +753,7 @@ def get_ontogpt_annotation( os.system(f"rm -rf {cache_path}") os.system(cmd) except Exception as e: # pylint: disable=broad-exception-caught - print(f"Error calling OntoGPT: {e}") + logger.error(f"Error calling OntoGPT: {e}") return None # Parse the results @@ -761,7 +761,7 @@ def get_ontogpt_annotation( with open(output_file, "r", encoding="utf-8") as f: r = load(f) except FileNotFoundError as e: - print(f"Error reading OntoGPT output file: {e}") + logger.error(f"Error reading OntoGPT output file: {e}") return None named_entities = r.get("named_entities") if named_entities is None: # OntoGPT couldn't find any annotations diff --git a/src/spinneret/main.py b/src/spinneret/main.py index 7579921..7a812ee 100644 --- a/src/spinneret/main.py +++ b/src/spinneret/main.py @@ -41,7 +41,7 @@ def create_workbooks(eml_dir: str, workbook_dir: str) -> None: continue # Create workbook - print(f"Creating workbook for {eml_file}") + logger.info(f"Creating workbook for {eml_file}") wb = workbook.create( eml_file=eml_dir + "/" + eml_file, elements=["dataset", "attribute"], @@ -101,12 +101,11 @@ def annotate_workbooks( eml_pid = workbook_file.split("_")[0] eml_file = eml_pid + ".xml" if not os.path.exists(eml_dir + "/" + eml_file): - print(f"Could not find EML file for {workbook_file}") + logger.warning(f"Could not find EML file for {workbook_file}") continue # Create annotated workbook logger.info(f"Creating annotated workbook for {workbook_file}") - print(f"Creating annotated workbook for {workbook_file}") annotate_workbook( workbook_path=workbook_dir + "/" + workbook_file, eml_path=eml_dir + "/" + eml_file, @@ -145,7 +144,7 @@ def annotate_eml_files(workbook_dir: str, eml_dir: str, output_dir: str) -> None continue # Create annotated EML file - print(f"Creating annotated EML file for {eml_path}") + logger.info(f"Creating annotated EML file for {eml_path}") annotate_eml( eml=eml_path, workbook=workbook_dir + "/" + workbook_file, @@ -176,7 +175,7 @@ def create_soso_files(eml_dir: str, output_dir: str) -> None: soso_file = eml_pid + ".json" if soso_file in soso_files: continue - print(f"Creating SOSO file for {eml_file}") + logger.info(f"Creating SOSO file for {eml_file}") # Add properties that can't be derived from the EML record scope, identifier, revision = eml_pid.split(".") @@ -292,7 +291,7 @@ def create_shadow_eml_files(eml_dir: str, output_dir: str) -> None: continue # Create shadow EML file - print(f"Creating shadow EML file for {eml_file}") + logger.info(f"Creating shadow EML file for {eml_file}") create_shadow_eml( eml_path=eml_dir + "/" + eml_file, output_path=output_dir + "/" + shadow_file, From f003b96237e7e97ea83b21b51612d2959f3f93fb Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Wed, 4 Dec 2024 15:20:46 -0800 Subject: [PATCH 08/24] feat: implement benchmark data collection and testing Add functionality to collect and analyze benchmark data, including a dedicated test suite to evaluate this routine. We have opted for a baseline comparison method to evaluate the performance of our algorithm across different parameterizations. This approach offers several advantages, including efficiency and interpretability. By directly comparing each parameterization to a fixed baseline, we can quickly assess its relative performance and identify the optimal configuration. While this method may not uncover subtle differences between parameterizations that are both better or worse than the baseline, it provides a practical and timely solution for our specific goals. --- src/spinneret/benchmark.py | 340 ++++++++ src/spinneret/data/prefixmaps.csv | 753 ++++++++++++++++++ src/spinneret/utilities.py | 33 + tests/conftest.py | 37 + ...ntl.1.59_annotation_workbook_annotated.tsv | 78 ++ ...ntl.2.37_annotation_workbook_annotated.tsv | 45 ++ tests/data/benchmark/standard/notes.txt | 1 + .../termset_similarity_score_processed.json | 12 + .../termset_similarity_score_raw.json | 59 ++ ...ntl.1.59_annotation_workbook_annotated.tsv | 71 ++ ...ntl.2.37_annotation_workbook_annotated.tsv | 42 + tests/data/benchmark/test_a/notes.txt | 1 + ...ntl.1.59_annotation_workbook_annotated.tsv | 124 +++ ...ntl.2.37_annotation_workbook_annotated.tsv | 66 ++ tests/data/benchmark/test_b/notes.txt | 1 + tests/test_benchmark.py | 226 +++++- tests/test_utilities.py | 20 + 17 files changed, 1908 insertions(+), 1 deletion(-) create mode 100644 src/spinneret/data/prefixmaps.csv create mode 100644 tests/data/benchmark/standard/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv create mode 100644 tests/data/benchmark/standard/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv create mode 100644 tests/data/benchmark/standard/notes.txt create mode 100644 tests/data/benchmark/termset_similarity_score_processed.json create mode 100644 tests/data/benchmark/termset_similarity_score_raw.json create mode 100644 tests/data/benchmark/test_a/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv create mode 100644 tests/data/benchmark/test_a/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv create mode 100644 tests/data/benchmark/test_a/notes.txt create mode 100644 tests/data/benchmark/test_b/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv create mode 100644 tests/data/benchmark/test_b/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv create mode 100644 tests/data/benchmark/test_b/notes.txt diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py index 548aa26..2cd8b69 100644 --- a/src/spinneret/benchmark.py +++ b/src/spinneret/benchmark.py @@ -1,9 +1,17 @@ """The benchmark module""" +import os +from typing import Union import time +from collections import OrderedDict +import tempfile import tracemalloc +from json import load from contextlib import contextmanager from daiquiri import getLogger +import pandas as pd +from spinneret.utilities import load_workbook, compress_uri +from spinneret.workbook import delete_duplicate_annotations logger = getLogger(__name__) @@ -34,3 +42,335 @@ def monitor(name: str) -> None: logger.info( f"Memory usage: Current={current / 1024:.2f} KB; Peak={peak / 1024:.2f} KB" ) + + +def benchmark_against_standard(standard_dir: str, test_dirs: list) -> pd.DataFrame: + """ + Benchmarks the performance of test data against a standard. Currently + supports select ontologies from the OBO Foundry. + + :param standard_dir: Directory containing the standard annotated workbook + files. + :param test_dirs: List of directories containing the test annotated + workbook files. Each directory represents a different test condition. + :return: A pandas DataFrame containing the benchmark results. Comparisons + are made between the standard and test data for each predicate and + element_xpath combination. The DataFrame contains the following + columns: + + - standard_dir: The directory containing the standard annotated + workbook files. + - test_dir: The directory containing the test annotated workbook files. + - standard_file: The name of the standard annotated workbook file. + - predicate_value: The value of the predicate column. + - element_xpath_value: The value of the element_xpath column. + - standard_set: The set of object_ids from the standard data. + - test_set: The set of object_ids from the test data. + - average_score: The average termset similarity score between the + standard and test sets. + - best_score: The best termset similarity score between the standard + and test sets. + - average_jaccard_similarity: The average Jaccard similarity score + between the standard and test sets. + - best_jaccard_similarity: The best Jaccard similarity score between + the standard and test sets. + - average_phenodigm_score: The average Phenodigm score between the + standard and test sets. + - best_phenodigm_score: The best Phenodigm score between the standard + and test sets. + - average_standard_information_content: The average information content + score of the standard set. + - best_standard_information_content: The best information content + score of the standard set. + - average_test_information_content: The average information content + score of the test set. + - best_test_information_content: The best information content score of + the test set. + """ + res = [] + + for standard_file in os.listdir(standard_dir): + if not standard_file.endswith(".tsv"): # we are expecting tsv files + continue + standard_path = os.path.join(standard_dir, standard_file) + + for test_dir in test_dirs: + test_path = os.path.join(test_dir, standard_file) + if not os.path.exists(test_path): # we need a matching test file + continue + + standard = load_workbook(standard_path) + test = load_workbook(test_path) + + # Prepare the data for comparison + standard = clean_workbook(standard) + test = clean_workbook(test) + standard = group_object_ids(standard) + test = group_object_ids(test) + standard = compress_object_ids(standard) + test = compress_object_ids(test) + + for key, standard_set in standard.items(): + if key not in test: + continue + test_set = test[key] + + scores = get_termset_similarity(standard_set, test_set) + if scores is None: + continue + + # Parse the scores and add them to the results + r = OrderedDict() + r["standard_dir"] = standard_dir + r["test_dir"] = test_dir + r["standard_file"] = standard_file + r["predicate_value"] = key[0] + r["element_xpath_value"] = key[1] + r["standard_set"] = standard_set + r["test_set"] = test_set + r.update(scores) + res.append(r) + + return pd.DataFrame(res) + + +def get_termset_similarity(set1: list, set2: list) -> dict: + """ + Calculate the similarity between two sets of terms. + + :param set1: List of CURIEs for the first set of terms. + :param set2: List of CURIEs for the second set of terms. + :return: A dictionary containing termset similarity and information content + scores. Default values, defined in + `benchmark.default_similarity_scores` are returned if the similarity + scores cannot be calculated or if an error occurs. For more information + on scoring, see the `oaklib` documentation: + https://incatools.github.io/ontology-access-kit/guide/similarity.html. + """ + res = default_similarity_scores() # a default ensures consistent returns + + # Clean the input sets in preparation for similarity scoring + set1 = [term for term in set1 if term is not None] # can't compare None + set2 = [term for term in set2 if term is not None] + set1 = delete_terms_from_unsupported_ontologies(set1) + set2 = delete_terms_from_unsupported_ontologies(set2) + + if not set1 or not set2: # can't calculate similarity of empty sets + logger.info("Cannot calculate similarity for empty sets") + return res + + db = get_shared_ontology(set1, set2) + if db is None: # can't compare terms from different ontologies + return res + + # Write output file to a temporary location to be read back in later. We + # do this because the output cannot be returned as an object. + with tempfile.TemporaryDirectory() as temp_dir: + output_file = os.path.join(temp_dir, "output.json") + + # Construct and run the termset-similarity command + cmd = ( + f"runoak -i {db} termset-similarity -o {output_file} -O json " + f"{' '.join(set1)} @ {' '.join(set2)}" + ) + try: + os.system(cmd) + except Exception as e: # pylint: disable=broad-exception-caught + logger.error(f"Error running termset-similarity command: {e}") + return res + + # Read and parse the similarity scores + try: + with open(output_file, "r", encoding="utf-8") as f: + scores = load(f) + except FileNotFoundError as e: + logger.error(f"Error reading termset-similarity output file: {e}") + return res + res = parse_similarity_scores(scores) + return res + + +def default_similarity_scores() -> dict: + """ + :return: A dictionary containing default similarity scores. Values are set + following `oaklib` conventions. + """ + res = OrderedDict() + res["average_score"] = 0.0 + res["best_score"] = 0.0 + res["average_jaccard_similarity"] = pd.NA + res["best_jaccard_similarity"] = pd.NA + res["average_phenodigm_score"] = pd.NA + res["best_phenodigm_score"] = pd.NA + res["average_standard_information_content"] = pd.NA + res["best_standard_information_content"] = pd.NA + res["average_test_information_content"] = pd.NA + res["best_test_information_content"] = pd.NA + return res + + +def clean_workbook(workbook: pd.DataFrame) -> pd.DataFrame: + """ + Clean a workbook for benchmarking. + + :param workbook: The workbook to clean. + :return: The cleaned workbook. + """ + # Remove rows where the "object_id" is NaN. This is necessary because + # the termset similarity function cannot handle NaN values. + workbook = workbook.dropna(subset=["object_id"]) + + # Remove rows where the "object_id" starts with "AUTO:", these terms are + # not grounded to any ontology and therefore cannot be compared. + workbook = workbook[~workbook["object_id"].str.startswith("AUTO:")] + + # Remove duplicate annotations, so we don't inflate the similarity scores + # by comparing the same object multiple times. + workbook = delete_duplicate_annotations(workbook) + + return workbook + + +def group_object_ids(workbook: pd.DataFrame) -> dict: + """ + Group object_id values by predicate and element_xpath, i.e. the context + of the object_id values that we are comparing. + + :param workbook: The workbook to apply the grouping to. + :return: The grouped workbook as a dictionary, where the keys are tuples + of the workbook predicate and element_xpath values, and the dictionary + values are lists of object_id values. + """ + # list_object_id_for_predicate_and_element_xpath + # Group data by predicate and element_xpath columns + series = workbook.groupby(["predicate", "element_xpath"]).apply( + lambda x: x.to_dict("records"), include_groups=False + ) + + # Only include the "object_id" values, these are what we want to compare + res = {key: [d["object_id"] for d in data] for key, data in series.items()} + return res + + +def compress_object_ids(object_id_groups: dict) -> dict: + """ + Convert object_ids to CURIEs for comparison. + + :param object_id_groups: The return value from `group_object_ids`. + :return: The object_id_groups dictionary with object_ids converted to + CURIEs. + """ + for key, data in object_id_groups.items(): + object_id_groups[key] = [compress_uri(d) if d else None for d in data] + return object_id_groups + + +def parse_similarity_scores(scores: list) -> dict: + """ + Parse similarity scores from the output of the `oaklib` termset-similarity + command into the format expected by the benchmarking function. + + :param scores: The output of the `oaklib` termset-similarity command. + :return: A dictionary containing the parsed similarity scores. + """ + res = default_similarity_scores() + + # Get the "termset similarity" scores + res["average_score"] = scores[0].get("average_score") + res["best_score"] = scores[0].get("best_score") + + # Get other similarity scores (i.e. information content, jaccard + # similarity, phenodigm score) + for key in scores[0].keys(): + + # Information content scores + if key == "subject_best_matches": # for the subject (i.e. "standard") + r = [] + for item in scores[0][key]: + s = scores[0][key][item]["similarity"]["subject_information_content"] + r.append(s) + res["average_standard_information_content"] = sum(r) / len(r) + res["best_standard_information_content"] = max(r) + if key == "object_best_matches": # for the object (i.e. the "test") + r = [] + for item in scores[0][key]: + s = scores[0][key][item]["similarity"]["object_information_content"] + r.append(s) + res["average_test_information_content"] = sum(r) / len(r) + res["best_test_information_content"] = max(r) + + # Jaccard similarity scores. Note, we can get this information from + # either the subject_best_matches or object_best_matches keys. Doing + # both is redundant. + if key == "subject_best_matches": + r = [] + for item in scores[0][key]: + s = scores[0][key][item]["similarity"]["jaccard_similarity"] + r.append(s) + res["average_jaccard_similarity"] = sum(r) / len(r) + res["best_jaccard_similarity"] = max(r) + + # Phenodigm scores. Note, we can get this information from either the + # subject_best_matches or object_best_matches keys. Doing both is + # redundant. + if key == "subject_best_matches": + r = [] + for item in scores[0][key]: + s = scores[0][key][item]["similarity"]["phenodigm_score"] + r.append(s) + res["average_phenodigm_score"] = sum(r) / len(r) + res["best_phenodigm_score"] = max(r) + + return res + + +def delete_terms_from_unsupported_ontologies(curies: list) -> list: + """ + Similarity scoring works for some ontologies and not others, so remove + terms that are not from supported ontologies. Supported ontologies are + hard-coded in this function. + + :param curies: List of CURIEs. + :return: List of CURIEs from supported ontologies. + """ + supported_ontologies = ["ENVO", "ECSO", "ENVTHES"] + res = [ + term + for term in curies + if any(term.startswith(ontology + ":") for ontology in supported_ontologies) + ] + return res + + +def get_shared_ontology(set1: list, set2: list) -> Union[str, None]: + """ + Get the most shared ontology of two sets based on the most frequently + occurring CURIE prefix. + + :param set1: List of CURIEs for the first set of terms. + :param set2: List of CURIEs for the second set of terms. + :return: The shared ontology. This value is returned as a string conforming + to the `oaklib` conventions for specifying the ontology database input + to the termset-similarity function. If no shared ontology is found, + None is returned. + """ + + prefixes1 = [term.split(":")[0] for term in set1] + prefixes2 = [term.split(":")[0] for term in set2] + + # Get the most common prefix in the intersection of the two sets + intersection = set(prefixes1) & set(prefixes2) + counts = {prefix: prefixes1.count(prefix) for prefix in intersection} + if len(intersection) == 0: + logger.info("Cannot find a common ontology for similarity scoring") + return None + prefix = max(counts, key=counts.get) + + # Map the prefix to the ontology database + if prefix == "ENVO": + db = "sqlite:obo:envo" + else: + logger.info(f"Ontology not supported: {prefix}") + return None + + return db diff --git a/src/spinneret/data/prefixmaps.csv b/src/spinneret/data/prefixmaps.csv new file mode 100644 index 0000000..2b9bfe9 --- /dev/null +++ b/src/spinneret/data/prefixmaps.csv @@ -0,0 +1,753 @@ +context,prefix,namespace,status +obo,AAO,http://purl.obolibrary.org/obo/AAO_,canonical +obo,ADO,http://purl.obolibrary.org/obo/ADO_,canonical +obo,ADW,http://purl.obolibrary.org/obo/ADW_,canonical +obo,AEO,http://purl.obolibrary.org/obo/AEO_,canonical +obo,AERO,http://purl.obolibrary.org/obo/AERO_,canonical +obo,AfPO,http://purl.obolibrary.org/obo/AfPO_,canonical +obo,AGRO,http://purl.obolibrary.org/obo/AGRO_,canonical +obo,AISM,http://purl.obolibrary.org/obo/AISM_,canonical +obo,AMPHX,http://purl.obolibrary.org/obo/AMPHX_,canonical +obo,APO,http://purl.obolibrary.org/obo/APO_,canonical +obo,APOLLO_SV,http://purl.obolibrary.org/obo/APOLLO_SV_,canonical +obo,ARO,http://purl.obolibrary.org/obo/ARO_,canonical +obo,ATO,http://purl.obolibrary.org/obo/ATO_,canonical +obo,BCGO,http://purl.obolibrary.org/obo/BCGO_,canonical +obo,BCO,http://purl.obolibrary.org/obo/BCO_,canonical +obo,BFO,http://purl.obolibrary.org/obo/BFO_,canonical +obo,BILA,http://purl.obolibrary.org/obo/BILA_,canonical +obo,BOOTSTREP,http://purl.obolibrary.org/obo/BOOTSTREP_,canonical +obo,BSPO,http://purl.obolibrary.org/obo/BSPO_,canonical +obo,BTO,http://purl.obolibrary.org/obo/BTO_,canonical +obo,CARO,http://purl.obolibrary.org/obo/CARO_,canonical +obo,CDAO,http://purl.obolibrary.org/obo/CDAO_,canonical +obo,CDNO,http://purl.obolibrary.org/obo/CDNO_,canonical +obo,CEPH,http://purl.obolibrary.org/obo/CEPH_,canonical +obo,CHEBI,http://purl.obolibrary.org/obo/CHEBI_,canonical +obo,CHEMINF,http://purl.obolibrary.org/obo/CHEMINF_,canonical +obo,CHIRO,http://purl.obolibrary.org/obo/CHIRO_,canonical +obo,CHMO,http://purl.obolibrary.org/obo/CHMO_,canonical +obo,CIDO,http://purl.obolibrary.org/obo/CIDO_,canonical +obo,CIO,http://purl.obolibrary.org/obo/CIO_,canonical +obo,CL,http://purl.obolibrary.org/obo/CL_,canonical +obo,CLAO,http://purl.obolibrary.org/obo/CLAO_,canonical +obo,CLO,http://purl.obolibrary.org/obo/CLO_,canonical +obo,CLYH,http://purl.obolibrary.org/obo/CLYH_,canonical +obo,CMF,http://purl.obolibrary.org/obo/CMF_,canonical +obo,CMO,http://purl.obolibrary.org/obo/CMO_,canonical +obo,COB,http://purl.obolibrary.org/obo/COB_,canonical +obo,COLAO,http://purl.obolibrary.org/obo/COLAO_,canonical +obo,CRO,http://purl.obolibrary.org/obo/CRO_,canonical +obo,CTENO,http://purl.obolibrary.org/obo/CTENO_,canonical +obo,CTO,http://purl.obolibrary.org/obo/CTO_,canonical +obo,CVDO,http://purl.obolibrary.org/obo/CVDO_,canonical +obo,DC_CL,http://purl.obolibrary.org/obo/DC_CL_,canonical +obo,DDANAT,http://purl.obolibrary.org/obo/DDANAT_,canonical +obo,DDPHENO,http://purl.obolibrary.org/obo/DDPHENO_,canonical +obo,DIDEO,http://purl.obolibrary.org/obo/DIDEO_,canonical +obo,DINTO,http://purl.obolibrary.org/obo/DINTO_,canonical +obo,DISDRIV,http://purl.obolibrary.org/obo/DISDRIV_,canonical +obo,DOID,http://purl.obolibrary.org/obo/DOID_,canonical +obo,DRON,http://purl.obolibrary.org/obo/DRON_,canonical +obo,DUO,http://purl.obolibrary.org/obo/DUO_,canonical +obo,ECAO,http://purl.obolibrary.org/obo/ECAO_,canonical +obo,ECO,http://purl.obolibrary.org/obo/ECO_,canonical +obo,ECOCORE,http://purl.obolibrary.org/obo/ECOCORE_,canonical +obo,ECTO,http://purl.obolibrary.org/obo/ECTO_,canonical +obo,EHDA,http://purl.obolibrary.org/obo/EHDA_,canonical +obo,EHDAA,http://purl.obolibrary.org/obo/EHDAA_,canonical +obo,EHDAA2,http://purl.obolibrary.org/obo/EHDAA2_,canonical +obo,EMAP,http://purl.obolibrary.org/obo/EMAP_,canonical +obo,EMAPA,http://purl.obolibrary.org/obo/EMAPA_,canonical +obo,ENVO,http://purl.obolibrary.org/obo/ENVO_,canonical +obo,EO,http://purl.obolibrary.org/obo/EO_,canonical +obo,EPIO,http://purl.obolibrary.org/obo/EPIO_,canonical +obo,EPO,http://purl.obolibrary.org/obo/EPO_,canonical +obo,ERO,http://purl.obolibrary.org/obo/ERO_,canonical +obo,EUPATH,http://purl.obolibrary.org/obo/EUPATH_,canonical +obo,EV,http://purl.obolibrary.org/obo/EV_,canonical +obo,ExO,http://purl.obolibrary.org/obo/ExO_,canonical +obo,FAO,http://purl.obolibrary.org/obo/FAO_,canonical +obo,FBbi,http://purl.obolibrary.org/obo/FBbi_,canonical +obo,FBbt,http://purl.obolibrary.org/obo/FBbt_,canonical +obo,FBcv,http://purl.obolibrary.org/obo/FBcv_,canonical +obo,FBdv,http://purl.obolibrary.org/obo/FBdv_,canonical +obo,FBSP,http://purl.obolibrary.org/obo/FBSP_,canonical +obo,FIDEO,http://purl.obolibrary.org/obo/FIDEO_,canonical +obo,FIX,http://purl.obolibrary.org/obo/FIX_,canonical +obo,FLOPO,http://purl.obolibrary.org/obo/FLOPO_,canonical +obo,FLU,http://purl.obolibrary.org/obo/FLU_,canonical +obo,FMA,http://purl.obolibrary.org/obo/FMA_,canonical +obo,FOBI,http://purl.obolibrary.org/obo/FOBI_,canonical +obo,FOODON,http://purl.obolibrary.org/obo/FOODON_,canonical +obo,FOVT,http://purl.obolibrary.org/obo/FOVT_,canonical +obo,FYPO,http://purl.obolibrary.org/obo/FYPO_,canonical +obo,GALLONT,http://purl.obolibrary.org/obo/GALLONT_,canonical +obo,GAZ,http://purl.obolibrary.org/obo/GAZ_,canonical +obo,GECKO,http://purl.obolibrary.org/obo/GECKO_,canonical +obo,GENEPIO,http://purl.obolibrary.org/obo/GENEPIO_,canonical +obo,GENO,http://purl.obolibrary.org/obo/GENO_,canonical +obo,GEO,http://purl.obolibrary.org/obo/GEO_,canonical +obo,GNO,http://purl.obolibrary.org/obo/GNO_,canonical +obo,GO,http://purl.obolibrary.org/obo/GO_,canonical +obo,GRO,http://purl.obolibrary.org/obo/GRO_,canonical +obo,GSSO,http://purl.obolibrary.org/obo/GSSO_,canonical +obo,HABRONATTUS,http://purl.obolibrary.org/obo/HABRONATTUS_,canonical +obo,HANCESTRO,http://purl.obolibrary.org/obo/HANCESTRO_,canonical +obo,HAO,http://purl.obolibrary.org/obo/HAO_,canonical +obo,HOM,http://purl.obolibrary.org/obo/HOM_,canonical +obo,HP,http://purl.obolibrary.org/obo/HP_,canonical +obo,HsapDv,http://purl.obolibrary.org/obo/HsapDv_,canonical +obo,HSO,http://purl.obolibrary.org/obo/HSO_,canonical +obo,HTN,http://purl.obolibrary.org/obo/HTN_,canonical +obo,IAO,http://purl.obolibrary.org/obo/IAO_,canonical +obo,ICEO,http://purl.obolibrary.org/obo/ICEO_,canonical +obo,ICO,http://purl.obolibrary.org/obo/ICO_,canonical +obo,IDO,http://purl.obolibrary.org/obo/IDO_,canonical +obo,IDOMAL,http://purl.obolibrary.org/obo/IDOMAL_,canonical +obo,IEV,http://purl.obolibrary.org/obo/IEV_,canonical +obo,IMR,http://purl.obolibrary.org/obo/IMR_,canonical +obo,INO,http://purl.obolibrary.org/obo/INO_,canonical +obo,IPR,http://purl.obolibrary.org/obo/IPR_,canonical +obo,KISAO,http://purl.obolibrary.org/obo/KISAO_,canonical +obo,LABO,http://purl.obolibrary.org/obo/LABO_,canonical +obo,LEPAO,http://purl.obolibrary.org/obo/LEPAO_,canonical +obo,LIPRO,http://purl.obolibrary.org/obo/LIPRO_,canonical +obo,LOGGERHEAD,http://purl.obolibrary.org/obo/LOGGERHEAD_,canonical +obo,MA,http://purl.obolibrary.org/obo/MA_,canonical +obo,MAMO,http://purl.obolibrary.org/obo/MAMO_,canonical +obo,MAO,http://purl.obolibrary.org/obo/MAO_,canonical +obo,MAT,http://purl.obolibrary.org/obo/MAT_,canonical +obo,MAXO,http://purl.obolibrary.org/obo/MAXO_,canonical +obo,MCO,http://purl.obolibrary.org/obo/MCO_,canonical +obo,MCRO,http://purl.obolibrary.org/obo/MCRO_,canonical +obo,MF,http://purl.obolibrary.org/obo/MF_,canonical +obo,MFMO,http://purl.obolibrary.org/obo/MFMO_,canonical +obo,MFO,http://purl.obolibrary.org/obo/MFO_,canonical +obo,MFOEM,http://purl.obolibrary.org/obo/MFOEM_,canonical +obo,MFOMD,http://purl.obolibrary.org/obo/MFOMD_,canonical +obo,MI,http://purl.obolibrary.org/obo/MI_,canonical +obo,MIAPA,http://purl.obolibrary.org/obo/MIAPA_,canonical +obo,MICRO,http://purl.obolibrary.org/obo/MICRO_,canonical +obo,MIRNAO,http://purl.obolibrary.org/obo/MIRNAO_,canonical +obo,MIRO,http://purl.obolibrary.org/obo/MIRO_,canonical +obo,MMO,http://purl.obolibrary.org/obo/MMO_,canonical +obo,MmusDv,http://purl.obolibrary.org/obo/MmusDv_,canonical +obo,MO,http://purl.obolibrary.org/obo/MO_,canonical +obo,MOD,http://purl.obolibrary.org/obo/MOD_,canonical +obo,MONDO,http://purl.obolibrary.org/obo/MONDO_,canonical +obo,MOP,http://purl.obolibrary.org/obo/MOP_,canonical +obo,MP,http://purl.obolibrary.org/obo/MP_,canonical +obo,MPATH,http://purl.obolibrary.org/obo/MPATH_,canonical +obo,MPIO,http://purl.obolibrary.org/obo/MPIO_,canonical +obo,MRO,http://purl.obolibrary.org/obo/MRO_,canonical +obo,MS,http://purl.obolibrary.org/obo/MS_,canonical +obo,NBO,http://purl.obolibrary.org/obo/NBO_,canonical +obo,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_,canonical +obo,NCIT,http://purl.obolibrary.org/obo/NCIT_,canonical +obo,NCRO,http://purl.obolibrary.org/obo/NCRO_,canonical +obo,NGBO,http://purl.obolibrary.org/obo/NGBO_,canonical +obo,NIF_CELL,http://purl.obolibrary.org/obo/NIF_CELL_,canonical +obo,NIF_DYSFUNCTION,http://purl.obolibrary.org/obo/NIF_DYSFUNCTION_,canonical +obo,NIF_GROSSANATOMY,http://purl.obolibrary.org/obo/NIF_GROSSANATOMY_,canonical +obo,NMR,http://purl.obolibrary.org/obo/NMR_,canonical +obo,NOMEN,http://purl.obolibrary.org/obo/NOMEN_,canonical +obo,OAE,http://purl.obolibrary.org/obo/OAE_,canonical +obo,OARCS,http://purl.obolibrary.org/obo/OARCS_,canonical +obo,OBA,http://purl.obolibrary.org/obo/OBA_,canonical +obo,OBCS,http://purl.obolibrary.org/obo/OBCS_,canonical +obo,OBI,http://purl.obolibrary.org/obo/OBI_,canonical +obo,OBIB,http://purl.obolibrary.org/obo/OBIB_,canonical +obo,OBO_REL,http://purl.obolibrary.org/obo/OBO_REL_,canonical +obo,OCCO,http://purl.obolibrary.org/obo/OCCO_,canonical +obo,OGG,http://purl.obolibrary.org/obo/OGG_,canonical +obo,OGI,http://purl.obolibrary.org/obo/OGI_,canonical +obo,OGMS,http://purl.obolibrary.org/obo/OGMS_,canonical +obo,OGSF,http://purl.obolibrary.org/obo/OGSF_,canonical +obo,OHD,http://purl.obolibrary.org/obo/OHD_,canonical +obo,OHMI,http://purl.obolibrary.org/obo/OHMI_,canonical +obo,OHPI,http://purl.obolibrary.org/obo/OHPI_,canonical +obo,OlatDv,http://purl.obolibrary.org/obo/OlatDv_,canonical +obo,OMIABIS,http://purl.obolibrary.org/obo/OMIABIS_,canonical +obo,OMIT,http://purl.obolibrary.org/obo/OMIT_,canonical +obo,OMO,http://purl.obolibrary.org/obo/OMO_,canonical +obo,OMP,http://purl.obolibrary.org/obo/OMP_,canonical +obo,OMRSE,http://purl.obolibrary.org/obo/OMRSE_,canonical +obo,ONE,http://purl.obolibrary.org/obo/ONE_,canonical +obo,ONS,http://purl.obolibrary.org/obo/ONS_,canonical +obo,ONTOAVIDA,http://purl.obolibrary.org/obo/ONTOAVIDA_,canonical +obo,ONTONEO,http://purl.obolibrary.org/obo/ONTONEO_,canonical +obo,OOSTT,http://purl.obolibrary.org/obo/OOSTT_,canonical +obo,OPL,http://purl.obolibrary.org/obo/OPL_,canonical +obo,OPMI,http://purl.obolibrary.org/obo/OPMI_,canonical +obo,ORNASEQ,http://purl.obolibrary.org/obo/ORNASEQ_,canonical +obo,OVAE,http://purl.obolibrary.org/obo/OVAE_,canonical +obo,PAO,http://purl.obolibrary.org/obo/PAO_,canonical +obo,PATO,http://purl.obolibrary.org/obo/PATO_,canonical +obo,PCL,http://purl.obolibrary.org/obo/PCL_,canonical +obo,PCO,http://purl.obolibrary.org/obo/PCO_,canonical +obo,PD_ST,http://purl.obolibrary.org/obo/PD_ST_,canonical +obo,PDRO,http://purl.obolibrary.org/obo/PDRO_,canonical +obo,PdumDv,http://purl.obolibrary.org/obo/PdumDv_,canonical +obo,PECO,http://purl.obolibrary.org/obo/PECO_,canonical +obo,PGDSO,http://purl.obolibrary.org/obo/PGDSO_,canonical +obo,PHIPO,http://purl.obolibrary.org/obo/PHIPO_,canonical +obo,PLANA,http://purl.obolibrary.org/obo/PLANA_,canonical +obo,PLANP,http://purl.obolibrary.org/obo/PLANP_,canonical +obo,PLO,http://purl.obolibrary.org/obo/PLO_,canonical +obo,PO,http://purl.obolibrary.org/obo/PO_,canonical +obo,PORO,http://purl.obolibrary.org/obo/PORO_,canonical +obo,PPO,http://purl.obolibrary.org/obo/PPO_,canonical +obo,PR,http://purl.obolibrary.org/obo/PR_,canonical +obo,PROCO,http://purl.obolibrary.org/obo/PROCO_,canonical +obo,PROPREO,http://purl.obolibrary.org/obo/PROPREO_,canonical +obo,PSDO,http://purl.obolibrary.org/obo/PSDO_,canonical +obo,PSO,http://purl.obolibrary.org/obo/PSO_,canonical +obo,PW,http://purl.obolibrary.org/obo/PW_,canonical +obo,RBO,http://purl.obolibrary.org/obo/RBO_,canonical +obo,RESID,http://purl.obolibrary.org/obo/RESID_,canonical +obo,REX,http://purl.obolibrary.org/obo/REX_,canonical +obo,RNAO,http://purl.obolibrary.org/obo/RNAO_,canonical +obo,RO,http://purl.obolibrary.org/obo/RO_,canonical +obo,RS,http://purl.obolibrary.org/obo/RS_,canonical +obo,RXNO,http://purl.obolibrary.org/obo/RXNO_,canonical +obo,SAO,http://purl.obolibrary.org/obo/SAO_,canonical +obo,SBO,http://purl.obolibrary.org/obo/SBO_,canonical +obo,SCDO,http://purl.obolibrary.org/obo/SCDO_,canonical +obo,SEP,http://purl.obolibrary.org/obo/SEP_,canonical +obo,SEPIO,http://purl.obolibrary.org/obo/SEPIO_,canonical +obo,SIBO,http://purl.obolibrary.org/obo/SIBO_,canonical +obo,SLSO,http://purl.obolibrary.org/obo/SLSO_,canonical +obo,SO,http://purl.obolibrary.org/obo/SO_,canonical +obo,SOPHARM,http://purl.obolibrary.org/obo/SOPHARM_,canonical +obo,SPD,http://purl.obolibrary.org/obo/SPD_,canonical +obo,STATO,http://purl.obolibrary.org/obo/STATO_,canonical +obo,SWO,http://purl.obolibrary.org/obo/SWO_,canonical +obo,SYMP,http://purl.obolibrary.org/obo/SYMP_,canonical +obo,T4FS,http://purl.obolibrary.org/obo/T4FS_,canonical +obo,TADS,http://purl.obolibrary.org/obo/TADS_,canonical +obo,TAHE,http://purl.obolibrary.org/obo/TAHE_,canonical +obo,TAHH,http://purl.obolibrary.org/obo/TAHH_,canonical +obo,TAO,http://purl.obolibrary.org/obo/TAO_,canonical +obo,TAXRANK,http://purl.obolibrary.org/obo/TAXRANK_,canonical +obo,TGMA,http://purl.obolibrary.org/obo/TGMA_,canonical +obo,TO,http://purl.obolibrary.org/obo/TO_,canonical +obo,TRANS,http://purl.obolibrary.org/obo/TRANS_,canonical +obo,TTO,http://purl.obolibrary.org/obo/TTO_,canonical +obo,TXPO,http://purl.obolibrary.org/obo/TXPO_,canonical +obo,UBERON,http://purl.obolibrary.org/obo/UBERON_,canonical +obo,UO,http://purl.obolibrary.org/obo/UO_,canonical +obo,UPA,http://purl.obolibrary.org/obo/UPA_,canonical +obo,UPHENO,http://purl.obolibrary.org/obo/UPHENO_,canonical +obo,VariO,http://purl.obolibrary.org/obo/VariO_,canonical +obo,VBO,http://purl.obolibrary.org/obo/VBO_,canonical +obo,VHOG,http://purl.obolibrary.org/obo/VHOG_,canonical +obo,VO,http://purl.obolibrary.org/obo/VO_,canonical +obo,VSAO,http://purl.obolibrary.org/obo/VSAO_,canonical +obo,VT,http://purl.obolibrary.org/obo/VT_,canonical +obo,VTO,http://purl.obolibrary.org/obo/VTO_,canonical +obo,WBbt,http://purl.obolibrary.org/obo/WBbt_,canonical +obo,WBls,http://purl.obolibrary.org/obo/WBls_,canonical +obo,WBPhenotype,http://purl.obolibrary.org/obo/WBPhenotype_,canonical +obo,XAO,http://purl.obolibrary.org/obo/XAO_,canonical +obo,XCO,http://purl.obolibrary.org/obo/XCO_,canonical +obo,XLMOD,http://purl.obolibrary.org/obo/XLMOD_,canonical +obo,XPO,http://purl.obolibrary.org/obo/XPO_,canonical +obo,YPO,http://purl.obolibrary.org/obo/YPO_,canonical +obo,ZEA,http://purl.obolibrary.org/obo/ZEA_,canonical +obo,ZECO,http://purl.obolibrary.org/obo/ZECO_,canonical +obo,ZFA,http://purl.obolibrary.org/obo/ZFA_,canonical +obo,ZFS,http://purl.obolibrary.org/obo/ZFS_,canonical +obo,ZP,http://purl.obolibrary.org/obo/ZP_,canonical +bioportal,ABD,http://brd.bsvgateway.org/api/,canonical +bioportal,ACESO,http://www.semanticweb.org/cbmi/ontologies/2018/10/aceso#,canonical +bioportal,ACGT-MO,http://www.ifomis.org/acgt/1.0#,canonical +bioportal,AD-DROP,http://www.semanticweb.org/AD-DROP#,canonical +bioportal,ADALAB-META,http://rdf.adalab-project.org/ontology/adalab-meta/,canonical +bioportal,ADAR,http://purl.org/autism-ontology/1.0/autism-core.owl#,canonical +bioportal,ADAR,http://purl.org/autism-ontology/1.0/CA_ADOS1-2001.owl#,prefix_alias +bioportal,ADAR,http://purl.org/autism-ontology/1.0/Interests_and_behaviors_phenotype#,prefix_alias +bioportal,ADAR,http://purl.org/autism-ontology/1.0/assessment-result.owl#,prefix_alias +bioportal,ADAR,http://purl.org/autism-ontology/1.0/autism-merged.owl#,prefix_alias +bioportal,ADAR,http://purl.org/autism-ontology/1.0/autism-rules.owl#,prefix_alias +bioportal,ADAR,http://purl.org/autism-ontology/1.0/ca_adi-2003.owl#,prefix_alias +bioportal,ADAR,http://purl.org/autism-ontology/1.0/ca_ados4_2001.owl#,prefix_alias +bioportal,ADAR,http://purl.org/autism-ontology/1.0/ca_vinelandsurvey-2005.owl#,prefix_alias +bioportal,ADCAD,https://purl.dataone.org/odo/ADCAD_,canonical +bioportal,ADHER_INTCARE_EN,http://www.semanticweb.org/parracarlos/ontologies/2019/3/untitled-ontology-31#,canonical +bioportal,ADMO,http://www.semanticweb.org/ADMO#,canonical +bioportal,ADO,http://scai.fraunhofer.de/AlzheimerOntology#,canonical +bioportal,AGROCYMAC,http://www.semanticweb.org/yali/ontologies/2019/0/cultivos#,canonical +bioportal,AGROMOP,http://www.semanticweb.org/vera/ontologies/2020/1/untitled-ontology-5#,canonical +bioportal,AHOL,http://opendata.inra.fr/AHOL/AHOL_,canonical +bioportal,AHSO,https://w3id.org/ahso#,canonical +bioportal,AIRBUS,http://protege.stanford.edu/thesauri/aero/,canonical +bioportal,AISM,OBO:AISM_,canonical +bioportal,AMINO-ACID,http://www.co-ode.org/ontologies/amino-acid/2006/05/18/amino-acid.owl#,canonical +bioportal,AO,http://childhealthservicemodels.eu/asthma#,canonical +bioportal,APAOCUEMPLOY,http://www.semanticweb.org/ontologies/2015/0/ocupationalemploymentcluster.owl#,canonical +bioportal,APATREATMENT,http://www.semanticweb.org/ontologies/2015/0/treatmentcluster.owl#,canonical +bioportal,araport,OBO:Araport_,canonical +bioportal,ARCRC,http://purl.dataone.org/odo/ARCRC_,canonical +bioportal,ASDPTO,http://cbmi.med.harvard.edu/asdphenotype#,canonical +bioportal,ASPECT,http://purl.org/aspect/,canonical +bioportal,ATC,http://purl.bioontology.org/ontology/ATC/,canonical +bioportal,ATOL,http://opendata.inra.fr/ATOL/ATOL_,canonical +bioportal,AURA,http://www.projecthalo.com/aura#,canonical +bioportal,BAO,http://www.bioassayontology.org/bao#BAO_,canonical +bioportal,BCI-O,https://w3id.org/BCI-ontology#,canonical +bioportal,BCTT,http://purl.bioontology.org/ontology/BCTT#,canonical +bioportal,BFO,http://www.ifomis.org/bfo/1.1/snap#,canonical +bioportal,BHN,http://chu-rouen.fr/cismef/BHN#,canonical +bioportal,BIBFRAME,http://id.loc.gov/ontologies/bibframe/,canonical +bioportal,BIBLIOTEK-O,http://bibliotek-o.org/1.1/ontology/,canonical +bioportal,BIBLIOTEK-O,http://bibliotek-o.org/ontology/,prefix_alias +bioportal,BIM,http://cbakerlab.unbsj.ca/unbvps/BIM#,canonical +bioportal,BIM,http://cbakerlab.unbsj.ca:8080/ontologies/BIM.owl#,prefix_alias +bioportal,BIM,http://cbakerlab.unbsj.ca:8080/sebi/BIM.owl#,prefix_alias +bioportal,BIN,http://purl.bioontology.org/ontology/BIN/,canonical +bioportal,BIRNLEX,http://bioontology.org/projects/ontologies/birnlex#,canonical +bioportal,BNO,http://www.owl-ontologies.com/Ontology1361987617.owl#,canonical +bioportal,BOF,http://www.owl-ontologies.com/BiodiversityOntologyFull.owl#,canonical +bioportal,BP,http://www.biopax.org/release/biopax-level3.owl#,canonical +bioportal,BRCT,http://www.semanticweb.org/latitude_user/ontologies/2014/8/untitled-ontology-7#,canonical +bioportal,BRCT,http://www.semanticweb.org/ontologies/2009/9/Ontology1255357986125.owl#,prefix_alias +bioportal,BRIDG,http://www.bridgmodel.org/owl#,canonical +bioportal,BRO,http://bioontology.org/ontologies/BiomedicalResourceOntology.owl#,canonical +bioportal,BRSO,http://purl.jp/bio/10/brso/,canonical +bioportal,BRSO,http://purl.org/brso/BiologicalResourceStatus#,prefix_alias +bioportal,BRSO,http://purl.org/brso/BiologicalResourceType#,prefix_alias +bioportal,BSAO,OBO:BSA_,canonical +bioportal,BT,http://purl.org/biotop/biotop.owl#,canonical +bioportal,CABRO,http://www.semanticweb.org/dimitrios/ontologies/2013/2/untitled-ontology-2#,canonical +bioportal,CARELEX,http://www.CareLex.org/2012/carelex.owl#,canonical +bioportal,CARRE,file:/Users/allanthird/Work/CARRE/CARREOntology/carre-ontology.xml#,canonical +bioportal,CARRE,file:/Users/allanthird/Work/CARRE/CARREOntology/carre-sensors.xml#,prefix_alias +bioportal,CASE-BASE-ONTO,http://www.semanticweb.org/hsh/ontologies/2019/7/CBRDystempOnto#,canonical +bioportal,CCTOO,OBO:CCTO_,canonical +bioportal,CDPEO,http://www.semanticweb.org/ontologies/chronic-diease-patient-education-ontology#,canonical +bioportal,CEDARVS,http://www.semanticweb.org/jgraybeal/ontologies/2015/7/cedarvaluesets#,canonical +bioportal,cgnc,OBO:CGNC_,canonical +bioportal,CHD,http://homes.esat.kuleuven.be/~bioiuser/chdwiki/index.php/CHD:CaseReport?id=,canonical +bioportal,CHEMINF,http://semanticscience.org/resource/CHEMINF_,canonical +bioportal,CIDOC-CRM,http://www.cidoc-crm.org/cidoc-crm/,canonical +bioportal,CKDO,http://clininf.eu/ckdo#,canonical +bioportal,CMDO,http://purl.bioontology.org/ontology/CMDO/,canonical +bioportal,CMR-QA,http://www.semanticweb.org/ukbiobank/ocmr_isg/CMR-QA#,canonical +bioportal,CN,http://mmisw.org/ont/Technology/ComputerNetworks/,canonical +bioportal,CN,http://mmisw.org/ont/Technology/ComputerNetworks#,prefix_alias +bioportal,CO-WHEAT,OBO:CO_321_,canonical +bioportal,CODO,http://www.isibang.ac.in/ns/codo#,canonical +bioportal,COGAT,file:/srv/ncbo/repository/COGAT/8/,canonical +bioportal,COGPO,http://www.cogpo.org/ontologies/COGPO_,canonical +bioportal,COGPO,http://www.cogpo.org/ontologies/CogPOver1.owl#,prefix_alias +bioportal,COGPO,http://www.cogpo.org/ontologies/CogPOver2010.owl#COGPO_,prefix_alias +bioportal,COID,https://github.com/sap218/coid/blob/master/coid.owl#,canonical +bioportal,COKPME,http://www.iiitdwd.ac.in/ACB/COKPME#,canonical +bioportal,COMODI,http://purl.uni-rostock.de/comodi/comodi#,canonical +bioportal,COSTART,http://purl.bioontology.org/ontology/CST/,canonical +bioportal,CPRO,http://purl.org/cpr/,canonical +bioportal,CRISP,http://purl.bioontology.org/ontology/CSP/,canonical +bioportal,CRYOEM,http://scipion.i2pc.es/ontology/CRYOEM_,canonical +bioportal,CSSO,http://purl.jp/bio/11/csso/CSSO_,canonical +bioportal,CU-VO,http://www.semanticweb.org/jdr2160/ontologies/2015/5/venom_ontology#,canonical +bioportal,CVAO,http://www.semanticweb.org/ontologies/2015/11/CVAO#,canonical +bioportal,CWD,http://www.semanticweb.org/jbagwell/ontologies/2017/9/untitled-ontology-6#,canonical +bioportal,CYTO,http://www.semanticweb.org/demetrios/ontologies/2014/5/,canonical +bioportal,DATACITE,http://purl.org/spar/datacite/,canonical +bioportal,DCM,http://dicom.nema.org/resources/ontology/DCM/,canonical +bioportal,DEB,http://www.semanticweb.org/osnathakimi/ontologies/deb#,canonical +bioportal,DERMO,OBO:DERMO_,canonical +bioportal,DFO,https://w3id.org/dfo/,canonical +bioportal,DIAB,http://purl.bioontology.org/ontology/DIAB/,canonical +bioportal,DIKB,http://purl.org/net/drug-interaction-knowledge-base/DIKB_evidence_ontology.owl#,canonical +bioportal,DISDRIV,OBO:DISDRIV_,canonical +bioportal,DLO,https://w3id.org/dlo/,canonical +bioportal,DLORO,http://www.semanticweb.org/alan/ontologies/2013/8/untitled-ontology-9#,canonical +bioportal,DOCCC,http://www.semanticweb.org/hll/ontologies/2013/8/untitled-ontology-2#,canonical +bioportal,DOID,http://purl.org/obo/owl/DOID#,canonical +bioportal,DOREMUS-KEYS,http://data.doremus.org/vocabulary/key/,canonical +bioportal,DRANPTO,http://www.semanticweb.org/zhenyuzhang/ontologies/2019/8/nonpharmacological-intervention-for-agitation-in-dementia-ontology/,canonical +bioportal,DREAMDNPTO,http://www.semanticweb.org/zhenyuzhang/ontologies/2021/DREAMDNPTO#,canonical +bioportal,DRPSNPTO,http://www.semanticweb.org/zhenyuzhang/ontologies/2020/DRPSNPTO/,canonical +bioportal,DSEO,http://bigdatau.org/dseo#,canonical +bioportal,DTO,http://www.drugtargetontology.org/dto/DTO_,canonical +bioportal,EBP,http://www.semanticweb.org/tswheeler/ontologies/2016/3/EmpowerBP#,canonical +bioportal,ECAO,OBO:ECAO_,canonical +bioportal,ECG,http://www.cvrgrid.org/files/ECGOntologyv1.owl#ECG_,canonical +bioportal,ECP,http://iris.med.duth.gr/research/ecp/ontology/eCP.owl#,canonical +bioportal,ECSO,http://purl.dataone.org/odo/ECSO_,canonical +bioportal,EDAM,http://edamontology.org/,canonical +bioportal,EFO,http://www.ebi.ac.uk/efo/EFO_,canonical +bioportal,ELECTRICA,http://purl.org/ELECTRICA/,canonical +bioportal,ELIG,http://www.semanticweb.org/ontologies/2012/8/Ontology1348158066194.owl#,canonical +bioportal,ELTER_CL,http://vocabs.lter-europe.net/eLTER_CL/,canonical +bioportal,EMO,http://www.semanticweb.org/ontologies/2011/1/14/EMO.owl/,canonical +bioportal,ensembl.bacteria,OBO:EnsemblBacteria#_,canonical +bioportal,ENVS_VARIABLES,http://purl.org/m4m-dk-2/variables/,canonical +bioportal,ENVTHES,http://vocabs.lter-europe.net/EnvThes/,canonical +bioportal,EO,http://www.semanticweb.org/ethnicityOntology#,canonical +bioportal,EOL,http://purl.org/obo/owlEOL_,canonical +bioportal,EP,http://www.cvrgrid.org/ontologies/Electrophysiology#,canonical +bioportal,EPIE,https://pat.nichd.nih.gov/patepigeneticentity/,canonical +bioportal,EPILONT,http://www.semanticweb.org/ontologies/2009/3/EpilepsyOntology.owl#,canonical +bioportal,EPIO,https://bio.scai.fraunhofer.de/ontology/epilepsy#,canonical +bioportal,EPIP,https://pat.nichd.nih.gov/patepigeneticprocess/,canonical +bioportal,EPISEM,http://www.semanticweb.org/danielhier/ontologies/2019/3/untitled-ontology-57/,canonical +bioportal,EPO,http://www.semanticweb.org/ontologies/epo.owl#,canonical +bioportal,EPSO,http://www.case.edu/EpilepsyOntology.owl#,canonical +bioportal,ESSO,http://www.semanticweb.org/rjyy/ontologies/2015/5/ESSO#,canonical +bioportal,ETHANC,https://github.com/VODANA/Controlled-vocabulary/ethanc/,canonical +bioportal,EXACT,http://www.owl-ontologies.com/Ontology1184060740.owl#,canonical +bioportal,EXTRACT,http://purl.org/extract/,canonical +bioportal,FBbi,http://purl.org/obo/owl/FBbi#FBbi_,canonical +bioportal,FCC1,http://www.semanticweb.org/diwaleva/ontologies/2019/9/fcc-ontology#,canonical +bioportal,FDC-GDMT,http://vocab.fairdatacollective.org/gdmt/,canonical +bioportal,FG,https://w3id.org/fair-genomes/ontology/,canonical +bioportal,FHHO,http://www.owl-ontologies.com/Ontology1172270693.owl#,canonical +bioportal,FIRE,http://cerrado.linkeddata.es/ecology/fire#,canonical +bioportal,FISH-AST,http://purl.org/heritagedata/schemes/560/concepts/,canonical +bioportal,FISHO,http://bioportal.bioontology.org/ontologies/FISHO#,canonical +bioportal,FISHO,http://mybiodiversityontologies.um.edu.my/FO.owl#,prefix_alias +bioportal,FLYGLYCODB,http://www.flyglycodb.org/ontologies/2015/,canonical +bioportal,FMA,http://purl.org/sig/ont/fma/,canonical +bioportal,FMA,http://purl.org/obo/owlapi/fma#,prefix_alias +bioportal,FMA,http://sig.uw.edu/fma#,prefix_alias +bioportal,GALEN,http://www.co-ode.org/ontologies/galen#,canonical +bioportal,GAMUTS,http://www.gamuts.net/entity#,canonical +bioportal,GBOL,http://gbol.life/0.1/,canonical +bioportal,GCO,http://rdf.biosemantics.org/ontologies/genomecomponents#,canonical +bioportal,GECKO,OBO:GECKO_,canonical +bioportal,GENE-CDS,http://www.genomic-cds.org/ont/genomic-cds.owl#,canonical +bioportal,GEOSPARQL,http://www.opengis.net/ont/geosparql#,canonical +bioportal,GEOSPECIES,http://rdf.geospecies.org/ont/geospecies#,canonical +bioportal,GEOSPECIES,http://rdf.geospecies.org/ont/geospecies.owl#,prefix_alias +bioportal,GFFO,https://raw.githubusercontent.com/mpievolbio-scicomp/GenomeFeatureFormatOntology/main/gffo#,canonical +bioportal,GFO,http://www.onto-med.de/ontologies/gfo.owl#,canonical +bioportal,GFO,http://www.onto-med.de/ontologies/gfo-basic.owl#,prefix_alias +bioportal,GFO-BIO,http://onto.eva.mpg.de/ontologies/gfo-bio.owl#,canonical +bioportal,GFVO,https://www.codamono.com/biointerchange/gfvo#,canonical +bioportal,GLYCO,http://glycomics.ccrc.uga.edu/ontologies/GlycO#,canonical +bioportal,GLYCOCOO,http://purl.jp/bio/12/glyco/,canonical +bioportal,GML,http://www.opengis.net/ont/gml#,canonical +bioportal,GML,http://loki.cae.drexel.edu/~wbs/ontology/2004/09/ogc-gml#,prefix_alias +bioportal,GMO,http://purl.jp/bio/10/gmo/GMO_,canonical +bioportal,GNO,OBO:GNO_,canonical +bioportal,GO,http://purl.org/obo/owl/GO#GO_,canonical +bioportal,GRO,http://www.bootstrep.eu/ontology/GRO#,canonical +bioportal,GSSO,OBO:GSSO_,canonical +bioportal,GVO,http://genome-variation.org/resource/gvo#,canonical +bioportal,HASCO,http://hadatac.org/ont/hasco/,canonical +bioportal,HCDR,http://www.semanticweb.org/m14067/ontologies/2020/0/untitled-ontology-5#,canonical +bioportal,HCPCS,http://purl.bioontology.org/ontology/HCPCS/,canonical +bioportal,HEIO,http://whistl.uwaterloo.ca/heio.owl#,canonical +bioportal,HFO,http://bmi.utah.edu/ontologies/hfontology/,canonical +bioportal,HGNC,http://ncicb.nci.nih.gov/xml/owl/EVS/Hugo.owl#HGNC_,canonical +bioportal,HGNC,OBO:HGNC_,prefix_alias +bioportal,HIVO0004,http://bioportal/bioontology.org/ontologies/HIVO0004#,canonical +bioportal,HL7,http://purl.bioontology.org/ontology/HL7/,canonical +bioportal,HLA,http://purl.org/stemnet/HLA#,canonical +bioportal,HMIS033B,http://vocab.vodana.org/hmis033b/,canonical +bioportal,HNS,http://www.humannervousystem.org/KAnOE/2014/dave86#,canonical +bioportal,HRDO,http://www.limics.org/hrdo/HRDO.owl#,canonical +bioportal,HSO,https://w3id.org/hso#,canonical +bioportal,HUPSON,http://scai.fraunhofer.de/HuPSON#,canonical +bioportal,I-ADOPT,https://w3id.org/iadopt/ont/,canonical +bioportal,I2SV,https://i2insights.org/index/integration-and-implementation-sciences-vocabulary#,canonical +bioportal,IAML-MOP,http://data.doremus.org/vocabulary/iaml/mop/,canonical +bioportal,IBO,http://www.semanticweb.org/eamdouni/ontologies/2015/5/IBO#,canonical +bioportal,ICD10,http://purl.bioontology.org/ontology/ICD10/,canonical +bioportal,ICD10,https://cdn.rawgit.com/laiasubirats/rarediseasesontology/master/ICD10_1.0.owl#,prefix_alias +bioportal,ICD10CM,http://purl.bioontology.org/ontology/ICD10CM/,canonical +bioportal,ICD10PCS,http://purl.bioontology.org/ontology/ICD10PCS/,canonical +bioportal,ICD11-BODYSYSTEM,http://who.int/bodysystem.owl#,canonical +bioportal,ICD9CM,http://purl.bioontology.org/ontology/ICD9CM/,canonical +bioportal,ICECI,http://who.int/iceci#,canonical +bioportal,ICECI,http://who.int/iceci.owl#,prefix_alias +bioportal,ICF,http://who.int/icf#,canonical +bioportal,ICNP,http://www.icn.ch/icnp#,canonical +bioportal,ICPC2P,http://purl.bioontology.org/ontology/ICPC2P/,canonical +bioportal,ICPS,http://www.ICPS/ontologies/,canonical +bioportal,ID-AMR,http://purl.org/zonmw/id-amr/,canonical +bioportal,IDEM,http://purl.org/idem/,canonical +bioportal,IDG_GL,http://druggablegenome.net/,canonical +bioportal,IDODEN,http://purl.bioontology.org/ontology/IDODEN_,canonical +bioportal,IMGT-ONTOLOGY,http://www.imgt.org/download/IMGT-ONTOLOGY/IMGT-ONTOLOGY-v1-0-3.owl#,canonical +bioportal,INBIO,http://www.semanticweb.org/rs/ontologies/INBIO#,canonical +bioportal,INBIODIV,http://www.semanticweb.org/mca/ontologies/2018/8/untitled-ontology-47#,canonical +bioportal,INCENTIVE,http://purl.org/incentive/,canonical +bioportal,INCENTIVE-VARS,http://purl.org/incentive/variables/,canonical +bioportal,INFRARISK,https://www.infrarisk-fp7.eu/vocabs/#,canonical +bioportal,INSECTH,http://neuromorpho.org/ontologies/insectH.owl#,canonical +bioportal,INSNAME,https://www.vodan-totafrica.info/vocs/institutions/,canonical +bioportal,IntAct,http://identifiers.org/intact/,canonical +bioportal,INTO,http://www.semanticweb.org/Terrorism#,canonical +bioportal,INVERSEROLES,http://rds.posccaesar.org/2008/02/OWL/ISO-15926-2_2003#,canonical +bioportal,IRD,http://www.semanticweb.org/msh/ontologies/2019/9/untitled-ontology-3#,canonical +bioportal,IRDG,http://www.semanticweb.org/IRDGuyamazon#,canonical +bioportal,ISO-15926-2_2003,http://rds.posccaesar.org/2008/02/OWL/ISO-15926-2_2003#,namespace_alias +bioportal,ISO19108TO,http://def.isotc211.org/iso19108/2006/,canonical +bioportal,ISO19110,http://def.isotc211.org/iso19110/2005/,canonical +bioportal,ISO19115,http://loki.cae.drexel.edu/~wbs/ontology/2004/09/iso-19115#,canonical +bioportal,ISO19115CC,http://def.isotc211.org/iso19115/-1/2014/CommonClasses/code/,canonical +bioportal,ISO19115ID,http://def.isotc211.org/iso19115/2003/,canonical +bioportal,ISO19115PR,http://www.geosciml.org/vocabularies/iso-19115-codelists.owl#,canonical +bioportal,ISO19115PR,http://www.geosciml.org/vocabularies/iso-19115-codes.owl#,prefix_alias +bioportal,ISSVA,http://purl.bioontology.org/ontology/ISSVA/,canonical +bioportal,JERM,http://jermontology.org/ontology/JERMOntology#,canonical +bioportal,KISAO,http://www.biomodels.net/kisao/KISAO#KISAO_,canonical +bioportal,KORO,http://www.knowledgegrid.org/koro#,canonical +bioportal,KORO,http://www.knowledgegrid.org/koro/1.0.0/koro.owl#,prefix_alias +bioportal,LAND-SURFACE,http://anzsoil.org/def/au/asls/land-surface/,canonical +bioportal,LANDFORM,http://anzsoil.org/def/au/asls/landform/,canonical +bioportal,LDA,http://www.semanticweb.org/ontologies/2008/10/languageacquisition_autism.owl#,canonical +bioportal,LEGALAPA,http://www.semanticweb.org/ontologies/2014/11/legal.owl#,canonical +bioportal,LEGALAPATEST2,http://www.semanticweb.org/ontologies/2014/11/legal-2.owl#,canonical +bioportal,LICO,http://vavlab.ee.boun.edu.tr/carera/khaos/lico.owl#,canonical +bioportal,LOINC,http://purl.bioontology.org/ontology/LNC/,canonical +bioportal,LONGCOVID,http://www.semanticweb.org/orchid/ontologies/2021/Long-Covid-Phenotype-Ontology#,canonical +bioportal,LUNGMAP_H_CELL,OBO:LMHA_,canonical +bioportal,LUNGMAP_M_CELL,OBO:LMMA_,canonical +bioportal,M4M19-SUBS,http://purl.org/m4m19/subjects/,canonical +bioportal,M4M19-VARS,http://purl.org/m4m19/variables#,canonical +bioportal,MATRELEMENT,http://sweet.jpl.nasa.gov/2.3/matrElement.owl#,canonical +bioportal,MATRROCK,http://sweet.jpl.nasa.gov/2.3/matrRock.owl#,canonical +bioportal,MATRROCKIGNEOUS,http://sweet.jpl.nasa.gov/2.3/matrRockIgneous.owl#,canonical +bioportal,MCBCC,OBO:MCBCC_,canonical +bioportal,MCCL,http://purl.bioontology.org/ontology/MCCL/CL__,canonical +bioportal,MCCL,http://www.semanticweb.org/pallabi.d/ontologies/2014/2/untitled-ontology-11#,prefix_alias +bioportal,MDDB,http://purl.bioontology.org/ontology/MDDB/,canonical +bioportal,MEDLINEPLUS,http://purl.bioontology.org/ontology/MEDLINEPLUS/,canonical +bioportal,MEDO,http://www.ebi.ac.uk/efo/medo/MEDO_,canonical +bioportal,MEDO,http://www.ebi.ac.uk/medo/,prefix_alias +bioportal,MEO,http://purl.jp/bio/11/meo/,canonical +bioportal,MESH,http://purl.bioontology.org/ontology/MESH/,canonical +bioportal,MHC,http://purl.org/stemnet/MHC#,canonical +bioportal,MIM,http://purl.bioontology.org/ontology/MIM#,canonical +bioportal,MIRO,OBO:miro#,canonical +bioportal,MIXS,https://w3id.org/mixs/terms/,canonical +bioportal,MIXS,https://w3id.org/mixs/vocab/,prefix_alias +bioportal,MOC,http://sweet.jpl.nasa.gov/2.3/matrOrganicCompound.owl#,canonical +bioportal,MODSCI,https://w3id.org/skgo/modsci#,canonical +bioportal,MONO,http://www.owl-ontologies.com/MO.owl#,canonical +bioportal,MOSAIC,https://purl.dataone.org/odo/MOSAIC_,canonical +bioportal,MSO,http://scai.fraunhofer.de/MSOntology#,canonical +bioportal,MSTDE,http://purl.bioontology.org/ontology/MSTDE/,canonical +bioportal,MSTDE-FRE,http://purl.bioontology.org/ontology/MSTDE-FRE/,canonical +bioportal,MSV,http://purl.jp/bio/11/msv/,canonical +bioportal,NCBIGene,http://identifiers.org/ncbigene/,canonical +bioportal,NCBIGene,OBO:NCBIGene_,prefix_alias +bioportal,NCBITAXON,http://purl.bioontology.org/ontology/NCBITAXON/,canonical +bioportal,NCBITAXON,http://www.ncbi.nlm.nih.gov/taxonomy/,prefix_alias +bioportal,NCBITAXON,OBO:NCBITAXON_,prefix_alias +bioportal,NCCNEHR,http://www.semanticweb.org/lamb/ontologies/NCCN-EHR#,canonical +bioportal,NCCO,http://www.semanticweb.org/vanessa/ontologies/2012/7/untitled-ontology-33#,canonical +bioportal,NCIT,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#,canonical +bioportal,NDDF,http://purl.bioontology.org/ontology/NDDF/,canonical +bioportal,NDDO,http://www.purl.org/NDDO/,canonical +bioportal,NDFRT,http://purl.bioontology.org/ontology/NDFRT/,canonical +bioportal,NDFRT,http://evs.nci.nih.gov/ftp1/NDF-RT/NDF-RT.owl#,prefix_alias +bioportal,NEICBEER,http://ontology.deic.dk/cv/beer-ontology/,canonical +bioportal,NEOMARK3,http://www.neomark.eu/ontologies/,canonical +bioportal,NEOMARK4,http://neomark.owl#,canonical +bioportal,NEUDIGS,http://bmkeg.isi.edu/neuDIGs#,canonical +bioportal,NEUMORE,http://neumore.cis.usouthal.edu/ontologies/NeuMORE-v0.1.owl#,canonical +bioportal,NIFCELL,http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-Cell.owl#,canonical +bioportal,NIHSS,https://mre.zcu.cz/ontology/nihss.owl#,canonical +bioportal,NMOBR,http://neuromorpho.org/ontologies/NMOSp.owl#,canonical +bioportal,NMR,http://nmrML.org/nmrCV#,canonical +bioportal,NPI,http://purl.bioontology.org/ontology/NPI#,canonical +bioportal,NPO,http://purl.bioontology.org/ontology/npo#,canonical +bioportal,OA,http://www.w3.org/ns/oa#,canonical +bioportal,OBOE,http://ecoinformatics.org/oboe/oboe.1.2/,canonical +bioportal,OBOREL,http://www.obofoundry.org/ro/ro.owl#,canonical +bioportal,OBOREL,OBO:OBO_REL#_,prefix_alias +bioportal,OBS,http://www.semanticweb.org/bito2/ontologies/2021/3/untitled-ontology-11#,canonical +bioportal,OCIMIDO,https://github.com/sap218/ocimido/blob/master/ontology/ocimido.owl#,canonical +bioportal,OCRE,http://purl.org/net/OCRe/,canonical +bioportal,OGR,http://www.owl-ontologies.com/GeographicalRegion.owl#,canonical +bioportal,OGROUP,http://protozoadb.biowebdb.org/22/ogroup#,canonical +bioportal,OM,http://www.ontology-of-units-of-measure.org/resource/,canonical +bioportal,OMIM,http://purl.bioontology.org/ontology/OMIM/,canonical +bioportal,OMIM,http://identifiers.org/omim/,prefix_alias +bioportal,OMV,http://omv.ontoware.org/2005/05/ontology#,canonical +bioportal,ONL-DP,http://neurolog.unice.fr/ontoneurolog/v3.0/ontoneurolog-dataset-processing.owl#,canonical +bioportal,ONL-TASKS,http://www.semanticweb.org/bakaev/ontologies/2020/3/untitled-ontology-25#,canonical +bioportal,ONLIRA,http://vavlab.ee.boun.edu.tr/carera/onlira.owl#,canonical +bioportal,ONSTR,http://onstr.googlecode.com/svn/tags/currentRelease/2014-09-03/ONSTR.owl#ONSTR_,canonical +bioportal,ONTOAD,http://doe-generated-ontology.com/OntoAD#,canonical +bioportal,ONTODM,http://www.ontodm.com/OntoDM-core/OntoDM_,canonical +bioportal,ONTODM,http://kt.ijs.si/panovp/OntoDM#OntoDM_,prefix_alias +bioportal,ONTODT,http://ontodm.com/OntoDT#,canonical +bioportal,ONTODT,http://www.ontodm.com/OntoDT#,prefix_alias +bioportal,ONTOLURGENCES,http://www.limics.fr/ontologies/ontolurgences#,canonical +bioportal,ONTOPBM,http://w3id.org/ontopbm#OntoPBM_,canonical +bioportal,ONTOPNEUMO,http://doe-generated-ontology.com/OntoPneumo#,canonical +bioportal,ONTOSIM,http://www.semanticweb.org/DIASUS/OntoSIM#,canonical +bioportal,ONTOSINASC,http://www.semanticweb.org/DIASUS/OntoSINASC#,canonical +bioportal,ONTOTOX,http://OntoTox.owl#,canonical +bioportal,OOEVV,http://bmkeg.isi.edu/ooevv/,canonical +bioportal,OPB,http://bhi.washington.edu/OPB#OPB_,canonical +bioportal,OPB,http://bhi.washington.edu/OPB##,prefix_alias +bioportal,OPDE,http://www.mudhc.edu.et/template-vocabulary#,canonical +bioportal,OPE,http://www.semanticweb.org/ontologies/2013/2/OPE.owl#,canonical +bioportal,OPTION-ONTOLOGY,http://w3id.org/ontoopt/,canonical +bioportal,ORCS,OBO:ORCS_,canonical +bioportal,ORTH,http://purl.org/net/orth#,canonical +bioportal,OSM,https://saudeconectada.org/saude_mental.owl#,canonical +bioportal,PANDA,http://purl.bioontology.org/net/brunel/panda#,canonical +bioportal,PANET,http://purl.org/pan-science/PaNET/,canonical +bioportal,PATCT,https://pat.nichd.nih.gov/placentalcelltype/,canonical +bioportal,PATEL,http://www.semanticweb.org/ambrish/ontologies/2020/10/untitled-ontology-24#,canonical +bioportal,PATGV,https://pat.nichd.nih.gov/patgeneticvariance/,canonical +bioportal,PATHLEX,http://www.semanticweb.org/david/ontologies/2013/0/pathLex.owl#,canonical +bioportal,PATIT,https://pat.nichd.nih.gov/patinvestigativetechniques/,canonical +bioportal,PATMHC,http://pat.nichd.nih.gov/maternalconditions/,canonical +bioportal,PATO,http://purl.obolibrary.org/obo/PATO_,canonical +bioportal,PCALION,http://www.semanticweb.org/ontologies/Prostate_cancer#,canonical +bioportal,PDO,http://purl.jp/bio/11/pdo/,canonical +bioportal,PDON,http://www.semanticweb.org/ontologies/2011/1/Ontology1296772722296.owl#,canonical +bioportal,PDQ,http://purl.bioontology.org/ontology/PDQ/,canonical +bioportal,PDRO,OBO:PDRO/PDRO.owl#,canonical +bioportal,PDUMDV,OBO:PdumDv_,canonical +bioportal,PE,http://bmi.utah.edu/ontologies/peontology/,canonical +bioportal,PE-O,http://www.pepathway.org/peo/1.2#,canonical +bioportal,PE-O,http://www.pepathway.org/peo/1.1#,prefix_alias +bioportal,PEDTERM,http://www.owl-ontologies.com/Ontology1358660052.owl#,canonical +bioportal,PEO,http://knoesis.wright.edu/ParasiteExperiment.owl#,canonical +bioportal,PESONT,http://www.semanticweb.org/patienceusip/ontologies/2021/7/untitled-ontology-23#,canonical +bioportal,PGXO,http://pgxo.loria.fr/,canonical +bioportal,PHENX,http://purl.bioontology.org/ontology/phenX/,canonical +bioportal,PHYLONT,http://www.semanticweb.org/ontologies/2011/7/Ontology1314368515010.owl#,canonical +bioportal,PIERO,http://reactionontology.org/piero/,canonical +bioportal,PLIO,http://www.semanticweb.org/ontologies/2010/3/Ontology1271664172453.owl#,canonical +bioportal,PLOSTHES,http://localhost/plosthes.2017-1#,canonical +bioportal,PMD,http://www.onto-med.de/ontologies/gfo-persian-medicine-diseases.owl#,canonical +bioportal,PMDO,http://www.case.edu/PMDO#,canonical +bioportal,PMO,http://performedmusicontology.org/ontology/,canonical +bioportal,PMO-SPEED,http://performedmusicontology.org/ontologies/vocabularies/playing_speed/,canonical +bioportal,PMR,http://purl.bioontology.org/ontology/PMR.owl#,canonical +bioportal,PP,https://bitbucket.org/PlantExpAssay/ontology/raw/v0.1/PipelinePatterns.owl#P,canonical +bioportal,PR,OBO:pr#,canonical +bioportal,PREGONTO,http://www.clininf.eu/pregnancy#,canonical +bioportal,PREO,http://presence-ontology.org/ontology//,canonical +bioportal,PROCCHEMICAL,http://sweet.jpl.nasa.gov/2.3/propChemical.owl#,canonical +bioportal,PROJ,http://linked.data.gov.au/def/project/,canonical +bioportal,PROPREO,http://lsdis.cs.uga.edu/projects/glycomics/propreo#,canonical +bioportal,PROVO,http://www.w3.org/ns/prov-o#,canonical +bioportal,PROVO,http://www.w3.org/ns/prov-o-20130312#,prefix_alias +bioportal,PSO_2,http://ontorion.com/PSO#,canonical +bioportal,QUDT,http://qudt.org/schema/,canonical +bioportal,QUDT,http://qudt.org/2.1/schema/,prefix_alias +bioportal,RADLEX,http://radlex.org/RID/,canonical +bioportal,RADLEX,http://www.radlex.org/RID/,prefix_alias +bioportal,RADXTT-MVREASONS,https://radx.orgx/vocs/missing-value-reason/,canonical +bioportal,RCD,http://purl.bioontology.org/ontology/RCD/,canonical +bioportal,RCTONT,http://www.owl-ontologies.com/RCTOntology.owl#,canonical +bioportal,RCTV2,http://purl.bioontology.org/ontology/RCTV2/,canonical +bioportal,RDA-CONTENT,http://rdaregistry.info/termList/RDAContentType#,canonical +bioportal,REPO,http://purl.bioontology.org/ontology/REPO.owl#,canonical +bioportal,RH-MESH,http://phenomebrowser.net/ontologies/mesh/mesh.owl#,canonical +bioportal,RO,http://www.radiomics.org/RO/,canonical +bioportal,ROLEO,OBO:RoleO_,canonical +bioportal,ROO,http://www.cancerdata.org/roo/,canonical +bioportal,ROS,urn:absolute:RadiationOncologyStructuresOntology#,canonical +bioportal,RPO,http://www.semanticweb.org/ontologies/2012/5/Ontology1338526551855.owl#,canonical +bioportal,RSA,http://rdf.biosemantics.org/ontologies/rsa#,canonical +bioportal,RVO,http://w3id.org/rv-ontology#,canonical +bioportal,SAO,http://ccdb.ucsd.edu/SAO/1.2#,canonical +bioportal,SARSMUTONTO,file://C/Users/Jamal/Desktop/SARSMutOnto.owl#,canonical +bioportal,SBO,http://purl.bioontology.org/ontology/SBO/SBO_,canonical +bioportal,SBO,http://biomodels.net/SBO/SBO_,prefix_alias +bioportal,SBOL,OBO:SBOL_,canonical +bioportal,SCHEMA,http://schema.org/,canonical +bioportal,SCHEMA,http://meta.schema.org/,prefix_alias +bioportal,SCHEMA,http://www.w3.org/wiki/WebSchemas/,prefix_alias +bioportal,SCHEMA,https://www.w3.org/wiki/WebSchemas/,prefix_alias +bioportal,SCIO,http://psink.de/scio/,canonical +bioportal,SD3,http://www.wiser.pitt.edu/ontologies/SimulationScenarioDeviations.owl#,canonical +bioportal,SDO,http://mimi.case.edu/ontologies/2009/1/SDO.owl#,canonical +bioportal,SEDI,http://semantic-dicom.org/dcm#,canonical +bioportal,SENSO,http://purl.dataone.org/odo/SENSO_,canonical +bioportal,SEQ,http://www.ontologydesignpatterns.org/cp/owl/sequence.owl#,canonical +bioportal,SHR,http://www.shojaee.com/shr/shr.owl#,canonical +bioportal,SITBAC,http://www.semanticweb.org/ontologies/2008/1/Ontology1204037102846.owl#,canonical +bioportal,SK,http://www.semanticweb.org/sandeepak/digitalforensic#,canonical +bioportal,SMASH,http://aimlab.cs.uoregon.edu/smash/ontologies/smash-ontology#,canonical +bioportal,SMASH,http://aimlab.cs.uoregon.edu/smash/ontologies/biomarker.owl#,prefix_alias +bioportal,SMASH,http://aimlab.cs.uoregon.edu/smash/ontologies/physical-activity.owl#,prefix_alias +bioportal,SMASH,http://aimlab.cs.uoregon.edu/smash/ontologies/social-activity.owl#,prefix_alias +bioportal,SNMI,http://purl.bioontology.org/ontology/SNMI/,canonical +bioportal,SNOMEDCT,http://purl.bioontology.org/ontology/SNOMEDCT/,canonical +bioportal,SNPO,http://www.loria.fr/~coulet/ontology/snpontology/version1.6/snpontology_full.owl#,canonical +bioportal,SO,http://purl.org/obo/owl/SO#SO_,canonical +bioportal,SOCPRES,http://www.semanticweb.org/social-prescribing#,canonical +bioportal,SOPHARM,http://www.loria.fr/~coulet/sopharm/SOPHARM_,canonical +bioportal,SOY,OBO:SOY_,canonical +bioportal,SP,http://purl.org/net/SMARTprotocol#,canonical +bioportal,SPO,http://www.semanticweb.org/ontologies/2008/8/MultiscaleSkinPhysiologyOntology.owl#,canonical +bioportal,SPO,http://www.semanticweb.org/ontologies/2008/8/SPO_lightweight_merged.owl#,prefix_alias +bioportal,SPTO,OBO:SP_,canonical +bioportal,SSN,http://www.w3.org/ns/ssn/,canonical +bioportal,SSO,http://surveillance.mcgill.ca/sso/syndromes.owl#,canonical +bioportal,SSO,http://www.medicine.mcgill.ca/epidemiology/buckeridge/syndromes.owl#,prefix_alias +bioportal,STMSO,https://bioportal.bioontology.org/ontologies/STMSO#,canonical +bioportal,STY,http://purl.bioontology.org/ontology/STY/,canonical +bioportal,SURGICAL,http://www.cablesat.com.au/research/,canonical +bioportal,SWEET,http://sweetontology.net/,canonical +bioportal,SWO,http://www.ebi.ac.uk/swo/SWO_,canonical +bioportal,SWO,http://www.ebi.ac.uk/efo/swo/SWO_,prefix_alias +bioportal,SWO,http://www.ebi.ac.uk/swo/algorithm/SWO_,prefix_alias +bioportal,SWO,http://www.ebi.ac.uk/swo/data/SWO_,prefix_alias +bioportal,SWO,http://www.ebi.ac.uk/swo/interface/SWO_,prefix_alias +bioportal,SWO,http://www.ebi.ac.uk/swo/license/SWO_,prefix_alias +bioportal,SWO,http://www.ebi.ac.uk/swo/objective/SWO_,prefix_alias +bioportal,SWO,http://www.ebi.ac.uk/swo/organization/SWO_,prefix_alias +bioportal,SWO,http://www.ebi.ac.uk/swo/version/SWO_,prefix_alias +bioportal,TAXRANK,OBO:taxrank.owl#,canonical +bioportal,TCDO,http://OntoTCM.org.cn/ontologies/TCDO_,canonical +bioportal,TCO,http://www.semanticweb.org/hx-jta/ontologies/thyroid_cancer_ontology#,canonical +bioportal,TDWGSPEC,http://rs.tdwg.org/ontology/voc/Specimen#,canonical +bioportal,TEDDY,http://identifiers.org/teddy/TEDDY_,canonical +bioportal,TEO,http://informatics.mayo.edu/TEO.owl#TEO_,canonical +bioportal,TESTEX,https://bioportal.databiology.com/test1.owl#,canonical +bioportal,TIME,http://www.w3.org/2006/time#,canonical +bioportal,TIMEBANK,https://w3id.org/timebank#,canonical +bioportal,TM-CONST,http://who.int/ictm/constitution#,canonical +bioportal,TM-MER,http://who.int/ictm/meridians#,canonical +bioportal,TM-SIGNS-AND-SYMPTS,http://who.int/ictm/signsAndSymptoms#,canonical +bioportal,TMA,http://bioontology.org/ontologies/tma-minimal#,canonical +bioportal,TMO,http://www.w3.org/2001/sw/hcls/ns/transmed/,canonical +bioportal,TOK,http://cui.unige.ch/isi/onto/tok/TOK.owl#,canonical +bioportal,TOP-MENELAS,http://www.limics.fr/ontologies/menelastop#,canonical +bioportal,TRAK,OBO:TRAK_,canonical +bioportal,TRIAGE,http://www.semanticweb.org/philshields/ontologies/2015/4/untitled-ontology-59#,canonical +bioportal,TRON,OBO:TrOn_,canonical +bioportal,TXPO,OBO:TXPO_,canonical +bioportal,TYPON,http://purl.phyloviz.net/ontology/typon#,canonical +bioportal,UMMS,https://w3id.org/umms/ekg/onto01/,canonical +bioportal,UNITSONT,http://mimi.case.edu/ontologies/2009/1/UnitsOntology#,canonical +bioportal,UPA,OBO:UPa_,canonical +bioportal,VANDF,http://purl.bioontology.org/ontology/VANDF/,canonical +bioportal,VARIO,OBO:VariO_,canonical +bioportal,VDOT,http://www.ifomis.org/vdot/vdot_core.owl#vdot_,canonical +bioportal,VEO,http://sbmi.uth.tmc.edu/ontology/VEO#,canonical +bioportal,VIDO,OBO:VIDO_,canonical +bioportal,VODANADISEASES,http://vocab.vodan-totafrica.info/vodana-terms/vdiseases/,canonical +bioportal,VODANAMFLCODE,http://vocab.vodana.org/vmfl/,canonical +bioportal,WB-LS,OBO:WBls_,canonical +bioportal,WC,OBO:WC_,canonical +bioportal,WEAR,http://purl.org/wear/,canonical +bioportal,WEAVE,http://purl.org/weave/,canonical +bioportal,WETAXTOPICS,http://purl.org/neat/,canonical +bioportal,WIKIPATHWAYS,http://vocabularies.wikipathways.org/wp#,canonical +bioportal,WIKIPATHWAYS,http://vocabularies.wikipathways.org/wpTypes#,prefix_alias +bioportal,WSIO,OBO:WSIO_,canonical +bioportal,WSIO,OBO:http://wsio.org#,prefix_alias +bioportal,XEO,OBO:XEO_,canonical +bioportal,XLMOD,OBO:XLMOD_,canonical +bioportal,XPO,OBO:XPO_,canonical +bioportal,XREF-FUNDER-REF,http://data.crossref.org/fundingdata/vocabulary/Label-,canonical +bioportal,ZONMW-ADMIN-MD,http://www.fair-data-collective.com/zonmw/projectadmin/,canonical +bioportal,ZONMW-CONTENT,http://purl.org/zonmw/covid19/,canonical +bioportal,ZONMW-GENERIC,http://purl.org/zonmw/generic/,canonical \ No newline at end of file diff --git a/src/spinneret/utilities.py b/src/spinneret/utilities.py index 1aa1648..922691a 100644 --- a/src/spinneret/utilities.py +++ b/src/spinneret/utilities.py @@ -2,6 +2,7 @@ from os import environ from typing import Union +import importlib from urllib.parse import urlparse from json import load @@ -111,3 +112,35 @@ def expand_curie(curie: str) -> str: } prefix, suffix = curie.split(":") return f"{mapping[prefix]}{suffix}" + + +def compress_uri(uri: str) -> str: + """ + Compress a URI into a CURIE based on the prefix mappings in the OBO and + BioPortal converters. + + :param uri: The URI to be compressed into a CURIE. + :returns: The compressed CURIE. Returns the original URI if the prefix + does not have a mapping. + :notes: This is a wrapper function around the `prefixmaps` and `curies` + libraries. + """ + prefixmaps = load_prefixmaps() + match = prefixmaps[prefixmaps["namespace"].apply(lambda x: x in uri)] + if not match.empty: + prefix = match["prefix"].values[0] + suffix = uri.replace(match["namespace"].values[0], "") + return f"{prefix}:{suffix}" + return uri + + +def load_prefixmaps() -> dict: + """ + Load ontology prefix maps. To be used with `expand_curie` and + `compress_uri`. + + :returns: The ontology prefix maps + """ + file = str(importlib.resources.files("spinneret.data")) + "/prefixmaps.csv" + prefixmaps = pd.read_csv(file) + return prefixmaps diff --git a/tests/conftest.py b/tests/conftest.py index 8943e6a..012eee8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,6 @@ """Configure the test suite""" +from json import load import pytest from spinneret.utilities import load_workbook @@ -18,3 +19,39 @@ def annotated_workbook(): """Return a fixture for an annotated workbook""" wb = load_workbook("tests/edi.3.9_annotation_workbook_annotated.tsv") return wb + + +@pytest.fixture(name="termset_similarity_score_raw") +def termset_similarity_score_raw(): + """Return a fixture for raw termset similarity scores returned by the + `runoak -i {db} termset-similarity` command.""" + score_file = "tests/data/benchmark/termset_similarity_score_raw.json" + with open(score_file, "r", encoding="utf-8") as file: + return load(file) + + +@pytest.fixture(name="termset_similarity_score_processed") +def termset_similarity_score_processed(): + """Return a fixture for processed termset similarity scores returned by + the get_termset_similarity function.""" + score_file = "tests/data/benchmark/termset_similarity_score_processed.json" + with open(score_file, "r", encoding="utf-8") as file: + return load(file) + + +@pytest.fixture(name="termset_similarity_score_fields") +def termset_similarity_score_fields(): + """Return a fixture for the fields expected in the termset similarity + scores""" + return [ + "average_score", + "best_score", + "average_jaccard_similarity", + "best_jaccard_similarity", + "average_phenodigm_score", + "best_phenodigm_score", + "average_standard_information_content", + "best_standard_information_content", + "average_test_information_content", + "best_test_information_content", + ] diff --git a/tests/data/benchmark/standard/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv b/tests/data/benchmark/standard/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv new file mode 100644 index 0000000..200ba37 --- /dev/null +++ b/tests/data/benchmark/standard/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv @@ -0,0 +1,78 @@ +package_id url element element_id element_xpath context description subject predicate predicate_id object object_id author date comment +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ freshwater lake biome http://purl.obolibrary.org/obo/ENVO_01000252 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ oligotrophic lake http://purl.obolibrary.org/obo/ENVO_01000774 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ eutrophic lake http://purl.obolibrary.org/obo/ENVO_01000548 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ mesotrophic lake http://purl.obolibrary.org/obo/ENVO_01000775 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ humic lake http://purl.obolibrary.org/obo/ENVO_01001021 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ dimictic lake http://purl.obolibrary.org/obo/ENVO_01000286 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ polymictic lake http://purl.obolibrary.org/obo/ENVO_01000287 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset contains process http://purl.obolibrary.org/obo/BFO_0000067 biogeochemical cycling http://purl.obolibrary.org/obo/ENVO_02500009 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset contains process http://purl.obolibrary.org/obo/BFO_0000067 acidification of an aquatic environment http://purl.obolibrary.org/obo/ENVO_01000630 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod environmental monitoring http://vocabs.lter-europe.net/EnvThes/21335 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod field measurement http://vocabs.lter-europe.net/EnvThes/20223 https://orcid.org/0000-0003-2261-9932 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod long term monitoring http://vocabs.lter-europe.net/EnvThes/21337 https://orcid.org/0000-0003-2261-9933 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod observational study http://vocabs.lter-europe.net/EnvThes/20243 https://orcid.org/0000-0003-2261-9934 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod time series http://vocabs.lter-europe.net/EnvThes/20285 https://orcid.org/0000-0003-2261-9935 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod water quality monitoring http://vocabs.lter-europe.net/EnvThes/21339 https://orcid.org/0000-0003-2261-9936 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod mooring http://vocabs.lter-europe.net/EnvThes/20304 https://orcid.org/0000-0003-2261-9937 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod combustion analysis https://www.wikidata.org/wiki/Q591867 https://orcid.org/0000-0003-2261-9938 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset b2a8f496-646c-41d3-af2f-53e9520d1954 /eml:eml/dataset knb-lter-ntl.1.59 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod colorimetric assay https://www.wikidata.org/wiki/Q5149058 https://orcid.org/0000-0003-2261-9939 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 66ec8e6f-5747-4540-a605-a2f5881869f1 /eml:eml/dataset/dataTable/attributeList/attribute[1] ntl1_v11.csv lake name abbreviation lakeid contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType site identifier http://purl.dataone.org/odo/ECSO_00002997 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 89e8a721-37cc-4a68-8cd5-81b517bc8348 /eml:eml/dataset/dataTable/attributeList/attribute[2] ntl1_v11.csv year year4 contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType year of measurement http://purl.dataone.org/odo/ECSO_00002050 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 7a1c02cb-743d-433e-b799-30e15f21fc42 /eml:eml/dataset/dataTable/attributeList/attribute[3] ntl1_v11.csv day of year daynum contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType day of year http://purl.dataone.org/odo/ECSO_00002058 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 4f92fc26-bff5-43ad-83ba-e9eb78cdde32 /eml:eml/dataset/dataTable/attributeList/attribute[4] ntl1_v11.csv sample date sampledate contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType date http://purl.dataone.org/odo/ECSO_00002051 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute ce72e831-a964-4e5b-a48f-4e736fe8cbde /eml:eml/dataset/dataTable/attributeList/attribute[5] ntl1_v11.csv depth at which the sample or measurement was taken depth contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType depth http://purl.dataone.org/odo/ECSO_00000515 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute f97b7698-2e00-4648-ae73-22660b2f7522 /eml:eml/dataset/dataTable/attributeList/attribute[6] ntl1_v11.csv sample replicate rep contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType replicate identifier http://purl.dataone.org/odo/ECSO_00002989 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 6eeef47d-8fb8-45a4-8ea7-e2f12ce508f7 /eml:eml/dataset/dataTable/attributeList/attribute[7] ntl1_v11.csv Location station of sample sta contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType station identifier http://purl.dataone.org/odo/ECSO_00002393 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 0e9015d5-6bb8-46d3-81f8-254f4e018e56 /eml:eml/dataset/dataTable/attributeList/attribute[8] ntl1_v11.csv sampling event event contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType sampling occasion http://purl.dataone.org/odo/ECSO_00002620 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute c1cdf313-99c1-4306-aea0-80d589f28dfb /eml:eml/dataset/dataTable/attributeList/attribute[9] ntl1_v11.csv pH ph contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType pH http://purl.dataone.org/odo/ECSO_00001645 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 4dd2235c-d46d-429c-b74c-f1fa536d8f71 /eml:eml/dataset/dataTable/attributeList/attribute[10] ntl1_v11.csv pH air equilibrated phair contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType pH http://purl.dataone.org/odo/ECSO_00001645 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 34820bf1-ec29-450c-a70e-970f4a1cd9cf /eml:eml/dataset/dataTable/attributeList/attribute[11] ntl1_v11.csv alkalinity alk contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType total alkalinity http://purl.dataone.org/odo/ECSO_00001752 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 0a54166f-7a3e-453e-b083-da01e2e26c16 /eml:eml/dataset/dataTable/attributeList/attribute[12] ntl1_v11.csv dissolved inorganic carbon dic contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType dissolved inorganic carbon concentration in freshwater http://purl.dataone.org/odo/ECSO_00002103 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 64541338-8971-4633-b18f-61c39e493921 /eml:eml/dataset/dataTable/attributeList/attribute[13] ntl1_v11.csv total inorganic carbon tic contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType Freshwater Total Inorganic Carbon Concentration http://purl.dataone.org/odo/ECSO_00001122 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute eccddf6b-6a90-424f-b6ba-acaf7cbc31a7 /eml:eml/dataset/dataTable/attributeList/attribute[14] ntl1_v11.csv dissolved organic carbon doc contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType dissolved organic carbon concentration in water http://purl.dataone.org/odo/ECSO_00002143 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 0e6a7f1a-c817-4fd6-8e61-34e615a98a0a /eml:eml/dataset/dataTable/attributeList/attribute[15] ntl1_v11.csv total organic carbon toc contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType Aquatic Total Organic Carbon Concentration http://purl.dataone.org/odo/ECSO_00001118 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute d5d5cb58-5fc4-4801-9a78-1d98203c3ee4 /eml:eml/dataset/dataTable/attributeList/attribute[16] ntl1_v11.csv (NO3 + NO2) - N no3no2 contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType nitrate and nitrite concentration in water http://purl.dataone.org/odo/ECSO_00002925 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 10d6081f-3c52-4323-a470-c2cdc799ffce /eml:eml/dataset/dataTable/attributeList/attribute[17] ntl1_v11.csv NO2 no2 contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute dc57229d-7528-47b2-b38b-461a4614a3a7 /eml:eml/dataset/dataTable/attributeList/attribute[18] ntl1_v11.csv NH4 - N nh4 contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType ammonium concentration in water http://purl.dataone.org/odo/ECSO_00001760 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute fbb79bdd-873f-44e5-a4aa-42bdf8072ba0 /eml:eml/dataset/dataTable/attributeList/attribute[19] ntl1_v11.csv total dissolved N (filtered sample) totnf contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType nitrogen concentration http://purl.dataone.org/odo/ECSO_00001883 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 1d24561a-fdad-4a16-8e35-8bfb480db6ab /eml:eml/dataset/dataTable/attributeList/attribute[20] ntl1_v11.csv total N (unfiltered sample) totnuf contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType nitrogen concentration http://purl.dataone.org/odo/ECSO_00001883 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute f1a8c896-e4c9-4c60-b61c-6ee9dc8f3115 /eml:eml/dataset/dataTable/attributeList/attribute[21] ntl1_v11.csv total dissolved P (filtered sample) totpf contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType total dissolved phosphorus concentration in water http://purl.dataone.org/odo/ECSO_00002844 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 269c37a4-59d5-44ec-9438-7a8bccd4e98b /eml:eml/dataset/dataTable/attributeList/attribute[22] ntl1_v11.csv total P unfiltered totpuf contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType total dissolved phosphorus concentration in water http://purl.dataone.org/odo/ECSO_00002844 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 7e390f5c-2cce-4d19-a95e-bdaecd8a9a90 /eml:eml/dataset/dataTable/attributeList/attribute[23] ntl1_v11.csv dissolved reactive silica filtered drsif contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute ee934a41-29dc-4a17-bdb8-edb0c586869e /eml:eml/dataset/dataTable/attributeList/attribute[24] ntl1_v11.csv bicarbonate reactive silica filtered brsif contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 862be757-1124-4d61-8b7e-b9976d4348d2 /eml:eml/dataset/dataTable/attributeList/attribute[25] ntl1_v11.csv bicarbonate reactive silica unfiltered brsiuf contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 860d9039-f249-4f03-a36f-47d54a167192 /eml:eml/dataset/dataTable/attributeList/attribute[26] ntl1_v11.csv total particulate matter tpm contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType suspended particulate material concentration in water http://purl.dataone.org/odo/ECSO_00002646 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute b9717e81-a1f5-4cca-b3bf-2bac7e874602 /eml:eml/dataset/dataTable/attributeList/attribute[27] ntl1_v11.csv Total Nitrogen (unfiltered) from WI State Lab of Hygiene totnuf_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType nitrogen concentration http://purl.dataone.org/odo/ECSO_00001883 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute fead55f1-5384-4b80-8949-6e44fa31cf6c /eml:eml/dataset/dataTable/attributeList/attribute[28] ntl1_v11.csv Nitrate plus Nitrite Nitrogen from WI State Lab. of Hygiene no3no2_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType nitrate and nitrite concentration in water http://purl.dataone.org/odo/ECSO_00002925 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 30c173fb-13a4-4dbf-b90b-672de6773227 /eml:eml/dataset/dataTable/attributeList/attribute[29] ntl1_v11.csv Ammonium Nitrogen from WI State Lab. of Hygiene nh4_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType ammonium concentration in water http://purl.dataone.org/odo/ECSO_00001760 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute e2c8a15a-1f29-4c84-a2db-25f1d8ecac05 /eml:eml/dataset/dataTable/attributeList/attribute[30] ntl1_v11.csv Total Kjeldahl Nitrogen from WI State Lab. of Hygiene kjdl_n_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType nitrate and nitrite concentration in water http://purl.dataone.org/odo/ECSO_00002925 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute e8120d58-6652-41c6-91b1-a0938a9640cb /eml:eml/dataset/dataTable/attributeList/attribute[31] ntl1_v11.csv Total Phosphorus Unfiltered from WI State Lab. of Hygiene totpuf_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType total dissolved phosphorus concentration in water http://purl.dataone.org/odo/ECSO_00002844 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 81903d24-fd91-49fb-86a9-92a04b2c63e6 /eml:eml/dataset/dataTable/attributeList/attribute[32] ntl1_v11.csv Dissolved Reactive Phosphorus from WI State Lab. of Hygiene drp_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType total dissolved phosphorus concentration in water http://purl.dataone.org/odo/ECSO_00002844 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 5b5b6b66-28ff-4303-84f8-4919b82bea0c /eml:eml/dataset/dataTable/attributeList/attribute[33] ntl1_v11.csv Dissolved Reactive Silica from WI State Lab. of Hygiene drsif_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 2f31fe01-d430-45c4-a3f4-b49e422b55c9 /eml:eml/dataset/dataTable/attributeList/attribute[34] ntl1_v11.csv data flag for depth flagdepth contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001720 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute e7a232ae-12c0-42f6-9a24-60127051d584 /eml:eml/dataset/dataTable/attributeList/attribute[35] ntl1_v11.csv data flag for ph flagph contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001721 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 44886cc4-edab-42b4-a626-a05e656bdc2f /eml:eml/dataset/dataTable/attributeList/attribute[36] ntl1_v11.csv data flag for phair flagphair contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001722 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 89f8f176-d741-4ff3-b216-271e7bf86075 /eml:eml/dataset/dataTable/attributeList/attribute[37] ntl1_v11.csv data flag for alkalinity flagalk contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001723 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 565ebbb5-20aa-4760-b9fb-8e113eb9daac /eml:eml/dataset/dataTable/attributeList/attribute[38] ntl1_v11.csv data flag for dissolved inorganic carbon flagdic contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001724 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute fc378152-97d6-4332-ae71-a643a6c81157 /eml:eml/dataset/dataTable/attributeList/attribute[39] ntl1_v11.csv data flag for total inorganic carbon flagtic contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001725 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 488d182b-9f28-4995-b575-c4677f6ceb98 /eml:eml/dataset/dataTable/attributeList/attribute[40] ntl1_v11.csv data flag for dissolved organic carbon flagdoc contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001726 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 215c3c0d-1bf4-48dd-8848-0b267964852a /eml:eml/dataset/dataTable/attributeList/attribute[41] ntl1_v11.csv data flag for total organic carbon flagtoc contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001727 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 4d7f18b9-adf4-4db2-8a32-d9a66cfa1621 /eml:eml/dataset/dataTable/attributeList/attribute[42] ntl1_v11.csv data flag for no3no2-n flagno3no2 contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001728 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a02c4fb7-7349-4c81-b19a-48de51099874 /eml:eml/dataset/dataTable/attributeList/attribute[43] ntl1_v11.csv data flag for no2 flagno2 contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001729 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 73679553-319b-4cd2-bc55-b1a37a1b0f4f /eml:eml/dataset/dataTable/attributeList/attribute[44] ntl1_v11.csv data flag for nh4 flagnh4 contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001730 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute f70fb9a4-34d2-4bd5-922b-41c83914b896 /eml:eml/dataset/dataTable/attributeList/attribute[45] ntl1_v11.csv data flag for totnf flagtotnf contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001731 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute cd8180f3-ed1c-4efd-acdd-6433e3f80681 /eml:eml/dataset/dataTable/attributeList/attribute[46] ntl1_v11.csv data flag for totnuf flagtotnuf contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001732 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute d83e548a-85a7-4472-87d7-33fa5205c666 /eml:eml/dataset/dataTable/attributeList/attribute[47] ntl1_v11.csv data flag for totpf flagtotpf contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001733 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 7f301922-dc5a-42aa-aaa2-0ee176d15c30 /eml:eml/dataset/dataTable/attributeList/attribute[48] ntl1_v11.csv data flag for totpuf flagtotpuf contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001734 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 21768e22-4e2f-4b8a-9d36-6b29008d5775 /eml:eml/dataset/dataTable/attributeList/attribute[49] ntl1_v11.csv data flag for drsif flagdrsif contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001735 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute fffeadc3-f4a3-497c-9a54-bb5b598a89b2 /eml:eml/dataset/dataTable/attributeList/attribute[50] ntl1_v11.csv data flag for brsif flagbrsif contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001736 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a6231932-1584-4319-8cec-24069735ed36 /eml:eml/dataset/dataTable/attributeList/attribute[51] ntl1_v11.csv data flag for brsiuf flagbrsiuf contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001737 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 1ec2a1cf-3ee6-4247-a202-0a3f2c7236da /eml:eml/dataset/dataTable/attributeList/attribute[52] ntl1_v11.csv data flag for tpm flagtpm contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001738 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 21b168c0-5006-48a2-b1db-1776a5d94287 /eml:eml/dataset/dataTable/attributeList/attribute[53] ntl1_v11.csv data flag for totnuf_sloh flagtotnuf_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001739 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute aeb36290-9c30-478f-b09b-237b7316584a /eml:eml/dataset/dataTable/attributeList/attribute[54] ntl1_v11.csv data flag for no3no2_sloh flagno3no2_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001740 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 6e4ac687-b845-4547-b427-474a65c77a7f /eml:eml/dataset/dataTable/attributeList/attribute[55] ntl1_v11.csv data flag for nh4_sloh flagnh4_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001741 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 65b6fee7-108e-4fb7-b099-36438fe46493 /eml:eml/dataset/dataTable/attributeList/attribute[56] ntl1_v11.csv data flag for kjdl_n_sloh flagkjdl_n_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001742 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute f5f1cd3f-d1cf-4109-8c42-9aee6cc7da77 /eml:eml/dataset/dataTable/attributeList/attribute[57] ntl1_v11.csv data flag for totpuf_sloh flagtotpuf_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001743 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute beb62153-b695-4365-b8d0-21a0e10495a1 /eml:eml/dataset/dataTable/attributeList/attribute[58] ntl1_v11.csv data flag for drp_sloh flagdrp_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001744 https://orcid.org/0000-0003-2261-9931 2014-10-17 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute c756e4b3-3c07-4e35-a967-d2e9f0a785b1 /eml:eml/dataset/dataTable/attributeList/attribute[59] ntl1_v11.csv data flag for drsif_sloh flagdrsif_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001745 https://orcid.org/0000-0003-2261-9931 2014-10-17 \ No newline at end of file diff --git a/tests/data/benchmark/standard/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv b/tests/data/benchmark/standard/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv new file mode 100644 index 0000000..6ccc514 --- /dev/null +++ b/tests/data/benchmark/standard/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv @@ -0,0 +1,45 @@ +package_id url element element_id element_xpath context description subject predicate predicate_id object object_id author date comment +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ freshwater lake biome http://purl.obolibrary.org/obo/ENVO_01000252 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ oligotrophic lake http://purl.obolibrary.org/obo/ENVO_01000774 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ eutrophic lake http://purl.obolibrary.org/obo/ENVO_01000548 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ mesotrophic lake http://purl.obolibrary.org/obo/ENVO_01000775 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ humic lake http://purl.obolibrary.org/obo/ENVO_01001021 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ dimictic lake http://purl.obolibrary.org/obo/ENVO_01000286 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ polymictic lake http://purl.obolibrary.org/obo/ENVO_01000287 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset contains process http://purl.obolibrary.org/obo/BFO_0000067 biogeochemical cycling http://purl.obolibrary.org/obo/ENVO_02500009 https://orcid.org/0000-0003-2261-9931 2014-10-15 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod environmental monitoring http://vocabs.lter-europe.net/EnvThes/21335 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod field measurement http://vocabs.lter-europe.net/EnvThes/20223 https://orcid.org/0000-0003-2261-9932 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod long term monitoring http://vocabs.lter-europe.net/EnvThes/21337 https://orcid.org/0000-0003-2261-9933 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod observational study http://vocabs.lter-europe.net/EnvThes/20243 https://orcid.org/0000-0003-2261-9934 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod time series http://vocabs.lter-europe.net/EnvThes/20285 https://orcid.org/0000-0003-2261-9935 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod water quality monitoring http://vocabs.lter-europe.net/EnvThes/21339 https://orcid.org/0000-0003-2261-9936 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod mooring http://vocabs.lter-europe.net/EnvThes/20304 https://orcid.org/0000-0003-2261-9937 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod probe http://vocabs.lter-europe.net/EnvThes/20095 https://orcid.org/0000-0003-2261-9939 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod chromatography https://www.wikidata.org/wiki/Q170050 https://orcid.org/0000-0003-2261-9939 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset 573aa136-fe0c-4380-b898-a66812c1dadf /eml:eml/dataset knb-lter-ntl.2.37 dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod spectrophotometry https://www.wikidata.org/wiki/Q332084 https://orcid.org/0000-0003-2261-9939 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 0897a396-1034-4848-bf0b-56fa200d9697 /eml:eml/dataset/dataTable/attributeList/attribute[1] ntl2_9.csv lake name abbreviation lakeid contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType site identifier http://purl.dataone.org/odo/ECSO_00002997 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 74405050-dfa2-4375-b5b0-5aed73b7f01c /eml:eml/dataset/dataTable/attributeList/attribute[2] ntl2_9.csv four-digit year year4 contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType year of measurement http://purl.dataone.org/odo/ECSO_00002050 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute d1138b76-ef27-4107-a8e9-b139d50206e4 /eml:eml/dataset/dataTable/attributeList/attribute[3] ntl2_9.csv day of the year daynum contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType day of year http://purl.dataone.org/odo/ECSO_00002058 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute b9d082a3-e19c-46fa-97dc-f7f6267158d1 /eml:eml/dataset/dataTable/attributeList/attribute[4] ntl2_9.csv Formatted date of sample sampledate contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType date http://purl.dataone.org/odo/ECSO_00002051 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute e4c5511f-0dfc-4e6f-a7fc-91a0f87acb5b /eml:eml/dataset/dataTable/attributeList/attribute[5] ntl2_9.csv water depth of sample depth contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType depth http://purl.dataone.org/odo/ECSO_00000515 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 46744b39-ffcf-4b61-b0ab-c549c2e37061 /eml:eml/dataset/dataTable/attributeList/attribute[6] ntl2_9.csv sample replicate rep contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType replicate identifier http://purl.dataone.org/odo/ECSO_00002989 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 76314844-0b99-4e8a-990d-9512c61cea35 /eml:eml/dataset/dataTable/attributeList/attribute[7] ntl2_9.csv Location station of sample sta contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType station identifier http://purl.dataone.org/odo/ECSO_00002393 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 4e32bc02-bd87-43fd-8ed9-2c7f88aef3aa /eml:eml/dataset/dataTable/attributeList/attribute[8] ntl2_9.csv sampling event event contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType sampling occasion http://purl.dataone.org/odo/ECSO_00002620 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute c54df277-23a3-4d55-8718-5cf8ff10ed13 /eml:eml/dataset/dataTable/attributeList/attribute[9] ntl2_9.csv chloride concentation cl contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType chlorine concentration in water http://purl.dataone.org/odo/ECSO_00002033 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute edbba2ef-ddce-4cfc-b4b7-ea3937907ae9 /eml:eml/dataset/dataTable/attributeList/attribute[10] ntl2_9.csv sulfate concentration so4 contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType sulfate concentration in water http://purl.dataone.org/odo/ECSO_00002389 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute c7616c28-1bcb-4de9-b4fc-72febcbf198e /eml:eml/dataset/dataTable/attributeList/attribute[11] ntl2_9.csv calcium concentration ca contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType calcium concentration in lake water http://purl.dataone.org/odo/ECSO_00001773 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute d3e9dc49-d8ce-4c36-9b9e-8616b748edcc /eml:eml/dataset/dataTable/attributeList/attribute[12] ntl2_9.csv magnesium concentration mg contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType magnesium concentration in lake water http://purl.dataone.org/odo/ECSO_00001791 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute e67ff433-e91c-4d19-9fc3-c8838277ef23 /eml:eml/dataset/dataTable/attributeList/attribute[13] ntl2_9.csv sodium concentration na contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType sodium concentration in lake water http://purl.dataone.org/odo/ECSO_00001805 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 0db9cae7-415f-4f29-856e-ca14ca224117 /eml:eml/dataset/dataTable/attributeList/attribute[14] ntl2_9.csv potassium concentration k contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType potassium concentration in lake water http://purl.dataone.org/odo/ECSO_00001799 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute aa922788-30fe-4e1c-8626-6d8bfd1c7b9e /eml:eml/dataset/dataTable/attributeList/attribute[15] ntl2_9.csv iron concentration fe contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType iron concentration in water http://purl.dataone.org/odo/ECSO_00001785 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 493815f4-610d-4f81-892d-60b63771f1cb /eml:eml/dataset/dataTable/attributeList/attribute[16] ntl2_9.csv manganese concentration mn contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType manganese concentration in lake water http://purl.dataone.org/odo/ECSO_00001793 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 06afb28f-ccdc-4899-9e66-4a27e7a6790e /eml:eml/dataset/dataTable/attributeList/attribute[17] ntl2_9.csv Specific conductance cond contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType conductivity http://purl.dataone.org/odo/ECSO_00001534 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 3dd95ddd-484f-4b85-a194-443d632b33ca /eml:eml/dataset/dataTable/attributeList/attribute[18] ntl2_9.csv data flag for chloride flagcl contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001720 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute efff83dc-e171-4945-b370-d61d631a0e78 /eml:eml/dataset/dataTable/attributeList/attribute[19] ntl2_9.csv data flag for so4 flagso4 contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001720 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute b4c29470-51af-4366-938f-851f77b8ac71 /eml:eml/dataset/dataTable/attributeList/attribute[20] ntl2_9.csv data flag for ca flagca contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001720 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute b9dd6cfa-c7dd-4490-9ed8-72c5ff7c312a /eml:eml/dataset/dataTable/attributeList/attribute[21] ntl2_9.csv data flag for magnesiumn flagmg contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001720 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute a5f01d3f-51cd-406d-93af-7e59043d5e04 /eml:eml/dataset/dataTable/attributeList/attribute[22] ntl2_9.csv data flag for sodium flagna contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001720 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 4709caa6-d841-481f-abd3-062bfd18dbd3 /eml:eml/dataset/dataTable/attributeList/attribute[23] ntl2_9.csv data flag for potassium flagk contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001720 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 50a83e04-14e2-4870-86b7-984f8ab061f8 /eml:eml/dataset/dataTable/attributeList/attribute[24] ntl2_9.csv data flag for iron flagfe contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001720 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 7ade0294-adf6-482a-9e21-70eed67eb59b /eml:eml/dataset/dataTable/attributeList/attribute[25] ntl2_9.csv data flag for manganese flagmn contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001720 https://orcid.org/0000-0003-2261-9931 2014-10-18 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 0d4536b0-94ba-40ea-b1c3-8ca895425ad2 /eml:eml/dataset/dataTable/attributeList/attribute[26] ntl2_9.csv data flag for specific conductivity flagcond contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType data quality flag http://purl.dataone.org/odo/ECSO_00001720 https://orcid.org/0000-0003-2261-9931 2014-10-18 \ No newline at end of file diff --git a/tests/data/benchmark/standard/notes.txt b/tests/data/benchmark/standard/notes.txt new file mode 100644 index 0000000..5ec11fc --- /dev/null +++ b/tests/data/benchmark/standard/notes.txt @@ -0,0 +1 @@ +These workbooks were annotated with expert human curation. These comprise the standard to test automated annotation methods against. \ No newline at end of file diff --git a/tests/data/benchmark/termset_similarity_score_processed.json b/tests/data/benchmark/termset_similarity_score_processed.json new file mode 100644 index 0000000..6e8bb58 --- /dev/null +++ b/tests/data/benchmark/termset_similarity_score_processed.json @@ -0,0 +1,12 @@ +{ + "average_score": 9.196397212803504, + "best_score": 9.196397212803504, + "average_jaccard_similarity": 0.8450704225352113, + "best_jaccard_similarity": 0.8450704225352113, + "average_phenodigm_score": 2.7877595445851306, + "best_phenodigm_score": 2.7877595445851306, + "average_standard_information_content": 11.196397212803504, + "best_standard_information_content": 11.196397212803504, + "average_test_information_content": 10.459431618637298, + "best_test_information_content": 10.459431618637298 +} diff --git a/tests/data/benchmark/termset_similarity_score_raw.json b/tests/data/benchmark/termset_similarity_score_raw.json new file mode 100644 index 0000000..1d86609 --- /dev/null +++ b/tests/data/benchmark/termset_similarity_score_raw.json @@ -0,0 +1,59 @@ +[ + { + "subject_termset": { + "ENVO:01000252": { + "id": "ENVO:01000252", + "label": "freshwater lake biome" + } + }, + "object_termset": { + "ENVO:01000253": { + "id": "ENVO:01000253", + "label": "freshwater river biome" + } + }, + "subject_best_matches": { + "ENVO:01000252": { + "match_source": "ENVO:01000252", + "score": 9.196397212803504, + "similarity": { + "subject_id": "ENVO:01000252", + "object_id": "ENVO:01000253", + "ancestor_id": "ENVO:00000873", + "ancestor_label": "freshwater biome", + "object_information_content": 10.459431618637298, + "subject_information_content": 11.196397212803504, + "ancestor_information_content": 9.196397212803504, + "jaccard_similarity": 0.8450704225352113, + "phenodigm_score": 2.7877595445851306 + }, + "match_source_label": "freshwater lake biome", + "match_target": "ENVO:01000253", + "match_target_label": "freshwater river biome" + } + }, + "object_best_matches": { + "ENVO:01000253": { + "match_source": "ENVO:01000253", + "score": 9.196397212803504, + "similarity": { + "subject_id": "ENVO:01000252", + "object_id": "ENVO:01000253", + "ancestor_id": "ENVO:00000873", + "ancestor_label": "freshwater biome", + "object_information_content": 10.459431618637298, + "subject_information_content": 11.196397212803504, + "ancestor_information_content": 9.196397212803504, + "jaccard_similarity": 0.8450704225352113, + "phenodigm_score": 2.7877595445851306 + }, + "match_source_label": "freshwater river biome", + "match_target": "ENVO:01000252", + "match_target_label": "freshwater lake biome" + } + }, + "average_score": 9.196397212803504, + "best_score": 9.196397212803504, + "@type": "TermSetPairwiseSimilarity" + } +] \ No newline at end of file diff --git a/tests/data/benchmark/test_a/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv b/tests/data/benchmark/test_a/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv new file mode 100644 index 0000000..22e1328 --- /dev/null +++ b/tests/data/benchmark/test_a/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv @@ -0,0 +1,71 @@ +package_id url element element_id element_xpath context description subject predicate predicate_id object object_id author date comment +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ inland waters AUTO:inland%20waters spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:25:12.409195 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ aquatic environment http://purl.obolibrary.org/obo/ENVO_01000317 spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:25:12.408893 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ freshwater AUTO:freshwater spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:25:12.408576 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ lake AUTO:lake spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:25:12.408274 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ bogs http://purl.obolibrary.org/obo/ENVO_01001209 spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:25:12.407974 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ lakes AUTO:lakes spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:25:12.407670 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ northern lakes AUTO:northern%20lakes spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:25:12.407361 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ southern lakes AUTO:southern%20lakes spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:25:12.407045 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ bog http://purl.obolibrary.org/obo/ENVO_01001209 spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:25:12.406700 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset 1d8fcbb8-fc0e-4432-a5d7-f12ac6b5989b /eml:eml/dataset knb-lter-ntl.1.59 "Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11 + Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received. chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry" dataset +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute ff88d1e6-dcaa-47f0-8599-4611bccb59a9 /eml:eml/dataset/dataTable/attributeList/attribute[1] ntl1_v11.csv lake name abbreviation lakeid +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 90ba6f24-ef50-4322-9f6e-2615bad77a23 /eml:eml/dataset/dataTable/attributeList/attribute[2] ntl1_v11.csv year year4 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute bb088d87-c6e7-4952-b333-350944605412 /eml:eml/dataset/dataTable/attributeList/attribute[3] ntl1_v11.csv day of year daynum +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 1b3c5c94-4448-4d74-9176-cb159da849a9 /eml:eml/dataset/dataTable/attributeList/attribute[4] ntl1_v11.csv sample date sampledate +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 7d427f25-53a5-4bbd-bb3c-a2fa501b3330 /eml:eml/dataset/dataTable/attributeList/attribute[5] ntl1_v11.csv depth at which the sample or measurement was taken depth +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 79351da1-ddde-4bb2-80b1-88a72643c195 /eml:eml/dataset/dataTable/attributeList/attribute[6] ntl1_v11.csv sample replicate rep +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a9946a61-0f90-4d59-9d07-75f22dcab406 /eml:eml/dataset/dataTable/attributeList/attribute[7] ntl1_v11.csv Location station of sample sta +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 5c22fc18-2b88-4c95-9f59-cd64d00bbb62 /eml:eml/dataset/dataTable/attributeList/attribute[8] ntl1_v11.csv sampling event event +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 68931cdd-8d38-4af8-baee-aaf0fe0468b6 /eml:eml/dataset/dataTable/attributeList/attribute[9] ntl1_v11.csv pH ph +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute b8dcce57-8597-43a2-981a-fde2e32a243a /eml:eml/dataset/dataTable/attributeList/attribute[10] ntl1_v11.csv pH air equilibrated phair +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute c2aa8715-3aa6-4bfb-b752-798f35fd456f /eml:eml/dataset/dataTable/attributeList/attribute[11] ntl1_v11.csv alkalinity alk +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a3c6dcfc-1ab5-45ef-b543-692235c26d70 /eml:eml/dataset/dataTable/attributeList/attribute[12] ntl1_v11.csv dissolved inorganic carbon dic +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 5a017432-2075-428c-a03f-1b87cf2fe0a4 /eml:eml/dataset/dataTable/attributeList/attribute[13] ntl1_v11.csv total inorganic carbon tic +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 800c66bc-0b8b-4496-b425-68a136215a55 /eml:eml/dataset/dataTable/attributeList/attribute[14] ntl1_v11.csv dissolved organic carbon doc +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 4353b35d-5e66-4d33-b176-cbd95a03e972 /eml:eml/dataset/dataTable/attributeList/attribute[15] ntl1_v11.csv total organic carbon toc +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 5986fb78-2525-4cfe-857d-7d38583e3ecc /eml:eml/dataset/dataTable/attributeList/attribute[16] ntl1_v11.csv (NO3 + NO2) - N no3no2 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 797c9db2-2702-42ac-a04c-c744751b6a35 /eml:eml/dataset/dataTable/attributeList/attribute[17] ntl1_v11.csv NO2 no2 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 58ec98c3-f584-4504-82f4-ed7a6ac971d3 /eml:eml/dataset/dataTable/attributeList/attribute[18] ntl1_v11.csv NH4 - N nh4 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute d597c578-231c-4431-807c-42be84e0d582 /eml:eml/dataset/dataTable/attributeList/attribute[19] ntl1_v11.csv total dissolved N (filtered sample) totnf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 315863bf-7381-417b-bfa5-4ce5a044210b /eml:eml/dataset/dataTable/attributeList/attribute[20] ntl1_v11.csv total N (unfiltered sample) totnuf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a4ab66d7-90b5-49f9-83c0-2f5ea904abf0 /eml:eml/dataset/dataTable/attributeList/attribute[21] ntl1_v11.csv total dissolved P (filtered sample) totpf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a1fb2bee-9c2f-44a0-958c-5834938604fd /eml:eml/dataset/dataTable/attributeList/attribute[22] ntl1_v11.csv total P unfiltered totpuf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a57c9579-845a-4711-9c29-a6ad5ac3d66b /eml:eml/dataset/dataTable/attributeList/attribute[23] ntl1_v11.csv dissolved reactive silica filtered drsif +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a97531d3-56ae-405d-bc12-f1f2a1d328a0 /eml:eml/dataset/dataTable/attributeList/attribute[24] ntl1_v11.csv bicarbonate reactive silica filtered brsif +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 8b70ba6b-ca4e-4b3a-9d89-04e345017ff9 /eml:eml/dataset/dataTable/attributeList/attribute[25] ntl1_v11.csv bicarbonate reactive silica unfiltered brsiuf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 6a8c7ee0-7c2b-4543-a5f4-1a921bea6119 /eml:eml/dataset/dataTable/attributeList/attribute[26] ntl1_v11.csv total particulate matter tpm +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 8849481f-897d-4e80-8c33-816038580e02 /eml:eml/dataset/dataTable/attributeList/attribute[27] ntl1_v11.csv Total Nitrogen (unfiltered) from WI State Lab of Hygiene totnuf_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 2ca00ca9-11cb-442a-93c7-acf8ae4d88fd /eml:eml/dataset/dataTable/attributeList/attribute[28] ntl1_v11.csv Nitrate plus Nitrite Nitrogen from WI State Lab. of Hygiene no3no2_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 8c1f92d3-eae3-45dd-b6bd-93a36c854aaf /eml:eml/dataset/dataTable/attributeList/attribute[29] ntl1_v11.csv Ammonium Nitrogen from WI State Lab. of Hygiene nh4_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 3c588629-11f9-4fb9-9a8d-3bc4263468ff /eml:eml/dataset/dataTable/attributeList/attribute[30] ntl1_v11.csv Total Kjeldahl Nitrogen from WI State Lab. of Hygiene kjdl_n_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute aeca1b6b-a69f-45c1-945b-10e97713a1e6 /eml:eml/dataset/dataTable/attributeList/attribute[31] ntl1_v11.csv Total Phosphorus Unfiltered from WI State Lab. of Hygiene totpuf_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 612f756e-b959-49ba-8ea9-70b9ba4f6183 /eml:eml/dataset/dataTable/attributeList/attribute[32] ntl1_v11.csv Dissolved Reactive Phosphorus from WI State Lab. of Hygiene drp_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 694813a5-75db-4a58-ba2b-32c440e93651 /eml:eml/dataset/dataTable/attributeList/attribute[33] ntl1_v11.csv Dissolved Reactive Silica from WI State Lab. of Hygiene drsif_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 03450955-1c82-4372-84ec-88a65a1fef63 /eml:eml/dataset/dataTable/attributeList/attribute[34] ntl1_v11.csv data flag for depth flagdepth +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 1c724750-20be-47f4-a7fb-ce260d732944 /eml:eml/dataset/dataTable/attributeList/attribute[35] ntl1_v11.csv data flag for ph flagph +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 55d6f8a5-7568-4799-b35e-72d62a49c12b /eml:eml/dataset/dataTable/attributeList/attribute[36] ntl1_v11.csv data flag for phair flagphair +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 41aadd4c-83d6-41d7-9220-cc2732485a35 /eml:eml/dataset/dataTable/attributeList/attribute[37] ntl1_v11.csv data flag for alkalinity flagalk +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 47aa51ce-f9cf-4a28-bc2e-67f4ec7d6c44 /eml:eml/dataset/dataTable/attributeList/attribute[38] ntl1_v11.csv data flag for dissolved inorganic carbon flagdic +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute d733078a-193d-41a6-8b37-78be3cac73a2 /eml:eml/dataset/dataTable/attributeList/attribute[39] ntl1_v11.csv data flag for total inorganic carbon flagtic +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 83c23a71-cc7c-4394-99b9-8d4b707554b0 /eml:eml/dataset/dataTable/attributeList/attribute[40] ntl1_v11.csv data flag for dissolved organic carbon flagdoc +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a6d5766e-6eca-4d0c-80c8-e08f48302122 /eml:eml/dataset/dataTable/attributeList/attribute[41] ntl1_v11.csv data flag for total organic carbon flagtoc +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 765c99ec-8b08-489f-b521-e6f3102f22eb /eml:eml/dataset/dataTable/attributeList/attribute[42] ntl1_v11.csv data flag for no3no2-n flagno3no2 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute c8a48b80-b980-4b03-8749-c1412f4b96cc /eml:eml/dataset/dataTable/attributeList/attribute[43] ntl1_v11.csv data flag for no2 flagno2 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 415123b6-ae86-40f9-91ba-2703ab1e390f /eml:eml/dataset/dataTable/attributeList/attribute[44] ntl1_v11.csv data flag for nh4 flagnh4 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 626bcdd6-e16e-4f3a-b2fe-d8c8216f2471 /eml:eml/dataset/dataTable/attributeList/attribute[45] ntl1_v11.csv data flag for totnf flagtotnf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 7847f72d-6405-433a-91c0-db80d29648b7 /eml:eml/dataset/dataTable/attributeList/attribute[46] ntl1_v11.csv data flag for totnuf flagtotnuf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute ee08a1b1-6a37-4b59-b8fd-0fcc837b6cf0 /eml:eml/dataset/dataTable/attributeList/attribute[47] ntl1_v11.csv data flag for totpf flagtotpf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute c34fafd5-058b-4cf7-a9b5-57e9b1027da9 /eml:eml/dataset/dataTable/attributeList/attribute[48] ntl1_v11.csv data flag for totpuf flagtotpuf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute e228b33d-1154-4d43-ada9-3e758f7c4c9f /eml:eml/dataset/dataTable/attributeList/attribute[49] ntl1_v11.csv data flag for drsif flagdrsif +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute d153c8fb-cbb0-42de-8f1e-67274dd8c949 /eml:eml/dataset/dataTable/attributeList/attribute[50] ntl1_v11.csv data flag for brsif flagbrsif +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 737ab156-01d0-4e61-b961-ce1cf90388e9 /eml:eml/dataset/dataTable/attributeList/attribute[51] ntl1_v11.csv data flag for brsiuf flagbrsiuf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute e9240b0f-5ee5-40cc-b65f-41789f1198dd /eml:eml/dataset/dataTable/attributeList/attribute[52] ntl1_v11.csv data flag for tpm flagtpm +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 1860cbb7-5ebb-4a6f-8ff2-3bc449d65a2f /eml:eml/dataset/dataTable/attributeList/attribute[53] ntl1_v11.csv data flag for totnuf_sloh flagtotnuf_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 3d7de8ea-4311-492c-942a-e18b374b7f97 /eml:eml/dataset/dataTable/attributeList/attribute[54] ntl1_v11.csv data flag for no3no2_sloh flagno3no2_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 4b270237-bcec-4e78-97e7-6f0b5342e254 /eml:eml/dataset/dataTable/attributeList/attribute[55] ntl1_v11.csv data flag for nh4_sloh flagnh4_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 4ba154a2-0677-4a99-8e16-b24b8a9dea29 /eml:eml/dataset/dataTable/attributeList/attribute[56] ntl1_v11.csv data flag for kjdl_n_sloh flagkjdl_n_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 66a23414-8dce-467b-ab1d-1b82b0c33f9b /eml:eml/dataset/dataTable/attributeList/attribute[57] ntl1_v11.csv data flag for totpuf_sloh flagtotpuf_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 231d3b44-1f7d-48d2-b9ff-8004bc332103 /eml:eml/dataset/dataTable/attributeList/attribute[58] ntl1_v11.csv data flag for drp_sloh flagdrp_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 00350a07-c335-4c2e-bd53-e35cb1cc4316 /eml:eml/dataset/dataTable/attributeList/attribute[59] ntl1_v11.csv data flag for drsif_sloh flagdrsif_sloh diff --git a/tests/data/benchmark/test_a/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv b/tests/data/benchmark/test_a/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv new file mode 100644 index 0000000..4c2ba06 --- /dev/null +++ b/tests/data/benchmark/test_a/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv @@ -0,0 +1,42 @@ +package_id url element element_id element_xpath context description subject predicate predicate_id object object_id author date comment +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ ponds AUTO:ponds spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:18:04.269472 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ wetlands http://purl.obolibrary.org/obo/ENVO_00000035 spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:18:04.269178 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ bogs http://purl.obolibrary.org/obo/ENVO_01001209 spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:18:04.268873 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ rivers AUTO:rivers spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:18:04.268577 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ lakes AUTO:lakes spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:18:04.268276 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ Bog waters AUTO:Bog%20waters spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:18:04.267973 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ Freshwater lakes AUTO:Freshwater%20lakes spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:18:04.267668 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ freshwater AUTO:freshwater spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:18:04.267352 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ inland water AUTO:inland%20water spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:18:04.267033 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ bog http://purl.obolibrary.org/obo/ENVO_01001209 spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:18:04.266699 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ lake AUTO:lake spinneret.annotator.get_onto_gpt_annotation 2024-11-19 14:18:04.266192 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset b761e76b-4888-4a68-8fd1-d192feb87fbe /eml:eml/dataset knb-lter-ntl.2.37 "Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects. + Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C. + 1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly. + Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11 chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity" dataset +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute a8be3bdb-ba9b-4462-ac00-4c195d8d6271 /eml:eml/dataset/dataTable/attributeList/attribute[1] ntl2_9.csv lake name abbreviation lakeid +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 2ee0ed01-2526-4d06-abf0-f0baa8bf0405 /eml:eml/dataset/dataTable/attributeList/attribute[2] ntl2_9.csv four-digit year year4 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 2935f46c-c6d6-4b56-8ca4-21b4f53874d3 /eml:eml/dataset/dataTable/attributeList/attribute[3] ntl2_9.csv day of the year daynum +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 78518f7c-bfb0-4e6f-be63-b942a8b09072 /eml:eml/dataset/dataTable/attributeList/attribute[4] ntl2_9.csv Formatted date of sample sampledate +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 8324a47b-fd8b-4477-9751-e662d0f24d87 /eml:eml/dataset/dataTable/attributeList/attribute[5] ntl2_9.csv water depth of sample depth +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute c9006d36-b13f-471a-9c65-165bbba29d3b /eml:eml/dataset/dataTable/attributeList/attribute[6] ntl2_9.csv sample replicate rep +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 9466e002-fe89-47c4-87ec-b322c55376c4 /eml:eml/dataset/dataTable/attributeList/attribute[7] ntl2_9.csv Location station of sample sta +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 6d59ec94-117f-4378-917b-7e0c785cdc9c /eml:eml/dataset/dataTable/attributeList/attribute[8] ntl2_9.csv sampling event event +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 5d666d0c-7be9-4d1f-84b2-a5b572d819db /eml:eml/dataset/dataTable/attributeList/attribute[9] ntl2_9.csv chloride concentation cl +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute a59d2c08-669a-436e-bfe9-6c3196215166 /eml:eml/dataset/dataTable/attributeList/attribute[10] ntl2_9.csv sulfate concentration so4 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 40ef2724-7200-4cbf-8663-0eaef6df880b /eml:eml/dataset/dataTable/attributeList/attribute[11] ntl2_9.csv calcium concentration ca +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 63021500-f274-4f23-8bcf-520f22cc53bb /eml:eml/dataset/dataTable/attributeList/attribute[12] ntl2_9.csv magnesium concentration mg +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute b2094513-bba1-4671-9555-d376c5333c5a /eml:eml/dataset/dataTable/attributeList/attribute[13] ntl2_9.csv sodium concentration na +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 6ae30df9-7862-4488-ad9e-f0f8316081e7 /eml:eml/dataset/dataTable/attributeList/attribute[14] ntl2_9.csv potassium concentration k +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 1b5a4f2e-58db-40e1-a044-38e4451180a0 /eml:eml/dataset/dataTable/attributeList/attribute[15] ntl2_9.csv iron concentration fe +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute e9537242-efd6-4a83-a64a-1b2fd7fc36d8 /eml:eml/dataset/dataTable/attributeList/attribute[16] ntl2_9.csv manganese concentration mn +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute cf23b883-857c-4573-9723-401e2f2fd2e0 /eml:eml/dataset/dataTable/attributeList/attribute[17] ntl2_9.csv Specific conductance cond +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute f95c44fe-7bdd-47a4-85c6-1462f6b9545a /eml:eml/dataset/dataTable/attributeList/attribute[18] ntl2_9.csv data flag for chloride flagcl +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute d1e21362-d37a-4d18-9656-ee3d94d74cff /eml:eml/dataset/dataTable/attributeList/attribute[19] ntl2_9.csv data flag for so4 flagso4 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 58bb291c-de47-4bbe-b5c4-93816f8f5c77 /eml:eml/dataset/dataTable/attributeList/attribute[20] ntl2_9.csv data flag for ca flagca +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 56db3ae8-db1c-4792-83b3-5c96a883c732 /eml:eml/dataset/dataTable/attributeList/attribute[21] ntl2_9.csv data flag for magnesiumn flagmg +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute b3a5a6d2-96b7-4096-bf6f-7967e443f0bc /eml:eml/dataset/dataTable/attributeList/attribute[22] ntl2_9.csv data flag for sodium flagna +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute f68e300b-31ef-4c38-b470-0988cfdc268b /eml:eml/dataset/dataTable/attributeList/attribute[23] ntl2_9.csv data flag for potassium flagk +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute ee010787-3226-4c28-bf1f-fa08f9764f6f /eml:eml/dataset/dataTable/attributeList/attribute[24] ntl2_9.csv data flag for iron flagfe +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute bdde0f42-1414-4355-bb23-73037fcef5ff /eml:eml/dataset/dataTable/attributeList/attribute[25] ntl2_9.csv data flag for manganese flagmn +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute ae7d8e74-98f7-4a04-a30b-c1d42ee514cf /eml:eml/dataset/dataTable/attributeList/attribute[26] ntl2_9.csv data flag for specific conductivity flagcond diff --git a/tests/data/benchmark/test_a/notes.txt b/tests/data/benchmark/test_a/notes.txt new file mode 100644 index 0000000..ee404ce --- /dev/null +++ b/tests/data/benchmark/test_a/notes.txt @@ -0,0 +1 @@ +This run increases sample size to 3 to test if overall grounding improves. \ No newline at end of file diff --git a/tests/data/benchmark/test_b/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv b/tests/data/benchmark/test_b/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv new file mode 100644 index 0000000..d7f8724 --- /dev/null +++ b/tests/data/benchmark/test_b/knb-lter-ntl.1.59_annotation_workbook_annotated.tsv @@ -0,0 +1,124 @@ +package_id url element element_id element_xpath context description subject predicate predicate_id object object_id author date comment +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[33] ntl1_v11.csv Dissolved Reactive Silica from WI State Lab. of Hygiene drsif_sloh uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:59.477115 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[32] ntl1_v11.csv Dissolved Reactive Phosphorus from WI State Lab. of Hygiene drp_sloh uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:59.137550 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[31] ntl1_v11.csv Total Phosphorus Unfiltered from WI State Lab. of Hygiene totpuf_sloh uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:58.792060 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[30] ntl1_v11.csv Total Kjeldahl Nitrogen from WI State Lab. of Hygiene kjdl_n_sloh uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:58.446345 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[29] ntl1_v11.csv Ammonium Nitrogen from WI State Lab. of Hygiene nh4_sloh uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:58.105312 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[28] ntl1_v11.csv Nitrate plus Nitrite Nitrogen from WI State Lab. of Hygiene no3no2_sloh uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:57.702475 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[27] ntl1_v11.csv Total Nitrogen (unfiltered) from WI State Lab of Hygiene totnuf_sloh uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:57.353412 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[26] ntl1_v11.csv total particulate matter tpm uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:57.008575 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[25] ntl1_v11.csv bicarbonate reactive silica unfiltered brsiuf uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Microgram Per Liter http://qudt.org/vocab/unit/MicroGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:56.669128 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[24] ntl1_v11.csv bicarbonate reactive silica filtered brsif uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Microgram Per Liter http://qudt.org/vocab/unit/MicroGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:56.332559 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[23] ntl1_v11.csv dissolved reactive silica filtered drsif uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Microgram Per Liter http://qudt.org/vocab/unit/MicroGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:56.003383 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[22] ntl1_v11.csv total P unfiltered totpuf uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Microgram Per Liter http://qudt.org/vocab/unit/MicroGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:55.672201 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[21] ntl1_v11.csv total dissolved P (filtered sample) totpf uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Microgram Per Liter http://qudt.org/vocab/unit/MicroGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:55.333889 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[20] ntl1_v11.csv total N (unfiltered sample) totnuf uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Microgram Per Liter http://qudt.org/vocab/unit/MicroGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:54.981935 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[19] ntl1_v11.csv total dissolved N (filtered sample) totnf uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Microgram Per Liter http://qudt.org/vocab/unit/MicroGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:54.634614 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[18] ntl1_v11.csv NH4 - N nh4 uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Microgram Per Liter http://qudt.org/vocab/unit/MicroGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:54.289731 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[17] ntl1_v11.csv NO2 no2 uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Microgram Per Liter http://qudt.org/vocab/unit/MicroGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:53.948702 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[16] ntl1_v11.csv (NO3 + NO2) - N no3no2 uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Microgram Per Liter http://qudt.org/vocab/unit/MicroGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:53.597860 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[15] ntl1_v11.csv total organic carbon toc uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:53.230680 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[14] ntl1_v11.csv dissolved organic carbon doc uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:52.881694 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[13] ntl1_v11.csv total inorganic carbon tic uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:52.536776 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[12] ntl1_v11.csv dissolved inorganic carbon dic uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:52.190681 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[10] ntl1_v11.csv pH air equilibrated phair uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Unitless http://qudt.org/vocab/unit/UNITLESS spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:51.239842 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[9] ntl1_v11.csv pH ph uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Unitless http://qudt.org/vocab/unit/UNITLESS spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:50.896707 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[5] ntl1_v11.csv depth at which the sample or measurement was taken depth uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Meter http://qudt.org/vocab/unit/M spinneret.annotator.get_qudt_annotation 2024-11-15 15:03:50.550432 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[58] ntl1_v11.csv data flag for drp_sloh flagdrp_sloh environmental material http://purl.obolibrary.org/obo/ENVO_00010483 Water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 15:03:43.774995 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[49] ntl1_v11.csv data flag for drsif flagdrsif environmental material http://purl.obolibrary.org/obo/ENVO_00010483 air http://purl.obolibrary.org/obo/ENVO_00002005 spinneret.annotator.get_ontogpt_annotation 2024-11-15 15:03:02.121445 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[42] ntl1_v11.csv data flag for no3no2-n flagno3no2 environmental material http://purl.obolibrary.org/obo/ENVO_00010483 soil http://purl.obolibrary.org/obo/ENVO_00001998 spinneret.annotator.get_ontogpt_annotation 2024-11-15 15:02:23.134680 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[42] ntl1_v11.csv data flag for no3no2-n flagno3no2 environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 15:02:23.134159 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[37] ntl1_v11.csv data flag for alkalinity flagalk environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 15:01:50.967139 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[35] ntl1_v11.csv data flag for ph flagph environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 15:01:42.468349 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[33] ntl1_v11.csv Dissolved Reactive Silica from WI State Lab. of Hygiene drsif_sloh environmental material http://purl.obolibrary.org/obo/ENVO_00010483 Groundwater http://purl.obolibrary.org/obo/ENVO_01001004 spinneret.annotator.get_ontogpt_annotation 2024-11-15 15:01:35.275643 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[33] ntl1_v11.csv Dissolved Reactive Silica from WI State Lab. of Hygiene drsif_sloh environmental material http://purl.obolibrary.org/obo/ENVO_00010483 Water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 15:01:35.275102 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[26] ntl1_v11.csv total particulate matter tpm environmental material http://purl.obolibrary.org/obo/ENVO_00010483 air http://purl.obolibrary.org/obo/ENVO_00002005 spinneret.annotator.get_ontogpt_annotation 2024-11-15 15:00:57.262942 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[23] ntl1_v11.csv dissolved reactive silica filtered drsif environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 15:00:45.680608 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[22] ntl1_v11.csv total P unfiltered totpuf environmental material http://purl.obolibrary.org/obo/ENVO_00010483 air http://purl.obolibrary.org/obo/ENVO_00002005 spinneret.annotator.get_ontogpt_annotation 2024-11-15 15:00:40.920276 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[22] ntl1_v11.csv total P unfiltered totpuf environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 15:00:40.919648 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[21] ntl1_v11.csv total dissolved P (filtered sample) totpf environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 15:00:36.165647 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[17] ntl1_v11.csv NO2 no2 environmental material http://purl.obolibrary.org/obo/ENVO_00010483 air http://purl.obolibrary.org/obo/ENVO_00002005 spinneret.annotator.get_ontogpt_annotation 2024-11-15 15:00:14.598300 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[14] ntl1_v11.csv dissolved organic carbon doc environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:59:43.876018 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[13] ntl1_v11.csv total inorganic carbon tic environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:59:39.301191 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[13] ntl1_v11.csv total inorganic carbon tic environmental material http://purl.obolibrary.org/obo/ENVO_00010483 air http://purl.obolibrary.org/obo/ENVO_00002005 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:59:39.300685 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[12] ntl1_v11.csv dissolved inorganic carbon dic environmental material http://purl.obolibrary.org/obo/ENVO_00010483 seawater http://purl.obolibrary.org/obo/ENVO_00002149 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:59:34.709185 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[11] ntl1_v11.csv alkalinity alk environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:59:30.715581 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[10] ntl1_v11.csv pH air equilibrated phair environmental material http://purl.obolibrary.org/obo/ENVO_00010483 air http://purl.obolibrary.org/obo/ENVO_00002005 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:59:25.940691 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[8] ntl1_v11.csv sampling event event environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:59:18.595722 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[8] ntl1_v11.csv sampling event event environmental material http://purl.obolibrary.org/obo/ENVO_00010483 soil http://purl.obolibrary.org/obo/ENVO_00001998 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:59:18.595206 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[7] ntl1_v11.csv Location station of sample sta environmental material http://purl.obolibrary.org/obo/ENVO_00010483 air http://purl.obolibrary.org/obo/ENVO_00002005 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:59:15.003423 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[7] ntl1_v11.csv Location station of sample sta environmental material http://purl.obolibrary.org/obo/ENVO_00010483 soil http://purl.obolibrary.org/obo/ENVO_00001998 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:59:15.003026 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[7] ntl1_v11.csv Location station of sample sta environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:59:15.002437 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[4] ntl1_v11.csv sample date sampledate environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:59:05.980897 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[4] ntl1_v11.csv sample date sampledate environmental material http://purl.obolibrary.org/obo/ENVO_00010483 soil http://purl.obolibrary.org/obo/ENVO_00001998 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:59:05.980359 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[41] ntl1_v11.csv data flag for total organic carbon flagtoc contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType total organic carbon http://purl.dataone.org/odo/ECSO_00000329 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:57:12.186743 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[31] ntl1_v11.csv Total Phosphorus Unfiltered from WI State Lab. of Hygiene totpuf_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType Turbidity http://purl.dataone.org/odo/ECSO_00002359 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:56:26.356521 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[31] ntl1_v11.csv Total Phosphorus Unfiltered from WI State Lab. of Hygiene totpuf_sloh contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType Conductivity http://purl.dataone.org/odo/ECSO_00001534 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:56:26.355963 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute /eml:eml/dataset/dataTable/attributeList/attribute[5] ntl1_v11.csv depth at which the sample or measurement was taken depth contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType ## depth at which the sample or measurement was taken http://purl.dataone.org/odo/ECSO_00001250 spinneret.annotator.get_ontogpt_annotation 2024-11-15 14:54:24.082809 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Sample Collection and StorageRoutine water samples are collected at one station in the deepest part of each lake. Northern lakes- Samples are collected monthly during ice-free periods and every 5 weeks during ice-covered season for the northern lakes. Samples are collected at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. When lakes are not stratified, samples are collected at the top, middle, and bottom of the water column. Southern lake dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod cadmium reduction method and absorption measurement at 520 nm on Technicon segmented flow autoanalyzer or Astoria-Pacific Astoria II segmented flow autoanalyzer http://vocabs.lter-europe.net/EnvThes/20803 spinneret.annotator.get_onto_gpt_annotation 2024-11-15 14:53:54.614269 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Sample Collection and StorageRoutine water samples are collected at one station in the deepest part of each lake. Northern lakes- Samples are collected monthly during ice-free periods and every 5 weeks during ice-covered season for the northern lakes. Samples are collected at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. When lakes are not stratified, samples are collected at the top, middle, and bottom of the water column. Southern lake dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod automated colorimetric spectrophotometry using segmented flow autoanalyzer http://vocabs.lter-europe.net/EnvThes/10375 spinneret.annotator.get_onto_gpt_annotation 2024-11-15 14:53:54.613955 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Sample Collection and StorageRoutine water samples are collected at one station in the deepest part of each lake. Northern lakes- Samples are collected monthly during ice-free periods and every 5 weeks during ice-covered season for the northern lakes. Samples are collected at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. When lakes are not stratified, samples are collected at the top, middle, and bottom of the water column. Southern lake dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod Heteropoly Blue Method and absorption measurement at 820 nm on Bausch and Lomb Spectrophotometer, Technicon AutoAnalyzer II, or Astoria-Pacific Astoria II AutoAnalyzer http://vocabs.lter-europe.net/EnvThes/20104 spinneret.annotator.get_onto_gpt_annotation 2024-11-15 14:53:54.613636 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Sample Collection and StorageRoutine water samples are collected at one station in the deepest part of each lake. Northern lakes- Samples are collected monthly during ice-free periods and every 5 weeks during ice-covered season for the northern lakes. Samples are collected at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. When lakes are not stratified, samples are collected at the top, middle, and bottom of the water column. Southern lake dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod combustion analysis on Shimadzu TOC-V-csh or TOC-L-cph analyzer http://vocabs.lter-europe.net/EnvThes/22297 spinneret.annotator.get_onto_gpt_annotation 2024-11-15 14:53:54.613298 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Sample Collection and StorageRoutine water samples are collected at one station in the deepest part of each lake. Northern lakes- Samples are collected monthly during ice-free periods and every 5 weeks during ice-covered season for the northern lakes. Samples are collected at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. When lakes are not stratified, samples are collected at the top, middle, and bottom of the water column. Southern lake dataset usesMethod http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod direct determination of Total N http://vocabs.lter-europe.net/EnvThes/10328 spinneret.annotator.get_onto_gpt_annotation 2024-11-15 14:53:54.612801 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset /eml:eml/dataset knb-lter-ntl.1.59 Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received.chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ bogs http://purl.obolibrary.org/obo/ENVO_01001209 spinneret.annotator.get_onto_gpt_annotation 2024-11-15 14:52:47.887392 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 dataset 1d8fcbb8-fc0e-4432-a5d7-f12ac6b5989b /eml:eml/dataset knb-lter-ntl.1.59 "Parameters characterizing the nutrient chemistry of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, unnamed lakes 27-02 [Crystal Bog] and 12-15 [Trout Bog], Mendota, Monona, Wingra, and Fish) are measured at multiple depths throughout the year. These parameters include total nitrogen, total dissolved nitrogen, nitrite+nitrate-N, ammonium-N, total phosphorus, total dissolved phosphorus, dissolved reactive phosphorus (only in the southern lakes and not in Wingra and Fish after 2003), bicarbonate-reactive filtered and unfiltered silica (both discontinued in 2003), dissolved reactive silica, pH, air equilibrated pH (discontinued in 2014 in the northern lakes and in 2020 in the southern lakes), total alkalinity, total inorganic carbon, dissolved inorganic carbon, total organic carbon, dissolved organic carbon, and total particulate matter (only in the northern lakes in this data set; total particulate matter in southern lakes starting in 2000 is available in a separate dataset). Sampling Frequency: Northern lakes- monthly during ice-free season - every 5 weeks during ice-covered season. Southern lakes- Southern lakes samples are collected every 2-4 weeks during the summer stratified period, at least monthly during the fall, and typically only once during the winter, depending on ice conditions. Number of sites: 11 + Note that years 2020 & 2021 are not complete, but we are publishing the data that we have. The dataset will be updated as new data is received. chemical (all) chemical limnology NTL Core Datasets organic matter inorganic nutrients alkalinity ammonia carbon dissolved organic carbon dissolved inorganic carbon nitrate nitrogen nutrients ph phosphorus total nitrogen total organic carbon total phosphorus water chemistry" dataset +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute ff88d1e6-dcaa-47f0-8599-4611bccb59a9 /eml:eml/dataset/dataTable/attributeList/attribute[1] ntl1_v11.csv lake name abbreviation lakeid +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 90ba6f24-ef50-4322-9f6e-2615bad77a23 /eml:eml/dataset/dataTable/attributeList/attribute[2] ntl1_v11.csv year year4 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute bb088d87-c6e7-4952-b333-350944605412 /eml:eml/dataset/dataTable/attributeList/attribute[3] ntl1_v11.csv day of year daynum +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 1b3c5c94-4448-4d74-9176-cb159da849a9 /eml:eml/dataset/dataTable/attributeList/attribute[4] ntl1_v11.csv sample date sampledate +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 7d427f25-53a5-4bbd-bb3c-a2fa501b3330 /eml:eml/dataset/dataTable/attributeList/attribute[5] ntl1_v11.csv depth at which the sample or measurement was taken depth +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 79351da1-ddde-4bb2-80b1-88a72643c195 /eml:eml/dataset/dataTable/attributeList/attribute[6] ntl1_v11.csv sample replicate rep +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a9946a61-0f90-4d59-9d07-75f22dcab406 /eml:eml/dataset/dataTable/attributeList/attribute[7] ntl1_v11.csv Location station of sample sta +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 5c22fc18-2b88-4c95-9f59-cd64d00bbb62 /eml:eml/dataset/dataTable/attributeList/attribute[8] ntl1_v11.csv sampling event event +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 68931cdd-8d38-4af8-baee-aaf0fe0468b6 /eml:eml/dataset/dataTable/attributeList/attribute[9] ntl1_v11.csv pH ph +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute b8dcce57-8597-43a2-981a-fde2e32a243a /eml:eml/dataset/dataTable/attributeList/attribute[10] ntl1_v11.csv pH air equilibrated phair +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute c2aa8715-3aa6-4bfb-b752-798f35fd456f /eml:eml/dataset/dataTable/attributeList/attribute[11] ntl1_v11.csv alkalinity alk +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a3c6dcfc-1ab5-45ef-b543-692235c26d70 /eml:eml/dataset/dataTable/attributeList/attribute[12] ntl1_v11.csv dissolved inorganic carbon dic +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 5a017432-2075-428c-a03f-1b87cf2fe0a4 /eml:eml/dataset/dataTable/attributeList/attribute[13] ntl1_v11.csv total inorganic carbon tic +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 800c66bc-0b8b-4496-b425-68a136215a55 /eml:eml/dataset/dataTable/attributeList/attribute[14] ntl1_v11.csv dissolved organic carbon doc +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 4353b35d-5e66-4d33-b176-cbd95a03e972 /eml:eml/dataset/dataTable/attributeList/attribute[15] ntl1_v11.csv total organic carbon toc +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 5986fb78-2525-4cfe-857d-7d38583e3ecc /eml:eml/dataset/dataTable/attributeList/attribute[16] ntl1_v11.csv (NO3 + NO2) - N no3no2 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 797c9db2-2702-42ac-a04c-c744751b6a35 /eml:eml/dataset/dataTable/attributeList/attribute[17] ntl1_v11.csv NO2 no2 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 58ec98c3-f584-4504-82f4-ed7a6ac971d3 /eml:eml/dataset/dataTable/attributeList/attribute[18] ntl1_v11.csv NH4 - N nh4 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute d597c578-231c-4431-807c-42be84e0d582 /eml:eml/dataset/dataTable/attributeList/attribute[19] ntl1_v11.csv total dissolved N (filtered sample) totnf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 315863bf-7381-417b-bfa5-4ce5a044210b /eml:eml/dataset/dataTable/attributeList/attribute[20] ntl1_v11.csv total N (unfiltered sample) totnuf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a4ab66d7-90b5-49f9-83c0-2f5ea904abf0 /eml:eml/dataset/dataTable/attributeList/attribute[21] ntl1_v11.csv total dissolved P (filtered sample) totpf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a1fb2bee-9c2f-44a0-958c-5834938604fd /eml:eml/dataset/dataTable/attributeList/attribute[22] ntl1_v11.csv total P unfiltered totpuf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a57c9579-845a-4711-9c29-a6ad5ac3d66b /eml:eml/dataset/dataTable/attributeList/attribute[23] ntl1_v11.csv dissolved reactive silica filtered drsif +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a97531d3-56ae-405d-bc12-f1f2a1d328a0 /eml:eml/dataset/dataTable/attributeList/attribute[24] ntl1_v11.csv bicarbonate reactive silica filtered brsif +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 8b70ba6b-ca4e-4b3a-9d89-04e345017ff9 /eml:eml/dataset/dataTable/attributeList/attribute[25] ntl1_v11.csv bicarbonate reactive silica unfiltered brsiuf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 6a8c7ee0-7c2b-4543-a5f4-1a921bea6119 /eml:eml/dataset/dataTable/attributeList/attribute[26] ntl1_v11.csv total particulate matter tpm +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 8849481f-897d-4e80-8c33-816038580e02 /eml:eml/dataset/dataTable/attributeList/attribute[27] ntl1_v11.csv Total Nitrogen (unfiltered) from WI State Lab of Hygiene totnuf_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 2ca00ca9-11cb-442a-93c7-acf8ae4d88fd /eml:eml/dataset/dataTable/attributeList/attribute[28] ntl1_v11.csv Nitrate plus Nitrite Nitrogen from WI State Lab. of Hygiene no3no2_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 8c1f92d3-eae3-45dd-b6bd-93a36c854aaf /eml:eml/dataset/dataTable/attributeList/attribute[29] ntl1_v11.csv Ammonium Nitrogen from WI State Lab. of Hygiene nh4_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 3c588629-11f9-4fb9-9a8d-3bc4263468ff /eml:eml/dataset/dataTable/attributeList/attribute[30] ntl1_v11.csv Total Kjeldahl Nitrogen from WI State Lab. of Hygiene kjdl_n_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute aeca1b6b-a69f-45c1-945b-10e97713a1e6 /eml:eml/dataset/dataTable/attributeList/attribute[31] ntl1_v11.csv Total Phosphorus Unfiltered from WI State Lab. of Hygiene totpuf_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 612f756e-b959-49ba-8ea9-70b9ba4f6183 /eml:eml/dataset/dataTable/attributeList/attribute[32] ntl1_v11.csv Dissolved Reactive Phosphorus from WI State Lab. of Hygiene drp_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 694813a5-75db-4a58-ba2b-32c440e93651 /eml:eml/dataset/dataTable/attributeList/attribute[33] ntl1_v11.csv Dissolved Reactive Silica from WI State Lab. of Hygiene drsif_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 03450955-1c82-4372-84ec-88a65a1fef63 /eml:eml/dataset/dataTable/attributeList/attribute[34] ntl1_v11.csv data flag for depth flagdepth +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 1c724750-20be-47f4-a7fb-ce260d732944 /eml:eml/dataset/dataTable/attributeList/attribute[35] ntl1_v11.csv data flag for ph flagph +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 55d6f8a5-7568-4799-b35e-72d62a49c12b /eml:eml/dataset/dataTable/attributeList/attribute[36] ntl1_v11.csv data flag for phair flagphair +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 41aadd4c-83d6-41d7-9220-cc2732485a35 /eml:eml/dataset/dataTable/attributeList/attribute[37] ntl1_v11.csv data flag for alkalinity flagalk +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 47aa51ce-f9cf-4a28-bc2e-67f4ec7d6c44 /eml:eml/dataset/dataTable/attributeList/attribute[38] ntl1_v11.csv data flag for dissolved inorganic carbon flagdic +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute d733078a-193d-41a6-8b37-78be3cac73a2 /eml:eml/dataset/dataTable/attributeList/attribute[39] ntl1_v11.csv data flag for total inorganic carbon flagtic +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 83c23a71-cc7c-4394-99b9-8d4b707554b0 /eml:eml/dataset/dataTable/attributeList/attribute[40] ntl1_v11.csv data flag for dissolved organic carbon flagdoc +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute a6d5766e-6eca-4d0c-80c8-e08f48302122 /eml:eml/dataset/dataTable/attributeList/attribute[41] ntl1_v11.csv data flag for total organic carbon flagtoc +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 765c99ec-8b08-489f-b521-e6f3102f22eb /eml:eml/dataset/dataTable/attributeList/attribute[42] ntl1_v11.csv data flag for no3no2-n flagno3no2 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute c8a48b80-b980-4b03-8749-c1412f4b96cc /eml:eml/dataset/dataTable/attributeList/attribute[43] ntl1_v11.csv data flag for no2 flagno2 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 415123b6-ae86-40f9-91ba-2703ab1e390f /eml:eml/dataset/dataTable/attributeList/attribute[44] ntl1_v11.csv data flag for nh4 flagnh4 +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 626bcdd6-e16e-4f3a-b2fe-d8c8216f2471 /eml:eml/dataset/dataTable/attributeList/attribute[45] ntl1_v11.csv data flag for totnf flagtotnf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 7847f72d-6405-433a-91c0-db80d29648b7 /eml:eml/dataset/dataTable/attributeList/attribute[46] ntl1_v11.csv data flag for totnuf flagtotnuf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute ee08a1b1-6a37-4b59-b8fd-0fcc837b6cf0 /eml:eml/dataset/dataTable/attributeList/attribute[47] ntl1_v11.csv data flag for totpf flagtotpf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute c34fafd5-058b-4cf7-a9b5-57e9b1027da9 /eml:eml/dataset/dataTable/attributeList/attribute[48] ntl1_v11.csv data flag for totpuf flagtotpuf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute e228b33d-1154-4d43-ada9-3e758f7c4c9f /eml:eml/dataset/dataTable/attributeList/attribute[49] ntl1_v11.csv data flag for drsif flagdrsif +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute d153c8fb-cbb0-42de-8f1e-67274dd8c949 /eml:eml/dataset/dataTable/attributeList/attribute[50] ntl1_v11.csv data flag for brsif flagbrsif +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 737ab156-01d0-4e61-b961-ce1cf90388e9 /eml:eml/dataset/dataTable/attributeList/attribute[51] ntl1_v11.csv data flag for brsiuf flagbrsiuf +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute e9240b0f-5ee5-40cc-b65f-41789f1198dd /eml:eml/dataset/dataTable/attributeList/attribute[52] ntl1_v11.csv data flag for tpm flagtpm +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 1860cbb7-5ebb-4a6f-8ff2-3bc449d65a2f /eml:eml/dataset/dataTable/attributeList/attribute[53] ntl1_v11.csv data flag for totnuf_sloh flagtotnuf_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 3d7de8ea-4311-492c-942a-e18b374b7f97 /eml:eml/dataset/dataTable/attributeList/attribute[54] ntl1_v11.csv data flag for no3no2_sloh flagno3no2_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 4b270237-bcec-4e78-97e7-6f0b5342e254 /eml:eml/dataset/dataTable/attributeList/attribute[55] ntl1_v11.csv data flag for nh4_sloh flagnh4_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 4ba154a2-0677-4a99-8e16-b24b8a9dea29 /eml:eml/dataset/dataTable/attributeList/attribute[56] ntl1_v11.csv data flag for kjdl_n_sloh flagkjdl_n_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 66a23414-8dce-467b-ab1d-1b82b0c33f9b /eml:eml/dataset/dataTable/attributeList/attribute[57] ntl1_v11.csv data flag for totpuf_sloh flagtotpuf_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 231d3b44-1f7d-48d2-b9ff-8004bc332103 /eml:eml/dataset/dataTable/attributeList/attribute[58] ntl1_v11.csv data flag for drp_sloh flagdrp_sloh +knb-lter-ntl.1.59 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.1.59 attribute 00350a07-c335-4c2e-bd53-e35cb1cc4316 /eml:eml/dataset/dataTable/attributeList/attribute[59] ntl1_v11.csv data flag for drsif_sloh flagdrsif_sloh diff --git a/tests/data/benchmark/test_b/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv b/tests/data/benchmark/test_b/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv new file mode 100644 index 0000000..47c10f1 --- /dev/null +++ b/tests/data/benchmark/test_b/knb-lter-ntl.2.37_annotation_workbook_annotated.tsv @@ -0,0 +1,66 @@ +package_id url element element_id element_xpath context description subject predicate predicate_id object object_id author date comment +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[17] ntl2_9.csv Specific conductance cond uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Microsiemens Per Centimeter http://qudt.org/vocab/unit/MicroS-PER-CentiM spinneret.annotator.get_qudt_annotation 2024-11-15 12:22:48.796261 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[16] ntl2_9.csv manganese concentration mn uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 12:22:48.393846 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[15] ntl2_9.csv iron concentration fe uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 12:22:48.054581 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[14] ntl2_9.csv potassium concentration k uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 12:22:47.709779 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[13] ntl2_9.csv sodium concentration na uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 12:22:47.364092 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[12] ntl2_9.csv magnesium concentration mg uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 12:22:47.022712 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[11] ntl2_9.csv calcium concentration ca uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 12:22:46.681542 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[10] ntl2_9.csv sulfate concentration so4 uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 12:22:46.337658 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[9] ntl2_9.csv chloride concentation cl uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Milligram Per Liter http://qudt.org/vocab/unit/MilliGM-PER-L spinneret.annotator.get_qudt_annotation 2024-11-15 12:22:45.996986 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[5] ntl2_9.csv water depth of sample depth uses standard http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesStandard Meter http://qudt.org/vocab/unit/M spinneret.annotator.get_qudt_annotation 2024-11-15 12:22:45.655668 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[26] ntl2_9.csv data flag for specific conductivity flagcond environmental material http://purl.obolibrary.org/obo/ENVO_00010483 air http://purl.obolibrary.org/obo/ENVO_00002005 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:22:44.266991 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[26] ntl2_9.csv data flag for specific conductivity flagcond environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:22:44.266472 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[25] ntl2_9.csv data flag for manganese flagmn environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:22:39.586762 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[25] ntl2_9.csv data flag for manganese flagmn environmental material http://purl.obolibrary.org/obo/ENVO_00010483 soil http://purl.obolibrary.org/obo/ENVO_00001998 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:22:39.586271 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[22] ntl2_9.csv data flag for sodium flagna environmental material http://purl.obolibrary.org/obo/ENVO_00010483 air http://purl.obolibrary.org/obo/ENVO_00002005 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:22:27.924876 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[22] ntl2_9.csv data flag for sodium flagna environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:22:27.924366 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[20] ntl2_9.csv data flag for ca flagca environmental material http://purl.obolibrary.org/obo/ENVO_00010483 soil http://purl.obolibrary.org/obo/ENVO_00001998 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:22:20.011442 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[20] ntl2_9.csv data flag for ca flagca environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:22:20.011081 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[20] ntl2_9.csv data flag for ca flagca environmental material http://purl.obolibrary.org/obo/ENVO_00010483 air http://purl.obolibrary.org/obo/ENVO_00002005 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:22:20.010566 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[18] ntl2_9.csv data flag for chloride flagcl environmental material http://purl.obolibrary.org/obo/ENVO_00010483 air http://purl.obolibrary.org/obo/ENVO_00002005 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:22:09.334418 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[9] ntl2_9.csv chloride concentation cl environmental material http://purl.obolibrary.org/obo/ENVO_00010483 air http://purl.obolibrary.org/obo/ENVO_00002005 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:21:34.749614 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[8] ntl2_9.csv sampling event event environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:21:31.070666 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[8] ntl2_9.csv sampling event event environmental material http://purl.obolibrary.org/obo/ENVO_00010483 soil http://purl.obolibrary.org/obo/ENVO_00001998 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:21:31.070162 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[7] ntl2_9.csv Location station of sample sta environmental material http://purl.obolibrary.org/obo/ENVO_00010483 air http://purl.obolibrary.org/obo/ENVO_00002005 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:21:24.628417 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[7] ntl2_9.csv Location station of sample sta environmental material http://purl.obolibrary.org/obo/ENVO_00010483 soil http://purl.obolibrary.org/obo/ENVO_00001998 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:21:24.628068 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[7] ntl2_9.csv Location station of sample sta environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:21:24.627556 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[5] ntl2_9.csv water depth of sample depth environmental material http://purl.obolibrary.org/obo/ENVO_00010483 water http://purl.obolibrary.org/obo/ENVO_00002006 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:21:09.589054 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[26] ntl2_9.csv data flag for specific conductivity flagcond contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType specific_conductivity http://purl.dataone.org/odo/ECSO_00001534 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:20:42.642322 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute /eml:eml/dataset/dataTable/attributeList/attribute[14] ntl2_9.csv potassium concentration k contains measurements of type http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#containsMeasurementsOfType potassium concentration http://purl.dataone.org/odo/ECSO_00001120 spinneret.annotator.get_ontogpt_annotation 2024-11-15 12:19:39.657275 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset research topic http://vocabs.lter-europe.net/EnvThes/21604 Inorganic Nutrients http://vocabs.lter-europe.net/EnvThes/USLterCV_266 spinneret.annotator.get_onto_gpt_annotation 2024-11-15 12:18:45.731211 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset research topic http://vocabs.lter-europe.net/EnvThes/21604 Water Chemistry http://vocabs.lter-europe.net/EnvThes/USLterCV_619 spinneret.annotator.get_onto_gpt_annotation 2024-11-15 12:18:45.730710 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset research topic http://vocabs.lter-europe.net/EnvThes/21604 Limnology http://vocabs.lter-europe.net/EnvThes/21749 spinneret.annotator.get_onto_gpt_annotation 2024-11-15 12:18:45.730199 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset env_local_scale https://genomicsstandardsconsortium.github.io/mixs/0000013/ road http://purl.obolibrary.org/obo/ENVO_00000064 spinneret.annotator.get_onto_gpt_annotation 2024-11-15 12:18:14.046509 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset env_local_scale https://genomicsstandardsconsortium.github.io/mixs/0000013/ lake http://purl.obolibrary.org/obo/ENVO_00000020 spinneret.annotator.get_onto_gpt_annotation 2024-11-15 12:18:14.046007 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset /eml:eml/dataset knb-lter-ntl.2.37 Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects.Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C.1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly.Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity dataset env_broad_scale https://genomicsstandardsconsortium.github.io/mixs/0000012/ bogs http://purl.obolibrary.org/obo/ENVO_01001209 spinneret.annotator.get_onto_gpt_annotation 2024-11-15 12:18:06.439216 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 dataset b761e76b-4888-4a68-8fd1-d192feb87fbe /eml:eml/dataset knb-lter-ntl.2.37 "Parameters characterizing the major ions of the eleven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, Trout, bog lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog], Mendota, Monona, Wingra and Fish) are measured at one station in the deepest part of each lake at the top and bottom of the epilimnion, mid-thermocline, and top, middle, and bottom of the hypolimnion. These parameters include chloride, sulfate, calcium, magnesium, sodium, potassium, iron, manganese, and specific conductance (northern lakes only). Lake Wingra has always been just a surface sample, but in the winter we have, at times, taken chloride samples from top to bottom to have a better understanding of road salt effects. + Samples for conductivity are collected four times per year in the seven primary lakes (Allequash, Big Muskellunge, Crystal, Sparkling, and Trout lakes, and unnamed lakes 27-02 [Crystal Bog], and 12-15 [Trout Bog] in the Trout Lake area at the deepest part of the lake, sampling at the surface, mid water column, and the bottom. The sampling dates include February under ice, spring mixis, August stratified, and fall mixis. Conductivity is measured using a YSI Model 32 conductivity meter with YSI 3403 conductivity cell, reported as uS/cm at 25°C. + 1981-1988: a Sybron Barnstead conductivity bridge was used. 1981-1986: conductivity was measured monthly. + Sampling Frequency: quarterly (winter, spring and fall mixes, and summer stratified periods) Number of sites: 11 chemical (all) chemical limnology NTL Core Datasets inorganic nutrients calcium chloride specific conductivity iron magnesium manganese potassium sodium sulfate water chemistry conductivity" dataset +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute a8be3bdb-ba9b-4462-ac00-4c195d8d6271 /eml:eml/dataset/dataTable/attributeList/attribute[1] ntl2_9.csv lake name abbreviation lakeid +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 2ee0ed01-2526-4d06-abf0-f0baa8bf0405 /eml:eml/dataset/dataTable/attributeList/attribute[2] ntl2_9.csv four-digit year year4 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 2935f46c-c6d6-4b56-8ca4-21b4f53874d3 /eml:eml/dataset/dataTable/attributeList/attribute[3] ntl2_9.csv day of the year daynum +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 78518f7c-bfb0-4e6f-be63-b942a8b09072 /eml:eml/dataset/dataTable/attributeList/attribute[4] ntl2_9.csv Formatted date of sample sampledate +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 8324a47b-fd8b-4477-9751-e662d0f24d87 /eml:eml/dataset/dataTable/attributeList/attribute[5] ntl2_9.csv water depth of sample depth +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute c9006d36-b13f-471a-9c65-165bbba29d3b /eml:eml/dataset/dataTable/attributeList/attribute[6] ntl2_9.csv sample replicate rep +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 9466e002-fe89-47c4-87ec-b322c55376c4 /eml:eml/dataset/dataTable/attributeList/attribute[7] ntl2_9.csv Location station of sample sta +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 6d59ec94-117f-4378-917b-7e0c785cdc9c /eml:eml/dataset/dataTable/attributeList/attribute[8] ntl2_9.csv sampling event event +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 5d666d0c-7be9-4d1f-84b2-a5b572d819db /eml:eml/dataset/dataTable/attributeList/attribute[9] ntl2_9.csv chloride concentation cl +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute a59d2c08-669a-436e-bfe9-6c3196215166 /eml:eml/dataset/dataTable/attributeList/attribute[10] ntl2_9.csv sulfate concentration so4 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 40ef2724-7200-4cbf-8663-0eaef6df880b /eml:eml/dataset/dataTable/attributeList/attribute[11] ntl2_9.csv calcium concentration ca +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 63021500-f274-4f23-8bcf-520f22cc53bb /eml:eml/dataset/dataTable/attributeList/attribute[12] ntl2_9.csv magnesium concentration mg +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute b2094513-bba1-4671-9555-d376c5333c5a /eml:eml/dataset/dataTable/attributeList/attribute[13] ntl2_9.csv sodium concentration na +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 6ae30df9-7862-4488-ad9e-f0f8316081e7 /eml:eml/dataset/dataTable/attributeList/attribute[14] ntl2_9.csv potassium concentration k +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 1b5a4f2e-58db-40e1-a044-38e4451180a0 /eml:eml/dataset/dataTable/attributeList/attribute[15] ntl2_9.csv iron concentration fe +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute e9537242-efd6-4a83-a64a-1b2fd7fc36d8 /eml:eml/dataset/dataTable/attributeList/attribute[16] ntl2_9.csv manganese concentration mn +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute cf23b883-857c-4573-9723-401e2f2fd2e0 /eml:eml/dataset/dataTable/attributeList/attribute[17] ntl2_9.csv Specific conductance cond +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute f95c44fe-7bdd-47a4-85c6-1462f6b9545a /eml:eml/dataset/dataTable/attributeList/attribute[18] ntl2_9.csv data flag for chloride flagcl +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute d1e21362-d37a-4d18-9656-ee3d94d74cff /eml:eml/dataset/dataTable/attributeList/attribute[19] ntl2_9.csv data flag for so4 flagso4 +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 58bb291c-de47-4bbe-b5c4-93816f8f5c77 /eml:eml/dataset/dataTable/attributeList/attribute[20] ntl2_9.csv data flag for ca flagca +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute 56db3ae8-db1c-4792-83b3-5c96a883c732 /eml:eml/dataset/dataTable/attributeList/attribute[21] ntl2_9.csv data flag for magnesiumn flagmg +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute b3a5a6d2-96b7-4096-bf6f-7967e443f0bc /eml:eml/dataset/dataTable/attributeList/attribute[22] ntl2_9.csv data flag for sodium flagna +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute f68e300b-31ef-4c38-b470-0988cfdc268b /eml:eml/dataset/dataTable/attributeList/attribute[23] ntl2_9.csv data flag for potassium flagk +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute ee010787-3226-4c28-bf1f-fa08f9764f6f /eml:eml/dataset/dataTable/attributeList/attribute[24] ntl2_9.csv data flag for iron flagfe +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute bdde0f42-1414-4355-bb23-73037fcef5ff /eml:eml/dataset/dataTable/attributeList/attribute[25] ntl2_9.csv data flag for manganese flagmn +knb-lter-ntl.2.37 https://portal.edirepository.org/nis/metadataviewer?packageid=knb-lter-ntl.2.37 attribute ae7d8e74-98f7-4a04-a30b-c1d42ee514cf /eml:eml/dataset/dataTable/attributeList/attribute[26] ntl2_9.csv data flag for specific conductivity flagcond diff --git a/tests/data/benchmark/test_b/notes.txt b/tests/data/benchmark/test_b/notes.txt new file mode 100644 index 0000000..dedf848 --- /dev/null +++ b/tests/data/benchmark/test_b/notes.txt @@ -0,0 +1 @@ +This run uses the default parameterization. \ No newline at end of file diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 119bf2c..8b10a28 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -2,7 +2,22 @@ import logging import daiquiri -from spinneret.benchmark import monitor +import pandas as pd +import pytest + +from spinneret.benchmark import ( + monitor, + benchmark_against_standard, + get_termset_similarity, + default_similarity_scores, + clean_workbook, + group_object_ids, + compress_object_ids, + parse_similarity_scores, + delete_terms_from_unsupported_ontologies, + get_shared_ontology, +) +from spinneret.utilities import is_url def test_monitor(tmp_path): @@ -28,3 +43,212 @@ def example_function(): # to call with monitor assert "Starting function 'example_function'" in log assert "Function 'example_function' completed in" in log assert "Memory usage: Current=" in log + + +@pytest.mark.parametrize("use_mock", [True]) # False calculates similarity scores +def test_benchmark_against_standard( + mocker, + use_mock, + termset_similarity_score_fields, + termset_similarity_score_processed, +): + """Test the benchmark_against_standard function""" + + if use_mock: + mocker.patch( + "spinneret.benchmark.get_termset_similarity", + return_value=termset_similarity_score_processed, + ) + + res = benchmark_against_standard( + standard_dir="tests/data/benchmark/standard", + test_dirs=["tests/data/benchmark/test_a", "tests/data/benchmark/test_b"], + ) + assert ( + res.columns.tolist() + == [ + "standard_dir", + "test_dir", + "standard_file", + "predicate_value", + "element_xpath_value", + "standard_set", + "test_set", + ] + + termset_similarity_score_fields + ) + + +def test_get_termset_similarity(termset_similarity_score_fields): + """Test the get_termset_similarity function""" + + # Get similarity scores for two sets of terms that are closely related. + r = get_termset_similarity( + set1={"ENVO:01000252"}, # freshwater lake biome + set2={"ENVO:01000253"}, # freshwater river biome + ) + assert isinstance(r, dict) + assert r.keys() == set(termset_similarity_score_fields) + for _, v in r.items(): + assert isinstance(v, float) + + # We expect lower similarity scores when we change one of the term sets to + # a less related set of terms. + r2 = get_termset_similarity( + set1={"ENVO:01000252"}, # freshwater lake biome + set2={"ENVO:01000182"}, # temperate desert biome + ) + assert r2["average_score"] < r["average_score"] + assert r2["best_score"] < r["best_score"] + + +def test_get_termset_similarity_with_empty_input_sets(): + """Test the get_termset_similarity function with empty input sets. The + function should return default score values.""" + + # Set 1 is empty + r = get_termset_similarity(set1=[], set2=["ENVO:01000253"]) + assert r == default_similarity_scores() + + # Set 2 is empty + r = get_termset_similarity(set1=["ENVO:01000252"], set2=[]) + assert r == default_similarity_scores() + + # Both sets are empty + r = get_termset_similarity(set1=[], set2=[]) + assert r == default_similarity_scores() + + +def test_default_similarity_scores(termset_similarity_score_fields): + """Test the default similarity scores return expected fields and values""" + + r = default_similarity_scores() + assert isinstance(r, dict) + assert set(r.keys()) == set(termset_similarity_score_fields) + for k, v in r.items(): + if k in ["average_score", "best_score"]: + assert v == 0.0 + else: + assert isinstance(v, type(pd.NA)) + + +def test_clean_workbook(annotated_workbook): + """Test the clean_workbook function""" + wb = annotated_workbook + + # Dirty-up the workbook by adding NA values and ungrounded terms in the + # "object_id" column + wb.loc[0, "object_id"] = pd.NA + assert wb["object_id"].isna().any() + wb.loc[1, "object_id"] = "AUTO:1234" + assert wb["object_id"].str.startswith("AUTO:").any() + + # After cleaning, the NA values and ungrounded terms will be gone + wb_cleaned = clean_workbook(wb) + assert not wb_cleaned["object_id"].isna().any() + assert not wb_cleaned["object_id"].str.startswith("AUTO:").any() + + +def test_group_object_ids(annotated_workbook): + """Test the group_object_ids function""" + wb = annotated_workbook + + # Group the workbook by predicate and element_xpath + grouped = group_object_ids(wb) + assert isinstance(grouped, dict) + + # The keys are tuples composed of the predicate and element_xpath values + assert isinstance(list(grouped.keys())[0], tuple) + + # Each value is a list of object_ids corresponding to the predicate and + # element_xpath grouping + assert isinstance(list(grouped.values())[0], list) + assert isinstance(list(grouped.values())[1][0], str) + assert is_url(list(grouped.values())[1][0]) + + +def test_compress_object_ids(annotated_workbook): + """The test_compress_object_ids function""" + + # Create grouped dictionary for testing + wb = annotated_workbook + grouped = group_object_ids(wb) + + # Grouped dictionary values are URI strings before compression + for _, values in grouped.items(): + for v in values: + if not v: # skip empty lists + continue + assert is_url(v) + + # After compression, the values are lists of CURIES + compressed = compress_object_ids(grouped) + for _, values in compressed.items(): + for v in values: + if not v: # skip empty lists + continue + assert not is_url(v) + assert len(v.split(":")) == 2 + + +def test_parse_similarity_scores( + termset_similarity_score_raw, termset_similarity_score_fields +): + """Test the parse_similarity_scores function""" + + # The parsed result should be a dictionary with the expected keys + r = parse_similarity_scores(termset_similarity_score_raw) + assert isinstance(r, dict) + assert set(r.keys()) == set(termset_similarity_score_fields) + + +def test_delete_terms_from_unsupported_ontologies(): + """Test the delete_terms_from_unsupported_ontologies function""" + + # Terms (CURIES) from supported ontologies are retained + supported_terms = ["ENVO:01000252", "ECSO:01000253", "ENVTHES:0000002"] + r = delete_terms_from_unsupported_ontologies(supported_terms) + assert r == supported_terms + + # Terms from unsupported ontologies are removed + mixed_term_list = supported_terms + ["AUTO:1234", "FOO:5678"] + r = delete_terms_from_unsupported_ontologies(mixed_term_list) + assert r == supported_terms + + +def test_get_shared_ontology(): + """Test the get_shared_ontology function""" + + # An ontology is returned when the two sets share the same ontology + set1 = ["ENVO:01000252", "ENVO:01000253"] + set2 = ["ENVO:01000252"] + db = get_shared_ontology(set1, set2) + assert db == "sqlite:obo:envo" + + set1 = ["ENVO:01000252", "ECSO:01000253"] + set2 = ["ENVO:01000252"] + db = get_shared_ontology(set1, set2) + assert db == "sqlite:obo:envo" + + # None is returned for unsupported ontologies + set1 = ["ECSO:01000253"] + set2 = ["ECSO:01000253"] + db = get_shared_ontology(set1, set2) + assert db is None + + # None is returned when the two sets do not share a common ontology + set1 = ["ENVO:01000252", "ENVO:01000253"] + set2 = ["ECSO:01000252"] + db = get_shared_ontology(set1, set2) + assert db is None + + # None is returned when one or both sets are empty + set1 = [] + set2 = ["ENVO:01000252"] + db = get_shared_ontology(set1, set2) + assert db is None + + set1 = [] + set2 = [] + db = get_shared_ontology(set1, set2) + assert db is None diff --git a/tests/test_utilities.py b/tests/test_utilities.py index b8b9492..7c8edb9 100644 --- a/tests/test_utilities.py +++ b/tests/test_utilities.py @@ -12,6 +12,8 @@ write_workbook, write_eml, expand_curie, + compress_uri, + load_prefixmaps, ) from spinneret.datasets import get_example_eml_dir @@ -101,3 +103,21 @@ def test_expand_curie(): ) # Ungrounded CURIES should return the original CURIE assert expand_curie("AUTO:00001203") == "AUTO:00001203" + + +def test_compress_uri(): + """Test that a URI is compressed to a CURIE""" + + # Return a CURIE if the URI is in the mapping + r = compress_uri("http://purl.obolibrary.org/obo/ENVO_00001203") + assert r == "ENVO:00001203" + + # Return the original URI if the URI is not in the mapping + r = compress_uri("http://example.com/00001203") + assert r == "http://example.com/00001203" + + +def test_load_prefixmaps(): + """Test that the prefixmaps are loaded""" + prefixmaps = load_prefixmaps() + assert isinstance(prefixmaps, pd.DataFrame) From 31a5ff4605c02fd1c8a49e7a98d8ff376233f5ce Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Wed, 11 Dec 2024 10:48:55 -0800 Subject: [PATCH 09/24] fix: correct OntoGPT command construction Remove an extra space from the OntoGPT `extract` command construction to prevent potential errors and ensure the command executes as expected. --- src/spinneret/annotator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py index 78216a5..925eff9 100644 --- a/src/spinneret/annotator.py +++ b/src/spinneret/annotator.py @@ -746,7 +746,7 @@ def get_ontogpt_annotation( f"--output-format json -o {output_file}" ) if local_model is not None: - cmd += f" -m ollama/{local_model}" + cmd += f" -m ollama/{local_model}" try: # Clear the cache so that the model can derive new annotations cache_path = os.getcwd() + "/.litellm_cache" From 2a46e3388bd2b4013848c83309ef773dc6e29fa0 Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Wed, 11 Dec 2024 10:55:53 -0800 Subject: [PATCH 10/24] perf: optimize OntoGPT calls using `ollama_chat` Optimize OntoGPT calls by specifying the `ollama_chat` model within the `extract` command, leveraging performance improvements recommended by the `litellm` package. --- src/spinneret/annotator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py index 925eff9..8452c8d 100644 --- a/src/spinneret/annotator.py +++ b/src/spinneret/annotator.py @@ -746,7 +746,7 @@ def get_ontogpt_annotation( f"--output-format json -o {output_file}" ) if local_model is not None: - cmd += f" -m ollama/{local_model}" + cmd += f" -m ollama_chat/{local_model}" try: # Clear the cache so that the model can derive new annotations cache_path = os.getcwd() + "/.litellm_cache" From 44ac7d61598c3f363ff71ee4b66622d1322dbe7b Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Wed, 11 Dec 2024 11:22:53 -0800 Subject: [PATCH 11/24] feat: introduce `temperature` parameter for OntoGPT calls Add a `temperature` parameter to OntoGPT calls, allowing users to control the model's behavior and adjust the level of creativity or randomness in the generated output. --- src/spinneret/annotator.py | 48 +++++++++++++++++++++++++++++++++++++- src/spinneret/main.py | 5 ++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py index 8452c8d..ea6127e 100644 --- a/src/spinneret/annotator.py +++ b/src/spinneret/annotator.py @@ -140,6 +140,7 @@ def annotate_workbook( output_path: str, annotator: str, local_model: str = None, + temperature: Union[float, None] = None, return_ungrounded: bool = False, sample_size: int = 1, ) -> None: @@ -154,6 +155,8 @@ def annotate_workbook( the `get_ontogpt_annotation` function. Similarly, BioPortal requires an API key and is described in the `get_bioportal_annotation` function. :param local_model: See `get_ontogpt_annotation` documentation for details. + :param temperature: The temperature parameter for the model. If None, the + OntoGPT default will be used. :param return_ungrounded: See `get_ontogpt_annotation` documentation for details. :param sample_size: Executes multiple replicates of the annotation request @@ -188,6 +191,7 @@ def annotate_workbook( wb, eml, local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, sample_size=sample_size, ) @@ -195,6 +199,7 @@ def annotate_workbook( wb, eml, local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, sample_size=sample_size, ) @@ -202,6 +207,7 @@ def annotate_workbook( wb, eml, local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, sample_size=sample_size, ) @@ -209,6 +215,7 @@ def annotate_workbook( wb, eml, local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, sample_size=sample_size, ) @@ -216,6 +223,7 @@ def annotate_workbook( wb, eml, local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, sample_size=sample_size, ) @@ -224,6 +232,7 @@ def annotate_workbook( eml, annotator="ontogpt", local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, sample_size=sample_size, ) @@ -231,6 +240,7 @@ def annotate_workbook( wb, eml, local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, sample_size=sample_size, ) @@ -570,6 +580,7 @@ def add_measurement_type_annotations_to_workbook( output_path: str = None, overwrite: bool = False, local_model: str = None, + temperature: Union[float, None] = None, return_ungrounded: bool = False, sample_size: int = 1, ) -> pd.core.frame.DataFrame: @@ -587,6 +598,8 @@ def add_measurement_type_annotations_to_workbook( annotations in the workbook, so a fresh set may be created. :param local_model: Required if `annotator` is "ontogpt". See `get_ontogpt_annotation` documentation for details. + :param temperature: The temperature parameter for the model. If `None`, the + OntoGPT default will be used. :param return_ungrounded: An option if `annotator` is "ontogpt". See `get_ontogpt_annotation` documentation for details. :param sample_size: Executes multiple replicates of the annotation request @@ -647,6 +660,7 @@ def add_measurement_type_annotations_to_workbook( text=attribute_description, template="contains_measurement_of_type", local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, ) if res is not None: @@ -705,7 +719,11 @@ def add_measurement_type_annotations_to_workbook( def get_ontogpt_annotation( - text: str, template: str, local_model: str = None, return_ungrounded: bool = False + text: str, + template: str, + local_model: str = None, + temperature: Union[float, None] = None, + return_ungrounded: bool = False, ) -> Union[list, None]: """ :param text: The text to be annotated. @@ -716,6 +734,8 @@ def get_ontogpt_annotation( https://ollama.com/library) and should be installed locally. If `None`, the configured remote model will be used. See the OntoGPT documentation for more information. + :param temperature: The temperature parameter for the model. If `None`, the + OntoGPT default will be used. :param return_ungrounded: If True, return ungrounded annotations. These may be useful in identifying potential concepts to add to a vocabulary, or to identify concepts that a human curator may be capable of @@ -747,6 +767,8 @@ def get_ontogpt_annotation( ) if local_model is not None: cmd += f" -m ollama_chat/{local_model}" + if temperature is not None: + cmd += f" --temperature {temperature}" try: # Clear the cache so that the model can derive new annotations cache_path = os.getcwd() + "/.litellm_cache" @@ -785,6 +807,7 @@ def add_process_annotations_to_workbook( output_path: str = None, overwrite: bool = False, local_model: str = None, + temperature: Union[float, None] = None, return_ungrounded: bool = False, sample_size: int = 1, ) -> pd.core.frame.DataFrame: @@ -797,6 +820,8 @@ def add_process_annotations_to_workbook( :param overwrite: If True, overwrite existing `process` annotations in the workbook, so a fresh set may be created. :param local_model: See `get_ontogpt_annotation` documentation for details. + :param temperature: The temperature parameter for the model. If `None`, the + OntoGPT default will be used. :param return_ungrounded: See `get_ontogpt_annotation` documentation for details. :param sample_size: Executes multiple replicates of the annotation request @@ -858,6 +883,7 @@ def add_process_annotations_to_workbook( text=element_description, template="contains_process", local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, ) if res is not None: @@ -901,6 +927,7 @@ def add_env_broad_scale_annotations_to_workbook( output_path: str = None, overwrite: bool = False, local_model: str = None, + temperature: Union[float, None] = None, return_ungrounded: bool = False, sample_size: int = 1, ) -> pd.core.frame.DataFrame: @@ -913,6 +940,8 @@ def add_env_broad_scale_annotations_to_workbook( :param overwrite: If True, overwrite existing `broad scale environmental context` annotations in the workbook, so a fresh set may be created. :param local_model: See `get_ontogpt_annotation` documentation for details. + :param temperature: The temperature parameter for the model. If `None`, the + OntoGPT default will be used. :param return_ungrounded: See `get_ontogpt_annotation` documentation for details. :param sample_size: Executes multiple replicates of the annotation request @@ -973,6 +1002,7 @@ def add_env_broad_scale_annotations_to_workbook( text=element_description, template=predicate, local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, ) if res is not None: @@ -1018,6 +1048,7 @@ def add_env_local_scale_annotations_to_workbook( output_path: str = None, overwrite: bool = False, local_model: str = None, + temperature: Union[float, None] = None, return_ungrounded: bool = False, sample_size: int = 1, ) -> pd.core.frame.DataFrame: @@ -1030,6 +1061,8 @@ def add_env_local_scale_annotations_to_workbook( :param overwrite: If True, overwrite existing `local scale environmental context` annotations in the workbook, so a fresh set may be created. :param local_model: See `get_ontogpt_annotation` documentation for details. + :param temperature: The temperature parameter for the model. If `None`, the + OntoGPT default will be used. :param return_ungrounded: See `get_ontogpt_annotation` documentation for details. :param sample_size: Executes multiple replicates of the annotation request @@ -1092,6 +1125,7 @@ def add_env_local_scale_annotations_to_workbook( text=element_description, template=predicate, local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, ) if res is not None: @@ -1137,6 +1171,7 @@ def add_env_medium_annotations_to_workbook( output_path: str = None, overwrite: bool = False, local_model: str = None, + temperature: Union[float, None] = None, return_ungrounded: bool = False, sample_size: int = 1, ) -> pd.core.frame.DataFrame: @@ -1150,6 +1185,8 @@ def add_env_medium_annotations_to_workbook( annotations in the workbook, so a fresh set may be created. :param local_model: Required if `annotator` is "ontogpt". See `get_ontogpt_annotation` documentation for details. + :param temperature: The temperature parameter for the model. If `None`, the + OntoGPT default will be used. :param return_ungrounded: An option if `annotator` is "ontogpt". See `get_ontogpt_annotation` documentation for details. :param sample_size: Executes multiple replicates of the annotation request @@ -1210,6 +1247,7 @@ def add_env_medium_annotations_to_workbook( text=attribute_description, template="env_medium", local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, ) if res is not None: @@ -1253,6 +1291,7 @@ def add_research_topic_annotations_to_workbook( output_path: str = None, overwrite: bool = False, local_model: str = None, + temperature: Union[float, None] = None, return_ungrounded: bool = False, sample_size: int = 1, ) -> pd.core.frame.DataFrame: @@ -1265,6 +1304,8 @@ def add_research_topic_annotations_to_workbook( :param overwrite: If True, overwrite existing `research topic` annotations in the workbook, so a fresh set may be created. :param local_model: See `get_ontogpt_annotation` documentation for details. + :param temperature: The temperature parameter for the model. If `None`, the + OntoGPT default will be used. :param return_ungrounded: See `get_ontogpt_annotation` documentation for details. :param sample_size: Executes multiple replicates of the annotation request @@ -1326,6 +1367,7 @@ def add_research_topic_annotations_to_workbook( text=element_description, template="research_topic", local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, ) if res is not None: @@ -1369,6 +1411,7 @@ def add_methods_annotations_to_workbook( output_path: str = None, overwrite: bool = False, local_model: str = None, + temperature: Union[float, None] = None, return_ungrounded: bool = False, sample_size: int = 1, ) -> pd.core.frame.DataFrame: @@ -1381,6 +1424,8 @@ def add_methods_annotations_to_workbook( :param overwrite: If True, overwrite existing `methods` annotations in the workbook, so a fresh set may be created. :param local_model: See `get_ontogpt_annotation` documentation for details. + :param temperature: The temperature parameter for the model. If `None`, the + OntoGPT default will be used. :param return_ungrounded: See `get_ontogpt_annotation` documentation for details. :param sample_size: Executes multiple replicates of the annotation request @@ -1445,6 +1490,7 @@ def add_methods_annotations_to_workbook( text=element_description, template="uses_method", local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, ) if res is not None: diff --git a/src/spinneret/main.py b/src/spinneret/main.py index 7a812ee..d8d4fd3 100644 --- a/src/spinneret/main.py +++ b/src/spinneret/main.py @@ -2,6 +2,7 @@ import os from pathlib import Path +from typing import Union from requests import get, codes from rdflib import Graph import daiquiri @@ -56,6 +57,7 @@ def annotate_workbooks( output_dir: str, config_path: str, local_model: str = None, + temperature: Union[float, None] = None, return_ungrounded: bool = False, sample_size: int = 1, ) -> None: @@ -70,6 +72,8 @@ def annotate_workbooks( :param output_dir: Directory to save annotated workbooks :param config_path: Path to configuration file :param local_model: See `get_ontogpt_annotation` documentation for details. + :param temperature: The temperature parameter for the model. If `None`, the + OntoGPT default will be used. :param return_ungrounded: See `get_ontogpt_annotation` documentation for details. :param sample_size: Executes multiple replicates of the annotation request @@ -112,6 +116,7 @@ def annotate_workbooks( annotator=annotator, output_path=output_dir + "/" + workbook_file_annotated, local_model=local_model, + temperature=temperature, return_ungrounded=return_ungrounded, sample_size=sample_size, ) From 1c7926037e76f9137ba25548a030e26e55598f7c Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Wed, 11 Dec 2024 11:44:51 -0800 Subject: [PATCH 12/24] fix: update OntoGPT templates to improve grounding Update templates to improve ontology grounding, specifically: 1. Improve template prompts to produce more accurate and precise results. 2. Relax vocabulary branch constraints to enable broader capture of concepts outside of the target branch due to relevant concepts appearing in multiple branches within the vocabulary. Do this for all templates except `contains_process` and `env_medium`, where concepts are sufficiently constrained to a single branch. By doing this we increase our reliance on effective prompts to guide the LLM to extract relevant concepts without extracting irrelevant concepts. The issue of irrelevant concepts may be addressed downstream in an additional post processing step that trims out these concepts. Note vocabulary constraints don't seem to work in vocabularies using the BioPortal API. 3. Replace semantically descriptive labels (e.g., `measurement_type`) in templates with less semantically related labels (e.g., `output`). This change mitigates the risk of the LLM misinterpreting labels as placeholders for extracted values, leading to parsing errors and incorrect results. --- .../contains_measurement_of_type.yaml | 11 ++++--- .../ontogpt/templates/contains_process.yaml | 7 +++-- .../ontogpt/templates/env_broad_scale.yaml | 31 ++++++++++--------- .../ontogpt/templates/env_local_scale.yaml | 22 ++----------- .../data/ontogpt/templates/env_medium.yaml | 6 ++-- .../ontogpt/templates/research_topic.yaml | 8 ++--- .../data/ontogpt/templates/uses_method.yaml | 10 +++--- 7 files changed, 40 insertions(+), 55 deletions(-) diff --git a/src/spinneret/data/ontogpt/templates/contains_measurement_of_type.yaml b/src/spinneret/data/ontogpt/templates/contains_measurement_of_type.yaml index f13f06f..6272214 100644 --- a/src/spinneret/data/ontogpt/templates/contains_measurement_of_type.yaml +++ b/src/spinneret/data/ontogpt/templates/contains_measurement_of_type.yaml @@ -7,6 +7,7 @@ description: >- license: https://creativecommons.org/publicdomain/zero/1.0/ prefixes: rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# +# rdfs: http://www.w3.org/2000/01/rdf-schema# ECSO: http://purl.dataone.org/odo/ECSO_ envmd: http://w3id.org/ontogpt/contains_measurement_of_type linkml: https://w3id.org/linkml/ @@ -22,16 +23,16 @@ classes: Dataset: tree_root: true attributes: - measurement_type: - description: the type of scientific measurement (or variable) described in the dataset + output: + description: >- + The measurement type or variable of a dataset annotations: - prompt: semicolon-separated list of the type of scientific measurement (or variable) described in the dataset + prompt: >- + semicolon-separated list of dataset variable names described by the text range: Measurement multivalued: true Measurement: is_a: NamedEntity - id_prefixes: - - ECSO annotations: annotators: bioportal:ECSO diff --git a/src/spinneret/data/ontogpt/templates/contains_process.yaml b/src/spinneret/data/ontogpt/templates/contains_process.yaml index 6d97e4e..2b8e18e 100644 --- a/src/spinneret/data/ontogpt/templates/contains_process.yaml +++ b/src/spinneret/data/ontogpt/templates/contains_process.yaml @@ -25,10 +25,11 @@ classes: Dataset: tree_root: true attributes: - contains_process: - description: the environmental process, biological process, or planned process investigated in the study + output: + description: The environmental or anthropogenic processes of the dataset annotations: - prompt: semicolon-separated list of the environmental process, biological process, or planned process investigated in the study + prompt: >- + semicolon-separated list of environmental processes or anthropogenic processes described by the text range: ContainsProcess multivalued: true diff --git a/src/spinneret/data/ontogpt/templates/env_broad_scale.yaml b/src/spinneret/data/ontogpt/templates/env_broad_scale.yaml index 5a3dda3..8a9e0cd 100644 --- a/src/spinneret/data/ontogpt/templates/env_broad_scale.yaml +++ b/src/spinneret/data/ontogpt/templates/env_broad_scale.yaml @@ -24,27 +24,28 @@ classes: Dataset: tree_root: true attributes: - env_broad_scale: - description: the broad environmental context in which the study was conducted + output: + description: The broad environmental context of the dataset annotations: - prompt: semicolon-separated list of broad environmental contexts in which the study was conducted + prompt: >- + semicolon-separated list of the large scale environmental systems (e.g. ecosystem, biome) range: EnvBroadScale multivalued: true EnvBroadScale: is_a: NamedEntity - id_prefixes: - - ENVO +# id_prefixes: +# - ENVO annotations: annotators: sqlite:obo:envo - slot_usage: - id: - values_from: - - EnvoEnvironmentalSystem +# slot_usage: +# id: +# values_from: +# - EnvoEnvironmentalSystem -enums: - EnvoEnvironmentalSystem: - reachable_from: - source_ontology: obo:envo - source_nodes: - - ENVO:01000254 # environmental system +#enums: +# EnvoEnvironmentalSystem: +# reachable_from: +# source_ontology: obo:envo +# source_nodes: +# - ENVO:01000254 # environmental system diff --git a/src/spinneret/data/ontogpt/templates/env_local_scale.yaml b/src/spinneret/data/ontogpt/templates/env_local_scale.yaml index 88f8758..109b595 100644 --- a/src/spinneret/data/ontogpt/templates/env_local_scale.yaml +++ b/src/spinneret/data/ontogpt/templates/env_local_scale.yaml @@ -24,31 +24,15 @@ classes: Dataset: tree_root: true attributes: - env_local_scale: - description: the local environmental context in which the study was conducted + output: + description: The local environmental context of the dataset annotations: - prompt: semicolon-separated list of local environmental contexts in which the study was conducted + prompt: semicolon-separated list of the local scale environmental features range: EnvLocalScale multivalued: true EnvLocalScale: is_a: NamedEntity - id_prefixes: - - ENVO annotations: annotators: sqlite:obo:envo - slot_usage: - id: - values_from: - - EnvoMaterialEntity -enums: - EnvoMaterialEntity: - reachable_from: - source_ontology: obo:envo - source_nodes: # a selection of nodes from the ENVO `material entity` branch - - ENVO:01000813 # astronomical body part - - ENVO:01001813 # construction - - ENVO:01000408 # environmental zone - - ENVO:01003020 # fiat part of an astronomical object - - ENVO:01000281 # layer diff --git a/src/spinneret/data/ontogpt/templates/env_medium.yaml b/src/spinneret/data/ontogpt/templates/env_medium.yaml index 9d14428..8ab18ed 100644 --- a/src/spinneret/data/ontogpt/templates/env_medium.yaml +++ b/src/spinneret/data/ontogpt/templates/env_medium.yaml @@ -23,10 +23,10 @@ classes: Dataset: tree_root: true attributes: - env_medium: - description: the environmental material(s) immediately surrounding the sample or specimen at the time of sampling + output: + description: The environmental material(s) immediately surrounding the measurement variable at the time of sampling annotations: - prompt: semicolon-separated list of the environmental material(s) immediately surrounding the sample or specimen at the time of sampling + prompt: semicolon-separated list of the environmental material(s) immediately surrounding the measurement variable at the time of sampling range: EnvironmentalMedium multivalued: true diff --git a/src/spinneret/data/ontogpt/templates/research_topic.yaml b/src/spinneret/data/ontogpt/templates/research_topic.yaml index b152a3b..7b5d03f 100644 --- a/src/spinneret/data/ontogpt/templates/research_topic.yaml +++ b/src/spinneret/data/ontogpt/templates/research_topic.yaml @@ -22,16 +22,14 @@ classes: Dataset: tree_root: true attributes: - topic: - description: the general scientific area of study concerning the sample(s) + output: + description: The scientific areas of study of the dataset annotations: - prompt: semicolon-separated list of scientific areas of study concerning the sample(s) + prompt: semicolon-separated list of scientific areas of study described by the text range: Topic multivalued: true Topic: is_a: NamedEntity - id_prefixes: - - ENVTHES annotations: annotators: bioportal:ENVTHES diff --git a/src/spinneret/data/ontogpt/templates/uses_method.yaml b/src/spinneret/data/ontogpt/templates/uses_method.yaml index 5ccfaf2..77b8534 100644 --- a/src/spinneret/data/ontogpt/templates/uses_method.yaml +++ b/src/spinneret/data/ontogpt/templates/uses_method.yaml @@ -22,16 +22,16 @@ classes: Dataset: tree_root: true attributes: - method: - description: the type of method or technique used to gather data + output: + description: >- + The type of method or technique used to create the dataset annotations: - prompt: semicolon-separated list of the type of method or technique used to gather data + prompt: >- + semicolon-separated list of the type of method or technique used to create the dataset range: Method multivalued: true Method: is_a: NamedEntity - id_prefixes: - - ENVTHES annotations: annotators: bioportal:ENVTHES From 5a09e7ed8c17e56083e960e08769f1ed08781b56 Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Wed, 11 Dec 2024 16:33:10 -0800 Subject: [PATCH 13/24] feat: enhance CURIE expansion with expanded prefix map Updated the `expand_curie` function to utilize a significantly larger prefix map, enabling the expansion of a wider range of CURIEs. --- src/spinneret/utilities.py | 20 ++++++++++---------- tests/test_utilities.py | 7 +++++++ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/spinneret/utilities.py b/src/spinneret/utilities.py index 922691a..6d56f2a 100644 --- a/src/spinneret/utilities.py +++ b/src/spinneret/utilities.py @@ -97,21 +97,21 @@ def write_eml(eml: etree._ElementTree, output_path: str) -> None: def expand_curie(curie: str) -> str: """ + Expand a CURIE into a URI based on the prefix mappings in the OBO and + BioPortal converters. + :param curie: The CURIE to be expanded. :returns: The expanded CURIE. Returns the original CURIE if the prefix does not have a mapping. + :notes: This is a wrapper function around the `prefixmaps` and `curies` + libraries. """ - mapping = { - "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - "linkml": "https://w3id.org/linkml/", - "ECSO": "http://purl.dataone.org/odo/ECSO_", - "ENVO": "http://purl.obolibrary.org/obo/ENVO_", - "BFO": "http://purl.obolibrary.org/obo/BFO_", - "ENVTHES": "http://vocabs.lter-europe.net/EnvThes/", - "AUTO": "AUTO:", # return ungrounded CURIEs as is - } + prefixmaps = load_prefixmaps() prefix, suffix = curie.split(":") - return f"{mapping[prefix]}{suffix}" + namespace = prefixmaps[prefixmaps["prefix"] == prefix]["namespace"] + if len(namespace) > 0: + return f"{namespace.to_string(index=False).strip()}{suffix}" + return curie def compress_uri(uri: str) -> str: diff --git a/tests/test_utilities.py b/tests/test_utilities.py index 7c8edb9..6a566d7 100644 --- a/tests/test_utilities.py +++ b/tests/test_utilities.py @@ -93,6 +93,8 @@ def test_write_eml(tmp_path): def test_expand_curie(): """Test that a CURIE is expanded to a URL""" + + # Recognized CURIES should return the corresponding URI assert expand_curie("ECSO:00001203") == "http://purl.dataone.org/odo/ECSO_00001203" assert ( expand_curie("ENVO:00001203") == "http://purl.obolibrary.org/obo/ENVO_00001203" @@ -101,6 +103,11 @@ def test_expand_curie(): expand_curie("ENVTHES:00001203") == "http://vocabs.lter-europe.net/EnvThes/00001203" ) + assert ( + expand_curie("OBOE:00001203") + == "http://ecoinformatics.org/oboe/oboe.1.2/00001203" + ) + # Ungrounded CURIES should return the original CURIE assert expand_curie("AUTO:00001203") == "AUTO:00001203" From 31d0e9cf12ae01017038b253c74b7093b0282593 Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Fri, 13 Dec 2024 06:50:34 -0800 Subject: [PATCH 14/24] fix: handle multiple semicolons in CURIE expansion Correct the expand_curie function to handle CURIEs containing more than one semicolon, preventing the ValueError: too many values to unpack error. --- src/spinneret/utilities.py | 15 ++++++++++++++- tests/test_utilities.py | 10 ++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/spinneret/utilities.py b/src/spinneret/utilities.py index 6d56f2a..80ad65c 100644 --- a/src/spinneret/utilities.py +++ b/src/spinneret/utilities.py @@ -5,10 +5,13 @@ import importlib from urllib.parse import urlparse from json import load +import daiquiri import pandas as pd from lxml import etree +logger = daiquiri.getLogger(__name__) + def load_configuration(config_file: str) -> None: """Loads the configuration file as global environment variables for use @@ -107,7 +110,17 @@ def expand_curie(curie: str) -> str: libraries. """ prefixmaps = load_prefixmaps() - prefix, suffix = curie.split(":") + + # On rare occasion we encounter a CURIE with multiple colons, so we need + # to use exception handling and issue a warning. + try: + prefix, suffix = curie.split(":") + except ValueError: + logger.warning( + f"Warning: {curie} is not recognized. Returning the original string." + ) + return curie + namespace = prefixmaps[prefixmaps["prefix"] == prefix]["namespace"] if len(namespace) > 0: return f"{namespace.to_string(index=False).strip()}{suffix}" diff --git a/tests/test_utilities.py b/tests/test_utilities.py index 6a566d7..3baa6f4 100644 --- a/tests/test_utilities.py +++ b/tests/test_utilities.py @@ -112,6 +112,16 @@ def test_expand_curie(): assert expand_curie("AUTO:00001203") == "AUTO:00001203" +def test_expand_curie_handles_multiple_semicolon(): + """Test that a CURIE with multiple semicolons does not raise an error + + This is an unusual case that has occurred in past integration tests. Not sure + what the source of this issue is but are testing for it here. + """ + curie = "ENVO:PATO:00001203" + assert expand_curie(curie) == curie + + def test_compress_uri(): """Test that a URI is compressed to a CURIE""" From 8fd9962ac308279f6c61fea0ea697d0d79e01145 Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Thu, 19 Dec 2024 11:00:48 -0500 Subject: [PATCH 15/24] feat: visualize grounding rates across OntoGPT configurations Implement a visualization to assess the grounding success rates of different OntoGPT configurations. This visualization utilizes a 100% stacked bar chart to compare and contrast the performance of various configurations. --- environment-min.yml | 4 +- environment.yml | 154 +++++++------ poetry.lock | 435 ++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + requirements.txt | 130 ++++++----- src/spinneret/benchmark.py | 116 +++++++++- tests/test_benchmark.py | 35 +++ 7 files changed, 748 insertions(+), 127 deletions(-) diff --git a/environment-min.yml b/environment-min.yml index 3983356..3f48ce9 100644 --- a/environment-min.yml +++ b/environment-min.yml @@ -17,7 +17,5 @@ dependencies: - sphinx - sphinx-autoapi - daiquiri - - pip - - pip: - - git+https://github.com/clnsmth/soso.git@main + - matplotlib prefix: /opt/miniconda3/envs/spinneret diff --git a/environment.yml b/environment.yml index e417283..d7c40db 100644 --- a/environment.yml +++ b/environment.yml @@ -6,25 +6,25 @@ dependencies: - alabaster=1.0.0 - annotated-types=0.7.0 - anyio=4.6.2.post1 - - astroid=3.3.5 - babel=2.16.0 - backoff=2.2.1 - - black=24.10.0 + - brotli=1.1.0 + - brotli-bin=1.1.0 - brotli-python=1.1.0 - bzip2=1.0.8 - - ca-certificates=2024.8.30 - - certifi=2024.8.30 + - ca-certificates=2024.12.14 + - certifi=2024.12.14 - cffi=1.17.1 - - charset-normalizer=3.4.0 - click=8.1.7 - click-option-group=0.5.6 - colorama=0.4.6 - - coverage=7.6.8 - - daiquiri=3.0.0 - - dill=0.3.9 + - contourpy=1.3.1 + - cycler=0.12.1 - docutils=0.21.2 - dotty-dict=1.3.1 - exceptiongroup=1.2.2 + - fonttools=4.55.3 + - freetype=2.12.1 - gitdb=4.0.11 - gitpython=3.1.43 - gql=3.5.0 @@ -37,122 +37,129 @@ dependencies: - importlib-resources=6.4.5 - importlib_resources=6.4.5 - iniconfig=2.0.0 - - isodate=0.7.2 - isort=5.13.2 - jinja2=3.1.4 + - kiwisolver=1.4.7 + - lcms2=2.16 + - lerc=4.0.0 - libblas=3.9.0 + - libbrotlicommon=1.1.0 + - libbrotlidec=1.1.0 + - libbrotlienc=1.1.0 - libcblas=3.9.0 - libcxx=19.1.4 + - libdeflate=1.23 - libexpat=2.6.4 - libffi=3.4.2 - libgfortran=5.0.0 - libgfortran5=13.2.0 - libiconv=1.17 + - libjpeg-turbo=3.0.0 - liblapack=3.9.0 + - liblzma=5.6.3 - libopenblas=0.3.28 + - libpng=1.6.44 - libsqlite=3.47.0 + - libtiff=4.7.0 + - libwebp-base=1.4.0 + - libxcb=1.17.0 - libxml2=2.13.5 - libxslt=1.1.39 - libzlib=1.3.1 - llvm-openmp=19.1.4 - lxml=5.3.0 - markdown-it-py=3.0.0 - - markupsafe=3.0.2 + - matplotlib=3.10.0 + - matplotlib-base=3.10.0 - mccabe=0.7.0 - mdit-py-plugins=0.4.2 - mdurl=0.1.2 - multidict=6.1.0 + - munkres=1.1.4 - mypy_extensions=1.0.0 - myst-parser=4.0.0 - ncurses=6.5 + - openjpeg=2.5.3 - openssl=3.4.0 - - packaging=24.2 - - pandas=2.2.3 - pathspec=0.12.1 + - pillow=11.0.0 - pip=24.3.1 - platformdirs=4.3.6 - pluggy=1.5.0 - propcache=0.2.0 + - pthread-stubs=0.4 - pycparser=2.22 - - pydantic=2.10.1 - - pydantic-core=2.27.1 - pygments=2.18.0 - - pylint=3.3.1 - - pyparsing=3.2.0 - pysocks=1.7.1 - pytest=8.3.3 - - pytest-cov=6.0.0 - pytest-mock=3.14.0 - python=3.11.10 - python-dateutil=2.9.0.post0 - - python-gitlab=4.13.0 - python-json-logger=2.0.7 - - python-semantic-release=9.14.0 - - python-tzdata=2024.2 - python_abi=3.11 - - pytz=2024.1 - pyyaml=6.0.2 - - rdflib=7.1.1 + - qhull=2020.2 - readline=8.2 - requests=2.32.3 - requests-toolbelt=1.0.0 - - rich=13.9.4 - - setuptools=75.6.0 - shellingham=1.5.4 - six=1.16.0 - - smmap=5.0.0 - sniffio=1.3.1 - snowballstemmer=2.2.0 - - sphinx=8.1.3 - - sphinx-autoapi=3.3.3 - sphinxcontrib-applehelp=2.0.0 - sphinxcontrib-devhelp=2.0.0 - sphinxcontrib-htmlhelp=2.1.0 - sphinxcontrib-jsmath=1.0.1 - sphinxcontrib-qthelp=2.0.0 - - sphinxcontrib-serializinghtml=1.1.10 - stdlib-list=0.11.0 - tk=8.6.13 - toml=0.10.2 - tomli=2.1.0 - tomlkit=0.13.2 + - tornado=6.4.2 - typing-extensions=4.12.2 - typing_extensions=4.12.2 - - tzdata=2024b + - unicodedata2=15.1.0 - wheel=0.45.1 + - xorg-libxau=1.0.12 + - xorg-libxdmcp=1.1.5 - xz=5.2.6 - yaml=0.2.5 - - yarl=1.18.0 - - zipp=3.21.0 - zstandard=0.23.0 - zstd=1.5.6 - pip: - adeft==0.12.3 - aiofiles==24.1.0 - aiohappyeyeballs==2.4.3 - - aiohttp==3.11.7 + - aiohttp==3.10.10 - aiosignal==1.3.1 - airium==0.2.6 - aniso8601==9.0.1 - antlr4-python3-runtime==4.9.3 - appdirs==1.4.4 - arrow==1.3.0 + - astroid==3.2.4 - attrs==24.2.0 - bcp47==0.1.0 - beautifulsoup4==4.12.3 - bioc==2.1 - - blinker==1.9.0 - - boto3==1.35.69 - - botocore==1.35.69 + - black==24.8.0 + - blinker==1.8.2 + - boto3==1.35.54 + - botocore==1.35.54 - cachier==3.1.2 - cattrs==24.1.2 - cfgraph==0.2.1 - chardet==5.2.0 - - class-resolver==0.5.4 + - charset-normalizer==3.3.2 + - class-resolver==0.5.2 + - coverage==7.6.1 - curies==0.9.0 + - daiquiri==3.2.5.1 - defusedxml==0.7.1 - - deprecated==1.2.15 + - deprecated==1.2.14 - deprecation==2.1.0 + - dill==0.3.8 - diskcache==5.6.3 - distro==1.9.0 - docopt==0.6.2 @@ -161,7 +168,7 @@ dependencies: - eutils==0.6.0 - fastobo==0.12.3 - filelock==3.16.1 - - flask==3.1.0 + - flask==3.0.3 - flask-restx==1.3.0 - fqdn==1.5.1 - frontend==0.0.3 @@ -173,7 +180,7 @@ dependencies: - h11==0.14.0 - hbreader==0.9.1 - html5lib==1.1 - - httpcore==1.0.7 + - httpcore==1.0.6 - httpx==0.27.2 - huggingface-hub==0.26.2 - ijson==3.3.0 @@ -181,9 +188,10 @@ dependencies: - inflect==7.4.0 - inflection==0.5.1 - intervaltree==3.1.0 + - isodate==0.6.1 - isoduration==20.11.0 - itsdangerous==2.2.0 - - jiter==0.7.1 + - jiter==0.7.0 - jmespath==1.0.1 - joblib==1.4.2 - json-flattener==0.1.9 @@ -203,83 +211,105 @@ dependencies: - linkml-owl==0.3.0 - linkml-renderer==0.3.1 - linkml-runtime==1.8.3 - - litellm==1.52.16 + - litellm==1.51.2 + - markupsafe==2.1.5 - more-click==0.1.2 - more-itertools==10.5.0 - ndex2==3.9.0 - networkx==3.4.2 - nltk==3.9.1 - - numpy==2.0.2 + - numpy==2.1.1 - oaklib==0.6.18 - ols-client==0.1.4 - - ontogpt==1.0.8 + - ontogpt==1.0.6 - ontoportal-client==0.0.4 - - openai==1.55.1 + - openai==1.53.0 - openpyxl==3.1.5 - owlrl==6.0.2 + - packaging==24.1 + - pandas==2.2.2 - pansql==0.0.1 - parse==1.20.2 - ply==3.11 - - portalocker==3.0.0 + - portalocker==2.10.1 - prefixcommons==0.1.12 - prefixmaps==0.2.6 - - prettytable==3.12.0 + - prettytable==3.11.0 - pronto==2.5.8 + - pydantic==2.9.2 + - pydantic-core==2.23.4 - pyjsg==0.11.10 - - pymupdf==1.24.14 + - pylint==3.2.7 + - pymupdf==1.24.13 + - pyparsing==3.1.4 - pyshacl==0.26.0 - pyshex==0.8.1 - pyshexc==0.9.1 - pysolr==3.10.0 - - pystow==0.6.1 + - pystow==0.5.6 + - pytest-cov==5.0.0 - pytest-logging==2015.11.4 - python-dotenv==1.0.1 + - python-gitlab==4.11.1 + - python-semantic-release==9.8.8 - pytrie==0.4.0 + - pytz==2024.2 - ratelimit==2.2.1 + - rdflib==7.0.0 - rdflib-jsonld==0.6.1 - rdflib-shim==1.0.3 - referencing==0.35.1 - - regex==2024.11.6 + - regex==2024.9.11 - requests-cache==1.2.1 - rfc3339-validator==0.1.4 - rfc3987==1.3.8 - - rpds-py==0.21.0 + - rich==13.8.1 + - rpds-py==0.20.1 - ruamel-yaml==0.18.6 - ruamel-yaml-clib==0.2.12 - - s3transfer==0.10.4 + - s3transfer==0.10.3 - scikit-learn==1.4.2 - scipy==1.14.1 - semsql==0.3.3 + - setuptools==75.3.0 - shexjsg==0.8.2 + - smmap==5.0.1 - sortedcontainers==2.4.0 + - soso==0.2.0 - soupsieve==2.6 - sparqlslurper==0.5.1 - sparqlwrapper==2.0.0 + - sphinx==8.0.2 + - sphinx-autoapi==3.3.1 + - sphinxcontrib-serializinghtml==2.0.0 + - spinneret==0.2.0 - sqlalchemy==2.0.36 - sqlalchemy-utils==0.38.3 - - sssom==0.4.13 + - sssom==0.4.12 - sssom-schema==1.0.0 - - starlette==0.41.3 + - starlette==0.41.2 - tenacity==9.0.0 - threadpoolctl==3.5.0 - - tiktoken==0.8.0 - - tokenizers==0.20.3 - - tqdm==4.67.1 - - typeguard==4.4.1 + - tiktoken==0.7.0 + - tokenizers==0.20.1 + - tqdm==4.66.6 + - typeguard==4.4.0 - types-python-dateutil==2.9.0.20241003 + - tzdata==2024.1 - unidecode==1.3.8 - uri-template==1.3.0 - url-normalize==1.4.3 - urllib3==1.26.20 - - uvicorn==0.32.1 + - uvicorn==0.32.0 - validators==0.34.0 - watchdog==6.0.0 - wcwidth==0.2.13 - - webcolors==24.11.1 + - webcolors==24.8.0 - webencodings==0.5.1 - - werkzeug==3.1.3 + - werkzeug==3.1.1 - wikipedia==1.4.0 - wikipedia-api==0.7.1 - - wrapt==1.17.0 - - git+https://github.com/clnsmth/soso.git@main + - wrapt==1.16.0 + - yarl==1.17.1 + - zipp==3.20.2 prefix: /opt/miniconda3/envs/spinneret diff --git a/poetry.lock b/poetry.lock index 98a35ed..a2fc226 100644 --- a/poetry.lock +++ b/poetry.lock @@ -705,6 +705,79 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "contourpy" +version = "1.3.1" +description = "Python library for calculating contours of 2D quadrilateral grids" +optional = false +python-versions = ">=3.10" +files = [ + {file = "contourpy-1.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a045f341a77b77e1c5de31e74e966537bba9f3c4099b35bf4c2e3939dd54cdab"}, + {file = "contourpy-1.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:500360b77259914f7805af7462e41f9cb7ca92ad38e9f94d6c8641b089338124"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2f926efda994cdf3c8d3fdb40b9962f86edbc4457e739277b961eced3d0b4c1"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:adce39d67c0edf383647a3a007de0a45fd1b08dedaa5318404f1a73059c2512b"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abbb49fb7dac584e5abc6636b7b2a7227111c4f771005853e7d25176daaf8453"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0cffcbede75c059f535725c1680dfb17b6ba8753f0c74b14e6a9c68c29d7ea3"}, + {file = "contourpy-1.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ab29962927945d89d9b293eabd0d59aea28d887d4f3be6c22deaefbb938a7277"}, + {file = "contourpy-1.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:974d8145f8ca354498005b5b981165b74a195abfae9a8129df3e56771961d595"}, + {file = "contourpy-1.3.1-cp310-cp310-win32.whl", hash = "sha256:ac4578ac281983f63b400f7fe6c101bedc10651650eef012be1ccffcbacf3697"}, + {file = "contourpy-1.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:174e758c66bbc1c8576992cec9599ce8b6672b741b5d336b5c74e35ac382b18e"}, + {file = "contourpy-1.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e8b974d8db2c5610fb4e76307e265de0edb655ae8169e8b21f41807ccbeec4b"}, + {file = "contourpy-1.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:20914c8c973f41456337652a6eeca26d2148aa96dd7ac323b74516988bea89fc"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19d40d37c1c3a4961b4619dd9d77b12124a453cc3d02bb31a07d58ef684d3d86"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:113231fe3825ebf6f15eaa8bc1f5b0ddc19d42b733345eae0934cb291beb88b6"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4dbbc03a40f916a8420e420d63e96a1258d3d1b58cbdfd8d1f07b49fcbd38e85"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a04ecd68acbd77fa2d39723ceca4c3197cb2969633836ced1bea14e219d077c"}, + {file = "contourpy-1.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c414fc1ed8ee1dbd5da626cf3710c6013d3d27456651d156711fa24f24bd1291"}, + {file = "contourpy-1.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:31c1b55c1f34f80557d3830d3dd93ba722ce7e33a0b472cba0ec3b6535684d8f"}, + {file = "contourpy-1.3.1-cp311-cp311-win32.whl", hash = "sha256:f611e628ef06670df83fce17805c344710ca5cde01edfdc72751311da8585375"}, + {file = "contourpy-1.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:b2bdca22a27e35f16794cf585832e542123296b4687f9fd96822db6bae17bfc9"}, + {file = "contourpy-1.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0ffa84be8e0bd33410b17189f7164c3589c229ce5db85798076a3fa136d0e509"}, + {file = "contourpy-1.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805617228ba7e2cbbfb6c503858e626ab528ac2a32a04a2fe88ffaf6b02c32bc"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ade08d343436a94e633db932e7e8407fe7de8083967962b46bdfc1b0ced39454"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:47734d7073fb4590b4a40122b35917cd77be5722d80683b249dac1de266aac80"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2ba94a401342fc0f8b948e57d977557fbf4d515f03c67682dd5c6191cb2d16ec"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efa874e87e4a647fd2e4f514d5e91c7d493697127beb95e77d2f7561f6905bd9"}, + {file = "contourpy-1.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1bf98051f1045b15c87868dbaea84f92408337d4f81d0e449ee41920ea121d3b"}, + {file = "contourpy-1.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:61332c87493b00091423e747ea78200659dc09bdf7fd69edd5e98cef5d3e9a8d"}, + {file = "contourpy-1.3.1-cp312-cp312-win32.whl", hash = "sha256:e914a8cb05ce5c809dd0fe350cfbb4e881bde5e2a38dc04e3afe1b3e58bd158e"}, + {file = "contourpy-1.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:08d9d449a61cf53033612cb368f3a1b26cd7835d9b8cd326647efe43bca7568d"}, + {file = "contourpy-1.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a761d9ccfc5e2ecd1bf05534eda382aa14c3e4f9205ba5b1684ecfe400716ef2"}, + {file = "contourpy-1.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:523a8ee12edfa36f6d2a49407f705a6ef4c5098de4f498619787e272de93f2d5"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece6df05e2c41bd46776fbc712e0996f7c94e0d0543af1656956d150c4ca7c81"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:573abb30e0e05bf31ed067d2f82500ecfdaec15627a59d63ea2d95714790f5c2"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9fa36448e6a3a1a9a2ba23c02012c43ed88905ec80163f2ffe2421c7192a5d7"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ea9924d28fc5586bf0b42d15f590b10c224117e74409dd7a0be3b62b74a501c"}, + {file = "contourpy-1.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b75aa69cb4d6f137b36f7eb2ace9280cfb60c55dc5f61c731fdf6f037f958a3"}, + {file = "contourpy-1.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:041b640d4ec01922083645a94bb3b2e777e6b626788f4095cf21abbe266413c1"}, + {file = "contourpy-1.3.1-cp313-cp313-win32.whl", hash = "sha256:36987a15e8ace5f58d4d5da9dca82d498c2bbb28dff6e5d04fbfcc35a9cb3a82"}, + {file = "contourpy-1.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:a7895f46d47671fa7ceec40f31fae721da51ad34bdca0bee83e38870b1f47ffd"}, + {file = "contourpy-1.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9ddeb796389dadcd884c7eb07bd14ef12408aaae358f0e2ae24114d797eede30"}, + {file = "contourpy-1.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:19c1555a6801c2f084c7ddc1c6e11f02eb6a6016ca1318dd5452ba3f613a1751"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:841ad858cff65c2c04bf93875e384ccb82b654574a6d7f30453a04f04af71342"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4318af1c925fb9a4fb190559ef3eec206845f63e80fb603d47f2d6d67683901c"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:14c102b0eab282427b662cb590f2e9340a9d91a1c297f48729431f2dcd16e14f"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05e806338bfeaa006acbdeba0ad681a10be63b26e1b17317bfac3c5d98f36cda"}, + {file = "contourpy-1.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4d76d5993a34ef3df5181ba3c92fabb93f1eaa5729504fb03423fcd9f3177242"}, + {file = "contourpy-1.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:89785bb2a1980c1bd87f0cb1517a71cde374776a5f150936b82580ae6ead44a1"}, + {file = "contourpy-1.3.1-cp313-cp313t-win32.whl", hash = "sha256:8eb96e79b9f3dcadbad2a3891672f81cdcab7f95b27f28f1c67d75f045b6b4f1"}, + {file = "contourpy-1.3.1-cp313-cp313t-win_amd64.whl", hash = "sha256:287ccc248c9e0d0566934e7d606201abd74761b5703d804ff3df8935f523d546"}, + {file = "contourpy-1.3.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b457d6430833cee8e4b8e9b6f07aa1c161e5e0d52e118dc102c8f9bd7dd060d6"}, + {file = "contourpy-1.3.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb76c1a154b83991a3cbbf0dfeb26ec2833ad56f95540b442c73950af2013750"}, + {file = "contourpy-1.3.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:44a29502ca9c7b5ba389e620d44f2fbe792b1fb5734e8b931ad307071ec58c53"}, + {file = "contourpy-1.3.1.tar.gz", hash = "sha256:dfd97abd83335045a913e3bcc4a09c0ceadbe66580cf573fe961f4a825efa699"}, +] + +[package.dependencies] +numpy = ">=1.23" + +[package.extras] +bokeh = ["bokeh", "selenium"] +docs = ["furo", "sphinx (>=7.2)", "sphinx-copybutton"] +mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.11.1)", "types-Pillow"] +test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] +test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"] + [[package]] name = "coverage" version = "7.6.1" @@ -812,6 +885,21 @@ pandas = ["pandas"] rdflib = ["rdflib"] tests = ["coverage", "pytest", "requests"] +[[package]] +name = "cycler" +version = "0.12.1" +description = "Composable style cycles" +optional = false +python-versions = ">=3.8" +files = [ + {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"}, + {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"}, +] + +[package.extras] +docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] +tests = ["pytest", "pytest-cov", "pytest-xdist"] + [[package]] name = "daiquiri" version = "3.2.5.1" @@ -1099,6 +1187,79 @@ dev = ["Faker (==2.0.0)", "black", "blinker", "invoke (==2.2.0)", "mock (==3.0.5 doc = ["Sphinx (==5.3.0)", "alabaster (==0.7.12)", "sphinx-issues (==3.0.1)"] test = ["Faker (==2.0.0)", "blinker", "invoke (==2.2.0)", "mock (==3.0.5)", "pytest (==7.0.1)", "pytest-benchmark (==3.4.1)", "pytest-cov (==4.0.0)", "pytest-flask (==1.3.0)", "pytest-mock (==3.6.1)", "pytest-profiling (==1.7.0)", "setuptools", "twine (==3.8.0)", "tzlocal"] +[[package]] +name = "fonttools" +version = "4.55.3" +description = "Tools to manipulate font files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fonttools-4.55.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1dcc07934a2165ccdc3a5a608db56fb3c24b609658a5b340aee4ecf3ba679dc0"}, + {file = "fonttools-4.55.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f7d66c15ba875432a2d2fb419523f5d3d347f91f48f57b8b08a2dfc3c39b8a3f"}, + {file = "fonttools-4.55.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27e4ae3592e62eba83cd2c4ccd9462dcfa603ff78e09110680a5444c6925d841"}, + {file = "fonttools-4.55.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62d65a3022c35e404d19ca14f291c89cc5890032ff04f6c17af0bd1927299674"}, + {file = "fonttools-4.55.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d342e88764fb201286d185093781bf6628bbe380a913c24adf772d901baa8276"}, + {file = "fonttools-4.55.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:dd68c87a2bfe37c5b33bcda0fba39b65a353876d3b9006fde3adae31f97b3ef5"}, + {file = "fonttools-4.55.3-cp310-cp310-win32.whl", hash = "sha256:1bc7ad24ff98846282eef1cbeac05d013c2154f977a79886bb943015d2b1b261"}, + {file = "fonttools-4.55.3-cp310-cp310-win_amd64.whl", hash = "sha256:b54baf65c52952db65df39fcd4820668d0ef4766c0ccdf32879b77f7c804d5c5"}, + {file = "fonttools-4.55.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8c4491699bad88efe95772543cd49870cf756b019ad56294f6498982408ab03e"}, + {file = "fonttools-4.55.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5323a22eabddf4b24f66d26894f1229261021dacd9d29e89f7872dd8c63f0b8b"}, + {file = "fonttools-4.55.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5480673f599ad410695ca2ddef2dfefe9df779a9a5cda89503881e503c9c7d90"}, + {file = "fonttools-4.55.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da9da6d65cd7aa6b0f806556f4985bcbf603bf0c5c590e61b43aa3e5a0f822d0"}, + {file = "fonttools-4.55.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e894b5bd60d9f473bed7a8f506515549cc194de08064d829464088d23097331b"}, + {file = "fonttools-4.55.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:aee3b57643827e237ff6ec6d28d9ff9766bd8b21e08cd13bff479e13d4b14765"}, + {file = "fonttools-4.55.3-cp311-cp311-win32.whl", hash = "sha256:eb6ca911c4c17eb51853143624d8dc87cdcdf12a711fc38bf5bd21521e79715f"}, + {file = "fonttools-4.55.3-cp311-cp311-win_amd64.whl", hash = "sha256:6314bf82c54c53c71805318fcf6786d986461622dd926d92a465199ff54b1b72"}, + {file = "fonttools-4.55.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f9e736f60f4911061235603a6119e72053073a12c6d7904011df2d8fad2c0e35"}, + {file = "fonttools-4.55.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7a8aa2c5e5b8b3bcb2e4538d929f6589a5c6bdb84fd16e2ed92649fb5454f11c"}, + {file = "fonttools-4.55.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07f8288aacf0a38d174445fc78377a97fb0b83cfe352a90c9d9c1400571963c7"}, + {file = "fonttools-4.55.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8d5e8916c0970fbc0f6f1bece0063363bb5857a7f170121a4493e31c3db3314"}, + {file = "fonttools-4.55.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ae3b6600565b2d80b7c05acb8e24d2b26ac407b27a3f2e078229721ba5698427"}, + {file = "fonttools-4.55.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:54153c49913f45065c8d9e6d0c101396725c5621c8aee744719300f79771d75a"}, + {file = "fonttools-4.55.3-cp312-cp312-win32.whl", hash = "sha256:827e95fdbbd3e51f8b459af5ea10ecb4e30af50221ca103bea68218e9615de07"}, + {file = "fonttools-4.55.3-cp312-cp312-win_amd64.whl", hash = "sha256:e6e8766eeeb2de759e862004aa11a9ea3d6f6d5ec710551a88b476192b64fd54"}, + {file = "fonttools-4.55.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a430178ad3e650e695167cb53242dae3477b35c95bef6525b074d87493c4bf29"}, + {file = "fonttools-4.55.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:529cef2ce91dc44f8e407cc567fae6e49a1786f2fefefa73a294704c415322a4"}, + {file = "fonttools-4.55.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e75f12c82127486fac2d8bfbf5bf058202f54bf4f158d367e41647b972342ca"}, + {file = "fonttools-4.55.3-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:859c358ebf41db18fb72342d3080bce67c02b39e86b9fbcf1610cca14984841b"}, + {file = "fonttools-4.55.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:546565028e244a701f73df6d8dd6be489d01617863ec0c6a42fa25bf45d43048"}, + {file = "fonttools-4.55.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:aca318b77f23523309eec4475d1fbbb00a6b133eb766a8bdc401faba91261abe"}, + {file = "fonttools-4.55.3-cp313-cp313-win32.whl", hash = "sha256:8c5ec45428edaa7022f1c949a632a6f298edc7b481312fc7dc258921e9399628"}, + {file = "fonttools-4.55.3-cp313-cp313-win_amd64.whl", hash = "sha256:11e5de1ee0d95af4ae23c1a138b184b7f06e0b6abacabf1d0db41c90b03d834b"}, + {file = "fonttools-4.55.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:caf8230f3e10f8f5d7593eb6d252a37caf58c480b19a17e250a63dad63834cf3"}, + {file = "fonttools-4.55.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b586ab5b15b6097f2fb71cafa3c98edfd0dba1ad8027229e7b1e204a58b0e09d"}, + {file = "fonttools-4.55.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8c2794ded89399cc2169c4d0bf7941247b8d5932b2659e09834adfbb01589aa"}, + {file = "fonttools-4.55.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf4fe7c124aa3f4e4c1940880156e13f2f4d98170d35c749e6b4f119a872551e"}, + {file = "fonttools-4.55.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:86721fbc389ef5cc1e2f477019e5069e8e4421e8d9576e9c26f840dbb04678de"}, + {file = "fonttools-4.55.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:89bdc5d88bdeec1b15af790810e267e8332d92561dce4f0748c2b95c9bdf3926"}, + {file = "fonttools-4.55.3-cp38-cp38-win32.whl", hash = "sha256:bc5dbb4685e51235ef487e4bd501ddfc49be5aede5e40f4cefcccabc6e60fb4b"}, + {file = "fonttools-4.55.3-cp38-cp38-win_amd64.whl", hash = "sha256:cd70de1a52a8ee2d1877b6293af8a2484ac82514f10b1c67c1c5762d38073e56"}, + {file = "fonttools-4.55.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bdcc9f04b36c6c20978d3f060e5323a43f6222accc4e7fcbef3f428e216d96af"}, + {file = "fonttools-4.55.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c3ca99e0d460eff46e033cd3992a969658c3169ffcd533e0a39c63a38beb6831"}, + {file = "fonttools-4.55.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22f38464daa6cdb7b6aebd14ab06609328fe1e9705bb0fcc7d1e69de7109ee02"}, + {file = "fonttools-4.55.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed63959d00b61959b035c7d47f9313c2c1ece090ff63afea702fe86de00dbed4"}, + {file = "fonttools-4.55.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5e8d657cd7326eeaba27de2740e847c6b39dde2f8d7cd7cc56f6aad404ddf0bd"}, + {file = "fonttools-4.55.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:fb594b5a99943042c702c550d5494bdd7577f6ef19b0bc73877c948a63184a32"}, + {file = "fonttools-4.55.3-cp39-cp39-win32.whl", hash = "sha256:dc5294a3d5c84226e3dbba1b6f61d7ad813a8c0238fceea4e09aa04848c3d851"}, + {file = "fonttools-4.55.3-cp39-cp39-win_amd64.whl", hash = "sha256:aedbeb1db64496d098e6be92b2e63b5fac4e53b1b92032dfc6988e1ea9134a4d"}, + {file = "fonttools-4.55.3-py3-none-any.whl", hash = "sha256:f412604ccbeee81b091b420272841e5ec5ef68967a9790e80bffd0e30b8e2977"}, + {file = "fonttools-4.55.3.tar.gz", hash = "sha256:3983313c2a04d6cc1fe9251f8fc647754cf49a61dac6cb1e7249ae67afaafc45"}, +] + +[package.extras] +all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "pycairo", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.1.0)", "xattr", "zopfli (>=0.1.4)"] +graphite = ["lz4 (>=1.7.4.2)"] +interpolatable = ["munkres", "pycairo", "scipy"] +lxml = ["lxml (>=4.0)"] +pathops = ["skia-pathops (>=0.5.0)"] +plot = ["matplotlib"] +repacker = ["uharfbuzz (>=0.23.0)"] +symfont = ["sympy"] +type1 = ["xattr"] +ufo = ["fs (>=2.2.0,<3)"] +unicode = ["unicodedata2 (>=15.1.0)"] +woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] + [[package]] name = "fqdn" version = "1.5.1" @@ -2141,6 +2302,129 @@ lark = ">=1.1.2" linkml-runtime = ">=1.1.24" prefixmaps = ">=0.2.0,<0.3.0" +[[package]] +name = "kiwisolver" +version = "1.4.7" +description = "A fast implementation of the Cassowary constraint solver" +optional = false +python-versions = ">=3.8" +files = [ + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8a9c83f75223d5e48b0bc9cb1bf2776cf01563e00ade8775ffe13b0b6e1af3a6"}, + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58370b1ffbd35407444d57057b57da5d6549d2d854fa30249771775c63b5fe17"}, + {file = "kiwisolver-1.4.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aa0abdf853e09aff551db11fce173e2177d00786c688203f52c87ad7fcd91ef9"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8d53103597a252fb3ab8b5845af04c7a26d5e7ea8122303dd7a021176a87e8b9"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:88f17c5ffa8e9462fb79f62746428dd57b46eb931698e42e990ad63103f35e6c"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88a9ca9c710d598fd75ee5de59d5bda2684d9db36a9f50b6125eaea3969c2599"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f4d742cb7af1c28303a51b7a27aaee540e71bb8e24f68c736f6f2ffc82f2bf05"}, + {file = "kiwisolver-1.4.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28c7fea2196bf4c2f8d46a0415c77a1c480cc0724722f23d7410ffe9842c407"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e968b84db54f9d42046cf154e02911e39c0435c9801681e3fc9ce8a3c4130278"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0c18ec74c0472de033e1bebb2911c3c310eef5649133dd0bedf2a169a1b269e5"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8f0ea6da6d393d8b2e187e6a5e3fb81f5862010a40c3945e2c6d12ae45cfb2ad"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:f106407dda69ae456dd1227966bf445b157ccc80ba0dff3802bb63f30b74e895"}, + {file = "kiwisolver-1.4.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:84ec80df401cfee1457063732d90022f93951944b5b58975d34ab56bb150dfb3"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win32.whl", hash = "sha256:71bb308552200fb2c195e35ef05de12f0c878c07fc91c270eb3d6e41698c3bcc"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win_amd64.whl", hash = "sha256:44756f9fd339de0fb6ee4f8c1696cfd19b2422e0d70b4cefc1cc7f1f64045a8c"}, + {file = "kiwisolver-1.4.7-cp310-cp310-win_arm64.whl", hash = "sha256:78a42513018c41c2ffd262eb676442315cbfe3c44eed82385c2ed043bc63210a"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d2b0e12a42fb4e72d509fc994713d099cbb15ebf1103545e8a45f14da2dfca54"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2a8781ac3edc42ea4b90bc23e7d37b665d89423818e26eb6df90698aa2287c95"}, + {file = "kiwisolver-1.4.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:46707a10836894b559e04b0fd143e343945c97fd170d69a2d26d640b4e297935"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef97b8df011141c9b0f6caf23b29379f87dd13183c978a30a3c546d2c47314cb"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab58c12a2cd0fc769089e6d38466c46d7f76aced0a1f54c77652446733d2d02"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:803b8e1459341c1bb56d1c5c010406d5edec8a0713a0945851290a7930679b51"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9a9e8a507420fe35992ee9ecb302dab68550dedc0da9e2880dd88071c5fb052"}, + {file = "kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18077b53dc3bb490e330669a99920c5e6a496889ae8c63b58fbc57c3d7f33a18"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6af936f79086a89b3680a280c47ea90b4df7047b5bdf3aa5c524bbedddb9e545"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3abc5b19d24af4b77d1598a585b8a719beb8569a71568b66f4ebe1fb0449460b"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:933d4de052939d90afbe6e9d5273ae05fb836cc86c15b686edd4b3560cc0ee36"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:65e720d2ab2b53f1f72fb5da5fb477455905ce2c88aaa671ff0a447c2c80e8e3"}, + {file = "kiwisolver-1.4.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3bf1ed55088f214ba6427484c59553123fdd9b218a42bbc8c6496d6754b1e523"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win32.whl", hash = "sha256:4c00336b9dd5ad96d0a558fd18a8b6f711b7449acce4c157e7343ba92dd0cf3d"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win_amd64.whl", hash = "sha256:929e294c1ac1e9f615c62a4e4313ca1823ba37326c164ec720a803287c4c499b"}, + {file = "kiwisolver-1.4.7-cp311-cp311-win_arm64.whl", hash = "sha256:e33e8fbd440c917106b237ef1a2f1449dfbb9b6f6e1ce17c94cd6a1e0d438376"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:5360cc32706dab3931f738d3079652d20982511f7c0ac5711483e6eab08efff2"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942216596dc64ddb25adb215c3c783215b23626f8d84e8eff8d6d45c3f29f75a"}, + {file = "kiwisolver-1.4.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:48b571ecd8bae15702e4f22d3ff6a0f13e54d3d00cd25216d5e7f658242065ee"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad42ba922c67c5f219097b28fae965e10045ddf145d2928bfac2eb2e17673640"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:612a10bdae23404a72941a0fc8fa2660c6ea1217c4ce0dbcab8a8f6543ea9e7f"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e838bba3a3bac0fe06d849d29772eb1afb9745a59710762e4ba3f4cb8424483"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:22f499f6157236c19f4bbbd472fa55b063db77a16cd74d49afe28992dff8c258"}, + {file = "kiwisolver-1.4.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693902d433cf585133699972b6d7c42a8b9f8f826ebcaf0132ff55200afc599e"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4e77f2126c3e0b0d055f44513ed349038ac180371ed9b52fe96a32aa071a5107"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:657a05857bda581c3656bfc3b20e353c232e9193eb167766ad2dc58b56504948"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4bfa75a048c056a411f9705856abfc872558e33c055d80af6a380e3658766038"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:34ea1de54beef1c104422d210c47c7d2a4999bdecf42c7b5718fbe59a4cac383"}, + {file = "kiwisolver-1.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:90da3b5f694b85231cf93586dad5e90e2d71b9428f9aad96952c99055582f520"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win32.whl", hash = "sha256:18e0cca3e008e17fe9b164b55735a325140a5a35faad8de92dd80265cd5eb80b"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:58cb20602b18f86f83a5c87d3ee1c766a79c0d452f8def86d925e6c60fbf7bfb"}, + {file = "kiwisolver-1.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:f5a8b53bdc0b3961f8b6125e198617c40aeed638b387913bf1ce78afb1b0be2a"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2e6039dcbe79a8e0f044f1c39db1986a1b8071051efba3ee4d74f5b365f5226e"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a1ecf0ac1c518487d9d23b1cd7139a6a65bc460cd101ab01f1be82ecf09794b6"}, + {file = "kiwisolver-1.4.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7ab9ccab2b5bd5702ab0803676a580fffa2aa178c2badc5557a84cc943fcf750"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f816dd2277f8d63d79f9c8473a79fe54047bc0467754962840782c575522224d"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf8bcc23ceb5a1b624572a1623b9f79d2c3b337c8c455405ef231933a10da379"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dea0bf229319828467d7fca8c7c189780aa9ff679c94539eed7532ebe33ed37c"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c06a4c7cf15ec739ce0e5971b26c93638730090add60e183530d70848ebdd34"}, + {file = "kiwisolver-1.4.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:913983ad2deb14e66d83c28b632fd35ba2b825031f2fa4ca29675e665dfecbe1"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5337ec7809bcd0f424c6b705ecf97941c46279cf5ed92311782c7c9c2026f07f"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4c26ed10c4f6fa6ddb329a5120ba3b6db349ca192ae211e882970bfc9d91420b"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c619b101e6de2222c1fcb0531e1b17bbffbe54294bfba43ea0d411d428618c27"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:073a36c8273647592ea332e816e75ef8da5c303236ec0167196793eb1e34657a"}, + {file = "kiwisolver-1.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3ce6b2b0231bda412463e152fc18335ba32faf4e8c23a754ad50ffa70e4091ee"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win32.whl", hash = "sha256:f4c9aee212bc89d4e13f58be11a56cc8036cabad119259d12ace14b34476fd07"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:8a3ec5aa8e38fc4c8af308917ce12c536f1c88452ce554027e55b22cbbfbff76"}, + {file = "kiwisolver-1.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:76c8094ac20ec259471ac53e774623eb62e6e1f56cd8690c67ce6ce4fcb05650"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5d5abf8f8ec1f4e22882273c423e16cae834c36856cac348cfbfa68e01c40f3a"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:aeb3531b196ef6f11776c21674dba836aeea9d5bd1cf630f869e3d90b16cfade"}, + {file = "kiwisolver-1.4.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b7d755065e4e866a8086c9bdada157133ff466476a2ad7861828e17b6026e22c"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08471d4d86cbaec61f86b217dd938a83d85e03785f51121e791a6e6689a3be95"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7bbfcb7165ce3d54a3dfbe731e470f65739c4c1f85bb1018ee912bae139e263b"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d34eb8494bea691a1a450141ebb5385e4b69d38bb8403b5146ad279f4b30fa3"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9242795d174daa40105c1d86aba618e8eab7bf96ba8c3ee614da8302a9f95503"}, + {file = "kiwisolver-1.4.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a0f64a48bb81af7450e641e3fe0b0394d7381e342805479178b3d335d60ca7cf"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8e045731a5416357638d1700927529e2b8ab304811671f665b225f8bf8d8f933"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4322872d5772cae7369f8351da1edf255a604ea7087fe295411397d0cfd9655e"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e1631290ee9271dffe3062d2634c3ecac02c83890ada077d225e081aca8aab89"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:edcfc407e4eb17e037bca59be0e85a2031a2ac87e4fed26d3e9df88b4165f92d"}, + {file = "kiwisolver-1.4.7-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4d05d81ecb47d11e7f8932bd8b61b720bf0b41199358f3f5e36d38e28f0532c5"}, + {file = "kiwisolver-1.4.7-cp38-cp38-win32.whl", hash = "sha256:b38ac83d5f04b15e515fd86f312479d950d05ce2368d5413d46c088dda7de90a"}, + {file = "kiwisolver-1.4.7-cp38-cp38-win_amd64.whl", hash = "sha256:d83db7cde68459fc803052a55ace60bea2bae361fc3b7a6d5da07e11954e4b09"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3f9362ecfca44c863569d3d3c033dbe8ba452ff8eed6f6b5806382741a1334bd"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8df2eb9b2bac43ef8b082e06f750350fbbaf2887534a5be97f6cf07b19d9583"}, + {file = "kiwisolver-1.4.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f32d6edbc638cde7652bd690c3e728b25332acbadd7cad670cc4a02558d9c417"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e2e6c39bd7b9372b0be21456caab138e8e69cc0fc1190a9dfa92bd45a1e6e904"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:dda56c24d869b1193fcc763f1284b9126550eaf84b88bbc7256e15028f19188a"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79849239c39b5e1fd906556c474d9b0439ea6792b637511f3fe3a41158d89ca8"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5e3bc157fed2a4c02ec468de4ecd12a6e22818d4f09cde2c31ee3226ffbefab2"}, + {file = "kiwisolver-1.4.7-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3da53da805b71e41053dc670f9a820d1157aae77b6b944e08024d17bcd51ef88"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8705f17dfeb43139a692298cb6637ee2e59c0194538153e83e9ee0c75c2eddde"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:82a5c2f4b87c26bb1a0ef3d16b5c4753434633b83d365cc0ddf2770c93829e3c"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ce8be0466f4c0d585cdb6c1e2ed07232221df101a4c6f28821d2aa754ca2d9e2"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:409afdfe1e2e90e6ee7fc896f3df9a7fec8e793e58bfa0d052c8a82f99c37abb"}, + {file = "kiwisolver-1.4.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5b9c3f4ee0b9a439d2415012bd1b1cc2df59e4d6a9939f4d669241d30b414327"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win32.whl", hash = "sha256:a79ae34384df2b615eefca647a2873842ac3b596418032bef9a7283675962644"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win_amd64.whl", hash = "sha256:cf0438b42121a66a3a667de17e779330fc0f20b0d97d59d2f2121e182b0505e4"}, + {file = "kiwisolver-1.4.7-cp39-cp39-win_arm64.whl", hash = "sha256:764202cc7e70f767dab49e8df52c7455e8de0df5d858fa801a11aa0d882ccf3f"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:94252291e3fe68001b1dd747b4c0b3be12582839b95ad4d1b641924d68fd4643"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5b7dfa3b546da08a9f622bb6becdb14b3e24aaa30adba66749d38f3cc7ea9706"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd3de6481f4ed8b734da5df134cd5a6a64fe32124fe83dde1e5b5f29fe30b1e6"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a91b5f9f1205845d488c928e8570dcb62b893372f63b8b6e98b863ebd2368ff2"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40fa14dbd66b8b8f470d5fc79c089a66185619d31645f9b0773b88b19f7223c4"}, + {file = "kiwisolver-1.4.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:eb542fe7933aa09d8d8f9d9097ef37532a7df6497819d16efe4359890a2f417a"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bfa1acfa0c54932d5607e19a2c24646fb4c1ae2694437789129cf099789a3b00"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:eee3ea935c3d227d49b4eb85660ff631556841f6e567f0f7bda972df6c2c9935"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f3160309af4396e0ed04db259c3ccbfdc3621b5559b5453075e5de555e1f3a1b"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a17f6a29cf8935e587cc8a4dbfc8368c55edc645283db0ce9801016f83526c2d"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10849fb2c1ecbfae45a693c070e0320a91b35dd4bcf58172c023b994283a124d"}, + {file = "kiwisolver-1.4.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:ac542bf38a8a4be2dc6b15248d36315ccc65f0743f7b1a76688ffb6b5129a5c2"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8b01aac285f91ca889c800042c35ad3b239e704b150cfd3382adfc9dcc780e39"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:48be928f59a1f5c8207154f935334d374e79f2b5d212826307d072595ad76a2e"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f37cfe618a117e50d8c240555331160d73d0411422b59b5ee217843d7b693608"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:599b5c873c63a1f6ed7eead644a8a380cfbdf5db91dcb6f85707aaab213b1674"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:801fa7802e5cfabe3ab0c81a34c323a319b097dfb5004be950482d882f3d7225"}, + {file = "kiwisolver-1.4.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0c6c43471bc764fad4bc99c5c2d6d16a676b1abf844ca7c8702bdae92df01ee0"}, + {file = "kiwisolver-1.4.7.tar.gz", hash = "sha256:9893ff81bd7107f7b685d3017cc6583daadb4fc26e4a888350df530e41980a60"}, +] + [[package]] name = "lark" version = "1.2.2" @@ -2555,6 +2839,63 @@ files = [ {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, ] +[[package]] +name = "matplotlib" +version = "3.10.0" +description = "Python plotting package" +optional = false +python-versions = ">=3.10" +files = [ + {file = "matplotlib-3.10.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2c5829a5a1dd5a71f0e31e6e8bb449bc0ee9dbfb05ad28fc0c6b55101b3a4be6"}, + {file = "matplotlib-3.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a2a43cbefe22d653ab34bb55d42384ed30f611bcbdea1f8d7f431011a2e1c62e"}, + {file = "matplotlib-3.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:607b16c8a73943df110f99ee2e940b8a1cbf9714b65307c040d422558397dac5"}, + {file = "matplotlib-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01d2b19f13aeec2e759414d3bfe19ddfb16b13a1250add08d46d5ff6f9be83c6"}, + {file = "matplotlib-3.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e6c6461e1fc63df30bf6f80f0b93f5b6784299f721bc28530477acd51bfc3d1"}, + {file = "matplotlib-3.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:994c07b9d9fe8d25951e3202a68c17900679274dadfc1248738dcfa1bd40d7f3"}, + {file = "matplotlib-3.10.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:fd44fc75522f58612ec4a33958a7e5552562b7705b42ef1b4f8c0818e304a363"}, + {file = "matplotlib-3.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c58a9622d5dbeb668f407f35f4e6bfac34bb9ecdcc81680c04d0258169747997"}, + {file = "matplotlib-3.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:845d96568ec873be63f25fa80e9e7fae4be854a66a7e2f0c8ccc99e94a8bd4ef"}, + {file = "matplotlib-3.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5439f4c5a3e2e8eab18e2f8c3ef929772fd5641876db71f08127eed95ab64683"}, + {file = "matplotlib-3.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4673ff67a36152c48ddeaf1135e74ce0d4bce1bbf836ae40ed39c29edf7e2765"}, + {file = "matplotlib-3.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:7e8632baebb058555ac0cde75db885c61f1212e47723d63921879806b40bec6a"}, + {file = "matplotlib-3.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4659665bc7c9b58f8c00317c3c2a299f7f258eeae5a5d56b4c64226fca2f7c59"}, + {file = "matplotlib-3.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d44cb942af1693cced2604c33a9abcef6205601c445f6d0dc531d813af8a2f5a"}, + {file = "matplotlib-3.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a994f29e968ca002b50982b27168addfd65f0105610b6be7fa515ca4b5307c95"}, + {file = "matplotlib-3.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b0558bae37f154fffda54d779a592bc97ca8b4701f1c710055b609a3bac44c8"}, + {file = "matplotlib-3.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:503feb23bd8c8acc75541548a1d709c059b7184cde26314896e10a9f14df5f12"}, + {file = "matplotlib-3.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:c40ba2eb08b3f5de88152c2333c58cee7edcead0a2a0d60fcafa116b17117adc"}, + {file = "matplotlib-3.10.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96f2886f5c1e466f21cc41b70c5a0cd47bfa0015eb2d5793c88ebce658600e25"}, + {file = "matplotlib-3.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:12eaf48463b472c3c0f8dbacdbf906e573013df81a0ab82f0616ea4b11281908"}, + {file = "matplotlib-3.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fbbabc82fde51391c4da5006f965e36d86d95f6ee83fb594b279564a4c5d0d2"}, + {file = "matplotlib-3.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad2e15300530c1a94c63cfa546e3b7864bd18ea2901317bae8bbf06a5ade6dcf"}, + {file = "matplotlib-3.10.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3547d153d70233a8496859097ef0312212e2689cdf8d7ed764441c77604095ae"}, + {file = "matplotlib-3.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:c55b20591ced744aa04e8c3e4b7543ea4d650b6c3c4b208c08a05b4010e8b442"}, + {file = "matplotlib-3.10.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9ade1003376731a971e398cc4ef38bb83ee8caf0aee46ac6daa4b0506db1fd06"}, + {file = "matplotlib-3.10.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:95b710fea129c76d30be72c3b38f330269363fbc6e570a5dd43580487380b5ff"}, + {file = "matplotlib-3.10.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cdbaf909887373c3e094b0318d7ff230b2ad9dcb64da7ade654182872ab2593"}, + {file = "matplotlib-3.10.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d907fddb39f923d011875452ff1eca29a9e7f21722b873e90db32e5d8ddff12e"}, + {file = "matplotlib-3.10.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3b427392354d10975c1d0f4ee18aa5844640b512d5311ef32efd4dd7db106ede"}, + {file = "matplotlib-3.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5fd41b0ec7ee45cd960a8e71aea7c946a28a0b8a4dcee47d2856b2af051f334c"}, + {file = "matplotlib-3.10.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:81713dd0d103b379de4516b861d964b1d789a144103277769238c732229d7f03"}, + {file = "matplotlib-3.10.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:359f87baedb1f836ce307f0e850d12bb5f1936f70d035561f90d41d305fdacea"}, + {file = "matplotlib-3.10.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae80dc3a4add4665cf2faa90138384a7ffe2a4e37c58d83e115b54287c4f06ef"}, + {file = "matplotlib-3.10.0.tar.gz", hash = "sha256:b886d02a581b96704c9d1ffe55709e49b4d2d52709ccebc4be42db856e511278"}, +] + +[package.dependencies] +contourpy = ">=1.0.1" +cycler = ">=0.10" +fonttools = ">=4.22.0" +kiwisolver = ">=1.3.1" +numpy = ">=1.23" +packaging = ">=20.0" +pillow = ">=8" +pyparsing = ">=2.3.1" +python-dateutil = ">=2.7" + +[package.extras] +dev = ["meson-python (>=0.13.1,<0.17.0)", "pybind11 (>=2.13.2,!=2.13.3)", "setuptools (>=64)", "setuptools_scm (>=7)"] + [[package]] name = "mccabe" version = "0.7.0" @@ -3193,6 +3534,98 @@ files = [ {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, ] +[[package]] +name = "pillow" +version = "11.0.0" +description = "Python Imaging Library (Fork)" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pillow-11.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:6619654954dc4936fcff82db8eb6401d3159ec6be81e33c6000dfd76ae189947"}, + {file = "pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b3c5ac4bed7519088103d9450a1107f76308ecf91d6dabc8a33a2fcfb18d0fba"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a65149d8ada1055029fcb665452b2814fe7d7082fcb0c5bed6db851cb69b2086"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88a58d8ac0cc0e7f3a014509f0455248a76629ca9b604eca7dc5927cc593c5e9"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c26845094b1af3c91852745ae78e3ea47abf3dbcd1cf962f16b9a5fbe3ee8488"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:1a61b54f87ab5786b8479f81c4b11f4d61702830354520837f8cc791ebba0f5f"}, + {file = "pillow-11.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:674629ff60030d144b7bca2b8330225a9b11c482ed408813924619c6f302fdbb"}, + {file = "pillow-11.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:598b4e238f13276e0008299bd2482003f48158e2b11826862b1eb2ad7c768b97"}, + {file = "pillow-11.0.0-cp310-cp310-win32.whl", hash = "sha256:9a0f748eaa434a41fccf8e1ee7a3eed68af1b690e75328fd7a60af123c193b50"}, + {file = "pillow-11.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:a5629742881bcbc1f42e840af185fd4d83a5edeb96475a575f4da50d6ede337c"}, + {file = "pillow-11.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:ee217c198f2e41f184f3869f3e485557296d505b5195c513b2bfe0062dc537f1"}, + {file = "pillow-11.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1c1d72714f429a521d8d2d018badc42414c3077eb187a59579f28e4270b4b0fc"}, + {file = "pillow-11.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:499c3a1b0d6fc8213519e193796eb1a86a1be4b1877d678b30f83fd979811d1a"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8b2351c85d855293a299038e1f89db92a2f35e8d2f783489c6f0b2b5f3fe8a3"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f4dba50cfa56f910241eb7f883c20f1e7b1d8f7d91c750cd0b318bad443f4d5"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5ddbfd761ee00c12ee1be86c9c0683ecf5bb14c9772ddbd782085779a63dd55b"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:45c566eb10b8967d71bf1ab8e4a525e5a93519e29ea071459ce517f6b903d7fa"}, + {file = "pillow-11.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b4fd7bd29610a83a8c9b564d457cf5bd92b4e11e79a4ee4716a63c959699b306"}, + {file = "pillow-11.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cb929ca942d0ec4fac404cbf520ee6cac37bf35be479b970c4ffadf2b6a1cad9"}, + {file = "pillow-11.0.0-cp311-cp311-win32.whl", hash = "sha256:006bcdd307cc47ba43e924099a038cbf9591062e6c50e570819743f5607404f5"}, + {file = "pillow-11.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:52a2d8323a465f84faaba5236567d212c3668f2ab53e1c74c15583cf507a0291"}, + {file = "pillow-11.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:16095692a253047fe3ec028e951fa4221a1f3ed3d80c397e83541a3037ff67c9"}, + {file = "pillow-11.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d2c0a187a92a1cb5ef2c8ed5412dd8d4334272617f532d4ad4de31e0495bd923"}, + {file = "pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:084a07ef0821cfe4858fe86652fffac8e187b6ae677e9906e192aafcc1b69903"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8069c5179902dcdce0be9bfc8235347fdbac249d23bd90514b7a47a72d9fecf4"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f02541ef64077f22bf4924f225c0fd1248c168f86e4b7abdedd87d6ebaceab0f"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fcb4621042ac4b7865c179bb972ed0da0218a076dc1820ffc48b1d74c1e37fe9"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:00177a63030d612148e659b55ba99527803288cea7c75fb05766ab7981a8c1b7"}, + {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8853a3bf12afddfdf15f57c4b02d7ded92c7a75a5d7331d19f4f9572a89c17e6"}, + {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3107c66e43bda25359d5ef446f59c497de2b5ed4c7fdba0894f8d6cf3822dafc"}, + {file = "pillow-11.0.0-cp312-cp312-win32.whl", hash = "sha256:86510e3f5eca0ab87429dd77fafc04693195eec7fd6a137c389c3eeb4cfb77c6"}, + {file = "pillow-11.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:8ec4a89295cd6cd4d1058a5e6aec6bf51e0eaaf9714774e1bfac7cfc9051db47"}, + {file = "pillow-11.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:27a7860107500d813fcd203b4ea19b04babe79448268403172782754870dac25"}, + {file = "pillow-11.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bcd1fb5bb7b07f64c15618c89efcc2cfa3e95f0e3bcdbaf4642509de1942a699"}, + {file = "pillow-11.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0e038b0745997c7dcaae350d35859c9715c71e92ffb7e0f4a8e8a16732150f38"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ae08bd8ffc41aebf578c2af2f9d8749d91f448b3bfd41d7d9ff573d74f2a6b2"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d69bfd8ec3219ae71bcde1f942b728903cad25fafe3100ba2258b973bd2bc1b2"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:61b887f9ddba63ddf62fd02a3ba7add935d053b6dd7d58998c630e6dbade8527"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:c6a660307ca9d4867caa8d9ca2c2658ab685de83792d1876274991adec7b93fa"}, + {file = "pillow-11.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:73e3a0200cdda995c7e43dd47436c1548f87a30bb27fb871f352a22ab8dcf45f"}, + {file = "pillow-11.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fba162b8872d30fea8c52b258a542c5dfd7b235fb5cb352240c8d63b414013eb"}, + {file = "pillow-11.0.0-cp313-cp313-win32.whl", hash = "sha256:f1b82c27e89fffc6da125d5eb0ca6e68017faf5efc078128cfaa42cf5cb38798"}, + {file = "pillow-11.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:8ba470552b48e5835f1d23ecb936bb7f71d206f9dfeee64245f30c3270b994de"}, + {file = "pillow-11.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:846e193e103b41e984ac921b335df59195356ce3f71dcfd155aa79c603873b84"}, + {file = "pillow-11.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4ad70c4214f67d7466bea6a08061eba35c01b1b89eaa098040a35272a8efb22b"}, + {file = "pillow-11.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6ec0d5af64f2e3d64a165f490d96368bb5dea8b8f9ad04487f9ab60dc4bb6003"}, + {file = "pillow-11.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c809a70e43c7977c4a42aefd62f0131823ebf7dd73556fa5d5950f5b354087e2"}, + {file = "pillow-11.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:4b60c9520f7207aaf2e1d94de026682fc227806c6e1f55bba7606d1c94dd623a"}, + {file = "pillow-11.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:1e2688958a840c822279fda0086fec1fdab2f95bf2b717b66871c4ad9859d7e8"}, + {file = "pillow-11.0.0-cp313-cp313t-win32.whl", hash = "sha256:607bbe123c74e272e381a8d1957083a9463401f7bd01287f50521ecb05a313f8"}, + {file = "pillow-11.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5c39ed17edea3bc69c743a8dd3e9853b7509625c2462532e62baa0732163a904"}, + {file = "pillow-11.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:75acbbeb05b86bc53cbe7b7e6fe00fbcf82ad7c684b3ad82e3d711da9ba287d3"}, + {file = "pillow-11.0.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2e46773dc9f35a1dd28bd6981332fd7f27bec001a918a72a79b4133cf5291dba"}, + {file = "pillow-11.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2679d2258b7f1192b378e2893a8a0a0ca472234d4c2c0e6bdd3380e8dfa21b6a"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eda2616eb2313cbb3eebbe51f19362eb434b18e3bb599466a1ffa76a033fb916"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20ec184af98a121fb2da42642dea8a29ec80fc3efbaefb86d8fdd2606619045d"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:8594f42df584e5b4bb9281799698403f7af489fba84c34d53d1c4bfb71b7c4e7"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:c12b5ae868897c7338519c03049a806af85b9b8c237b7d675b8c5e089e4a618e"}, + {file = "pillow-11.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:70fbbdacd1d271b77b7721fe3cdd2d537bbbd75d29e6300c672ec6bb38d9672f"}, + {file = "pillow-11.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5178952973e588b3f1360868847334e9e3bf49d19e169bbbdfaf8398002419ae"}, + {file = "pillow-11.0.0-cp39-cp39-win32.whl", hash = "sha256:8c676b587da5673d3c75bd67dd2a8cdfeb282ca38a30f37950511766b26858c4"}, + {file = "pillow-11.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:94f3e1780abb45062287b4614a5bc0874519c86a777d4a7ad34978e86428b8dd"}, + {file = "pillow-11.0.0-cp39-cp39-win_arm64.whl", hash = "sha256:290f2cc809f9da7d6d622550bbf4c1e57518212da51b6a30fe8e0a270a5b78bd"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1187739620f2b365de756ce086fdb3604573337cc28a0d3ac4a01ab6b2d2a6d2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fbbcb7b57dc9c794843e3d1258c0fbf0f48656d46ffe9e09b63bbd6e8cd5d0a2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d203af30149ae339ad1b4f710d9844ed8796e97fda23ffbc4cc472968a47d0b"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21a0d3b115009ebb8ac3d2ebec5c2982cc693da935f4ab7bb5c8ebe2f47d36f2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:73853108f56df97baf2bb8b522f3578221e56f646ba345a372c78326710d3830"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e58876c91f97b0952eb766123bfef372792ab3f4e3e1f1a2267834c2ab131734"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:224aaa38177597bb179f3ec87eeefcce8e4f85e608025e9cfac60de237ba6316"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:5bd2d3bdb846d757055910f0a59792d33b555800813c3b39ada1829c372ccb06"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:375b8dd15a1f5d2feafff536d47e22f69625c1aa92f12b339ec0b2ca40263273"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:daffdf51ee5db69a82dd127eabecce20729e21f7a3680cf7cbb23f0829189790"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7326a1787e3c7b0429659e0a944725e1b03eeaa10edd945a86dead1913383944"}, + {file = "pillow-11.0.0.tar.gz", hash = "sha256:72bacbaf24ac003fea9bff9837d1eedb6088758d41e100c1552930151f677739"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +fpx = ["olefile"] +mic = ["olefile"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] +typing = ["typing-extensions"] +xmp = ["defusedxml"] + [[package]] name = "platformdirs" version = "4.3.6" @@ -5807,4 +6240,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "c615df7205bca842a4fd1edb0ea32df5fa2ff3d72506cc28285f4d4a300e708f" +content-hash = "7af44d4cbf6980cf49dbaa6c299f86365b442cef1246d8538c34be00f9d230b7" diff --git a/pyproject.toml b/pyproject.toml index 0185814..71497a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ lxml = "^5.3.0" soso = {git = "https://github.com/clnsmth/soso.git", rev = "main"} ontogpt = "^1.0.6" daiquiri = "^3.2.5.1" +matplotlib = "^3.10.0" [tool.poetry.group.dev.dependencies] pytest = "^8.3.2" diff --git a/requirements.txt b/requirements.txt index 55de71b..3235fea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ adeft==0.12.3 aiofiles==24.1.0 aiohappyeyeballs==2.4.3 -aiohttp==3.11.7 +aiohttp==3.10.10 aiosignal==1.3.1 airium==0.2.6 alabaster==1.0.0 @@ -11,36 +11,38 @@ antlr4-python3-runtime==4.9.3 anyio==4.6.2.post1 appdirs==1.4.4 arrow==1.3.0 -astroid==3.3.5 +astroid==3.2.4 attrs==24.2.0 babel==2.16.0 backoff==2.2.1 bcp47==0.1.0 beautifulsoup4==4.12.3 bioc==2.1 -black==24.10.0 -blinker==1.9.0 -boto3==1.35.69 -botocore==1.35.69 +black==24.8.0 +blinker==1.8.2 +boto3==1.35.54 +botocore==1.35.54 Brotli==1.1.0 cachier==3.1.2 cattrs==24.1.2 -certifi==2024.8.30 +certifi==2024.12.14 cffi==1.17.1 CFGraph==0.2.1 chardet==5.2.0 -charset-normalizer==3.4.0 -class_resolver==0.5.4 +charset-normalizer==3.3.2 +class_resolver==0.5.2 click==8.1.7 click-option-group==0.5.6 colorama==0.4.6 -coverage==7.6.8 +contourpy==1.3.1 +coverage==7.6.1 curies==0.9.0 -daiquiri==3.0.0 +cycler==0.12.1 +daiquiri==3.2.5.1 defusedxml==0.7.1 -Deprecated==1.2.15 +Deprecated==1.2.14 deprecation==2.1.0 -dill==0.3.9 +dill==0.3.8 diskcache==5.6.3 distro==1.9.0 docopt==0.6.2 @@ -52,8 +54,9 @@ eutils==0.6.0 exceptiongroup==1.2.2 fastobo==0.12.3 filelock==3.16.1 -Flask==3.1.0 +Flask==3.0.3 flask-restx==1.3.0 +fonttools==4.55.3 fqdn==1.5.1 frontend==0.0.3 frozenlist==1.5.0 @@ -70,7 +73,7 @@ h2==4.1.0 hbreader==0.9.1 hpack==4.0.0 html5lib==1.1 -httpcore==1.0.7 +httpcore==1.0.6 httpx==0.27.2 huggingface-hub==0.26.2 hyperframe==6.0.1 @@ -83,12 +86,12 @@ inflect==7.4.0 inflection==0.5.1 iniconfig==2.0.0 intervaltree==3.1.0 -isodate==0.7.2 +isodate==0.6.1 isoduration==20.11.0 isort==5.13.2 itsdangerous==2.2.0 Jinja2==3.1.4 -jiter==0.7.1 +jiter==0.7.0 jmespath==1.0.1 joblib==1.4.2 json-flattener==0.1.9 @@ -102,148 +105,155 @@ jsonschema==4.23.0 jsonschema-specifications==2024.10.1 kgcl-rdflib==0.5.0 kgcl_schema==0.6.9 +kiwisolver==1.4.7 lark==1.2.2 linkml==1.8.5 linkml-dataops==0.1.0 linkml-owl==0.3.0 linkml-renderer==0.3.1 linkml-runtime==1.8.3 -litellm==1.52.16 +litellm==1.51.2 lxml==5.3.0 markdown-it-py==3.0.0 -MarkupSafe==3.0.2 +MarkupSafe==2.1.5 +matplotlib==3.10.0 mccabe==0.7.0 mdit-py-plugins==0.4.2 mdurl==0.1.2 more-click==0.1.2 more-itertools==10.5.0 multidict==6.1.0 +munkres==1.1.4 mypy-extensions==1.0.0 myst-parser==4.0.0 ndex2==3.9.0 networkx==3.4.2 nltk==3.9.1 -numpy==2.0.2 +numpy==2.1.1 oaklib==0.6.18 ols-client==0.1.4 -ontogpt==1.0.8 +ontogpt==1.0.6 ontoportal-client==0.0.4 -openai==1.55.1 +openai==1.53.0 openpyxl==3.1.5 owlrl==6.0.2 -packaging==24.2 -pandas==2.2.3 +packaging==24.1 +pandas==2.2.2 pansql==0.0.1 parse==1.20.2 pathspec==0.12.1 +pillow==11.0.0 pip==24.3.1 platformdirs==4.3.6 pluggy==1.5.0 ply==3.11 -portalocker==3.0.0 +portalocker==2.10.1 prefixcommons==0.1.12 prefixmaps==0.2.6 -prettytable==3.12.0 +prettytable==3.11.0 pronto==2.5.8 propcache==0.2.0 pycparser==2.22 -pydantic==2.10.1 -pydantic_core==2.27.1 +pydantic==2.9.2 +pydantic_core==2.23.4 Pygments==2.18.0 PyJSG==0.11.10 -pylint==3.3.1 -PyMuPDF==1.24.14 -pyparsing==3.2.0 +pylint==3.2.7 +PyMuPDF==1.24.13 +pyparsing==3.1.4 pyshacl==0.26.0 PyShEx==0.8.1 PyShExC==0.9.1 PySocks==1.7.1 pysolr==3.10.0 -pystow==0.6.1 +pystow==0.5.6 pytest==8.3.3 -pytest-cov==6.0.0 +pytest-cov==5.0.0 pytest-logging==2015.11.4 pytest-mock==3.14.0 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 -python-gitlab==4.13.0 +python-gitlab==4.11.1 python-json-logger==2.0.7 -python-semantic-release==9.14.0 +python-semantic-release==9.8.8 PyTrie==0.4.0 -pytz==2024.1 +pytz==2024.2 PyYAML==6.0.2 ratelimit==2.2.1 -rdflib==7.1.1 +rdflib==7.0.0 rdflib-jsonld==0.6.1 rdflib-shim==1.0.3 referencing==0.35.1 -regex==2024.11.6 +regex==2024.9.11 requests==2.32.3 requests-cache==1.2.1 requests-toolbelt==1.0.0 rfc3339-validator==0.1.4 rfc3987==1.3.8 -rich==13.9.4 -rpds-py==0.21.0 +rich==13.8.1 +rpds-py==0.20.1 ruamel.yaml==0.18.6 ruamel.yaml.clib==0.2.12 -s3transfer==0.10.4 +s3transfer==0.10.3 scikit-learn==1.4.2 scipy==1.14.1 semsql==0.3.3 -setuptools==75.6.0 +setuptools==75.3.0 shellingham==1.5.4 ShExJSG==0.8.2 six==1.16.0 -smmap==5.0.0 +smmap==5.0.1 sniffio==1.3.1 snowballstemmer==2.2.0 sortedcontainers==2.4.0 -soso @ git+https://github.com/clnsmth/soso.git@main +soso==0.2.0 soupsieve==2.6 sparqlslurper==0.5.1 SPARQLWrapper==2.0.0 -Sphinx==8.1.3 -sphinx-autoapi==3.3.3 +Sphinx==8.0.2 +sphinx-autoapi==3.3.1 sphinxcontrib-applehelp==2.0.0 sphinxcontrib-devhelp==2.0.0 sphinxcontrib-htmlhelp==2.1.0 sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==2.0.0 -sphinxcontrib-serializinghtml==1.1.10 +sphinxcontrib-serializinghtml==2.0.0 +spinneret==0.2.0 SQLAlchemy==2.0.36 SQLAlchemy-Utils==0.38.3 -sssom==0.4.13 +sssom==0.4.12 sssom-schema==1.0.0 -starlette==0.41.3 +starlette==0.41.2 stdlib-list==0.11.0 tenacity==9.0.0 threadpoolctl==3.5.0 -tiktoken==0.8.0 -tokenizers==0.20.3 +tiktoken==0.7.0 +tokenizers==0.20.1 toml==0.10.2 tomli==2.1.0 tomlkit==0.13.2 -tqdm==4.67.1 -typeguard==4.4.1 +tornado==6.4.2 +tqdm==4.66.6 +typeguard==4.4.0 types-python-dateutil==2.9.0.20241003 typing_extensions==4.12.2 -tzdata==2024.2 +tzdata==2024.1 +unicodedata2==15.1.0 Unidecode==1.3.8 uri-template==1.3.0 url-normalize==1.4.3 urllib3==1.26.20 -uvicorn==0.32.1 +uvicorn==0.32.0 validators==0.34.0 watchdog==6.0.0 wcwidth==0.2.13 -webcolors==24.11.1 +webcolors==24.8.0 webencodings==0.5.1 -Werkzeug==3.1.3 +Werkzeug==3.1.1 wheel==0.45.1 wikipedia==1.4.0 Wikipedia-API==0.7.1 -wrapt==1.17.0 -yarl==1.18.0 -zipp==3.21.0 +wrapt==1.16.0 +yarl==1.17.1 +zipp==3.20.2 zstandard==0.23.0 diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py index 2cd8b69..c6e8c15 100644 --- a/src/spinneret/benchmark.py +++ b/src/spinneret/benchmark.py @@ -10,8 +10,9 @@ from contextlib import contextmanager from daiquiri import getLogger import pandas as pd +import matplotlib.pyplot as plt from spinneret.utilities import load_workbook, compress_uri -from spinneret.workbook import delete_duplicate_annotations +from spinneret.workbook import delete_duplicate_annotations, delete_unannotated_rows logger = getLogger(__name__) @@ -374,3 +375,116 @@ def get_shared_ontology(set1: list, set2: list) -> Union[str, None]: return None return db + + +def plot_grounding_rates( + grounding_rates: dict, configuration: str, output_file: str +) -> None: + """ + Plot the grounding rates of the test data. + + :param grounding_rates: The return value from the `get_grounding_rates` + function. + :param configuration: The configuration of OntoGPT that was used to + generate the test data. This is typically the directory name of the + test data. + :param output_file: The path to save the plot to, as a PNG file. + :return: None + """ + + # Reformating the grounding rates dictionary into a DataFrame for plotting + df = pd.DataFrame(grounding_rates).T + + # Calculate percentages + df_percent = df.div(df.sum(axis=1), axis=0) * 100 + + # Add data labels to the bars + plt.figure(figsize=(10, 6)) + bottom = [0] * len(df) + for col in df_percent.columns: + bars = plt.bar(df_percent.index, df_percent[col], bottom=bottom, label=col) + for item in bars: + height = item.get_height() + if height > 5: # Only add labels if the segment is large enough + plt.text( + item.get_x() + item.get_width() / 2, + item.get_y() + height / 2, + f"{height:.1f}%", + ha="center", + va="center", + color="white", + fontsize=9, + ) + bottom = [bottom[i] + df_percent[col][i] for i in range(len(bottom))] + + plt.ylabel("Percentage") + title = f"OntoGPT Grounding Rates for Configuration '{configuration}'" + plt.title(title) + plt.xticks(rotation=-20) + plt.legend(title="State") + plt.tight_layout() + plt.savefig(output_file, dpi=300) + plt.show() + + +def get_grounding_rates(test_dir: str) -> dict: + """ + Get the OntoGPT grounding rates of the test data, by predicate. + + Predicates may have different grounding rates, due to differences in LLM + prompting and the nature of the vocabularies/ontologies being grounded to. + + :param test_dir: Path to a directory containing the test annotated + workbook files. + :return: A nested set of dictionaries containing the grounding rates of the + test data. The first level of dictionary keys are the predicates, and + the values are a second dictionary with keys "grounded" and + "ungrounded". The values of these keys are the number of grounded and + ungrounded terms, respectively. + """ + res = { + "env_broad_scale": {"grounded": 0, "ungrounded": 0}, + "env_local_scale": {"grounded": 0, "ungrounded": 0}, + "contains process": {"grounded": 0, "ungrounded": 0}, + "environmental material": {"grounded": 0, "ungrounded": 0}, + "contains measurements of type": {"grounded": 0, "ungrounded": 0}, + "uses standard": {"grounded": 0, "ungrounded": 0}, + "usesMethod": {"grounded": 0, "ungrounded": 0}, + "research topic": {"grounded": 0, "ungrounded": 0}, + } + + files = [f for f in os.listdir(test_dir) if f.endswith(".tsv")] + for file in files: + path = os.path.join(test_dir, file) + logger.info(f"Getting grounding rates for {path}") + wb = load_workbook(path) + wb = delete_unannotated_rows(wb) # OntoGPT skipped these, don't count + + # Group object_ids by predicate and element_xpath. These represent + # unique annotation opportunities for OntoGPT to ground to an ontology. + object_id_groups = group_object_ids(wb) + + # For each group determine if the object_ids are grounded or ungrounded + for key, data in object_id_groups.items(): + predicate = key[0] + if is_grounded(data): + res[predicate]["grounded"] += 1 + else: + res[predicate]["ungrounded"] += 1 + return res + + +def is_grounded(data: list) -> bool: + """ + Determine if the list contains a grounded object_id. + + :param data: List of object_ids. + :return: True if the list contains a grounded object_id, False otherwise. + A grounded term is defined as a term that starts with "http". + Ungrounded terms are those that begin with "AUTO:" or are None. + """ + # Remove None and NaN values from list to avoid errors on string matching + data = [d for d in data if d is not None] + data = [d for d in data if not pd.isna(d)] + + return any("http" in s for s in data) diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 8b10a28..34482c0 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -16,6 +16,8 @@ parse_similarity_scores, delete_terms_from_unsupported_ontologies, get_shared_ontology, + get_grounding_rates, + is_grounded, ) from spinneret.utilities import is_url @@ -252,3 +254,36 @@ def test_get_shared_ontology(): set2 = [] db = get_shared_ontology(set1, set2) assert db is None + + +def test_get_grounding_rates(): + """Test the get_grounding_rates function""" + grounding_rates = get_grounding_rates("tests/data/benchmark/test_a") + + # The result is a dictionary with expected keys and value types + assert isinstance(grounding_rates, dict) + assert set(grounding_rates.keys()) == { + "contains measurements of type", + "contains process", + "environmental material", + "uses standard", + "env_local_scale", + "research topic", + "env_broad_scale", + "usesMethod", + } + for _, v in grounding_rates.items(): + for k2, v2 in v.items(): + assert k2 in ["grounded", "ungrounded"] + assert isinstance(v2, int) + + +def test_is_grounded(): + """Test the is_grounded function""" + + # Lists with None or NaN values are not grounded + assert not is_grounded([None]) + assert not is_grounded([pd.NA]) + + # But lists with strings starting with "http" are grounded + assert is_grounded(["http://example.com"]) From 13b2eb64bcd5f3b01c9e41767f445897a803970e Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Fri, 20 Dec 2024 11:45:31 -0500 Subject: [PATCH 16/24] feat: add logging to `benchmark_against_standard` for better insights Add logging capabilities to the `benchmark_against_standard` function to provide insights into the ongoing execution process, especially helpful for this time-consuming operation. --- src/spinneret/benchmark.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py index c6e8c15..7930579 100644 --- a/src/spinneret/benchmark.py +++ b/src/spinneret/benchmark.py @@ -94,9 +94,11 @@ def benchmark_against_standard(standard_dir: str, test_dirs: list) -> pd.DataFra if not standard_file.endswith(".tsv"): # we are expecting tsv files continue standard_path = os.path.join(standard_dir, standard_file) + logger.info(f"Benchmarking against standard file: {standard_path}") for test_dir in test_dirs: test_path = os.path.join(test_dir, standard_file) + logger.info(f"Comparing to test file: {test_path}") if not os.path.exists(test_path): # we need a matching test file continue From 513e5e5789e84fe10d215c844c1d5098be6fb2f5 Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Fri, 20 Dec 2024 12:44:44 -0500 Subject: [PATCH 17/24] test: create test data for term-set similarity score analysis Create a set of test data containing term-set similarity scores for various configurations, enabling unit testing of downstream functions that analyze and interpret these scores. --- tests/conftest.py | 11 +++++++++++ tests/data/benchmark/termset_similarity_scores.tsv | 11 +++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tests/data/benchmark/termset_similarity_scores.tsv diff --git a/tests/conftest.py b/tests/conftest.py index 012eee8..f6f8ba0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,6 +2,7 @@ from json import load import pytest +import pandas as pd from spinneret.utilities import load_workbook @@ -55,3 +56,13 @@ def termset_similarity_score_fields(): "average_test_information_content", "best_test_information_content", ] + + +@pytest.fixture(name="termset_similarity_score_dataframe") +def termset_similarity_score_dataframe(): + """Return a fixture for a dataframe of termset similarity scores returned + by the benchmark_against_standard function""" + scores = pd.read_csv( + "tests/data/benchmark/termset_similarity_scores.tsv", sep="\t", encoding="utf-8" + ) + return scores diff --git a/tests/data/benchmark/termset_similarity_scores.tsv b/tests/data/benchmark/termset_similarity_scores.tsv new file mode 100644 index 0000000..eb2fee7 --- /dev/null +++ b/tests/data/benchmark/termset_similarity_scores.tsv @@ -0,0 +1,11 @@ +standard_dir test_dir standard_file predicate_value element_xpath_value standard_set test_set average_score best_score average_jaccard_similarity best_jaccard_similarity average_phenodigm_score best_phenodigm_score average_standard_information_content best_standard_information_content average_test_information_content best_test_information_content +tests/data/benchmark/standard tests/data/benchmark/test_a knb-lter-ntl.1.59_annotation_workbook_annotated.tsv env_broad_scale /eml:eml/dataset ['ENVO:01000286', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01001021', 'ENVO:01000774', 'ENVO:01000287', 'ENVO:01000252'] ['ENVO:01000317', 'ENVO:01001209', 'ENVO:01001209'] 4.399317289600849 4.616452786848972 0.2719553079933457 0.28735632183908044 1.1051688054274622 1.1517668569518282 12.185656141890044 12.78135971352466 7.598198606401752 8.321928094887362 +tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[31] ['ECSO:00002844'] ['ECSO:00002359', 'ECSO:00001534'] 0.0 0.0 +tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[41] ['ECSO:00001727'] ['ECSO:00000329'] 0.0 0.0 +tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[5] ['ECSO:00000515'] ['ECSO:00001250'] 0.0 0.0 +tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv env_broad_scale /eml:eml/dataset ['ENVO:01000286', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01001021', 'ENVO:01000774', 'ENVO:01000287', 'ENVO:01000252'] ['ENVO:01001209'] 4.509617311638698 4.616452786848972 0.2719553079933457 0.28735632183908044 1.1051688054274622 1.1517668569518282 12.185656141890044 12.78135971352466 6.874469117916141 6.874469117916141 +tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.1.59_annotation_workbook_annotated.tsv usesMethod /eml:eml/dataset ['ENVTHES:21335', 'ENVTHES:20223', 'ENVTHES:21337', 'ENVTHES:20243', 'ENVTHES:20285', 'ENVTHES:21339', 'ENVTHES:20304', 'https://www.wikidata.org/wiki/Q591867', 'https://www.wikidata.org/wiki/Q5149058'] ['ENVTHES:20803', 'ENVTHES:10375', 'ENVTHES:20104', 'ENVTHES:22297', 'ENVTHES:10328'] 0.0 0.0 +tests/data/benchmark/standard tests/data/benchmark/test_a knb-lter-ntl.2.37_annotation_workbook_annotated.tsv env_broad_scale /eml:eml/dataset ['ENVO:01001021', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01000774', 'ENVO:01000286', 'ENVO:01000287', 'ENVO:01000252'] ['ENVO:00000035', 'ENVO:01001209', 'ENVO:01001209'] 4.521487919995395 4.616452786848972 0.2689232631619699 0.2840909090909091 1.0989954987335404 1.1452040294162371 12.185656141890044 12.78135971352466 9.035433165359823 11.196397212803504 +tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.2.37_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[14] ['ECSO:00001799'] ['ECSO:00001120'] 0.0 0.0 +tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.2.37_annotation_workbook_annotated.tsv contains measurements of type /eml:eml/dataset/dataTable/attributeList/attribute[26] ['ECSO:00001720'] ['ECSO:00001534'] 0.0 0.0 +tests/data/benchmark/standard tests/data/benchmark/test_b knb-lter-ntl.2.37_annotation_workbook_annotated.tsv env_broad_scale /eml:eml/dataset ['ENVO:01001021', 'ENVO:01000548', 'ENVO:01000775', 'ENVO:01000774', 'ENVO:01000286', 'ENVO:01000287', 'ENVO:01000252'] ['ENVO:01001209'] 4.509617311638698 4.616452786848972 0.2719553079933457 0.28735632183908044 1.1051688054274622 1.1517668569518282 12.185656141890044 12.78135971352466 6.874469117916141 6.874469117916141 From 1bd118495fb9a46c98336bdac6aded865b5de07f Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Fri, 20 Dec 2024 16:17:01 -0500 Subject: [PATCH 18/24] feat: visualize similarity metrics by predicate Implement a visualization to assess the accuracy of different OntoGPT configurations relative to a baseline standard for each predicate represented by OntoGPT templates. Use a simple box plot to effectively display and compare similarity metrics across predicate values. --- src/spinneret/benchmark.py | 74 ++++++++++++++++++++++++++++++++++++++ tests/test_benchmark.py | 11 ++++++ 2 files changed, 85 insertions(+) diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py index 7930579..d4f1e90 100644 --- a/src/spinneret/benchmark.py +++ b/src/spinneret/benchmark.py @@ -11,6 +11,7 @@ from daiquiri import getLogger import pandas as pd import matplotlib.pyplot as plt +import numpy as np from spinneret.utilities import load_workbook, compress_uri from spinneret.workbook import delete_duplicate_annotations, delete_unannotated_rows @@ -490,3 +491,76 @@ def is_grounded(data: list) -> bool: data = [d for d in data if not pd.isna(d)] return any("http" in s for s in data) + + +def plot_similarity_scores_by_predicate( + benchmark_results: pd.DataFrame, + test_dir_path: str, + metric: str, + output_file: str = None, +) -> None: + """ + To see predicate level performance for an OntoGPT test configuration + + :param benchmark_results: The return value from the + `benchmark_against_standard` function. + :param test_dir_path: Path to the test directory containing the test + annotated workbook files for the desired configuration. This should be + a value from the `test_dir` column of the benchmark_results DataFrame, + which indicates the configuration comparison to plot. + :param metric: The metric to plot. This should be a column name from the + benchmark_results DataFrame, e.g. "average_score", "best_score", etc. + :param output_file: The path to save the plot to, as a PNG file. + :return: None + """ + # Subset the benchmark results dataframe to only include the desired + # columns: test_dir, metric + df = benchmark_results[benchmark_results["test_dir"] == test_dir_path][ + ["predicate_value", metric] + ] + + # Remove empty rows where the metric is 0 or NaN to avoid plotting them + df = df.dropna(subset=[metric]) + df = df[df[metric] != 0] + + # Order the "predicate_value" column to ensure the plot's x-axis is ordered + # correctly + df["predicate_value"] = pd.Categorical( + df["predicate_value"], + [ + "env_broad_scale", + "env_local_scale", + "contains process", + "environmental material", + "contains measurements of type", + "uses standard", + "usesMethod", + "research topic", + ], + ) + + plt.figure(figsize=(10, 6)) + grouped_data_long = df.groupby("predicate_value")[metric].apply(list) + plt.boxplot( + grouped_data_long.values, labels=grouped_data_long.index, showmeans=True + ) + + # Add individual data points (jittered) + for i, group_data in enumerate(grouped_data_long): + x = np.random.normal(i + 1, 0.08, size=len(group_data)) # Jitter x-values + plt.plot(x, group_data, "o", alpha=0.25, color="grey") + + configuration = os.path.basename(test_dir_path) + + plt.xlabel("Predicate") + plt.ylabel("Score") + title = ( + f"Similarity Score '{metric}' Against Benchmark Standard for " + f"Configuration '{configuration}'" + ) + plt.title(title) + plt.xticks(rotation=-20) + plt.tight_layout() + if output_file: + plt.savefig(output_file, dpi=300) + plt.show() diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 34482c0..eb432f2 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -18,6 +18,7 @@ get_shared_ontology, get_grounding_rates, is_grounded, + plot_similarity_scores_by_predicate, ) from spinneret.utilities import is_url @@ -287,3 +288,13 @@ def test_is_grounded(): # But lists with strings starting with "http" are grounded assert is_grounded(["http://example.com"]) + + +@pytest.mark.skip(reason="Manual inspection required") +def test_plot_similarity_scores_by_predicate(termset_similarity_score_dataframe): + """Test the plot_similarity_scores_by_predicate function""" + plot_similarity_scores_by_predicate( + benchmark_results=termset_similarity_score_dataframe, + test_dir_path="tests/data/benchmark/test_a", + metric="average_score", + ) From b39262910b05e2b0ea7de92e3759683c654c8920 Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Fri, 20 Dec 2024 16:58:52 -0500 Subject: [PATCH 19/24] feat: visualize similarity metrics by configuration Implement a visualization to assess the accuracy of different OntoGPT configurations relative to a baseline. Use a simple box plot to display and compare configurations. --- src/spinneret/benchmark.py | 45 ++++++++++++++++++++++++++++++++++++++ tests/test_benchmark.py | 10 +++++++++ 2 files changed, 55 insertions(+) diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py index d4f1e90..63f6236 100644 --- a/src/spinneret/benchmark.py +++ b/src/spinneret/benchmark.py @@ -564,3 +564,48 @@ def plot_similarity_scores_by_predicate( if output_file: plt.savefig(output_file, dpi=300) plt.show() + + +def plot_similarity_scores_by_configuration( + benchmark_results: pd.DataFrame, + metric: str, + output_file: str = None, +) -> None: + """ + To see configuration level performance for an OntoGPT predicate + + :param benchmark_results: The return value from the + `benchmark_against_standard` function. + :param metric: The metric to plot. This should be a column name from the + benchmark_results DataFrame, e.g. "average_score", "best_score", etc. + :param output_file: The path to save the plot to, as a PNG file. + :return: None + """ + # Subset the benchmark results dataframe to only include the desired + # columns: test_dir, metric + df = benchmark_results[["test_dir", metric]] + + # Remove empty rows where the metric is 0 or NaN to avoid plotting them + df = df.dropna(subset=[metric]) + df = df[df[metric] != 0] + + plt.figure(figsize=(10, 6)) + grouped_data_long = df.groupby("test_dir")[metric].apply(list) + plt.boxplot( + grouped_data_long.values, labels=grouped_data_long.index, showmeans=True + ) + + # Add individual data points (jittered) + for i, group_data in enumerate(grouped_data_long): + x = np.random.normal(i + 1, 0.08, size=len(group_data)) # Jitter x-values + plt.plot(x, group_data, "o", alpha=0.25, color="grey") + + plt.xlabel("Configuration") + plt.ylabel("Score") + title = f"Similarity Score '{metric}' Across Configurations" + plt.title(title) + plt.xticks(rotation=-20) + plt.tight_layout() + if output_file: + plt.savefig(output_file, dpi=300) + plt.show() diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index eb432f2..5890997 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -19,6 +19,7 @@ get_grounding_rates, is_grounded, plot_similarity_scores_by_predicate, + plot_similarity_scores_by_configuration, ) from spinneret.utilities import is_url @@ -298,3 +299,12 @@ def test_plot_similarity_scores_by_predicate(termset_similarity_score_dataframe) test_dir_path="tests/data/benchmark/test_a", metric="average_score", ) + + +@pytest.mark.skip(reason="Manual inspection required") +def test_plot_similarity_scores_by_configuration(termset_similarity_score_dataframe): + """Test the plot_similarity_scores_by_configuration function""" + plot_similarity_scores_by_configuration( + benchmark_results=termset_similarity_score_dataframe, + metric="average_score", + ) From 18d01bbf495bee37977bccf3b46f2114dce40a78 Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Fri, 20 Dec 2024 17:08:36 -0500 Subject: [PATCH 20/24] feat: make plot writing to file optional in `plot_grounding_rates` Make writing plots to file optional in the `plot_grounding_rates` function by introducing a new parameter to control this behavior. This allows for flexible usage, including previewing plots without generating files. --- src/spinneret/benchmark.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/spinneret/benchmark.py b/src/spinneret/benchmark.py index 63f6236..d69af9b 100644 --- a/src/spinneret/benchmark.py +++ b/src/spinneret/benchmark.py @@ -381,7 +381,7 @@ def get_shared_ontology(set1: list, set2: list) -> Union[str, None]: def plot_grounding_rates( - grounding_rates: dict, configuration: str, output_file: str + grounding_rates: dict, configuration: str, output_file: str = None ) -> None: """ Plot the grounding rates of the test data. @@ -426,7 +426,8 @@ def plot_grounding_rates( plt.xticks(rotation=-20) plt.legend(title="State") plt.tight_layout() - plt.savefig(output_file, dpi=300) + if output_file: + plt.savefig(output_file, dpi=300) plt.show() From 25f0a8bc89c406c70fc1259e68abc8c243c94384 Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Mon, 23 Dec 2024 11:56:08 -0500 Subject: [PATCH 21/24] refactor: remove outdated `add_dataset_annotations_to_workbook` function Remove the outdated `add_dataset_annotations_to_workbook` function, as it lacks the necessary granularity for predicate-level categorization of semantic annotations, a crucial aspect of our current annotation model. While alternative approaches exist (e.g., annotating with terms from multiple vocabularies and then categorizing based on branch), the ongoing development and active community support for OntoGPT suggest a more promising long-term solution. --- src/spinneret/annotator.py | 265 ++++++++++--------------------------- src/spinneret/main.py | 6 - tests/test_annotator.py | 212 ++--------------------------- 3 files changed, 81 insertions(+), 402 deletions(-) diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py index ea6127e..a1637f8 100644 --- a/src/spinneret/annotator.py +++ b/src/spinneret/annotator.py @@ -138,7 +138,6 @@ def annotate_workbook( workbook_path: str, eml_path: str, output_path: str, - annotator: str, local_model: str = None, temperature: Union[float, None] = None, return_ungrounded: bool = False, @@ -150,10 +149,6 @@ def annotate_workbook( corresponding to the EML file. :param eml_path: The path to the EML file corresponding to the workbook. :param output_path: The path to write the annotated workbook. - :param annotator: The annotator to use for grounding. Options are "ontogpt" - and "bioportal". OntoGPT requires setup and configuration described in - the `get_ontogpt_annotation` function. Similarly, BioPortal requires - an API key and is described in the `get_bioportal_annotation` function. :param local_model: See `get_ontogpt_annotation` documentation for details. :param temperature: The temperature parameter for the model. If None, the OntoGPT default will be used. @@ -167,7 +162,6 @@ def annotate_workbook( path as the original workbook. """ logger.info(f"Annotating workbook {workbook_path}") - logger.info(f"Annotating with {annotator}") # Ensure the workbook and eml file match to avoid errors pid = os.path.basename(workbook_path).split("_")[0] @@ -180,70 +174,63 @@ def annotate_workbook( wb = load_workbook(workbook_path) eml = load_eml(eml_path) - # Run workbook annotators, results of one are used as input for the next - if annotator == "bioportal": - wb = add_dataset_annotations_to_workbook(wb, eml, sample_size=sample_size) - wb = add_measurement_type_annotations_to_workbook( - wb, eml, annotator=annotator, sample_size=sample_size - ) - elif annotator == "ontogpt": - wb = add_env_broad_scale_annotations_to_workbook( - wb, - eml, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - sample_size=sample_size, - ) - wb = add_env_local_scale_annotations_to_workbook( - wb, - eml, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - sample_size=sample_size, - ) - wb = add_process_annotations_to_workbook( - wb, - eml, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - sample_size=sample_size, - ) - wb = add_methods_annotations_to_workbook( - wb, - eml, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - sample_size=sample_size, - ) - wb = add_research_topic_annotations_to_workbook( - wb, - eml, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - sample_size=sample_size, - ) - wb = add_measurement_type_annotations_to_workbook( - wb, - eml, - annotator="ontogpt", - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - sample_size=sample_size, - ) - wb = add_env_medium_annotations_to_workbook( - wb, - eml, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - sample_size=sample_size, - ) + # Run workbook annotator, results of one are used as input for the next + wb = add_env_broad_scale_annotations_to_workbook( + wb, + eml, + local_model=local_model, + temperature=temperature, + return_ungrounded=return_ungrounded, + sample_size=sample_size, + ) + wb = add_env_local_scale_annotations_to_workbook( + wb, + eml, + local_model=local_model, + temperature=temperature, + return_ungrounded=return_ungrounded, + sample_size=sample_size, + ) + wb = add_process_annotations_to_workbook( + wb, + eml, + local_model=local_model, + temperature=temperature, + return_ungrounded=return_ungrounded, + sample_size=sample_size, + ) + wb = add_methods_annotations_to_workbook( + wb, + eml, + local_model=local_model, + temperature=temperature, + return_ungrounded=return_ungrounded, + sample_size=sample_size, + ) + wb = add_research_topic_annotations_to_workbook( + wb, + eml, + local_model=local_model, + temperature=temperature, + return_ungrounded=return_ungrounded, + sample_size=sample_size, + ) + wb = add_measurement_type_annotations_to_workbook( + wb, + eml, + local_model=local_model, + temperature=temperature, + return_ungrounded=return_ungrounded, + sample_size=sample_size, + ) + wb = add_env_medium_annotations_to_workbook( + wb, + eml, + local_model=local_model, + temperature=temperature, + return_ungrounded=return_ungrounded, + sample_size=sample_size, + ) wb = add_qudt_annotations_to_workbook(wb, eml) # irrespective of annotator write_workbook(wb, output_path) @@ -485,98 +472,11 @@ def add_qudt_annotations_to_workbook( return wb -def add_dataset_annotations_to_workbook( - workbook: Union[str, pd.core.frame.DataFrame], - eml: Union[str, etree._ElementTree], - output_path: str = None, - overwrite: bool = False, - sample_size: int = 1, -) -> pd.core.frame.DataFrame: - """ - :param workbook: Either the path to the workbook to be annotated, or the - workbook itself as a pandas DataFrame. - :param eml: Either the path to the EML file corresponding to the workbook, - or the EML file itself as an lxml etree. - :param output_path: The path to write the annotated workbook. - :param overwrite: If True, overwrite existing `dataset` annotations in the - workbook, so a fresh set may be created. - :param sample_size: Executes multiple replicates of the annotation request - to reduce variability of outputs. Variability is inherent in OntoGPT. - :returns: Workbook with dataset annotations. - """ - logger.info("Annotating dataset") - - # Load the workbook and EML for processing - wb = load_workbook(workbook) - eml = load_eml(eml) - - # Set the author identifier for consistent reference below - author = "spinneret.annotator.get_bioportal_annotation" - - # Remove existing dataset annotations if overwrite is True, using a set of - # criteria that accurately define the annotations to remove. - if overwrite: - wb = delete_annotations( - workbook=wb, - criteria={ - "element": "dataset", - "element_xpath": "/eml:eml/dataset", - "author": author, - }, - ) - - # Get the dataset annotations - dataset_element = eml.xpath("//dataset")[0] - element_description = get_description(dataset_element) - annotations = [] - for _ in range(sample_size): - res = get_bioportal_annotation( # expecting a list of annotations - text=element_description, - api_key=os.environ["BIOPORTAL_API_KEY"], - ontologies="ENVO", # ENVO provides environmental terms - exclude_synonyms="true", - ) - if res is not None: - annotations.extend(res) - if len(annotations) == 0: - annotations = None - - # Add dataset annotations to the workbook - if annotations is not None: - for annotation in annotations: - row = initialize_workbook_row() - row["package_id"] = get_package_id(eml) - row["url"] = get_package_url(eml) - row["element"] = dataset_element.tag - if "id" in dataset_element.attrib: - row["element_id"] = dataset_element.attrib["id"] - else: - row["element_id"] = pd.NA - row["element_xpath"] = eml.getpath(dataset_element) - row["context"] = get_subject_and_context(dataset_element)["context"] - row["description"] = element_description - row["subject"] = get_subject_and_context(dataset_element)["subject"] - row["predicate"] = "is about" - row["predicate_id"] = "http://purl.obolibrary.org/obo/IAO_0000136" - row["object"] = annotation["label"] - row["object_id"] = annotation["uri"] - row["author"] = author - row["date"] = pd.Timestamp.now() - row = pd.DataFrame([row], dtype=str) - wb = pd.concat([wb, row], ignore_index=True) - wb = delete_duplicate_annotations(wb) - - if output_path: - write_workbook(wb, output_path) - return wb - - # pylint: disable=too-many-branches # pylint: disable=too-many-statements def add_measurement_type_annotations_to_workbook( workbook: Union[str, pd.core.frame.DataFrame], eml: Union[str, etree._ElementTree], - annotator: str, output_path: str = None, overwrite: bool = False, local_model: str = None, @@ -589,10 +489,6 @@ def add_measurement_type_annotations_to_workbook( workbook itself as a pandas DataFrame. :param eml: Either the path to the EML file corresponding to the workbook, or the EML file itself as an lxml etree. - :param annotator: The annotator to use for grounding. Options are "ontogpt" - and "bioportal". OntoGPT requires setup and configuration described in - the `get_ontogpt_annotation` function. Similarly, BioPortal requires - an API key and is described in the `get_bioportal_annotation` function. :param output_path: The path to write the annotated workbook. :param overwrite: If True, overwrite existing `measurement type` annotations in the workbook, so a fresh set may be created. @@ -652,35 +548,19 @@ def add_measurement_type_annotations_to_workbook( ) if annotations is None: - # Select an annotator, and get the measurement type annotations - if annotator.lower() == "ontogpt": - annotations = [] - for _ in range(sample_size): - res = get_ontogpt_annotation( - text=attribute_description, - template="contains_measurement_of_type", - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - ) - if res is not None: - annotations.extend(res) - if len(annotations) == 0: - annotations = None - else: - annotations = [] - for _ in range(sample_size): - res = get_bioportal_annotation( - # expecting a list of annotations - text=attribute_description, - api_key=os.environ["BIOPORTAL_API_KEY"], - ontologies="ECSO", # ECSO provides measurment terms - exclude_synonyms="true", - ) - if res is not None: - annotations.extend(res) - if len(annotations) == 0: - annotations = None + annotations = [] + for _ in range(sample_size): + res = get_ontogpt_annotation( + text=attribute_description, + template="contains_measurement_of_type", + local_model=local_model, + temperature=temperature, + return_ungrounded=return_ungrounded, + ) + if res is not None: + annotations.extend(res) + if len(annotations) == 0: + annotations = None # Add the measurement type annotations to the workbook if annotations is not None: @@ -704,10 +584,7 @@ def add_measurement_type_annotations_to_workbook( ) row["object"] = annotation["label"] row["object_id"] = annotation["uri"] - if annotator.lower() == "ontogpt": - row["author"] = "spinneret.annotator.get_ontogpt_annotation" - elif annotator.lower() == "bioportal": - row["author"] = "spinneret.annotator.get_bioportal_annotation" + row["author"] = "spinneret.annotator.get_ontogpt_annotation" row["date"] = pd.Timestamp.now() row = pd.DataFrame([row], dtype=str) wb = pd.concat([wb, row], ignore_index=True) diff --git a/src/spinneret/main.py b/src/spinneret/main.py index d8d4fd3..4e2d382 100644 --- a/src/spinneret/main.py +++ b/src/spinneret/main.py @@ -53,7 +53,6 @@ def create_workbooks(eml_dir: str, workbook_dir: str) -> None: def annotate_workbooks( workbook_dir: str, eml_dir: str, - annotator: str, output_dir: str, config_path: str, local_model: str = None, @@ -65,10 +64,6 @@ def annotate_workbooks( :param workbook_dir: Directory of unannotated workbooks :param eml_dir: Directory of EML files corresponding to workbooks - :param annotator: The annotator to use for grounding. Options are "ontogpt" - and "bioportal". OntoGPT requires setup and configuration described in - the `get_ontogpt_annotation` function. Similarly, BioPortal requires - an API key and is described in the `get_bioportal_annotation` function. :param output_dir: Directory to save annotated workbooks :param config_path: Path to configuration file :param local_model: See `get_ontogpt_annotation` documentation for details. @@ -113,7 +108,6 @@ def annotate_workbooks( annotate_workbook( workbook_path=workbook_dir + "/" + workbook_file, eml_path=eml_dir + "/" + eml_file, - annotator=annotator, output_path=output_dir + "/" + workbook_file_annotated, local_model=local_model, temperature=temperature, diff --git a/tests/test_annotator.py b/tests/test_annotator.py index afc0ef0..0fcbf1d 100644 --- a/tests/test_annotator.py +++ b/tests/test_annotator.py @@ -13,7 +13,6 @@ annotate_eml, create_annotation_element, add_qudt_annotations_to_workbook, - add_dataset_annotations_to_workbook, add_measurement_type_annotations_to_workbook, add_process_annotations_to_workbook, add_env_broad_scale_annotations_to_workbook, @@ -81,73 +80,6 @@ def test_get_bioportal_annotation(mocker, use_mock, get_annotation_fixture): assert item["uri"] != "" -# pylint: disable=duplicate-code -@pytest.mark.parametrize("use_mock", [True]) # False tests with real HTTP requests -def test_annotate_workbook_with_bioportal( - tmp_path, mocker, use_mock, get_annotation_fixture -): - """Test annotate_workbook using the BioPortal annotator""" - - # Configure the mock responses - if use_mock: - mocker.patch( - "spinneret.annotator.get_bioportal_annotation", - return_value=get_annotation_fixture, - ) - os.environ["BIOPORTAL_API_KEY"] = "mock api key" - mocker.patch( - "spinneret.annotator.get_qudt_annotation", - return_value=get_annotation_fixture, - ) - else: - if not os.path.exists("config.json"): - pytest.skip( - "Skipping test due to missing config.json file in package root." - ) - load_configuration("config.json") - - # Copy the workbook to tmp_path for editing - wb_path = "tests/edi.3.9_annotation_workbook.tsv" - wb_path_copy = str(tmp_path) + "/edi.3.9_annotation_workbook.tsv" - copyfile(wb_path, wb_path_copy) - wb_path_annotated = str(tmp_path) + "/edi.3.9_annotation_workbook_annotated.tsv" - - # Check features of the unannotated workbook - assert os.path.exists(wb_path_copy) - wb = load_workbook(wb_path_copy) - # The columns to be annotated should be empty - cols_to_annotate = [ - "predicate", - "predicate_id", - "object", - "object_id", - "author", - "date", - ] - for col in cols_to_annotate: - assert wb[col].isnull().all() - - # Annotate the workbook copy - annotate_workbook( - workbook_path=wb_path_copy, - eml_path=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=wb_path_annotated, - annotator="bioportal", - ) - - # Check the workbook was annotated - assert os.path.exists(wb_path_annotated) - wb = load_workbook(wb_path_annotated) - # The columns to be annotated should be full - for col in cols_to_annotate: - assert not wb[col].isnull().all() - # The authors are the annotator functions called under this configuration - authors = wb["author"].unique() - authors = [x for x in authors if pd.notna(x)] - assert "spinneret.annotator.get_bioportal_annotation" in authors - assert "spinneret.annotator.get_qudt_annotation" in authors - - # pylint: disable=duplicate-code @pytest.mark.parametrize("use_mock", [True]) # False tests with real LLM queries def test_annotate_workbook_with_ontogpt( @@ -192,7 +124,6 @@ def test_annotate_workbook_with_ontogpt( workbook_path=wb_path_copy, eml_path=get_example_eml_dir() + "/" + "edi.3.9.xml", output_path=wb_path_annotated, - annotator="ontogpt", local_model="llama3.2", return_ungrounded=True, # ensures we get at least one annotation back ) @@ -409,120 +340,24 @@ def test_has_annotations(): assert has_annotations(wb) is True -@pytest.mark.parametrize("use_mock", [True]) # False makes real HTTP requests -def test_add_dataset_annotations_to_workbook(tmp_path, use_mock, mocker): - """Test add_dataset_annotations_to_workbook""" - - # Parameterize the test - workbook_path = "tests/edi.3.9_annotation_workbook.tsv" - output_path = str(tmp_path) + "edi.3.9_annotation_workbook_qudt.tsv" - - # The workbook shouldn't have any annotations yet - wb = load_workbook(workbook_path) - assert not has_annotations(wb) - - # The workbook has annotations after calling the function - if use_mock: - mocker.patch( - "spinneret.annotator.get_bioportal_annotation", - return_value=[ - { - "label": "freshwater lake biome", - "uri": "http://purl.obolibrary.org/obo/ENVO_01000252", - } - ], - ) - os.environ["BIOPORTAL_API_KEY"] = "mock api key" - wb = add_dataset_annotations_to_workbook( - workbook=workbook_path, - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - ) - assert has_annotations(wb) - - # Overwriting changes the annotations. Note, we can't test this with real - # requests because we'll expect the same results as the first call. - if use_mock: - mocker.patch( # an arbitrary response to check for - "spinneret.annotator.get_bioportal_annotation", - return_value=[ - { - "label": "A different biome", - "uri": "http://purl.obolibrary.org/obo/ENVO_XXXXXXXX", - } - ], - ) - os.environ["BIOPORTAL_API_KEY"] = "mock api key" - wb = add_dataset_annotations_to_workbook( - workbook=output_path, # the output from the first call - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - overwrite=True, - ) - assert wb["object"].str.contains("A different biome").any() - assert ( - wb["object_id"] - .str.contains("http://purl.obolibrary.org/obo/ENVO_XXXXXXXX") - .any() - ) - - # Original annotations are gone - assert not wb["object"].str.contains("freshwater lake biome").any() - assert ( - not wb["object_id"] - .str.contains("http://purl.obolibrary.org/obo/ENVO_01000252") - .any() - ) - - -def test_add_dataset_annotations_to_workbook_io_options(tmp_path, mocker): - """Test add_dataset_annotations_to_workbook with different input and output - options""" - - mocker.patch( - "spinneret.annotator.get_bioportal_annotation", - return_value=[ - { - "label": "freshwater lake biome", - "uri": "http://purl.obolibrary.org/obo/ENVO_01000252", - } - ], - ) - os.environ["BIOPORTAL_API_KEY"] = "mock api key" - - # Accepts file path as input - output_path = str(tmp_path) + "edi.3.9_annotation_workbook_dataset.tsv" - wb = add_dataset_annotations_to_workbook( - workbook="tests/edi.3.9_annotation_workbook.tsv", - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - ) - wb = load_workbook(output_path) - assert has_annotations(wb) - - # Accepts dataframes and etree objects as input - wb = load_workbook("tests/edi.3.9_annotation_workbook.tsv") - eml = load_eml(get_example_eml_dir() + "/" + "edi.3.9.xml") - wb = add_dataset_annotations_to_workbook(workbook=wb, eml=eml) - assert has_annotations(wb) - - @pytest.mark.parametrize("use_mock", [True]) # False makes real HTTP requests def test_add_measurement_type_annotations_to_workbook(tmp_path, use_mock, mocker): """Test add_measurement_type_annotations_to_workbook""" # Parameterize the test workbook_path = "tests/edi.3.9_annotation_workbook.tsv" - output_path = str(tmp_path) + "edi.3.9_annotation_workbook_qudt.tsv" + output_path = str(tmp_path) + "edi.3.9_annotation_workbook_annotated.tsv" # The workbook shouldn't have any annotations yet wb = load_workbook(workbook_path) assert not has_annotations(wb) - # The workbook has annotations after calling the function + # The workbook "should" have annotations after calling the function. We + # say "should" because OntoGPT is non-deterministic, and we can't always + # expect the same results, or any results at all. if use_mock: mocker.patch( - "spinneret.annotator.get_bioportal_annotation", + "spinneret.annotator.get_ontogpt_annotation", return_value=[ { "label": "depth", @@ -530,11 +365,9 @@ def test_add_measurement_type_annotations_to_workbook(tmp_path, use_mock, mocker } ], ) - os.environ["BIOPORTAL_API_KEY"] = "mock api key" wb = add_measurement_type_annotations_to_workbook( workbook=workbook_path, eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - annotator="bioportal", output_path=output_path, ) assert has_annotations(wb) @@ -543,7 +376,7 @@ def test_add_measurement_type_annotations_to_workbook(tmp_path, use_mock, mocker # requests because we'll expect the same results as the first call. if use_mock: mocker.patch( # an arbitrary response to check for - "spinneret.annotator.get_bioportal_annotation", + "spinneret.annotator.get_ontogpt_annotation", return_value=[ { "label": "A different measurement type", @@ -551,11 +384,9 @@ def test_add_measurement_type_annotations_to_workbook(tmp_path, use_mock, mocker } ], ) - os.environ["BIOPORTAL_API_KEY"] = "mock api key" wb = add_measurement_type_annotations_to_workbook( workbook=output_path, # the output from the first call eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - annotator="bioportal", output_path=output_path, overwrite=True, ) @@ -578,7 +409,7 @@ def test_add_measurement_type_annotations_to_workbook_io_options(tmp_path, mocke and output options""" mocker.patch( - "spinneret.annotator.get_bioportal_annotation", + "spinneret.annotator.get_ontogpt_annotation", return_value=[ { "label": "depth", @@ -586,14 +417,12 @@ def test_add_measurement_type_annotations_to_workbook_io_options(tmp_path, mocke } ], ) - os.environ["BIOPORTAL_API_KEY"] = "mock api key" # Accepts file path as input - output_path = str(tmp_path) + "edi.3.9_annotation_workbook_dataset.tsv" + output_path = str(tmp_path) + "edi.3.9_annotation_workbook_annotated.tsv" wb = add_measurement_type_annotations_to_workbook( workbook="tests/edi.3.9_annotation_workbook.tsv", eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - annotator="bioportal", output_path=output_path, ) wb = load_workbook(output_path) @@ -602,15 +431,12 @@ def test_add_measurement_type_annotations_to_workbook_io_options(tmp_path, mocke # Accepts dataframes and etree objects as input wb = load_workbook("tests/edi.3.9_annotation_workbook.tsv") eml = load_eml(get_example_eml_dir() + "/" + "edi.3.9.xml") - wb = add_measurement_type_annotations_to_workbook( - workbook=wb, eml=eml, annotator="bioportal" - ) + wb = add_measurement_type_annotations_to_workbook(workbook=wb, eml=eml) assert has_annotations(wb) def test_annotators_are_listed_as_authors(tmp_path, mocker): - """Test that the annotators are listed as authors in the workbook. Test - this for each workbook annotator with an annotator parameter.""" + """Test that the annotators are listed as authors in the workbook.""" # Test for the `add_measurement_type_annotations_to_workbook` function # using the OntoGPT annotator @@ -622,30 +448,12 @@ def test_annotators_are_listed_as_authors(tmp_path, mocker): workbook="tests/edi.3.9_annotation_workbook.tsv", eml=get_example_eml_dir() + "/" + "edi.3.9.xml", output_path=str(tmp_path) + "edi.3.9_annotation_workbook_dataset.tsv", - annotator="ontogpt", local_model="llama3.2", ) authors = wb["author"].unique() authors = [x for x in authors if pd.notna(x)] assert "spinneret.annotator.get_ontogpt_annotation" == authors[0] - # Test for the `add_measurement_type_annotations_to_workbook` function - # using the Bioportal annotator - mocker.patch( - "spinneret.annotator.get_bioportal_annotation", - return_value=[{"label": "a label", "uri": "a uri"}], - ) - os.environ["BIOPORTAL_API_KEY"] = "mock api key" - wb = add_measurement_type_annotations_to_workbook( - workbook="tests/edi.3.9_annotation_workbook.tsv", - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=str(tmp_path) + "edi.3.9_annotation_workbook_dataset.tsv", - annotator="bioportal", - ) - authors = wb["author"].unique() - authors = [x for x in authors if pd.notna(x)] - assert "spinneret.annotator.get_bioportal_annotation" == authors[0] - @pytest.mark.parametrize("use_mock", [True]) # False tests with real local LLM queries def test_get_ontogpt_annotation(mocker, use_mock): From ed668b1ecf46121045733d37f0dfdbc357d043ca Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Fri, 27 Dec 2024 21:10:08 -0500 Subject: [PATCH 22/24] refactor: consolidate OntoGPT workbook annotators into a single function Consolidate multiple OntoGPT workbook annotator functions into a single, unified function to improve code maintainability, reduce redundancy, and enhance overall code clarity. --- src/spinneret/annotator.py | 922 ++++--------------------------------- src/spinneret/utilities.py | 73 +++ tests/test_annotator.py | 374 +-------------- tests/test_utilities.py | 43 ++ 4 files changed, 210 insertions(+), 1202 deletions(-) diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py index a1637f8..7bee5cd 100644 --- a/src/spinneret/annotator.py +++ b/src/spinneret/annotator.py @@ -24,6 +24,9 @@ write_workbook, write_eml, expand_curie, + get_elements_for_predicate, + get_template_for_predicate, + get_predicate_id_for_predicate, ) logger = getLogger(__name__) @@ -175,63 +178,26 @@ def annotate_workbook( eml = load_eml(eml_path) # Run workbook annotator, results of one are used as input for the next - wb = add_env_broad_scale_annotations_to_workbook( - wb, - eml, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - sample_size=sample_size, - ) - wb = add_env_local_scale_annotations_to_workbook( - wb, - eml, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - sample_size=sample_size, - ) - wb = add_process_annotations_to_workbook( - wb, - eml, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - sample_size=sample_size, - ) - wb = add_methods_annotations_to_workbook( - wb, - eml, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - sample_size=sample_size, - ) - wb = add_research_topic_annotations_to_workbook( - wb, - eml, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - sample_size=sample_size, - ) - wb = add_measurement_type_annotations_to_workbook( - wb, - eml, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - sample_size=sample_size, - ) - wb = add_env_medium_annotations_to_workbook( - wb, - eml, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - sample_size=sample_size, - ) - wb = add_qudt_annotations_to_workbook(wb, eml) # irrespective of annotator + predicates = [ + "contains measurements of type", + "contains process", + "env_broad_scale", + "env_local_scale", + "environmental material", + "research topic", + "usesMethod", + ] + for p in predicates: + wb = add_predicate_annotations_to_workbook( + predicate=p, + workbook=wb, + eml=eml, + local_model=local_model, + temperature=temperature, + return_ungrounded=return_ungrounded, + sample_size=sample_size, + ) + wb = add_qudt_annotations_to_workbook(wb, eml) write_workbook(wb, output_path) return None @@ -472,129 +438,6 @@ def add_qudt_annotations_to_workbook( return wb -# pylint: disable=too-many-branches -# pylint: disable=too-many-statements -def add_measurement_type_annotations_to_workbook( - workbook: Union[str, pd.core.frame.DataFrame], - eml: Union[str, etree._ElementTree], - output_path: str = None, - overwrite: bool = False, - local_model: str = None, - temperature: Union[float, None] = None, - return_ungrounded: bool = False, - sample_size: int = 1, -) -> pd.core.frame.DataFrame: - """ - :param workbook: Either the path to the workbook to be annotated, or the - workbook itself as a pandas DataFrame. - :param eml: Either the path to the EML file corresponding to the workbook, - or the EML file itself as an lxml etree. - :param output_path: The path to write the annotated workbook. - :param overwrite: If True, overwrite existing `measurement type` - annotations in the workbook, so a fresh set may be created. - :param local_model: Required if `annotator` is "ontogpt". See - `get_ontogpt_annotation` documentation for details. - :param temperature: The temperature parameter for the model. If `None`, the - OntoGPT default will be used. - :param return_ungrounded: An option if `annotator` is "ontogpt". See - `get_ontogpt_annotation` documentation for details. - :param sample_size: Executes multiple replicates of the annotation request - to reduce variability of outputs. Variability is inherent in OntoGPT. - :returns: Workbook with measurement type annotations. - """ - logger.info("Annotating measurement type") - - # Parameters for the function - predicate = "contains measurements of type" - - # Load the workbook and EML for processing - wb = load_workbook(workbook) - eml = load_eml(eml) - - # Remove existing measurement type annotations if overwrite is True, using - # a set of criteria that accurately define the annotations to remove. - if overwrite: - wb = delete_annotations( - workbook=wb, - criteria={ - "element": "attribute", - "element_xpath": "/attribute", - "author": "spinneret.annotator", # any spinneret annotator - }, - ) - - # Iterate over EML attributes and add measurement type annotations to the - # workbook - attributes = eml.xpath("//attribute") - for attribute in attributes: - attribute_element = attribute - attribute_xpath = eml.getpath(attribute_element) - attribute_description = get_description(attribute_element) - - # Skip if this element already has an annotation in the workbook, to - # prevent duplicate annotations from being added. - if has_annotation(wb, attribute_xpath, predicate): - return wb - - # Reuse existing annotations for elements with identical tag names, - # descriptions, and predicate labels, to reduce redundant processing. - # Note this assumes semantic equivalence between elements with matching - # tags and descriptions. - annotations = get_annotation_from_workbook( - workbook=wb, - element=attribute_element.tag, - description=attribute_description, - predicate=predicate, - ) - - if annotations is None: - annotations = [] - for _ in range(sample_size): - res = get_ontogpt_annotation( - text=attribute_description, - template="contains_measurement_of_type", - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - ) - if res is not None: - annotations.extend(res) - if len(annotations) == 0: - annotations = None - - # Add the measurement type annotations to the workbook - if annotations is not None: - for annotation in annotations: - row = initialize_workbook_row() - row["package_id"] = get_package_id(eml) - row["url"] = get_package_url(eml) - row["element"] = attribute_element.tag - if "id" in attribute_element.attrib: - row["element_id"] = attribute_element.attrib["id"] - else: - row["element_id"] = pd.NA - row["element_xpath"] = attribute_xpath - row["context"] = get_subject_and_context(attribute_element)["context"] - row["description"] = get_description(attribute_element) - row["subject"] = get_subject_and_context(attribute_element)["subject"] - row["predicate"] = predicate - row["predicate_id"] = ( - "http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#" - "containsMeasurementsOfType" - ) - row["object"] = annotation["label"] - row["object_id"] = annotation["uri"] - row["author"] = "spinneret.annotator.get_ontogpt_annotation" - row["date"] = pd.Timestamp.now() - row = pd.DataFrame([row], dtype=str) - wb = pd.concat([wb, row], ignore_index=True) - wb = delete_duplicate_annotations(wb) - - if output_path: - write_workbook(wb, output_path) - return wb - - def get_ontogpt_annotation( text: str, template: str, @@ -678,248 +521,8 @@ def get_ontogpt_annotation( return annotations -def add_process_annotations_to_workbook( - workbook: Union[str, pd.core.frame.DataFrame], - eml: Union[str, etree._ElementTree], - output_path: str = None, - overwrite: bool = False, - local_model: str = None, - temperature: Union[float, None] = None, - return_ungrounded: bool = False, - sample_size: int = 1, -) -> pd.core.frame.DataFrame: - """ - :param workbook: Either the path to the workbook to be annotated, or the - workbook itself as a pandas DataFrame. - :param eml: Either the path to the EML file corresponding to the workbook, - or the EML file itself as an lxml etree. - :param output_path: The path to write the annotated workbook. - :param overwrite: If True, overwrite existing `process` annotations in the - workbook, so a fresh set may be created. - :param local_model: See `get_ontogpt_annotation` documentation for details. - :param temperature: The temperature parameter for the model. If `None`, the - OntoGPT default will be used. - :param return_ungrounded: See `get_ontogpt_annotation` documentation for - details. - :param sample_size: Executes multiple replicates of the annotation request - to reduce variability of outputs. Variability is inherent in OntoGPT. - :returns: Workbook with process annotations. - :notes: This function retrieves process annotations using OntoGPT, which - requires setup and configuration described in the - `get_ontogpt_annotation` function. - """ - logger.info("Annotating process") - - # Load the workbook and EML for processing - wb = load_workbook(workbook) - eml = load_eml(eml) - - # Parameters for the function - dataset_element = eml.xpath("//dataset")[0] - element_description = get_description(dataset_element) - element_xpath = eml.getpath(dataset_element) - predicate = "contains process" - - # Set the author identifier for consistent reference below - author = "spinneret.annotator.get_onto_gpt_annotation" - - # Remove existing process annotations if overwrite is True, using a set of - # criteria that accurately define the annotations to remove. - if overwrite: - wb = delete_annotations( - workbook=wb, - criteria={ - "element": "dataset", - "element_xpath": "/eml:eml/dataset", - "predicate": predicate, - "author": author, - }, - ) - - # Skip if this element already has an annotation in the workbook, to - # prevent duplicate annotations from being added. - if has_annotation(wb, element_xpath, predicate): - return wb - - # Reuse existing annotations for elements with identical tag names, - # descriptions, and predicate labels, to reduce redundant processing. - # Note this assumes semantic equivalence between elements with matching - # tags and descriptions. - annotations = get_annotation_from_workbook( - workbook=wb, - element=dataset_element.tag, - description=element_description, - predicate=predicate, - ) - - if annotations is None: - # Get the process annotations - annotations = [] - for _ in range(sample_size): - res = get_ontogpt_annotation( - text=element_description, - template="contains_process", - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - ) - if res is not None: - annotations.extend(res) - if len(annotations) == 0: - annotations = None - - # Add process annotations to the workbook - if annotations is not None: - for annotation in annotations: - row = initialize_workbook_row() - row["package_id"] = get_package_id(eml) - row["url"] = get_package_url(eml) - row["element"] = dataset_element.tag - if "id" in dataset_element.attrib: - row["element_id"] = dataset_element.attrib["id"] - else: - row["element_id"] = pd.NA - row["element_xpath"] = eml.getpath(dataset_element) - row["context"] = get_subject_and_context(dataset_element)["context"] - row["description"] = element_description - row["subject"] = get_subject_and_context(dataset_element)["subject"] - row["predicate"] = predicate - row["predicate_id"] = "http://purl.obolibrary.org/obo/BFO_0000067" - row["object"] = annotation["label"] - row["object_id"] = annotation["uri"] - row["author"] = author - row["date"] = pd.Timestamp.now() - row = pd.DataFrame([row], dtype=str) - wb = pd.concat([wb, row], ignore_index=True) - wb = delete_duplicate_annotations(wb) - - if output_path: - write_workbook(wb, output_path) - return wb - - -def add_env_broad_scale_annotations_to_workbook( - workbook: Union[str, pd.core.frame.DataFrame], - eml: Union[str, etree._ElementTree], - output_path: str = None, - overwrite: bool = False, - local_model: str = None, - temperature: Union[float, None] = None, - return_ungrounded: bool = False, - sample_size: int = 1, -) -> pd.core.frame.DataFrame: - """ - :param workbook: Either the path to the workbook to be annotated, or the - workbook itself as a pandas DataFrame. - :param eml: Either the path to the EML file corresponding to the workbook, - or the EML file itself as an lxml etree. - :param output_path: The path to write the annotated workbook. - :param overwrite: If True, overwrite existing `broad scale environmental - context` annotations in the workbook, so a fresh set may be created. - :param local_model: See `get_ontogpt_annotation` documentation for details. - :param temperature: The temperature parameter for the model. If `None`, the - OntoGPT default will be used. - :param return_ungrounded: See `get_ontogpt_annotation` documentation for - details. - :param sample_size: Executes multiple replicates of the annotation request - to reduce variability of outputs. Variability is inherent in OntoGPT. - :returns: Workbook with broad scale environmental context annotations. - :notes: This function retrieves broad scale environmental context - annotations using OntoGPT, which requires setup and configuration - described in the `get_ontogpt_annotation` function. - """ - logger.info("Annotating broad scale environmental context") - - # Load the workbook and EML for processing - wb = load_workbook(workbook) - eml = load_eml(eml) - - # Parameters for the function - author = "spinneret.annotator.get_onto_gpt_annotation" - dataset_element = eml.xpath("//dataset")[0] - element_description = get_description(dataset_element) - element_xpath = eml.getpath(dataset_element) - predicate = "env_broad_scale" - - # Remove existing broad scale environmental context annotations if - # overwrite is True, using a set of criteria that accurately define the - # annotations to remove. - if overwrite: - wb = delete_annotations( - workbook=wb, - criteria={ - "element": "dataset", - "element_xpath": "/eml:eml/dataset", - "predicate": predicate, - "author": author, - }, - ) - - # Skip if this element already has an annotation in the workbook, to - # prevent duplicate annotations from being added. - if has_annotation(wb, element_xpath, predicate): - return wb - - # Reuse existing annotations for elements with identical tag names, - # descriptions, and predicate labels, to reduce redundant processing. - # Note this assumes semantic equivalence between elements with matching - # tags and descriptions. - annotations = get_annotation_from_workbook( - workbook=wb, - element=dataset_element.tag, - description=element_description, - predicate=predicate, - ) - - if annotations is None: - # Get the broad scale environmental context annotations - annotations = [] - for _ in range(sample_size): - res = get_ontogpt_annotation( - text=element_description, - template=predicate, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - ) - if res is not None: - annotations.extend(res) - if len(annotations) == 0: - annotations = None - - # Add broad scale environmental context annotations to the workbook - if annotations is not None: - for annotation in annotations: - row = initialize_workbook_row() - row["package_id"] = get_package_id(eml) - row["url"] = get_package_url(eml) - row["element"] = dataset_element.tag - if "id" in dataset_element.attrib: - row["element_id"] = dataset_element.attrib["id"] - else: - row["element_id"] = pd.NA - row["element_xpath"] = eml.getpath(dataset_element) - row["context"] = get_subject_and_context(dataset_element)["context"] - row["description"] = element_description - row["subject"] = get_subject_and_context(dataset_element)["subject"] - row["predicate"] = predicate - row["predicate_id"] = ( - "https://genomicsstandardsconsortium.github.io/mixs/0000012/" - ) - row["object"] = annotation["label"] - row["object_id"] = annotation["uri"] - row["author"] = author - row["date"] = pd.Timestamp.now() - row = pd.DataFrame([row], dtype=str) - wb = pd.concat([wb, row], ignore_index=True) - wb = delete_duplicate_annotations(wb) - - if output_path: - write_workbook(wb, output_path) - return wb - - -def add_env_local_scale_annotations_to_workbook( +def add_predicate_annotations_to_workbook( + predicate: str, workbook: Union[str, pd.core.frame.DataFrame], eml: Union[str, etree._ElementTree], output_path: str = None, @@ -930,13 +533,19 @@ def add_env_local_scale_annotations_to_workbook( sample_size: int = 1, ) -> pd.core.frame.DataFrame: """ + :param predicate: The predicate label for the annotation. This guides the + annotation process with which OntoGPT template to use. The options are: + `contains measurements of type`, `contains process`, `env_broad_scale`, + `env_local_scale`, `environmental material`, `research topic`, + `usesMethod`, `uses standard`. :param workbook: Either the path to the workbook to be annotated, or the workbook itself as a pandas DataFrame. :param eml: Either the path to the EML file corresponding to the workbook, or the EML file itself as an lxml etree. :param output_path: The path to write the annotated workbook. - :param overwrite: If True, overwrite existing `local scale environmental - context` annotations in the workbook, so a fresh set may be created. + :param overwrite: If True, overwrite existing annotations in the workbook, + so a fresh set may be created. Only annotations with the same predicate + as the `predicate` input will be removed. :param local_model: See `get_ontogpt_annotation` documentation for details. :param temperature: The temperature parameter for the model. If `None`, the OntoGPT default will be used. @@ -944,185 +553,66 @@ def add_env_local_scale_annotations_to_workbook( details. :param sample_size: Executes multiple replicates of the annotation request to reduce variability of outputs. Variability is inherent in OntoGPT. - :returns: Workbook with local scale environmental context annotations. - :notes: This function retrieves local scale environmental context - annotations using OntoGPT, which requires setup and configuration - described in the `get_ontogpt_annotation` function. + :returns: Workbook with predicate annotations. + :notes: This function retrieves annotations using OntoGPT, except for the + `uses standard` which uses a deterministic method. OntoGPT requires + setup and configuration described in the `get_ontogpt_annotation` + function. """ - logger.info("Annotating local scale environmental context") # Load the workbook and EML for processing wb = load_workbook(workbook) eml = load_eml(eml) - # Parameters for the function - dataset_element = eml.xpath("//dataset")[0] - element_description = get_description(dataset_element) - element_xpath = eml.getpath(dataset_element) - predicate = "env_local_scale" - - # Set the author identifier for consistent reference below - author = "spinneret.annotator.get_onto_gpt_annotation" - - # Remove existing local scale environmental context annotations if - # overwrite is True, using a set of criteria that accurately define the - # annotations to remove. - if overwrite: - wb = delete_annotations( - workbook=wb, - criteria={ - "element": "dataset", - "element_xpath": "/eml:eml/dataset", - "predicate": predicate, - "author": author, - }, - ) - - # Skip if this element already has an annotation in the workbook, to - # prevent duplicate annotations from being added. - if has_annotation(wb, element_xpath, predicate): - return wb - - # Reuse existing annotations for elements with identical tag names, - # descriptions, and predicate labels, to reduce redundant processing. - # Note this assumes semantic equivalence between elements with matching - # tags and descriptions. - annotations = get_annotation_from_workbook( - workbook=wb, - element=dataset_element.tag, - description=element_description, - predicate=predicate, - ) - - if annotations is None: - # Get the local scale environmental context annotations - annotations = [] - for _ in range(sample_size): - res = get_ontogpt_annotation( - text=element_description, - template=predicate, - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - ) - if res is not None: - annotations.extend(res) - if len(annotations) == 0: - annotations = None - - # Add local scale environmental context annotations to the workbook - if annotations is not None: - for annotation in annotations: - row = initialize_workbook_row() - row["package_id"] = get_package_id(eml) - row["url"] = get_package_url(eml) - row["element"] = dataset_element.tag - if "id" in dataset_element.attrib: - row["element_id"] = dataset_element.attrib["id"] - else: - row["element_id"] = pd.NA - row["element_xpath"] = eml.getpath(dataset_element) - row["context"] = get_subject_and_context(dataset_element)["context"] - row["description"] = element_description - row["subject"] = get_subject_and_context(dataset_element)["subject"] - row["predicate"] = predicate - row["predicate_id"] = ( - "https://genomicsstandardsconsortium.github.io/mixs/0000013/" + # Annotate for each element in the set that matches the predicate + elements = get_elements_for_predicate(eml, predicate) + for element in elements: + logger.info(f"Annotating {predicate}") + + # Parameters for use below + element_tag = element.tag + element_description = get_description(element) + element_xpath = eml.getpath(element) + template = get_template_for_predicate(predicate) + predicate_id = get_predicate_id_for_predicate(predicate) + author = "spinneret.annotator.get_ontogpt_annotation" + + # Remove existing annotations if instructed to do so + if overwrite: + wb = delete_annotations( + workbook=wb, + criteria={ + "element": element_tag, + "element_xpath": element_xpath, + "predicate": predicate, + "author": author, + }, ) - row["object"] = annotation["label"] - row["object_id"] = annotation["uri"] - row["author"] = author - row["date"] = pd.Timestamp.now() - row = pd.DataFrame([row], dtype=str) - wb = pd.concat([wb, row], ignore_index=True) - wb = delete_duplicate_annotations(wb) - - if output_path: - write_workbook(wb, output_path) - return wb - - -def add_env_medium_annotations_to_workbook( - workbook: Union[str, pd.core.frame.DataFrame], - eml: Union[str, etree._ElementTree], - output_path: str = None, - overwrite: bool = False, - local_model: str = None, - temperature: Union[float, None] = None, - return_ungrounded: bool = False, - sample_size: int = 1, -) -> pd.core.frame.DataFrame: - """ - :param workbook: Either the path to the workbook to be annotated, or the - workbook itself as a pandas DataFrame. - :param eml: Either the path to the EML file corresponding to the workbook, - or the EML file itself as an lxml etree. - :param output_path: The path to write the annotated workbook. - :param overwrite: If True, overwrite existing `environmental medium` - annotations in the workbook, so a fresh set may be created. - :param local_model: Required if `annotator` is "ontogpt". See - `get_ontogpt_annotation` documentation for details. - :param temperature: The temperature parameter for the model. If `None`, the - OntoGPT default will be used. - :param return_ungrounded: An option if `annotator` is "ontogpt". See - `get_ontogpt_annotation` documentation for details. - :param sample_size: Executes multiple replicates of the annotation request - to reduce variability of outputs. Variability is inherent in OntoGPT. - :returns: Workbook with environmental medium annotations. - """ - logger.info("Annotating environmental medium") - - # Parameters for the function - predicate = "environmental material" - - # Load the workbook and EML for processing - wb = load_workbook(workbook) - eml = load_eml(eml) - - # Remove existing environmental medium annotations if overwrite is True, - # using a set of criteria that accurately define the annotations to remove. - if overwrite: - wb = delete_annotations( - workbook=wb, - criteria={ - "element": "attribute", - "element_xpath": "/attribute", - "predicate": "environmental material", - "author": "spinneret.annotator.get_ontogpt_annotation", - }, - ) - - # Iterate over EML attributes and add environmental medium annotations to - # the workbook - attributes = eml.xpath("//attribute") - for attribute in attributes: - attribute_element = attribute - attribute_xpath = eml.getpath(attribute_element) - attribute_description = get_description(attribute_element) - # Skip if this element already has an annotation in the workbook, to - # prevent duplicate annotations from being added. - if has_annotation(wb, attribute_xpath, predicate): + # Skip if this element already has an annotation in the workbook, to: + # prevent duplicate annotations, and to allow for resuming annotation + # of a partially annotated workbook. + if has_annotation(wb, element_xpath, predicate): return wb # Reuse existing annotations for elements with identical tag names, # descriptions, and predicate labels, to reduce redundant processing. # Note this assumes semantic equivalence between elements with matching - # tags and descriptions. + # tags and descriptions, which is generally true. annotations = get_annotation_from_workbook( workbook=wb, - element=attribute_element.tag, - description=attribute_description, + element=element_tag, + description=element_description, predicate=predicate, ) if annotations is None: - # Get the environmental medium annotations from the annotator + # Get the annotations annotations = [] for _ in range(sample_size): res = get_ontogpt_annotation( - text=attribute_description, - template="env_medium", + text=element_description, + template=template, local_model=local_model, temperature=temperature, return_ungrounded=return_ungrounded, @@ -1132,281 +622,35 @@ def add_env_medium_annotations_to_workbook( if len(annotations) == 0: annotations = None - # And add the environmental medium annotations to the workbook + # Add annotations to the workbook if annotations is not None: for annotation in annotations: row = initialize_workbook_row() row["package_id"] = get_package_id(eml) row["url"] = get_package_url(eml) - row["element"] = attribute_element.tag - if "id" in attribute_element.attrib: - row["element_id"] = attribute_element.attrib["id"] + row["element"] = element_tag + if "id" in element.attrib: + row["element_id"] = element.attrib["id"] else: row["element_id"] = pd.NA - row["element_xpath"] = attribute_xpath - row["context"] = get_subject_and_context(attribute_element)["context"] - row["description"] = attribute_description - row["subject"] = get_subject_and_context(attribute_element)["subject"] + row["element_xpath"] = eml.getpath(element) + row["context"] = get_subject_and_context(element)["context"] + row["description"] = element_description + row["subject"] = get_subject_and_context(element)["subject"] row["predicate"] = predicate - row["predicate_id"] = "http://purl.obolibrary.org/obo/ENVO_00010483" + row["predicate_id"] = predicate_id row["object"] = annotation["label"] row["object_id"] = annotation["uri"] - row["author"] = "spinneret.annotator.get_ontogpt_annotation" + row["author"] = author row["date"] = pd.Timestamp.now() row = pd.DataFrame([row], dtype=str) wb = pd.concat([wb, row], ignore_index=True) wb = delete_duplicate_annotations(wb) - if output_path: - write_workbook(wb, output_path) - return wb - - -def add_research_topic_annotations_to_workbook( - workbook: Union[str, pd.core.frame.DataFrame], - eml: Union[str, etree._ElementTree], - output_path: str = None, - overwrite: bool = False, - local_model: str = None, - temperature: Union[float, None] = None, - return_ungrounded: bool = False, - sample_size: int = 1, -) -> pd.core.frame.DataFrame: - """ - :param workbook: Either the path to the workbook to be annotated, or the - workbook itself as a pandas DataFrame. - :param eml: Either the path to the EML file corresponding to the workbook, - or the EML file itself as an lxml etree. - :param output_path: The path to write the annotated workbook. - :param overwrite: If True, overwrite existing `research topic` annotations - in the workbook, so a fresh set may be created. - :param local_model: See `get_ontogpt_annotation` documentation for details. - :param temperature: The temperature parameter for the model. If `None`, the - OntoGPT default will be used. - :param return_ungrounded: See `get_ontogpt_annotation` documentation for - details. - :param sample_size: Executes multiple replicates of the annotation request - to reduce variability of outputs. Variability is inherent in OntoGPT. - :returns: Workbook with research topic annotations. - :notes: This function retrieves research topic annotations using OntoGPT, which - requires setup and configuration described in the - `get_ontogpt_annotation` function. - """ - logger.info("Annotating research topic") - - # Load the workbook and EML for processing - wb = load_workbook(workbook) - eml = load_eml(eml) - - # Parameters for the function - dataset_element = eml.xpath("//dataset")[0] - element_description = get_description(dataset_element) - element_xpath = eml.getpath(dataset_element) - predicate = "research topic" - - # Set the author identifier for consistent reference below - author = "spinneret.annotator.get_onto_gpt_annotation" - - # Remove existing research topic annotations if overwrite is True, using a set of - # criteria that accurately define the annotations to remove. - if overwrite: - wb = delete_annotations( - workbook=wb, - criteria={ - "element": "dataset", - "element_xpath": "/eml:eml/dataset", - "predicate": "research topic", - "author": author, - }, - ) - - # Skip if this element already has an annotation in the workbook, to - # prevent duplicate annotations from being added. - if has_annotation(wb, element_xpath, predicate): + if output_path: + write_workbook(wb, output_path) return wb - # Reuse existing annotations for elements with identical tag names, - # descriptions, and predicate labels, to reduce redundant processing. - # Note this assumes semantic equivalence between elements with matching - # tags and descriptions. - annotations = get_annotation_from_workbook( - workbook=wb, - element=dataset_element.tag, - description=element_description, - predicate=predicate, - ) - - if annotations is None: - # Get the research topic annotations - annotations = [] - for _ in range(sample_size): - res = get_ontogpt_annotation( - text=element_description, - template="research_topic", - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - ) - if res is not None: - annotations.extend(res) - if len(annotations) == 0: - annotations = None - - # Add research topic annotations to the workbook - if annotations is not None: - for annotation in annotations: - row = initialize_workbook_row() - row["package_id"] = get_package_id(eml) - row["url"] = get_package_url(eml) - row["element"] = dataset_element.tag - if "id" in dataset_element.attrib: - row["element_id"] = dataset_element.attrib["id"] - else: - row["element_id"] = pd.NA - row["element_xpath"] = eml.getpath(dataset_element) - row["context"] = get_subject_and_context(dataset_element)["context"] - row["description"] = element_description - row["subject"] = get_subject_and_context(dataset_element)["subject"] - row["predicate"] = predicate - row["predicate_id"] = "http://vocabs.lter-europe.net/EnvThes/21604" - row["object"] = annotation["label"] - row["object_id"] = annotation["uri"] - row["author"] = author - row["date"] = pd.Timestamp.now() - row = pd.DataFrame([row], dtype=str) - wb = pd.concat([wb, row], ignore_index=True) - wb = delete_duplicate_annotations(wb) - - if output_path: - write_workbook(wb, output_path) - return wb - - -def add_methods_annotations_to_workbook( - workbook: Union[str, pd.core.frame.DataFrame], - eml: Union[str, etree._ElementTree], - output_path: str = None, - overwrite: bool = False, - local_model: str = None, - temperature: Union[float, None] = None, - return_ungrounded: bool = False, - sample_size: int = 1, -) -> pd.core.frame.DataFrame: - """ - :param workbook: Either the path to the workbook to be annotated, or the - workbook itself as a pandas DataFrame. - :param eml: Either the path to the EML file corresponding to the workbook, - or the EML file itself as an lxml etree. - :param output_path: The path to write the annotated workbook. - :param overwrite: If True, overwrite existing `methods` annotations in the - workbook, so a fresh set may be created. - :param local_model: See `get_ontogpt_annotation` documentation for details. - :param temperature: The temperature parameter for the model. If `None`, the - OntoGPT default will be used. - :param return_ungrounded: See `get_ontogpt_annotation` documentation for - details. - :param sample_size: Executes multiple replicates of the annotation request - to reduce variability of outputs. Variability is inherent in OntoGPT. - :returns: Workbook with methods annotations. - :notes: This function retrieves methods annotations using OntoGPT, which - requires setup and configuration described in the - `get_ontogpt_annotation` function. - """ - logger.info("Annotating methods") - - # Load the workbook and EML for processing - wb = load_workbook(workbook) - eml = load_eml(eml) - - # Parameters for the function - # Get the methods annotations, if the methods element exists in the EML - dataset_element = eml.xpath("//dataset")[0] - methods_element = eml.xpath("//dataset/methods") - if not methods_element: - return wb - element_description = get_description(methods_element[0]) - element_xpath = eml.getpath(dataset_element) - predicate = "usesMethod" - - # Set the author identifier for consistent reference below - author = "spinneret.annotator.get_onto_gpt_annotation" - - # Remove existing methods annotations if overwrite is True, using a set of - # criteria that accurately define the annotations to remove. - if overwrite: - wb = delete_annotations( - workbook=wb, - criteria={ - "element": "dataset", - "element_xpath": "/eml:eml/dataset", - "predicate": predicate, - "author": author, - }, - ) - - # Skip if this element already has an annotation in the workbook, to - # prevent duplicate annotations from being added. - if has_annotation(wb, element_xpath, predicate): - return wb - - # Reuse existing annotations for elements with identical tag names, - # descriptions, and predicate labels, to reduce redundant processing. - # Note this assumes semantic equivalence between elements with matching - # tags and descriptions. - annotations = get_annotation_from_workbook( - workbook=wb, - element=dataset_element.tag, - description=element_description, - predicate=predicate, - ) - - if annotations is None: - annotations = [] - for _ in range(sample_size): - res = get_ontogpt_annotation( - text=element_description, - template="uses_method", - local_model=local_model, - temperature=temperature, - return_ungrounded=return_ungrounded, - ) - if res is not None: - annotations.extend(res) - if len(annotations) == 0: - annotations = None - - # Add methods annotations to the workbook. Note, methods annotations are - # at the dataset level. - if annotations is not None: - for annotation in annotations: - row = initialize_workbook_row() - row["package_id"] = get_package_id(eml) - row["url"] = get_package_url(eml) - row["element"] = dataset_element.tag - if "id" in dataset_element.attrib: - row["element_id"] = dataset_element.attrib["id"] - else: - row["element_id"] = pd.NA - row["element_xpath"] = eml.getpath(dataset_element) - row["context"] = get_subject_and_context(dataset_element)["context"] - row["description"] = element_description[0:500] # don't need all of it - row["subject"] = get_subject_and_context(dataset_element)["subject"] - row["predicate"] = predicate - row["predicate_id"] = ( - "http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#usesMethod" - ) - row["object"] = annotation["label"] - row["object_id"] = annotation["uri"] - row["author"] = author - row["date"] = pd.Timestamp.now() - row = pd.DataFrame([row], dtype=str) - wb = pd.concat([wb, row], ignore_index=True) - wb = delete_duplicate_annotations(wb) - - if output_path: - write_workbook(wb, output_path) - return wb - def get_annotation_from_workbook( workbook: Union[str, pd.core.frame.DataFrame], diff --git a/src/spinneret/utilities.py b/src/spinneret/utilities.py index 80ad65c..3551276 100644 --- a/src/spinneret/utilities.py +++ b/src/spinneret/utilities.py @@ -157,3 +157,76 @@ def load_prefixmaps() -> dict: file = str(importlib.resources.files("spinneret.data")) + "/prefixmaps.csv" prefixmaps = pd.read_csv(file) return prefixmaps + + +def get_elements_for_predicate(eml: etree._ElementTree, predicate: str) -> list: + """ + Get the EML elements that corresponds to a predicate. Elements contain + the information from which annotations are derived. + + :param eml: An EML document. + :param predicate: The predicate to be used to find the element(s). + :returns: The element(s) that corresponds to the predicate, each as an + etree._Element. If the predicate is not found, returns empty list. + """ + predicate_and_xpath = { + "contains measurements of type": "//attribute", + "contains process": "//dataset", + "env_broad_scale": "//dataset", + "env_local_scale": "//dataset", + "environmental material": "//attribute", + "research topic": "//dataset", + "usesMethod": "//dataset/methods", + } + xpath = predicate_and_xpath.get(predicate) + if xpath: + return eml.xpath(xpath) + logger.warning(f"Predicate {predicate} not found in the list of predicates.") + return [] + + +def get_template_for_predicate(predicate: str) -> Union[str, None]: + """ + :param predicate: The predicate to be used to find the template. + :returns: The OntoGPT template for the predicate. Returns None if the + predicate is not found. + """ + predicate_and_template = { + "contains measurements of type": "contains_measurement_of_type", + "contains process": "contains_process", + "env_broad_scale": "env_broad_scale", + "env_local_scale": "env_local_scale", + "environmental material": "env_medium", + "research topic": "research_topic", + "usesMethod": "uses_method", + } + template = predicate_and_template.get(predicate) + if not template: + logger.warning(f"Predicate {predicate} not found in the list of predicates.") + return template + + +def get_predicate_id_for_predicate(predicate: str) -> Union[str, None]: + """ + :param predicate: The predicate to be used to find the predicate ID. + :returns: The predicate ID for the predicate. Returns None if the + predicate is not found. + """ + predicate_and_id = { + "contains measurements of type": "http://ecoinformatics.org/oboe/" + "oboe.1.2/oboe-core.owl#" + "containsMeasurementsOfType", + "contains process": "http://purl.obolibrary.org/obo/BFO_0000067", + "env_broad_scale": "https://genomicsstandardsconsortium.github.io/mixs" + "/0000012/", + "env_local_scale": "https://genomicsstandardsconsortium.github.io/mixs" + "/0000013/", + "environmental material": "http://purl.obolibrary.org/obo/" "ENVO_00010483", + "research topic": "http://vocabs.lter-europe.net/EnvThes/21604", + "usesMethod": "http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#" + "usesMethod", + } + predicate_id = predicate_and_id.get(predicate) + if not predicate_id: + logger.warning(f"Predicate {predicate} not found in the list of predicates.") + return predicate_id diff --git a/tests/test_annotator.py b/tests/test_annotator.py index 0fcbf1d..1e17498 100644 --- a/tests/test_annotator.py +++ b/tests/test_annotator.py @@ -13,15 +13,9 @@ annotate_eml, create_annotation_element, add_qudt_annotations_to_workbook, - add_measurement_type_annotations_to_workbook, - add_process_annotations_to_workbook, - add_env_broad_scale_annotations_to_workbook, - add_env_local_scale_annotations_to_workbook, - add_env_medium_annotations_to_workbook, - add_research_topic_annotations_to_workbook, - add_methods_annotations_to_workbook, get_annotation_from_workbook, has_annotation, + add_predicate_annotations_to_workbook, ) from spinneret.utilities import ( load_configuration, @@ -82,10 +76,8 @@ def test_get_bioportal_annotation(mocker, use_mock, get_annotation_fixture): # pylint: disable=duplicate-code @pytest.mark.parametrize("use_mock", [True]) # False tests with real LLM queries -def test_annotate_workbook_with_ontogpt( - tmp_path, mocker, use_mock, get_annotation_fixture -): - """Test annotate_workbook using the OntoGPT annotator""" +def test_annotate_workbook(tmp_path, mocker, use_mock, get_annotation_fixture): + """Test annotate_workbook""" # Configure the mock responses if use_mock: @@ -340,111 +332,15 @@ def test_has_annotations(): assert has_annotations(wb) is True -@pytest.mark.parametrize("use_mock", [True]) # False makes real HTTP requests -def test_add_measurement_type_annotations_to_workbook(tmp_path, use_mock, mocker): - """Test add_measurement_type_annotations_to_workbook""" - - # Parameterize the test - workbook_path = "tests/edi.3.9_annotation_workbook.tsv" - output_path = str(tmp_path) + "edi.3.9_annotation_workbook_annotated.tsv" - - # The workbook shouldn't have any annotations yet - wb = load_workbook(workbook_path) - assert not has_annotations(wb) - - # The workbook "should" have annotations after calling the function. We - # say "should" because OntoGPT is non-deterministic, and we can't always - # expect the same results, or any results at all. - if use_mock: - mocker.patch( - "spinneret.annotator.get_ontogpt_annotation", - return_value=[ - { - "label": "depth", - "uri": "http://purl.dataone.org/odo/ECSO_00000515", - } - ], - ) - wb = add_measurement_type_annotations_to_workbook( - workbook=workbook_path, - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - ) - assert has_annotations(wb) - - # Overwriting changes the annotations. Note, we can't test this with real - # requests because we'll expect the same results as the first call. - if use_mock: - mocker.patch( # an arbitrary response to check for - "spinneret.annotator.get_ontogpt_annotation", - return_value=[ - { - "label": "A different measurement type", - "uri": "http://purl.dataone.org/odo/ECSO_XXXXXXXX", - } - ], - ) - wb = add_measurement_type_annotations_to_workbook( - workbook=output_path, # the output from the first call - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - overwrite=True, - ) - assert wb["object"].str.contains("A different measurement type").any() - assert ( - wb["object_id"].str.contains("http://purl.dataone.org/odo/ECSO_XXXXXXXX").any() - ) - - # Original annotations are gone - assert not wb["object"].str.contains("depth").any() - assert ( - not wb["object_id"] - .str.contains("http://purl.dataone.org/odo/ECSO_00000515") - .any() - ) - - -def test_add_measurement_type_annotations_to_workbook_io_options(tmp_path, mocker): - """Test add_measurement_type_annotations_to_workbook with different input - and output options""" - - mocker.patch( - "spinneret.annotator.get_ontogpt_annotation", - return_value=[ - { - "label": "depth", - "uri": "http://purl.dataone.org/odo/ECSO_00000515", - } - ], - ) - - # Accepts file path as input - output_path = str(tmp_path) + "edi.3.9_annotation_workbook_annotated.tsv" - wb = add_measurement_type_annotations_to_workbook( - workbook="tests/edi.3.9_annotation_workbook.tsv", - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - ) - wb = load_workbook(output_path) - assert has_annotations(wb) - - # Accepts dataframes and etree objects as input - wb = load_workbook("tests/edi.3.9_annotation_workbook.tsv") - eml = load_eml(get_example_eml_dir() + "/" + "edi.3.9.xml") - wb = add_measurement_type_annotations_to_workbook(workbook=wb, eml=eml) - assert has_annotations(wb) - - def test_annotators_are_listed_as_authors(tmp_path, mocker): """Test that the annotators are listed as authors in the workbook.""" - # Test for the `add_measurement_type_annotations_to_workbook` function - # using the OntoGPT annotator mocker.patch( "spinneret.annotator.get_ontogpt_annotation", return_value=[{"label": "a label", "uri": "a uri"}], ) - wb = add_measurement_type_annotations_to_workbook( + wb = add_predicate_annotations_to_workbook( + predicate="contains measurements of type", workbook="tests/edi.3.9_annotation_workbook.tsv", eml=get_example_eml_dir() + "/" + "edi.3.9.xml", output_path=str(tmp_path) + "edi.3.9_annotation_workbook_dataset.tsv", @@ -496,258 +392,8 @@ def test_get_ontogpt_annotation(mocker, use_mock): @pytest.mark.parametrize("use_mock", [True]) # False tests with real local LLM queries -def test_add_process_annotations_to_workbook(tmp_path, use_mock, mocker): - """Test add_process_annotations_to_workbook""" - - # Parameterize the test - workbook_path = "tests/edi.3.9_annotation_workbook.tsv" - output_path = str(tmp_path) + "edi.3.9_annotation_workbook_qudt.tsv" - - # The workbook shouldn't have any annotations yet - wb = load_workbook(workbook_path) - assert not has_annotations(wb) - - # The workbook has annotations after calling the function - if use_mock: - mocker.patch( - "spinneret.annotator.get_ontogpt_annotation", - return_value=[{"label": "a label", "uri": "a uri"}], - ) - wb = add_process_annotations_to_workbook( - workbook=workbook_path, - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - local_model="llama3.2", - return_ungrounded=True, # ensures we get at least one annotation back - ) - assert has_annotations(wb) - - # Overwriting changes the annotations. Note, we can't test this with real - # requests because we'll expect the same results as the first call. - if use_mock: - mocker.patch( # an arbitrary response to check for - "spinneret.annotator.get_ontogpt_annotation", - return_value=[{"label": "a different label", "uri": "a different uri"}], - ) - wb = add_process_annotations_to_workbook( - workbook=output_path, # the output from the first call - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - local_model="llama3.2", - return_ungrounded=True, # ensures we get at least one annotation back - overwrite=True, - ) - assert wb["object"].str.contains("a different label").any() - assert wb["object_id"].str.contains("a different uri").any() - - # Original annotations are gone - assert not wb["object"].str.contains("a label").any() - assert not wb["object_id"].str.contains("a uri").any() - - -@pytest.mark.parametrize("use_mock", [True]) # False tests with real local LLM queries -def test_add_env_broad_scale_annotations_to_workbook(tmp_path, use_mock, mocker): - """Test add_env_broad_scale_annotations_to_workbook""" - - # Parameterize the test - workbook_path = "tests/edi.3.9_annotation_workbook.tsv" - output_path = str(tmp_path) + "edi.3.9_annotation_workbook.tsv" - - # The workbook shouldn't have any annotations yet - wb = load_workbook(workbook_path) - assert not has_annotations(wb) - - # The workbook has annotations after calling the function - if use_mock: - mocker.patch( - "spinneret.annotator.get_ontogpt_annotation", - return_value=[{"label": "a label", "uri": "a uri"}], - ) - wb = add_env_broad_scale_annotations_to_workbook( - workbook=workbook_path, - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - local_model="llama3.2", - return_ungrounded=True, # ensures we get at least one annotation back - ) - assert has_annotations(wb) - - # Overwriting changes the annotations. Note, we can't test this with real - # requests because we'll expect the same results as the first call. - if use_mock: - mocker.patch( # an arbitrary response to check for - "spinneret.annotator.get_ontogpt_annotation", - return_value=[{"label": "a different label", "uri": "a different uri"}], - ) - wb = add_env_broad_scale_annotations_to_workbook( - workbook=output_path, # the output from the first call - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - local_model="llama3.2", - return_ungrounded=True, # ensures we get at least one annotation back - overwrite=True, - ) - assert wb["object"].str.contains("a different label").any() - assert wb["object_id"].str.contains("a different uri").any() - - # Original annotations are gone - assert not wb["object"].str.contains("a label").any() - assert not wb["object_id"].str.contains("a uri").any() - - -@pytest.mark.parametrize("use_mock", [True]) # False tests with real local LLM queries -def test_add_env_local_scale_annotations_to_workbook(tmp_path, use_mock, mocker): - """Test add_env_local_scale_annotations_to_workbook""" - - # Parameterize the test - workbook_path = "tests/edi.3.9_annotation_workbook.tsv" - output_path = str(tmp_path) + "edi.3.9_annotation_workbook.tsv" - - # The workbook shouldn't have any annotations yet - wb = load_workbook(workbook_path) - assert not has_annotations(wb) - - # The workbook has annotations after calling the function - if use_mock: - mocker.patch( - "spinneret.annotator.get_ontogpt_annotation", - return_value=[{"label": "a label", "uri": "a uri"}], - ) - wb = add_env_local_scale_annotations_to_workbook( - workbook=workbook_path, - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - local_model="llama3.2", - return_ungrounded=True, # ensures we get at least one annotation back - ) - assert has_annotations(wb) - - # Overwriting changes the annotations. Note, we can't test this with real - # requests because we'll expect the same results as the first call. - if use_mock: - mocker.patch( # an arbitrary response to check for - "spinneret.annotator.get_ontogpt_annotation", - return_value=[{"label": "a different label", "uri": "a different uri"}], - ) - wb = add_env_local_scale_annotations_to_workbook( - workbook=output_path, # the output from the first call - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - local_model="llama3.2", - return_ungrounded=True, # ensures we get at least one annotation back - overwrite=True, - ) - assert wb["object"].str.contains("a different label").any() - assert wb["object_id"].str.contains("a different uri").any() - - # Original annotations are gone - assert not wb["object"].str.contains("a label").any() - assert not wb["object_id"].str.contains("a uri").any() - - -@pytest.mark.parametrize("use_mock", [True]) # False tests with real local LLM queries -def test_add_env_medium_annotations_to_workbook(tmp_path, use_mock, mocker): - """Test add_env_medium_annotations_to_workbook""" - - # Parameterize the test - workbook_path = "tests/edi.3.9_annotation_workbook.tsv" - output_path = str(tmp_path) + "edi.3.9_annotation_workbook.tsv" - - # The workbook shouldn't have any annotations yet - wb = load_workbook(workbook_path) - assert not has_annotations(wb) - - # The workbook has annotations after calling the function - if use_mock: - mocker.patch( - "spinneret.annotator.get_ontogpt_annotation", - return_value=[{"label": "a label", "uri": "a uri"}], - ) - wb = add_env_medium_annotations_to_workbook( - workbook=workbook_path, - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - local_model="llama3.2", - return_ungrounded=True, # ensures we get at least one annotation back - ) - assert has_annotations(wb) - - # Overwriting changes the annotations. Note, we can't test this with real - # requests because we'll expect the same results as the first call. - if use_mock: - mocker.patch( # an arbitrary response to check for - "spinneret.annotator.get_ontogpt_annotation", - return_value=[{"label": "a different label", "uri": "a different uri"}], - ) - wb = add_env_medium_annotations_to_workbook( - workbook=output_path, # the output from the first call - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - local_model="llama3.2", - return_ungrounded=True, # ensures we get at least one annotation back - overwrite=True, - ) - assert wb["object"].str.contains("a different label").any() - assert wb["object_id"].str.contains("a different uri").any() - - # Original annotations are gone - assert not wb["object"].str.contains("a label").any() - assert not wb["object_id"].str.contains("a uri").any() - - -@pytest.mark.parametrize("use_mock", [True]) # False tests with real local LLM queries -def test_add_research_topic_annotations_to_workbook(tmp_path, use_mock, mocker): - """Test add_research_topic_annotations_to_workbook""" - - # Parameterize the test - workbook_path = "tests/edi.3.9_annotation_workbook.tsv" - output_path = str(tmp_path) + "edi.3.9_annotation_workbook.tsv" - - # The workbook shouldn't have any annotations yet - wb = load_workbook(workbook_path) - assert not has_annotations(wb) - - # The workbook has annotations after calling the function - if use_mock: - mocker.patch( - "spinneret.annotator.get_ontogpt_annotation", - return_value=[{"label": "a label", "uri": "a uri"}], - ) - wb = add_research_topic_annotations_to_workbook( - workbook=workbook_path, - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - local_model="llama3.2", - return_ungrounded=True, # ensures we get at least one annotation back - ) - assert has_annotations(wb) - - # Overwriting changes the annotations. Note, we can't test this with real - # requests because we'll expect the same results as the first call. - if use_mock: - mocker.patch( # an arbitrary response to check for - "spinneret.annotator.get_ontogpt_annotation", - return_value=[{"label": "a different label", "uri": "a different uri"}], - ) - wb = add_research_topic_annotations_to_workbook( - workbook=output_path, # the output from the first call - eml=get_example_eml_dir() + "/" + "edi.3.9.xml", - output_path=output_path, - local_model="llama3.2", - return_ungrounded=True, # ensures we get at least one annotation back - overwrite=True, - ) - assert wb["object"].str.contains("a different label").any() - assert wb["object_id"].str.contains("a different uri").any() - - # Original annotations are gone - assert not wb["object"].str.contains("a label").any() - assert not wb["object_id"].str.contains("a uri").any() - - -@pytest.mark.parametrize("use_mock", [True]) # False tests with real local LLM queries -def test_add_methods_annotations_to_workbook(tmp_path, use_mock, mocker): - """Test add_methods_annotations_to_workbook""" +def test_add_predicate_annotations_to_workbook(tmp_path, use_mock, mocker): + """Test add_predicate_annotations_to_workbook""" # Parameterize the test workbook_path = "tests/edi.3.9_annotation_workbook.tsv" @@ -763,7 +409,8 @@ def test_add_methods_annotations_to_workbook(tmp_path, use_mock, mocker): "spinneret.annotator.get_ontogpt_annotation", return_value=[{"label": "a label", "uri": "a uri"}], ) - wb = add_methods_annotations_to_workbook( + wb = add_predicate_annotations_to_workbook( + predicate="env_broad_scale", workbook=workbook_path, eml=get_example_eml_dir() + "/" + "edi.3.9.xml", output_path=output_path, @@ -779,7 +426,8 @@ def test_add_methods_annotations_to_workbook(tmp_path, use_mock, mocker): "spinneret.annotator.get_ontogpt_annotation", return_value=[{"label": "a different label", "uri": "a different uri"}], ) - wb = add_methods_annotations_to_workbook( + wb = add_predicate_annotations_to_workbook( + predicate="env_broad_scale", workbook=output_path, # the output from the first call eml=get_example_eml_dir() + "/" + "edi.3.9.xml", output_path=output_path, diff --git a/tests/test_utilities.py b/tests/test_utilities.py index 3baa6f4..231d1b5 100644 --- a/tests/test_utilities.py +++ b/tests/test_utilities.py @@ -14,6 +14,9 @@ expand_curie, compress_uri, load_prefixmaps, + get_elements_for_predicate, + get_template_for_predicate, + get_predicate_id_for_predicate, ) from spinneret.datasets import get_example_eml_dir @@ -138,3 +141,43 @@ def test_load_prefixmaps(): """Test that the prefixmaps are loaded""" prefixmaps = load_prefixmaps() assert isinstance(prefixmaps, pd.DataFrame) + + +def test_get_elements_for_predicate(): + """Test that elements are retrieved for a given predicate""" + eml = load_eml(get_example_eml_dir() + "/" + "edi.3.9.xml") + + # Elements are retrieved for a given predicate + elements = get_elements_for_predicate(eml, "contains measurements of type") + for element in elements: + assert element.tag == "attribute" + assert isinstance(element, etree._Element) + + # Elements are not retrieved for a non-existent predicate + elements = get_elements_for_predicate(eml, "non-existent predicate") + assert elements == [] + + +def test_get_template_for_predicate(): + """Test that a template is retrieved for a given predicate""" + # The template is retrieved for a given predicate + template = get_template_for_predicate("contains measurements of type") + assert template == "contains_measurement_of_type" + + # None is returned for a non-existent predicate + template = get_template_for_predicate("non-existent predicate") + assert template is None + + +def test_get_predicate_id_for_predicate(): + """Test that a predicate ID is retrieved for a given predicate""" + # The predicate ID is retrieved for a given predicate + predicate_id = get_predicate_id_for_predicate("contains measurements of type") + assert predicate_id == ( + "http://ecoinformatics.org/oboe/oboe.1.2/oboe-core.owl#" + "containsMeasurementsOfType" + ) + + # None is returned for a non-existent predicate + predicate_id = get_predicate_id_for_predicate("non-existent predicate") + assert predicate_id is None From 5a495842e17f8dad5db95453ad2a540382b52ce3 Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Sat, 28 Dec 2024 10:04:26 -0500 Subject: [PATCH 23/24] fix: correct return logic in `add_predicate_annotations_to_workbook` Resolve an issue in the `add_predicate_annotations_to_workbook` function that prevents it from returning the expected results. --- src/spinneret/annotator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/spinneret/annotator.py b/src/spinneret/annotator.py index 7bee5cd..ac5b0bc 100644 --- a/src/spinneret/annotator.py +++ b/src/spinneret/annotator.py @@ -647,9 +647,9 @@ def add_predicate_annotations_to_workbook( wb = pd.concat([wb, row], ignore_index=True) wb = delete_duplicate_annotations(wb) - if output_path: - write_workbook(wb, output_path) - return wb + if output_path: + write_workbook(wb, output_path) + return wb def get_annotation_from_workbook( From ee474938e6c15baec225f1a303c842dfb731f78b Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Fri, 17 Jan 2025 12:43:38 -0800 Subject: [PATCH 24/24] build: configure Read the Docs for explicit path to config.py Update `.readthedocs.yaml` to explicitly specify the path to `config.py`. This ensures proper documentation builds and avoids potential issues with an upcoming deprecation of inferred configuration. --- .readthedocs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.readthedocs.yml b/.readthedocs.yml index 092d342..ebd1092 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -7,6 +7,7 @@ build: sphinx: fail_on_warning: false + configuration: docs/source/conf.py python: # Install our python package before building the docs