Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial version of micronutrient extraction template (and other new templates) #469

Merged
merged 1 commit into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
316 changes: 316 additions & 0 deletions src/ontogpt/templates/ecosim_methods.py

Large diffs are not rendered by default.

99 changes: 99 additions & 0 deletions src/ontogpt/templates/ecosim_methods.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
id: http://w3id.org/ontogpt/ecosim_methods
name: ecosim_methods
title: EcoSIM Methods Extraction Template
description: >-
EcoSIM Methods Extraction Template
license: https://creativecommons.org/publicdomain/zero/1.0/
prefixes:
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
linkml: https://w3id.org/linkml/
ecosim_simple: http://w3id.org/ontogpt/ecosim_simple
ecosim: http://purl.obolibrary.org/obo/ecosim

default_prefix: ecosim_methods
default_range: string

imports:
- linkml:types
- core

classes:
TermSet:
tree_root: true
is_a: NamedEntity
attributes:
locations:
range: Location
multivalued: true
description: >-
A semicolon-separated list of research locations.
Examples include: Vermont, New York City,
Ethiopia
methods:
range: Method
multivalued: true
description: >-
A semicolon-separated list of methods used in
environmental and earth science research. Examples
include: sampling, spectroscopy
variables:
range: Variable
description: >-
A semicolon-separated list of variables measured in
environmental and earth science research. Examples
include: root shape, biomass, water turbidity
equipments:
range: Equipment
description: >-
A semicolon-separated list of equipment used in
environmental and earth science research.
equipment_to_variable_relationships:
range: EquipmentMeasuresVariable
description: >-
A semicolon separated list of relationships
between specific equipment and variables
they are used to measure as described in the input.
Example: NMR spectrometer was used to measure
chemical content
multivalued: true
inlined: true

Location:
is_a: NamedEntity
annotations:
prompt: >-
The name of a location used in research.

Method:
is_a: NamedEntity
annotations:
annotators: bioportal:ECOSIM
prompt: >-
The name of a method used in environment and
earth science research.

Variable:
is_a: NamedEntity
annotations:
annotators: bioportal:ECOSIM
prompt: >-
The name of a variable measured in environment and
earth science research.

Equipment:
is_a: NamedEntity
annotations:
prompt: >-
The name of a piece of equipment used in
environment and earth science research.

EquipmentMeasuresVariable:
is_a: CompoundExpression
attributes:
equipment:
range: Equipment
description: Name of the equipment used to measure a variable.
variable:
range: Variable
description: Name of the variable being measured.

243 changes: 243 additions & 0 deletions src/ontogpt/templates/ecosim_simple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
from __future__ import annotations
from datetime import (
datetime,
date,
time
)
from decimal import Decimal
from enum import Enum
import re
import sys
from typing import (
Any,
ClassVar,
List,
Literal,
Dict,
Optional,
Union
)
from pydantic import (
BaseModel,
ConfigDict,
Field,
RootModel,
field_validator
)
metamodel_version = "None"
version = "None"


class ConfiguredBaseModel(BaseModel):
model_config = ConfigDict(
validate_assignment = True,
validate_default = True,
extra = "forbid",
arbitrary_types_allowed = True,
use_enum_values = True,
strict = False,
)
pass




class LinkMLMeta(RootModel):
root: Dict[str, Any] = {}
model_config = ConfigDict(frozen=True)

def __getattr__(self, key:str):
return getattr(self.root, key)

def __getitem__(self, key:str):
return self.root[key]

def __setitem__(self, key:str, value):
self.root[key] = value

def __contains__(self, key:str) -> bool:
return key in self.root


linkml_meta = LinkMLMeta({'default_prefix': 'ecosim_simple',
'default_range': 'string',
'description': 'Simple EcoSIM Extraction Template',
'id': 'http://w3id.org/ontogpt/ecosim_simple',
'imports': ['linkml:types', 'core'],
'license': 'https://creativecommons.org/publicdomain/zero/1.0/',
'name': 'ecosim_simple',
'prefixes': {'ecosim': {'prefix_prefix': 'ecosim',
'prefix_reference': 'http://purl.obolibrary.org/obo/ecosim'},
'ecosim_simple': {'prefix_prefix': 'ecosim_simple',
'prefix_reference': 'http://w3id.org/ontogpt/ecosim_simple'},
'linkml': {'prefix_prefix': 'linkml',
'prefix_reference': 'https://w3id.org/linkml/'},
'rdf': {'prefix_prefix': 'rdf',
'prefix_reference': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}},
'source_file': '/home/harry/ontogpt/src/ontogpt/templates/ecosim_simple.yaml',
'title': 'Simple EcoSIM Extraction Template'} )

class NullDataOptions(str, Enum):
UNSPECIFIED_METHOD_OF_ADMINISTRATION = "UNSPECIFIED_METHOD_OF_ADMINISTRATION"
NOT_APPLICABLE = "NOT_APPLICABLE"
NOT_MENTIONED = "NOT_MENTIONED"



class ExtractionResult(ConfiguredBaseModel):
"""
A result of extracting knowledge on text
"""
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'})

input_id: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'input_id', 'domain_of': ['ExtractionResult']} })
input_title: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'input_title', 'domain_of': ['ExtractionResult']} })
input_text: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'input_text', 'domain_of': ['ExtractionResult']} })
raw_completion_output: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'raw_completion_output', 'domain_of': ['ExtractionResult']} })
prompt: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'prompt', 'domain_of': ['ExtractionResult']} })
extracted_object: Optional[Any] = Field(None, description="""The complex objects extracted from the text""", json_schema_extra = { "linkml_meta": {'alias': 'extracted_object', 'domain_of': ['ExtractionResult']} })
named_entities: Optional[List[Any]] = Field(None, description="""Named entities extracted from the text""", json_schema_extra = { "linkml_meta": {'alias': 'named_entities', 'domain_of': ['ExtractionResult']} })


class NamedEntity(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, 'from_schema': 'http://w3id.org/ontogpt/core'})

id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'comments': ['this is populated during the grounding and normalization step'],
'domain_of': ['NamedEntity', 'Publication']} })
label: Optional[str] = Field(None, description="""The label (name) of the named thing""", json_schema_extra = { "linkml_meta": {'alias': 'label',
'aliases': ['name'],
'annotations': {'owl': {'tag': 'owl',
'value': 'AnnotationProperty, AnnotationAssertion'}},
'domain_of': ['NamedEntity'],
'slot_uri': 'rdfs:label'} })


class CompoundExpression(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, 'from_schema': 'http://w3id.org/ontogpt/core'})

pass


class Triple(CompoundExpression):
"""
Abstract parent for Relation Extraction tasks
"""
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, 'from_schema': 'http://w3id.org/ontogpt/core'})

subject: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'subject', 'domain_of': ['Triple']} })
predicate: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'predicate', 'domain_of': ['Triple']} })
object: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'object', 'domain_of': ['Triple']} })
qualifier: Optional[str] = Field(None, description="""A qualifier for the statements, e.g. \"NOT\" for negation""", json_schema_extra = { "linkml_meta": {'alias': 'qualifier', 'domain_of': ['Triple']} })
subject_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the subject of the statement, e.g. \"high dose\" or \"intravenously administered\"""", json_schema_extra = { "linkml_meta": {'alias': 'subject_qualifier', 'domain_of': ['Triple']} })
object_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the object of the statement, e.g. \"severe\" or \"with additional complications\"""", json_schema_extra = { "linkml_meta": {'alias': 'object_qualifier', 'domain_of': ['Triple']} })


class TextWithTriples(ConfiguredBaseModel):
"""
A text containing one or more relations of the Triple type.
"""
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'})

publication: Optional[Publication] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'publication',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'domain_of': ['TextWithTriples', 'TextWithEntity']} })
triples: Optional[List[Triple]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'triples', 'domain_of': ['TextWithTriples']} })


class TextWithEntity(ConfiguredBaseModel):
"""
A text containing one or more instances of a single type of entity.
"""
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'})

publication: Optional[Publication] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'publication',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'domain_of': ['TextWithTriples', 'TextWithEntity']} })
entities: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'entities', 'domain_of': ['TextWithEntity']} })


class RelationshipType(NamedEntity):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core',
'id_prefixes': ['RO', 'biolink']})

id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'comments': ['this is populated during the grounding and normalization step'],
'domain_of': ['NamedEntity', 'Publication']} })
label: Optional[str] = Field(None, description="""The label (name) of the named thing""", json_schema_extra = { "linkml_meta": {'alias': 'label',
'aliases': ['name'],
'annotations': {'owl': {'tag': 'owl',
'value': 'AnnotationProperty, AnnotationAssertion'}},
'domain_of': ['NamedEntity'],
'slot_uri': 'rdfs:label'} })


class Publication(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'})

id: Optional[str] = Field(None, description="""The publication identifier""", json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedEntity', 'Publication']} })
title: Optional[str] = Field(None, description="""The title of the publication""", json_schema_extra = { "linkml_meta": {'alias': 'title', 'domain_of': ['Publication']} })
abstract: Optional[str] = Field(None, description="""The abstract of the publication""", json_schema_extra = { "linkml_meta": {'alias': 'abstract', 'domain_of': ['Publication']} })
combined_text: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'combined_text', 'domain_of': ['Publication']} })
full_text: Optional[str] = Field(None, description="""The full text of the publication""", json_schema_extra = { "linkml_meta": {'alias': 'full_text', 'domain_of': ['Publication']} })


class AnnotatorResult(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'})

subject_text: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'subject_text', 'domain_of': ['AnnotatorResult']} })
object_id: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'object_id', 'domain_of': ['AnnotatorResult']} })
object_text: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'object_text', 'domain_of': ['AnnotatorResult']} })


class TermSet(NamedEntity):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/ecosim_simple', 'tree_root': True})

terms: Optional[List[str]] = Field(None, description="""A semicolon-separated list of variables for earth system simulation. Do not include abbreviations in parentheses, e.g., \"Carbon (C)\" should be represented as \"carbon\". Examples include: carboxylation, sodium, underground irrigation.""", json_schema_extra = { "linkml_meta": {'alias': 'terms', 'domain_of': ['TermSet']} })
id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'comments': ['this is populated during the grounding and normalization step'],
'domain_of': ['NamedEntity', 'Publication']} })
label: Optional[str] = Field(None, description="""The label (name) of the named thing""", json_schema_extra = { "linkml_meta": {'alias': 'label',
'aliases': ['name'],
'annotations': {'owl': {'tag': 'owl',
'value': 'AnnotationProperty, AnnotationAssertion'}},
'domain_of': ['NamedEntity'],
'slot_uri': 'rdfs:label'} })


class Term(NamedEntity):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'annotations': {'annotators': {'tag': 'annotators',
'value': 'bioportal:ECOSIM'},
'prompt': {'tag': 'prompt',
'value': 'The name of a variable for earth system '
'simulation.'}},
'from_schema': 'http://w3id.org/ontogpt/ecosim_simple'})

id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'comments': ['this is populated during the grounding and normalization step'],
'domain_of': ['NamedEntity', 'Publication']} })
label: Optional[str] = Field(None, description="""The label (name) of the named thing""", json_schema_extra = { "linkml_meta": {'alias': 'label',
'aliases': ['name'],
'annotations': {'owl': {'tag': 'owl',
'value': 'AnnotationProperty, AnnotationAssertion'}},
'domain_of': ['NamedEntity'],
'slot_uri': 'rdfs:label'} })


# Model rebuild
# see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model
ExtractionResult.model_rebuild()
NamedEntity.model_rebuild()
CompoundExpression.model_rebuild()
Triple.model_rebuild()
TextWithTriples.model_rebuild()
TextWithEntity.model_rebuild()
RelationshipType.model_rebuild()
Publication.model_rebuild()
AnnotatorResult.model_rebuild()
TermSet.model_rebuild()
Term.model_rebuild()
39 changes: 39 additions & 0 deletions src/ontogpt/templates/ecosim_simple.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
id: http://w3id.org/ontogpt/ecosim_simple
name: ecosim_simple
title: Simple EcoSIM Extraction Template
description: >-
Simple EcoSIM Extraction Template
license: https://creativecommons.org/publicdomain/zero/1.0/
prefixes:
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
linkml: https://w3id.org/linkml/
ecosim_simple: http://w3id.org/ontogpt/ecosim_simple
ecosim: http://purl.obolibrary.org/obo/ecosim

default_prefix: ecosim_simple
default_range: string

imports:
- linkml:types
- core

classes:
TermSet:
tree_root: true
is_a: NamedEntity
attributes:
terms:
range: Term
multivalued: true
description: >-
A semicolon-separated list of variables
for earth system simulation. Do not include
abbreviations in parentheses, e.g., "Carbon (C)"
should be represented as "carbon". Examples include: carboxylation, sodium, underground irrigation.

Term:
is_a: NamedEntity
annotations:
annotators: bioportal:ECOSIM
prompt: >-
The name of a variable for earth system simulation.
Loading
Loading