-
Notifications
You must be signed in to change notification settings - Fork 81
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add initial version of micronutrient extraction template (and other n…
…ew templates) (#469)
- Loading branch information
Showing
10 changed files
with
2,171 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
id: http://w3id.org/ontogpt/ecosim_methods | ||
name: ecosim_methods | ||
title: EcoSIM Methods Extraction Template | ||
description: >- | ||
EcoSIM Methods Extraction Template | ||
license: https://creativecommons.org/publicdomain/zero/1.0/ | ||
prefixes: | ||
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# | ||
linkml: https://w3id.org/linkml/ | ||
ecosim_simple: http://w3id.org/ontogpt/ecosim_simple | ||
ecosim: http://purl.obolibrary.org/obo/ecosim | ||
|
||
default_prefix: ecosim_methods | ||
default_range: string | ||
|
||
imports: | ||
- linkml:types | ||
- core | ||
|
||
classes: | ||
TermSet: | ||
tree_root: true | ||
is_a: NamedEntity | ||
attributes: | ||
locations: | ||
range: Location | ||
multivalued: true | ||
description: >- | ||
A semicolon-separated list of research locations. | ||
Examples include: Vermont, New York City, | ||
Ethiopia | ||
methods: | ||
range: Method | ||
multivalued: true | ||
description: >- | ||
A semicolon-separated list of methods used in | ||
environmental and earth science research. Examples | ||
include: sampling, spectroscopy | ||
variables: | ||
range: Variable | ||
description: >- | ||
A semicolon-separated list of variables measured in | ||
environmental and earth science research. Examples | ||
include: root shape, biomass, water turbidity | ||
equipments: | ||
range: Equipment | ||
description: >- | ||
A semicolon-separated list of equipment used in | ||
environmental and earth science research. | ||
equipment_to_variable_relationships: | ||
range: EquipmentMeasuresVariable | ||
description: >- | ||
A semicolon separated list of relationships | ||
between specific equipment and variables | ||
they are used to measure as described in the input. | ||
Example: NMR spectrometer was used to measure | ||
chemical content | ||
multivalued: true | ||
inlined: true | ||
|
||
Location: | ||
is_a: NamedEntity | ||
annotations: | ||
prompt: >- | ||
The name of a location used in research. | ||
Method: | ||
is_a: NamedEntity | ||
annotations: | ||
annotators: bioportal:ECOSIM | ||
prompt: >- | ||
The name of a method used in environment and | ||
earth science research. | ||
Variable: | ||
is_a: NamedEntity | ||
annotations: | ||
annotators: bioportal:ECOSIM | ||
prompt: >- | ||
The name of a variable measured in environment and | ||
earth science research. | ||
Equipment: | ||
is_a: NamedEntity | ||
annotations: | ||
prompt: >- | ||
The name of a piece of equipment used in | ||
environment and earth science research. | ||
EquipmentMeasuresVariable: | ||
is_a: CompoundExpression | ||
attributes: | ||
equipment: | ||
range: Equipment | ||
description: Name of the equipment used to measure a variable. | ||
variable: | ||
range: Variable | ||
description: Name of the variable being measured. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,243 @@ | ||
from __future__ import annotations | ||
from datetime import ( | ||
datetime, | ||
date, | ||
time | ||
) | ||
from decimal import Decimal | ||
from enum import Enum | ||
import re | ||
import sys | ||
from typing import ( | ||
Any, | ||
ClassVar, | ||
List, | ||
Literal, | ||
Dict, | ||
Optional, | ||
Union | ||
) | ||
from pydantic import ( | ||
BaseModel, | ||
ConfigDict, | ||
Field, | ||
RootModel, | ||
field_validator | ||
) | ||
metamodel_version = "None" | ||
version = "None" | ||
|
||
|
||
class ConfiguredBaseModel(BaseModel): | ||
model_config = ConfigDict( | ||
validate_assignment = True, | ||
validate_default = True, | ||
extra = "forbid", | ||
arbitrary_types_allowed = True, | ||
use_enum_values = True, | ||
strict = False, | ||
) | ||
pass | ||
|
||
|
||
|
||
|
||
class LinkMLMeta(RootModel): | ||
root: Dict[str, Any] = {} | ||
model_config = ConfigDict(frozen=True) | ||
|
||
def __getattr__(self, key:str): | ||
return getattr(self.root, key) | ||
|
||
def __getitem__(self, key:str): | ||
return self.root[key] | ||
|
||
def __setitem__(self, key:str, value): | ||
self.root[key] = value | ||
|
||
def __contains__(self, key:str) -> bool: | ||
return key in self.root | ||
|
||
|
||
linkml_meta = LinkMLMeta({'default_prefix': 'ecosim_simple', | ||
'default_range': 'string', | ||
'description': 'Simple EcoSIM Extraction Template', | ||
'id': 'http://w3id.org/ontogpt/ecosim_simple', | ||
'imports': ['linkml:types', 'core'], | ||
'license': 'https://creativecommons.org/publicdomain/zero/1.0/', | ||
'name': 'ecosim_simple', | ||
'prefixes': {'ecosim': {'prefix_prefix': 'ecosim', | ||
'prefix_reference': 'http://purl.obolibrary.org/obo/ecosim'}, | ||
'ecosim_simple': {'prefix_prefix': 'ecosim_simple', | ||
'prefix_reference': 'http://w3id.org/ontogpt/ecosim_simple'}, | ||
'linkml': {'prefix_prefix': 'linkml', | ||
'prefix_reference': 'https://w3id.org/linkml/'}, | ||
'rdf': {'prefix_prefix': 'rdf', | ||
'prefix_reference': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}}, | ||
'source_file': '/home/harry/ontogpt/src/ontogpt/templates/ecosim_simple.yaml', | ||
'title': 'Simple EcoSIM Extraction Template'} ) | ||
|
||
class NullDataOptions(str, Enum): | ||
UNSPECIFIED_METHOD_OF_ADMINISTRATION = "UNSPECIFIED_METHOD_OF_ADMINISTRATION" | ||
NOT_APPLICABLE = "NOT_APPLICABLE" | ||
NOT_MENTIONED = "NOT_MENTIONED" | ||
|
||
|
||
|
||
class ExtractionResult(ConfiguredBaseModel): | ||
""" | ||
A result of extracting knowledge on text | ||
""" | ||
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'}) | ||
|
||
input_id: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'input_id', 'domain_of': ['ExtractionResult']} }) | ||
input_title: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'input_title', 'domain_of': ['ExtractionResult']} }) | ||
input_text: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'input_text', 'domain_of': ['ExtractionResult']} }) | ||
raw_completion_output: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'raw_completion_output', 'domain_of': ['ExtractionResult']} }) | ||
prompt: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'prompt', 'domain_of': ['ExtractionResult']} }) | ||
extracted_object: Optional[Any] = Field(None, description="""The complex objects extracted from the text""", json_schema_extra = { "linkml_meta": {'alias': 'extracted_object', 'domain_of': ['ExtractionResult']} }) | ||
named_entities: Optional[List[Any]] = Field(None, description="""Named entities extracted from the text""", json_schema_extra = { "linkml_meta": {'alias': 'named_entities', 'domain_of': ['ExtractionResult']} }) | ||
|
||
|
||
class NamedEntity(ConfiguredBaseModel): | ||
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, 'from_schema': 'http://w3id.org/ontogpt/core'}) | ||
|
||
id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id', | ||
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}}, | ||
'comments': ['this is populated during the grounding and normalization step'], | ||
'domain_of': ['NamedEntity', 'Publication']} }) | ||
label: Optional[str] = Field(None, description="""The label (name) of the named thing""", json_schema_extra = { "linkml_meta": {'alias': 'label', | ||
'aliases': ['name'], | ||
'annotations': {'owl': {'tag': 'owl', | ||
'value': 'AnnotationProperty, AnnotationAssertion'}}, | ||
'domain_of': ['NamedEntity'], | ||
'slot_uri': 'rdfs:label'} }) | ||
|
||
|
||
class CompoundExpression(ConfiguredBaseModel): | ||
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, 'from_schema': 'http://w3id.org/ontogpt/core'}) | ||
|
||
pass | ||
|
||
|
||
class Triple(CompoundExpression): | ||
""" | ||
Abstract parent for Relation Extraction tasks | ||
""" | ||
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, 'from_schema': 'http://w3id.org/ontogpt/core'}) | ||
|
||
subject: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'subject', 'domain_of': ['Triple']} }) | ||
predicate: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'predicate', 'domain_of': ['Triple']} }) | ||
object: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'object', 'domain_of': ['Triple']} }) | ||
qualifier: Optional[str] = Field(None, description="""A qualifier for the statements, e.g. \"NOT\" for negation""", json_schema_extra = { "linkml_meta": {'alias': 'qualifier', 'domain_of': ['Triple']} }) | ||
subject_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the subject of the statement, e.g. \"high dose\" or \"intravenously administered\"""", json_schema_extra = { "linkml_meta": {'alias': 'subject_qualifier', 'domain_of': ['Triple']} }) | ||
object_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the object of the statement, e.g. \"severe\" or \"with additional complications\"""", json_schema_extra = { "linkml_meta": {'alias': 'object_qualifier', 'domain_of': ['Triple']} }) | ||
|
||
|
||
class TextWithTriples(ConfiguredBaseModel): | ||
""" | ||
A text containing one or more relations of the Triple type. | ||
""" | ||
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'}) | ||
|
||
publication: Optional[Publication] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'publication', | ||
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}}, | ||
'domain_of': ['TextWithTriples', 'TextWithEntity']} }) | ||
triples: Optional[List[Triple]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'triples', 'domain_of': ['TextWithTriples']} }) | ||
|
||
|
||
class TextWithEntity(ConfiguredBaseModel): | ||
""" | ||
A text containing one or more instances of a single type of entity. | ||
""" | ||
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'}) | ||
|
||
publication: Optional[Publication] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'publication', | ||
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}}, | ||
'domain_of': ['TextWithTriples', 'TextWithEntity']} }) | ||
entities: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'entities', 'domain_of': ['TextWithEntity']} }) | ||
|
||
|
||
class RelationshipType(NamedEntity): | ||
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core', | ||
'id_prefixes': ['RO', 'biolink']}) | ||
|
||
id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id', | ||
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}}, | ||
'comments': ['this is populated during the grounding and normalization step'], | ||
'domain_of': ['NamedEntity', 'Publication']} }) | ||
label: Optional[str] = Field(None, description="""The label (name) of the named thing""", json_schema_extra = { "linkml_meta": {'alias': 'label', | ||
'aliases': ['name'], | ||
'annotations': {'owl': {'tag': 'owl', | ||
'value': 'AnnotationProperty, AnnotationAssertion'}}, | ||
'domain_of': ['NamedEntity'], | ||
'slot_uri': 'rdfs:label'} }) | ||
|
||
|
||
class Publication(ConfiguredBaseModel): | ||
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'}) | ||
|
||
id: Optional[str] = Field(None, description="""The publication identifier""", json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedEntity', 'Publication']} }) | ||
title: Optional[str] = Field(None, description="""The title of the publication""", json_schema_extra = { "linkml_meta": {'alias': 'title', 'domain_of': ['Publication']} }) | ||
abstract: Optional[str] = Field(None, description="""The abstract of the publication""", json_schema_extra = { "linkml_meta": {'alias': 'abstract', 'domain_of': ['Publication']} }) | ||
combined_text: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'combined_text', 'domain_of': ['Publication']} }) | ||
full_text: Optional[str] = Field(None, description="""The full text of the publication""", json_schema_extra = { "linkml_meta": {'alias': 'full_text', 'domain_of': ['Publication']} }) | ||
|
||
|
||
class AnnotatorResult(ConfiguredBaseModel): | ||
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'}) | ||
|
||
subject_text: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'subject_text', 'domain_of': ['AnnotatorResult']} }) | ||
object_id: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'object_id', 'domain_of': ['AnnotatorResult']} }) | ||
object_text: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'object_text', 'domain_of': ['AnnotatorResult']} }) | ||
|
||
|
||
class TermSet(NamedEntity): | ||
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/ecosim_simple', 'tree_root': True}) | ||
|
||
terms: Optional[List[str]] = Field(None, description="""A semicolon-separated list of variables for earth system simulation. Do not include abbreviations in parentheses, e.g., \"Carbon (C)\" should be represented as \"carbon\". Examples include: carboxylation, sodium, underground irrigation.""", json_schema_extra = { "linkml_meta": {'alias': 'terms', 'domain_of': ['TermSet']} }) | ||
id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id', | ||
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}}, | ||
'comments': ['this is populated during the grounding and normalization step'], | ||
'domain_of': ['NamedEntity', 'Publication']} }) | ||
label: Optional[str] = Field(None, description="""The label (name) of the named thing""", json_schema_extra = { "linkml_meta": {'alias': 'label', | ||
'aliases': ['name'], | ||
'annotations': {'owl': {'tag': 'owl', | ||
'value': 'AnnotationProperty, AnnotationAssertion'}}, | ||
'domain_of': ['NamedEntity'], | ||
'slot_uri': 'rdfs:label'} }) | ||
|
||
|
||
class Term(NamedEntity): | ||
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'annotations': {'annotators': {'tag': 'annotators', | ||
'value': 'bioportal:ECOSIM'}, | ||
'prompt': {'tag': 'prompt', | ||
'value': 'The name of a variable for earth system ' | ||
'simulation.'}}, | ||
'from_schema': 'http://w3id.org/ontogpt/ecosim_simple'}) | ||
|
||
id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id', | ||
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}}, | ||
'comments': ['this is populated during the grounding and normalization step'], | ||
'domain_of': ['NamedEntity', 'Publication']} }) | ||
label: Optional[str] = Field(None, description="""The label (name) of the named thing""", json_schema_extra = { "linkml_meta": {'alias': 'label', | ||
'aliases': ['name'], | ||
'annotations': {'owl': {'tag': 'owl', | ||
'value': 'AnnotationProperty, AnnotationAssertion'}}, | ||
'domain_of': ['NamedEntity'], | ||
'slot_uri': 'rdfs:label'} }) | ||
|
||
|
||
# Model rebuild | ||
# see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model | ||
ExtractionResult.model_rebuild() | ||
NamedEntity.model_rebuild() | ||
CompoundExpression.model_rebuild() | ||
Triple.model_rebuild() | ||
TextWithTriples.model_rebuild() | ||
TextWithEntity.model_rebuild() | ||
RelationshipType.model_rebuild() | ||
Publication.model_rebuild() | ||
AnnotatorResult.model_rebuild() | ||
TermSet.model_rebuild() | ||
Term.model_rebuild() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
id: http://w3id.org/ontogpt/ecosim_simple | ||
name: ecosim_simple | ||
title: Simple EcoSIM Extraction Template | ||
description: >- | ||
Simple EcoSIM Extraction Template | ||
license: https://creativecommons.org/publicdomain/zero/1.0/ | ||
prefixes: | ||
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# | ||
linkml: https://w3id.org/linkml/ | ||
ecosim_simple: http://w3id.org/ontogpt/ecosim_simple | ||
ecosim: http://purl.obolibrary.org/obo/ecosim | ||
|
||
default_prefix: ecosim_simple | ||
default_range: string | ||
|
||
imports: | ||
- linkml:types | ||
- core | ||
|
||
classes: | ||
TermSet: | ||
tree_root: true | ||
is_a: NamedEntity | ||
attributes: | ||
terms: | ||
range: Term | ||
multivalued: true | ||
description: >- | ||
A semicolon-separated list of variables | ||
for earth system simulation. Do not include | ||
abbreviations in parentheses, e.g., "Carbon (C)" | ||
should be represented as "carbon". Examples include: carboxylation, sodium, underground irrigation. | ||
Term: | ||
is_a: NamedEntity | ||
annotations: | ||
annotators: bioportal:ECOSIM | ||
prompt: >- | ||
The name of a variable for earth system simulation. |
Oops, something went wrong.