-
Notifications
You must be signed in to change notification settings - Fork 81
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Template for Alzheimer's Disease extractions from literature; expose …
…the `max-text-length` option for pubmed_annotate (#392)
- Loading branch information
Showing
4 changed files
with
275 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
from __future__ import annotations | ||
from datetime import ( | ||
datetime, | ||
date | ||
) | ||
from decimal import Decimal | ||
from enum import Enum | ||
import re | ||
import sys | ||
from typing import ( | ||
Any, | ||
List, | ||
Literal, | ||
Dict, | ||
Optional, | ||
Union | ||
) | ||
from pydantic.version import VERSION as PYDANTIC_VERSION | ||
if int(PYDANTIC_VERSION[0])>=2: | ||
from pydantic import ( | ||
BaseModel, | ||
ConfigDict, | ||
Field, | ||
field_validator | ||
) | ||
else: | ||
from pydantic import ( | ||
BaseModel, | ||
Field, | ||
validator | ||
) | ||
|
||
metamodel_version = "None" | ||
version = "None" | ||
|
||
|
||
class ConfiguredBaseModel(BaseModel): | ||
model_config = ConfigDict( | ||
validate_assignment = True, | ||
validate_default = True, | ||
extra = "forbid", | ||
arbitrary_types_allowed = True, | ||
use_enum_values = True, | ||
strict = False, | ||
) | ||
pass | ||
|
||
|
||
class NullDataOptions(str, Enum): | ||
UNSPECIFIED_METHOD_OF_ADMINISTRATION = "UNSPECIFIED_METHOD_OF_ADMINISTRATION" | ||
NOT_APPLICABLE = "NOT_APPLICABLE" | ||
NOT_MENTIONED = "NOT_MENTIONED" | ||
|
||
|
||
class ExtractionResult(ConfiguredBaseModel): | ||
""" | ||
A result of extracting knowledge on text | ||
""" | ||
input_id: Optional[str] = Field(None) | ||
input_title: Optional[str] = Field(None) | ||
input_text: Optional[str] = Field(None) | ||
raw_completion_output: Optional[str] = Field(None) | ||
prompt: Optional[str] = Field(None) | ||
extracted_object: Optional[Any] = Field(None, description="""The complex objects extracted from the text""") | ||
named_entities: Optional[List[Any]] = Field(default_factory=list, description="""Named entities extracted from the text""") | ||
|
||
|
||
class NamedEntity(ConfiguredBaseModel): | ||
id: str = Field(..., description="""A unique identifier for the named entity""") | ||
label: Optional[str] = Field(None, description="""The label (name) of the named thing""") | ||
|
||
|
||
class CompoundExpression(ConfiguredBaseModel): | ||
pass | ||
|
||
|
||
class Triple(CompoundExpression): | ||
""" | ||
Abstract parent for Relation Extraction tasks | ||
""" | ||
subject: Optional[str] = Field(None) | ||
predicate: Optional[str] = Field(None) | ||
object: Optional[str] = Field(None) | ||
qualifier: Optional[str] = Field(None, description="""A qualifier for the statements, e.g. \"NOT\" for negation""") | ||
subject_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the subject of the statement, e.g. \"high dose\" or \"intravenously administered\"""") | ||
object_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the object of the statement, e.g. \"severe\" or \"with additional complications\"""") | ||
|
||
|
||
class TextWithTriples(ConfiguredBaseModel): | ||
""" | ||
A text containing one or more relations of the Triple type. | ||
""" | ||
publication: Optional[Publication] = Field(None) | ||
triples: Optional[List[Triple]] = Field(default_factory=list) | ||
|
||
|
||
class TextWithEntity(ConfiguredBaseModel): | ||
""" | ||
A text containing one or more instances of a single type of entity. | ||
""" | ||
publication: Optional[Publication] = Field(None) | ||
entities: Optional[List[str]] = Field(default_factory=list) | ||
|
||
|
||
class RelationshipType(NamedEntity): | ||
id: str = Field(..., description="""A unique identifier for the named entity""") | ||
label: Optional[str] = Field(None, description="""The label (name) of the named thing""") | ||
|
||
|
||
class Publication(ConfiguredBaseModel): | ||
id: Optional[str] = Field(None, description="""The publication identifier""") | ||
title: Optional[str] = Field(None, description="""The title of the publication""") | ||
abstract: Optional[str] = Field(None, description="""The abstract of the publication""") | ||
combined_text: Optional[str] = Field(None) | ||
full_text: Optional[str] = Field(None, description="""The full text of the publication""") | ||
|
||
|
||
class AnnotatorResult(ConfiguredBaseModel): | ||
subject_text: Optional[str] = Field(None) | ||
object_id: Optional[str] = Field(None) | ||
object_text: Optional[str] = Field(None) | ||
|
||
|
||
class Document(NamedEntity): | ||
sections: Optional[List[DocumentSection]] = Field(default_factory=list, description="""A semicolon-separated list of full sections of the document. If semicolons are present in the section text, they should be replaced with (SEMICOLON) to avoid parsing errors. A section is a major division of the document, such as an abstract, introduction, methods, results, discussion, or conclusion, or a subsection of one of these. The text should include the section title. A single phrase or ID is not a section. Do not format in Markdown.""") | ||
id: str = Field(..., description="""A unique identifier for the named entity""") | ||
label: Optional[str] = Field(None, description="""The label (name) of the named thing""") | ||
|
||
|
||
class DocumentSection(CompoundExpression): | ||
summary: Optional[str] = Field(None, description="""A brief summary of the section, suitable for display in a table of contents or search results. This should be a single sentence or phrase, not a full paragraph. Do not format in Markdown.""") | ||
symptoms: Optional[List[str]] = Field(default_factory=list, description="""A semicolon-separated list of symptoms mentioned in the section.""") | ||
diagnostics: Optional[List[str]] = Field(default_factory=list, description="""A semicolon-separated list of diagnostic procedures mentioned in the section.""") | ||
treatments: Optional[List[str]] = Field(default_factory=list, description="""A semicolon-separated list of treatments mentioned in the section. These may be drugs or other therapeutic procedures.""") | ||
|
||
|
||
class Symptom(NamedEntity): | ||
id: str = Field(..., description="""A unique identifier for the named entity""") | ||
label: Optional[str] = Field(None, description="""The label (name) of the named thing""") | ||
|
||
|
||
class Diagnostic(NamedEntity): | ||
id: str = Field(..., description="""A unique identifier for the named entity""") | ||
label: Optional[str] = Field(None, description="""The label (name) of the named thing""") | ||
|
||
|
||
class Treatment(NamedEntity): | ||
id: str = Field(..., description="""A unique identifier for the named entity""") | ||
label: Optional[str] = Field(None, description="""The label (name) of the named thing""") | ||
|
||
|
||
# Model rebuild | ||
# see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model | ||
ExtractionResult.model_rebuild() | ||
NamedEntity.model_rebuild() | ||
CompoundExpression.model_rebuild() | ||
Triple.model_rebuild() | ||
TextWithTriples.model_rebuild() | ||
TextWithEntity.model_rebuild() | ||
RelationshipType.model_rebuild() | ||
Publication.model_rebuild() | ||
AnnotatorResult.model_rebuild() | ||
Document.model_rebuild() | ||
DocumentSection.model_rebuild() | ||
Symptom.model_rebuild() | ||
Diagnostic.model_rebuild() | ||
Treatment.model_rebuild() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
id: http://w3id.org/ontogpt/alz_treat | ||
name: alz_treat | ||
title: Template for extracting Alzheimer's Disease Treatments | ||
description: >- | ||
Template for extracting Alzheimer's disease treatments and related concepts. | ||
Assumes a large input text, on the order of a full scientific article or | ||
review. Try with this review - PMID:33302541 | ||
license: https://creativecommons.org/publicdomain/zero/1.0/ | ||
prefixes: | ||
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# | ||
alz_treat: http://w3id.org/ontogpt/alz_treat | ||
linkml: https://w3id.org/linkml/ | ||
|
||
default_prefix: alz_treat | ||
default_range: string | ||
|
||
imports: | ||
- linkml:types | ||
- core | ||
|
||
classes: | ||
Document: | ||
tree_root: true | ||
is_a: NamedEntity | ||
attributes: | ||
sections: | ||
range: DocumentSection | ||
multivalued: true | ||
description: >- | ||
A semicolon-separated list of full sections of the document. | ||
If semicolons are present in the section text, they should | ||
be replaced with (SEMICOLON) to avoid parsing errors. | ||
A section is a major division of the document, such as an abstract, | ||
introduction, methods, results, discussion, or conclusion, | ||
or a subsection of one of these. The text should include the section | ||
title. A single phrase or ID is not a section. | ||
Do not format in Markdown. | ||
DocumentSection: | ||
is_a: CompoundExpression | ||
attributes: | ||
summary: | ||
range: string | ||
description: >- | ||
A brief summary of the section, suitable for display in a table of | ||
contents or search results. This should be a single sentence or | ||
phrase, not a full paragraph. Do not format in Markdown. | ||
symptoms: | ||
range: Symptom | ||
multivalued: true | ||
description: >- | ||
A semicolon-separated list of symptoms mentioned in the section. | ||
diagnostics: | ||
range: Diagnostic | ||
multivalued: true | ||
description: >- | ||
A semicolon-separated list of diagnostic procedures mentioned in the | ||
section. | ||
treatments: | ||
range: Treatment | ||
multivalued: true | ||
description: >- | ||
A semicolon-separated list of treatments mentioned in the section. | ||
These may be drugs or other therapeutic procedures. | ||
Symptom: | ||
is_a: NamedEntity | ||
id_prefixes: | ||
- HP | ||
annotations: | ||
annotators: sqlite:obo:hp, sqlite:obo:mondo, sqlite:obo:mesh, sqlite:obo:ncit | ||
prompt: >- | ||
the name of a human phenotype or symptom. | ||
Examples are ascites, fever, pain, seizure, increased intracranial | ||
pressure, lactic acidosis. | ||
Diagnostic: | ||
is_a: NamedEntity | ||
id_prefixes: | ||
- MAXO | ||
annotations: | ||
annotators: sqlite:obo:maxo, sqlite:obo:mesh, bioportal:SNOMEDCT, sqlite:obo:ncit | ||
prompt: >- | ||
the name of a diagnostic procedure or test. | ||
Examples are MRI, PET scan, lumbar puncture, blood test, biopsy. | ||
Treatment: | ||
is_a: NamedEntity | ||
id_prefixes: | ||
- DRUGBANK | ||
- MAXO | ||
annotations: | ||
annotators: sqlite:obo:drugbank, sqlite:obo:maxo, sqlite:obo:mesh, sqlite:obo:ncit | ||
prompt: >- | ||
the name of a drug or therapeutic procedure. | ||
Examples are aspirin, chemotherapy, radiation therapy, surgery. |