Skip to content

Commit

Permalink
Prep for v1.0.5 (#460)
Browse files Browse the repository at this point in the history
  • Loading branch information
caufieldjh authored Sep 24, 2024
2 parents 79bac92 + 8c4fab8 commit 75e87ee
Show file tree
Hide file tree
Showing 13 changed files with 597 additions and 469 deletions.
924 changes: 471 additions & 453 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "ontogpt"
version = "1.0.4"
version = "1.0.5"
description = "OntoGPT"
authors = ["Chris Mungall <[email protected]>", "J. Harry Caufield <[email protected]>"]
license = "BSD-3"
Expand All @@ -19,7 +19,7 @@ fastapi = {version = ">=0.88.0", optional = true}
gilda = ">=1.0.0"
inflect = ">=6.0.2"
inflection = ">=0.5.1"
linkml = "^1.8.0"
linkml = "^1.8.3"
linkml-owl = "^0.3.0"
myst-parser = {version = ">=0.18.1", extras = ["docs"], optional = true}
oaklib = ">=0.5.28"
Expand Down
1 change: 1 addition & 0 deletions src/ontogpt/clients/llm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# Necessary to avoid repeated debug messages
litellm.suppress_debug_info = True


@dataclass
class LLMClient:

Expand Down
22 changes: 20 additions & 2 deletions src/ontogpt/engines/embedding_similarity_engine.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,22 @@
"""Similarity engine."""
"""
Embedding similarity engine.
This module provides functionality for calculating the similarity between entities
using embeddings and cosine similarity. It includes classes and methods for
searching for similar entities and retrieving textual representations of entities.
Classes:
EmbeddingSimilarity: A dataclass representing the similarity between two entities.
SimilarityEngine: A class for calculating similarity between entities.
Methods:
SimilarityEngine.similarity(entity1: str, entity2: str) -> EmbeddingSimilarity:
Get similarity between two entities.
SimilarityEngine.search(entity1: str, entities: List[str]) -> Iterable[EmbeddingSimilarity]:
Search for similar entities.
SimilarityEngine.entity_text(entity: str) -> str:
Get text representation for an entity.
"""

import logging
from dataclasses import dataclass
Expand All @@ -25,7 +43,7 @@ class EmbeddingSimilarity:

@dataclass
class SimilarityEngine(KnowledgeEngine):
"""Engine for generating synonyms."""
"""Engine for finding embedding similarity."""

adapter: BasicOntologyInterface = None
autolabel: bool = True
Expand Down
2 changes: 2 additions & 0 deletions src/ontogpt/engines/knowledge_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ def chunk_text_by_sentence(text: str, window_size=3) -> Iterator[str]:
left_index = max(0, right_index - window_size)
yield ". ".join(sentences[left_index:right_index])


def chunk_text_by_char(text: str, window_size=1000) -> Iterator[str]:
"""Chunk text into windows of characters."""
for i in range(0, len(text), window_size):
yield text[i : i + window_size]


@dataclass
class KnowledgeEngine(ABC):
"""
Expand Down
32 changes: 31 additions & 1 deletion src/ontogpt/engines/mapping_engine.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,34 @@
"""Synonym engine."""
"""
Mapping engine.
This module provides the `MappingEngine` class,
which is responsible for generating and resolving
mappings between concepts in different ontologies.
It utilizes various classes and enums to represent
mapping tasks, categorized mappings, and confidence levels.
Classes:
MappingPredicate: Enum representing different types of mapping predicates.
Confidence: Enum representing different levels of confidence.
CategorizedMapping: Model representing a categorized mapping with various attributes.
MappingTask: Model representing a mapping task with various attributes.
MappingTaskCollection: Model representing a collection of mapping tasks.
Relationship: Model representing a relationship between concepts.
Concept: Model representing a concept with various attributes.
MappingEngine: Engine class for generating and resolving mappings.
Functions:
categorize_mapping: Categorizes a mapping between a subject
and an object using a template.
_parse: Parses the payload from the categorization process into a `CategorizedMapping`.
categorize_mappings: Categorizes multiple mappings between subjects and object sources.
run_tasks: Runs a collection of mapping tasks and categorizes the results.
generate_tasks: Generates mapping tasks from a collection or
specified subjects and object sources.
from_sssom: Generates tasks from an SSSOM file.
categorize_sssom_mapping: Categorizes a single SSSOM mapping.
_concept: Retrieves a concept from an ontology adapter.
"""

import logging
from copy import deepcopy
Expand Down
29 changes: 28 additions & 1 deletion src/ontogpt/engines/reasoner_engine.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,31 @@
"""Reasoner engine."""
"""
Reasoner engine.
This module provides the implementation of the
ReasonerEngine class, which is used to perform
reasoning tasks using a Language Model (LLM).
The engine takes an ontology and a query task as input,
translates it into an LLM prompt, and performs
reasoning over the ontology.
Classes:
ReasonerResult: Represents the result of a reason query.
ReasonerResultSet: Represents a set of reasoner results.
ReasonerEngine: Engine for performing reasoning using an LLM.
Functions:
flatten_list(lst): Flattens a nested list into a single list.
reason(task, template_path=None, strict=False, evaluate=None): Performs
reasoning over axioms and query entailments.
reason_multiple(task_collection, **kwargs): Performs reasoning over multiple tasks.
_parse_single_answer(payload, task): Parses a single answer from the payload.
evaluate(result, task): Evaluates the result against the task.
Usage:
The ReasonerEngine can be used to perform reasoning tasks
such as finding superclasses of a given class.
It can also provide explanations for the reasoning process.
"""

import logging
import re
Expand Down
8 changes: 5 additions & 3 deletions src/ontogpt/engines/spires_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,19 +504,21 @@ def _parse_response_to_dict(
# First split the text into sections, denoted by presence of multiple newlines
# Each section may still have multiple fields, but we don't know how
# they will be formatted
sections = results.replace("*","").split("\n\n")
sections = results.replace("*", "").split("\n\n")
ann = {}
for section in sections:
lines = section.splitlines()
continued_line = ""
for line in lines:
line = line.replace("*","").strip()
line = line.replace("*", "").strip()
# The line may be split into multiple lines,
# and we can only tell if there's a delimiter at the end of this one
# (though it may just be a misplaced delimiter)
# TODO: this could be a different delimiter, globally defined
if line.endswith(";"):
logging.info(f"This line ends in a delimiter, assuming continuation: {line}")
logging.info(
f"This line ends in a delimiter, assuming continuation: {line}"
)
continued_line = line
continue
# If there's nothing after the colon,
Expand Down
18 changes: 17 additions & 1 deletion src/ontogpt/engines/synonym_engine.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,20 @@
"""Synonym engine."""
"""
Synonym engine.
This module defines the SynonymEngine class,
which is responsible for generating synonyms
for given named entities within a specified domain.
Classes:
SynonymEngine: Inherits from KnowledgeEngine and
provides a method to retrieve synonyms.
Methods:
synonyms(named_entity: str, domain: str) -> List[str]:
Retrieves synonyms for the given named entity
within the specified domain.
"""

from dataclasses import dataclass
from typing import List

Expand Down
16 changes: 15 additions & 1 deletion src/ontogpt/engines/topic_classifier_engine.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,17 @@
"""Topic classifier engine."""
"""
Topic classifier engine.
This module contains the `TopicClassifierEngine` class, which inherits from `KnowledgeEngine`.
The engine is designed to classify input text based on a given topic.
Classes:
TopicClassifierEngine: Engine for classifying input text based on its topic.
Methods:
binary_classify(topic: str, text: str) -> bool:
Given a topic description, indicate whether it applies to the input text.
Returns True if the text matches the topic, otherwise returns False.
"""

import logging

Expand All @@ -8,6 +21,7 @@

logger = logging.getLogger(__name__)


@dataclass
class TopicClassifierEngine(KnowledgeEngine):
"""Engine for classifying input text based on its topic."""
Expand Down
4 changes: 3 additions & 1 deletion src/ontogpt/evaluation/maxo/eval_maxo.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,9 @@ def eval(self) -> EvaluationObjectSetRE:
if extraction.extracted_object is not None:
# Process all multi-object triples to 1 to 1 triples
# so they may be more directly compared
for extracted_triple in extraction.extracted_object.action_annotation_relationships:
for (
extracted_triple
) in extraction.extracted_object.action_annotation_relationships:
new_triple = extracted_triple
for object in extracted_triple.object:
new_triple.object = [object]
Expand Down
2 changes: 1 addition & 1 deletion src/ontogpt/utils/clinical_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ def get_kanjee_prompt() -> str:
" the diagnoses. Again, the goal is to be as specific as possible with each of the "
"diagnoses. Do you have any questions, Dr. GPT-4?\n\nHere is the case:"
)
return prompt
return prompt
4 changes: 1 addition & 3 deletions src/ontogpt/webapp/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ class Query(BaseModel):
def get_engine(datamodel: str, llm_model: str):
if datamodel not in engines:
template_details = get_template_details(template=datamodel)
engines[datamodel] = SPIRESEngine(
model=llm_model, template_details=template_details
)
engines[datamodel] = SPIRESEngine(model=llm_model, template_details=template_details)
return engines[datamodel]


Expand Down

0 comments on commit 75e87ee

Please sign in to comment.