monarch-initiative · caufieldjh · Sep 13, 2023 · Sep 13, 2023 · Sep 13, 2023 · Sep 13, 2023
diff --git a/src/ontogpt/clients/openai_client.py b/src/ontogpt/clients/openai_client.py
@@ -29,7 +29,7 @@ def __post_init__(self):
             self.api_key = get_apikey_value("openai")
         openai.api_key = self.api_key
 
-    def complete(self, prompt, show_prompt: bool = False, max_tokens=3000, **kwargs) -> str:
+    def complete(self, prompt, max_tokens=3000, show_prompt: bool = False, **kwargs) -> str:
         engine = self.model
         logger.info(f"Complete: engine={engine}, prompt[{len(prompt)}]={prompt[0:100]}...")
         if show_prompt:

diff --git a/src/ontogpt/engines/gpt4all_engine.py b/src/ontogpt/engines/gpt4all_engine.py
@@ -80,7 +80,7 @@ def extract_from_text(
             chunks = chunk_text(text, self.sentences_per_window)
             extracted_object = None
             for chunk in chunks:
-                raw_text = self._raw_extract(chunk, cls, object=object)
+                raw_text = self._raw_extract(chunk, cls=cls, object=object)
                 logging.info(f"RAW TEXT: {raw_text}")
                 if show_prompt:
                     logging.info(f" PROVIDED PROMPT:\n{self.last_prompt}")
@@ -97,11 +97,11 @@ def extract_from_text(
                             else:
                                 extracted_object[k] = v
         else:
-            raw_text = self._raw_extract(text, cls, object=object)
+            raw_text = self._raw_extract(text=text, cls=cls, object=object)
             logging.info(f"RAW TEXT: {raw_text}")
             if show_prompt:
                 logging.info(f" PROVIDED PROMPT:\n{self.last_prompt}")
-            extracted_object = self.parse_completion_payload(raw_text, cls, object=object)
+            extracted_object = self.parse_completion_payload(raw_text, cls=cls, object=object)
         return ExtractionResult(
             input_text=text,
             raw_completion_output=raw_text,
@@ -111,7 +111,7 @@ def extract_from_text(
         )
 
     def _extract_from_text_to_dict(self, text: str, cls: ClassDefinition = None) -> RESPONSE_DICT:
-        raw_text = self._raw_extract(text, cls)
+        raw_text = self._raw_extract(text=text, cls=cls)
         return self._parse_response_to_dict(raw_text, cls)
 
     def iteratively_generate_and_extract(

diff --git a/src/ontogpt/engines/spires_engine.py b/src/ontogpt/engines/spires_engine.py
@@ -59,9 +59,9 @@ class SPIRESEngine(KnowledgeEngine):
     def extract_from_text(
         self,
         text: str,
-        show_prompt: bool = False,
         cls: ClassDefinition = None,
         object: OBJECT = None,
+        show_prompt: bool = False,
     ) -> ExtractionResult:
         """
         Extract annotations from the given text.
@@ -75,7 +75,7 @@ def extract_from_text(
             chunks = chunk_text(text, self.sentences_per_window)
             extracted_object = None
             for chunk in chunks:
-                raw_text = self._raw_extract(chunk, cls, show_prompt=show_prompt, object=object)
+                raw_text = self._raw_extract(chunk, cls=cls, object=object, show_prompt=show_prompt)
                 logging.info(f"RAW TEXT: {raw_text}")
                 next_object = self.parse_completion_payload(raw_text, cls, object=object)
                 if extracted_object is None:
@@ -90,7 +90,7 @@ def extract_from_text(
                             else:
                                 extracted_object[k] = v
         else:
-            raw_text = self._raw_extract(text=text, cls=cls, show_prompt=show_prompt, object=object)
+            raw_text = self._raw_extract(text=text, cls=cls, object=object, show_prompt=show_prompt)
             logging.info(f"RAW TEXT: {raw_text}")
             extracted_object = self.parse_completion_payload(raw_text, cls, object=object)
         return ExtractionResult(
@@ -102,11 +102,11 @@ def extract_from_text(
         )
 
     def _extract_from_text_to_dict(self, text: str, cls: ClassDefinition = None) -> RESPONSE_DICT:
-        raw_text = self._raw_extract(text, cls, )
+        raw_text = self._raw_extract(text=text, cls=cls)
         return self._parse_response_to_dict(raw_text, cls)
 
     def generate_and_extract(
-        self, entity: str, show_prompt: bool = False, prompt_template: str = None, **kwargs
+        self, entity: str, prompt_template: str = None, show_prompt: bool = False, **kwargs
     ) -> ExtractionResult:
         """
         Generate a description using GPT and then extract from it using SPIRES.
@@ -166,7 +166,7 @@ def _remove_parenthetical_context(s: str):
             else:
                 curie = None
             result = self.generate_and_extract(
-                next_entity, show_prompt=show_prompt, prompt_template=prompt_template, **kwargs
+                next_entity, prompt_template=prompt_template, show_prompt=show_prompt, **kwargs
             )
             if curie:
                 if result.extracted_object:
@@ -357,17 +357,17 @@ def _serialize_value(self, val: Any, slot: SlotDefinition) -> str:
         return val
 
     def _raw_extract(
-        self, text, show_prompt: bool = False, cls: ClassDefinition = None, object: OBJECT = None
+        self, text, cls: ClassDefinition = None, object: OBJECT = None, show_prompt: bool = False,
     ) -> str:
         """
         Extract annotations from the given text.
 
         :param text:
         :return:
         """
-        prompt = self.get_completion_prompt(cls, text, object=object)
+        prompt = self.get_completion_prompt(cls=cls, text=text, object=object)
         self.last_prompt = prompt
-        payload = self.client.complete(prompt, show_prompt)
+        payload = self.client.complete(prompt=prompt, show_prompt=show_prompt)
         return payload
 
     def get_completion_prompt(
@@ -465,11 +465,13 @@ def _parse_line_to_dict(
         # The LLML may mutate the output format somewhat,
         # randomly pluralizing or replacing spaces with underscores
         field = field.lower().replace(" ", "_")
+        logging.debug(f"  FIELD: {field}")
         cls_slots = sv.class_slots(cls.name)
         slot = None
         if field in cls_slots:
             slot = sv.induced_slot(field, cls.name)
         else:
+            # TODO: check this
             if field.endswith("s"):
                 field = field[:-1]
             if field in cls_slots:
@@ -501,6 +503,7 @@ def _parse_line_to_dict(
             transformed = False
             slots_of_range = sv.class_slots(slot_range.name)
             if self.recurse or len(slots_of_range) > 2:
+                logging.debug(f"  RECURSING ON SLOT: {slot.name}, range={slot_range.name}")
                 vals = [self._extract_from_text_to_dict(v, slot_range) for v in vals]
             else:
                 for sep in [" - ", ":", "/", "*", "-"]:
@@ -591,9 +594,11 @@ def ground_annotation_object(
                 if slot.range in self.schemaview.all_enums():
                     enum_def = self.schemaview.get_enum(slot.range)
             new_ann[field] = []
+            logging.debug(f"FIELD: {field} SLOT: {slot.name}")
             for val in vals:
                 if not val:
                     continue
+                logging.debug(f"   VAL: {val}")
                 if isinstance(val, tuple):
                     # special case for pairs
                     sub_slots = sv.class_induced_slots(rng_cls.name)