Merge pull request #121 from monarch-initiative/reasoner

Adding a MappingEngine
monarch-initiative · May 30, 2023 · d7d1cda · d7d1cda
2 parents 9c63a8c + f6308ac
commit d7d1cda
Show file tree

Hide file tree

Showing 12 changed files with 1,090 additions and 24 deletions.
diff --git a/Makefile b/Makefile
@@ -141,6 +141,9 @@ analysis/enrichment/yeast/%-results-$(N).yaml: tests/input/genesets/yeast/%.yaml
 	$(RUN) ontogpt -vv eval-enrichment -n $(N) -U $< -A tests/input/sgd.gaf -o $@.tmp && mv $@.tmp $@
 
 
+analysis/enrichment/gpt4/%-results-$(N).yaml: tests/input/genesets/%.yaml analysis/enrichment/TRIGGER-REANALYSIS
+	$(RUN) ontogpt  -v eval-enrichment  --model gpt-4 -n $(N) -U $< -o $@.tmp && mv $@.tmp $@
+
 analysis/enrichment/%-results-$(N).yaml: tests/input/genesets/%.yaml analysis/enrichment/TRIGGER-REANALYSIS
 	$(RUN) ontogpt -v eval-enrichment -n $(N) -U $< -o $@.tmp && mv $@.tmp $@
 
@@ -164,3 +167,4 @@ analysis/gpt4-enrichment-summary-$(N).yaml:
 all_enrich: $(patsubst %, analysis/enrichment/%-results-$(N).yaml, $(GENE_SETS))
 all_zfin_enrich: $(patsubst %, analysis/enrichment/zebrafish/%-results-$(N).yaml, $(ZFIN_GENE_SETS))
 all_sgd_enrich: $(patsubst %, analysis/enrichment/yeast/%-results-$(N).yaml, $(SGD_GENE_SETS))
+all_gpt4_enrich: $(patsubst %, analysis/enrichment/gpt4/%-results-$(N).yaml, $(GENE_SETS))
diff --git a/src/ontogpt/cli.py b/src/ontogpt/cli.py
@@ -29,7 +29,7 @@
 from ontogpt.engines.enrichment import EnrichmentEngine
 from ontogpt.engines.halo_engine import HALOEngine
 from ontogpt.engines.knowledge_engine import KnowledgeEngine
-from ontogpt.engines.models import MODELS
+from ontogpt.engines.mapping_engine import MappingEngine, MappingTaskCollection
 from ontogpt.engines.reasoner_engine import ReasonerEngine
 from ontogpt.engines.spires_engine import SPIRESEngine
 from ontogpt.engines.synonym_engine import SynonymEngine
@@ -839,7 +839,7 @@ def reason(
     """Reason."""
     reasoner = ReasonerEngine(model=model)
     if task_file:
-        tc = extractor.TaskCollection.load(task_file)
+        tc = MappingTaskCollection.load(task_file)
     else:
         adapter = get_adapter(inputfile)
         if not isinstance(adapter, OboGraphInterface):
@@ -869,6 +869,40 @@ def reason(
         write_obj_as_csv(resultset.results, tsv_output)
 
 
+@main.command()
+@inputfile_option
+@output_option_txt
+@model_option
+@click.option("--task-file")
+@click.option("--task-type")
+@click.option("--tsv-output")
+@click.option("--all-methods/--no-all-methods", default=False)
+@click.option("--explain/--no-explain", default=False)
+@click.option("--evaluate/--no-evaluate", default=False)
+@click.argument("terms", nargs=-1)
+def categorize_mappings(
+    terms,
+    inputfile,
+    model,
+    task_file,
+    explain,
+    task_type,
+    output,
+    tsv_output,
+    all_methods,
+    evaluate,
+    **kwargs,
+):
+    """Categorize a collection of SSSOM mappings."""
+    mapper = MappingEngine(model=model)
+    tc = mapper.from_sssom(inputfile)
+    for cm in mapper.run_tasks(tc, evaluate=evaluate):
+        print(dump_minimal_yaml(cm.dict()))
+    # dump_minimal_yaml(cm.dict(), file=output)
+    if tsv_output:
+        write_obj_as_csv(resultset.results, tsv_output)
+
+
 @main.command()
 @output_option_txt
 @click.option(
@@ -916,8 +950,9 @@ def reason(
     "-A",
     help="Path to annotations",
 )
+@model_option
 @click.argument("genes", nargs=-1)
-def eval_enrichment(genes, input_file, number_to_drop, annotations_path, output, **kwargs):
+def eval_enrichment(genes, input_file, number_to_drop, annotations_path, model, output, **kwargs):
     """Run enrichment using multiple methods."""
     if not genes and not input_file:
         raise ValueError("Either genes or input file must be passed")
@@ -934,7 +969,7 @@ def eval_enrichment(genes, input_file, number_to_drop, annotations_path, output,
         if not _is_human(gene_set):
             raise ValueError("No annotations path passed")
         annotations_path = "tests/input/genes2go.tsv.gz"
-    eval_engine = EvalEnrichment()
+    eval_engine = EvalEnrichment(model=model)
     eval_engine.load_annotations(annotations_path)
     comps = eval_engine.evaluate_methods_on_gene_set(gene_set, n=number_to_drop, **kwargs)
     output.write(dump_minimal_yaml(comps))

diff --git a/src/ontogpt/clients/openai_client.py b/src/ontogpt/clients/openai_client.py
@@ -31,7 +31,7 @@ def __post_init__(self):
 
     def complete(self, prompt, max_tokens=3000, **kwargs) -> str:
         engine = self.model
-        logging.info(f"Complete: engine={engine}, prompt={prompt[0:100]}...")
+        logger.info(f"Complete: engine={engine}, prompt[{len(prompt)}]={prompt[0:100]}...")
         cur = self.db_connection()
         res = cur.execute("SELECT payload FROM cache WHERE prompt=? AND engine=?", (prompt, engine))
         payload = res.fetchone()
@@ -43,7 +43,7 @@ def complete(self, prompt, max_tokens=3000, **kwargs) -> str:
         i = 0
         while not response:
             i += 1
-            logging.debug(f"Calling OpenAI API (attempt {i})...")
+            logger.debug(f"Calling OpenAI API (attempt {i})...")
             try:
                 if self.interactive:
                     response = self._interactive_completion(prompt, engine, max_tokens, **kwargs)