Skip to content

Commit

Permalink
Add JSON writer (#423)
Browse files Browse the repository at this point in the history
  • Loading branch information
caufieldjh authored Aug 2, 2024
2 parents 371ba50 + e12e6de commit ba0e109
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 1 deletion.
6 changes: 5 additions & 1 deletion src/ontogpt/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@

from ontogpt.io.owl_exporter import OWLExporter
from ontogpt.io.rdf_exporter import RDFExporter
from ontogpt.io.json_wrapper import dump_minimal_json
from ontogpt.io.yaml_wrapper import dump_minimal_yaml
from ontogpt.templates.core import ExtractionResult

Expand Down Expand Up @@ -112,6 +113,8 @@ def write_extraction(
elif output_format == "owl":
exporter = OWLExporter()
exporter.export(results, output, knowledge_engine.schemaview)
elif output_format == "json":
output.write(dump_minimal_json(results)) # type: ignore
elif output_format == "kgx":
# TODO: enable passing name without extension,
# since there will be multiple output files
Expand Down Expand Up @@ -343,7 +346,7 @@ def extract(

text = textract.process(inputfile).decode("utf-8")
else:
text = open(inputfile, "rb").read().decode(encoding="utf-8",errors="ignore")
text = open(inputfile, "rb").read().decode(encoding="utf-8", errors="ignore")
logging.info(f"Input text: {text}")
inputlist.append(text)
elif inputfile and not Path(inputfile).exists():
Expand Down Expand Up @@ -386,6 +389,7 @@ def extract(
for slot_value in set_slot_value:
slot, value = slot_value.split("=")
setattr(results.extracted_object, slot, value)
logging.info(f"Output format: {output_format}")
write_extraction(results, output, output_format, ke, template, cut_input_text)


Expand Down
36 changes: 36 additions & 0 deletions src/ontogpt/io/json_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""JSON Wrapper."""

import io
import logging
import json
from typing import Any, Optional, TextIO

import pydantic

logger = logging.getLogger(__name__)


def eliminate_empty(obj: Any, preserve=False) -> Any:
"""Eliminate empty lists and dicts from an object."""
if isinstance(obj, list):
return [eliminate_empty(x, preserve) for x in obj if x or preserve]
elif isinstance(obj, dict):
return {k: eliminate_empty(v, preserve) for k, v in obj.items() if v or preserve}
elif isinstance(obj, pydantic.BaseModel):
return eliminate_empty(obj.model_dump(), preserve)
elif isinstance(obj, tuple):
return [eliminate_empty(x, preserve) for x in obj]
elif isinstance(obj, str):
return str(obj)
else:
return obj

def dump_minimal_json(obj: Any, minimize=True, file: Optional[TextIO] = None) -> str:
"""Dump a JSON string, but eliminate Nones and empty lists and dicts."""
if not file:
file = io.StringIO()
json.dump(eliminate_empty(obj, not minimize), file, indent=2)
return file.getvalue()
else:
json.dump(eliminate_empty(obj, not minimize), file, indent=2)
return ""

0 comments on commit ba0e109

Please sign in to comment.