Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding option to pass through results of CLI commands into a custom or pre-defined jinja2 template, to allow for customization of result output. #286

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 42 additions & 6 deletions src/oaklib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
from oaklib.io.streaming_axiom_writer import StreamingAxiomWriter
from oaklib.io.streaming_csv_writer import StreamingCsvWriter
from oaklib.io.streaming_info_writer import StreamingInfoWriter
from oaklib.io.streaming_jinja_writer import StreamingJinjaWriter
from oaklib.io.streaming_json_writer import StreamingJsonWriter
from oaklib.io.streaming_kgcl_writer import StreamingKGCLWriter
from oaklib.io.streaming_markdown_writer import StreamingMarkdownWriter
Expand Down Expand Up @@ -143,6 +144,7 @@
OWLFUN_FORMAT = "ofn"
NL_FORMAT = "nl"
KGCL_FORMAT = "kgcl"
JINJA_FORMAT = "jinja"
HEATMAP_FORMAT = "heatmap"

ONT_FORMATS = [
Expand Down Expand Up @@ -171,6 +173,7 @@
NL_FORMAT: StreamingNaturalLanguageWriter,
KGCL_FORMAT: StreamingKGCLWriter,
HEATMAP_FORMAT: HeatmapWriter,
JINJA_FORMAT: StreamingJinjaWriter,
}


Expand Down Expand Up @@ -351,14 +354,21 @@ def _get_writer(
default_type: Type[StreamingWriter] = StreamingInfoWriter,
datamodel: ModuleType = None,
) -> StreamingWriter:
params = {}
if output_type is None:
typ = default_type
else:
if "//" in output_type:
output_type, param_str = output_type.split("//")
params = {}
for kv in param_str.split(","):
[k, v] = kv.split("=", 1)
params[k] = v
if output_type in WRITERS:
typ = WRITERS[output_type]
else:
raise ValueError(f"Unrecognized output type: {output_type}")
w = typ(ontology_interface=impl)
w = typ(ontology_interface=impl, **params)
if isinstance(w, StreamingRdfWriter) and datamodel is not None:
w.schemaview = package_schemaview(datamodel.__name__)
return w
Expand Down Expand Up @@ -883,8 +893,14 @@ def ontology_metadata(ontologies, output_type: str, output: str, all: bool):
show_default=True,
help="if true then fetch axiom triples with annotations",
)
@click.option(
"--tuples/--no-tuples",
default=False,
show_default=True,
help="if true then show results as simple tuples",
)
@click.argument("terms", nargs=-1)
def term_metadata(terms, reification: bool, output_type: str, output: str):
def term_metadata(terms, reification: bool, tuples: bool, output_type: str, output: str):
"""
Shows term metadata
"""
Expand All @@ -904,8 +920,12 @@ def term_metadata(terms, reification: bool, output_type: str, output: str):
else:
raise NotImplementedError
else:
metadata = impl.entity_metadata_map(curie)
writer.emit(metadata)
if tuples:
for s, p, o, typ in impl.entities_metadata([curie]):
writer.emit(dict(subject=s, predicate=p, object=o, datatype=typ))
else:
metadata = impl.entity_metadata_map(curie)
writer.emit(metadata)
else:
raise NotImplementedError(f"Cannot execute this using {impl} of type {type(impl)}")

Expand Down Expand Up @@ -2224,7 +2244,10 @@ def terms(output: str, owl_type, filter_obsoletes: bool):
@main.command()
@output_option
@predicates_option
def roots(output: str, predicates: str):
@click.option("--with-prefix",
multiple=True,
help="Restrict to terms with this prefix(es)")
def roots(output: str, predicates: str, with_prefix):
"""
List all root nodes in the ontology

Expand All @@ -2237,6 +2260,19 @@ def roots(output: str, predicates: str):

runoak -i db/cob.db roots

Many ontologies have native roots as subclasses of terms from other ontologies. To
see native roots or to filter to an ID prefix, use --with-prefix.

Example:

runoak -i sqlite:obo:obi roots -p i --with-prefix OBI

Note this returns a lot of terms, as OBI heavily subclasses other ontologies

Another example:

runoak -i sqlite:obo:ms roots --with-prefix MS

This command is a wrapper onto the "roots" command in the BasicOntologyInterface.

- https://incatools.github.io/ontology-access-kit/interfaces/basic.html#
Expand All @@ -2246,7 +2282,7 @@ def roots(output: str, predicates: str):
impl = settings.impl
if isinstance(impl, OboGraphInterface):
actual_predicates = _process_predicates_arg(predicates)
for curie in impl.roots(actual_predicates):
for curie in impl.roots(actual_predicates, id_prefixes=list(with_prefix) if with_prefix else None):
print(f"{curie} ! {impl.label(curie)}")
else:
raise NotImplementedError(f"Cannot execute this using {impl} of type {type(impl)}")
Expand Down
39 changes: 36 additions & 3 deletions src/oaklib/implementations/sqldb/sql_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def definition(self, curie: CURIE) -> Optional[str]:
return row.value

def entity_metadata_map(self, curie: CURIE) -> METADATA_MAP:
m = {"id": curie}
m = {"id": [curie]}
# subquery = self.session.query(AnnotationPropertyNode.id)
subquery = self.session.query(RdfTypeStatement.subject).filter(
RdfTypeStatement.object == "owl:AnnotationProperty"
Expand All @@ -351,7 +351,7 @@ def entity_metadata_map(self, curie: CURIE) -> METADATA_MAP:
m[row.predicate] = [m[row.predicate]]
m[row.predicate].append(v)
else:
m[row.predicate] = v
m[row.predicate] = [v]
return m

def ontologies(self) -> Iterable[CURIE]:
Expand Down Expand Up @@ -490,12 +490,45 @@ def outgoing_relationships(
def outgoing_relationship_map(self, *args, **kwargs) -> RELATIONSHIP_MAP:
return pairs_as_dict(self.outgoing_relationships(*args, **kwargs))

def incoming_relationships(
self, curie: CURIE, predicates: List[PRED_CURIE] = None, entailed=False
) -> Iterator[Tuple[PRED_CURIE, CURIE]]:
if entailed:
tbl = EntailedEdge
else:
tbl = Edge
q = self.session.query(tbl).filter(tbl.object == curie)
if predicates:
q = q.filter(tbl.predicate.in_(predicates))
logging.debug(f"Querying incoming, curie={curie}, predicates={predicates}, q={q}")
for row in q:
yield row.predicate, row.subject
if not predicates or RDF_TYPE in predicates:
q = self.session.query(RdfTypeStatement.subject).filter(
RdfTypeStatement.object == curie
)
cls_subq = self.session.query(ClassNode.id)
q = q.filter(RdfTypeStatement.subject.in_(cls_subq))
for row in q:
yield RDF_TYPE, row.subject
if tbl == Edge and (not predicates or EQUIVALENT_CLASS in predicates):
q = self.session.query(OwlEquivalentClassStatement.subject).filter(
OwlEquivalentClassStatement.object == curie
)
cls_subq = self.session.query(ClassNode.id)
q = q.filter(OwlEquivalentClassStatement.subject.in_(cls_subq))
for row in q:
yield EQUIVALENT_CLASS, row.subject

def incoming_relationship_map(self, *args, **kwargs) -> RELATIONSHIP_MAP:
return pairs_as_dict(self.incoming_relationships(*args, **kwargs))

def entailed_outgoing_relationships(
self, curie: CURIE, predicates: List[PRED_CURIE] = None
) -> Iterable[Tuple[PRED_CURIE, CURIE]]:
return self.outgoing_relationships(curie, predicates, entailed=True)

def incoming_relationship_map(self, curie: CURIE) -> RELATIONSHIP_MAP:
def __OLDincoming_relationship_map(self, curie: CURIE) -> RELATIONSHIP_MAP:
rmap = defaultdict(list)
for row in self.session.query(Edge).filter(Edge.object == curie):
rmap[row.predicate].append(row.subject)
Expand Down
16 changes: 15 additions & 1 deletion src/oaklib/interfaces/basic_ontology_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
PREFIX_MAP = Mapping[NC_NAME, URI]
RELATIONSHIP_MAP = Dict[PRED_CURIE, List[CURIE]]
ALIAS_MAP = Dict[PRED_CURIE, List[str]]
METADATA_MAP = Dict[PRED_CURIE, List[str]]
METADATA_MAP = Dict[PRED_CURIE, List[Any]]
# ANNOTATED_METADATA_MAP = Dict[PRED_CURIE, List[Tuple[str, METADATA_MAP]]]
RELATIONSHIP = Tuple[CURIE, PRED_CURIE, CURIE]

Expand Down Expand Up @@ -404,6 +404,9 @@ def subset_members(self, subset: SUBSET_CURIE) -> Iterable[CURIE]:
"""
raise NotImplementedError

def entities_subsets(self, curies: Iterable[CURIE]) -> Iterable[Tuple[CURIE, SUBSET_CURIE]]:
return self.terms_subsets(curies)

def terms_subsets(self, curies: Iterable[CURIE]) -> Iterable[Tuple[CURIE, SUBSET_CURIE]]:
"""
returns iterator over all subsets a term belongs to
Expand All @@ -418,6 +421,9 @@ def terms_subsets(self, curies: Iterable[CURIE]) -> Iterable[Tuple[CURIE, SUBSET
if t in curies:
yield t, s

def entities_categories(self, curies: Iterable[CURIE]) -> Iterable[Tuple[CURIE, SUBSET_CURIE]]:
return self.terms_categories(curies)

def terms_categories(self, curies: Iterable[CURIE]) -> Iterable[Tuple[CURIE, CATEGORY_CURIE]]:
"""
returns iterator over all categories a term or terms belongs to
Expand Down Expand Up @@ -708,6 +714,14 @@ def entity_alias_map(self, curie: CURIE) -> ALIAS_MAP:
def alias_map_by_curie(self, curie: CURIE) -> ALIAS_MAP:
return self.entity_alias_map(curie)

def entities_metadata(self, curies: List[CURIE]) -> Iterator[Tuple[CURIE, PRED_CURIE, Any, str]]:
for curie in curies:
m = self.entity_metadata_map(curie)
for p, vs in m.items():
for v in vs:
yield curie, p, v, type(v)


def entity_metadata_map(self, curie: CURIE) -> METADATA_MAP:
"""
Returns a dictionary keyed by property predicate, with a list of zero or more values,
Expand Down
19 changes: 19 additions & 0 deletions src/oaklib/io/jinja2_templates/info_basic.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
ID: {{obj}}
URI: {{oi.curie_to_uri(obj)}}
Name: {{oi.label(obj)}}
Def: {{oi.definition(obj)}}
Subsets:
{% for _, s in oi.entities_subsets([obj]) -%}
- {{s}}
{% endfor %}

Parents:
{% for p,o in oi.outgoing_relationships(obj) -%}
- {{p}} {{o}} {{oi.label(o)}}
{% endfor %}

Children:
{% for p,o in oi.incoming_relationships(obj) -%}
- INV({{p}}) {{o}} {{oi.label(o)}}
{% endfor %}

9 changes: 5 additions & 4 deletions src/oaklib/io/streaming_csv_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class StreamingCsvWriter(StreamingWriter):
delimiter: str = "\t"
writer: csv.DictWriter = None
keys: List[str] = None
include_information_content: bool = None

def emit(self, obj: Union[YAMLRoot, Dict, CURIE], label_fields=None):
if isinstance(obj, dict):
Expand All @@ -55,22 +56,22 @@ def _get_dict(self, curie: CURIE):
definition=oi.definition(curie),
)
for k, vs in oi.entity_alias_map(curie).items():
d[k] = "|".join(vs)
if k != "id":
d[k] = "|".join([str(v) for v in vs])
for _, x in oi.simple_mappings_by_curie(curie):
d["mappings"] = x
for k, vs in oi.entity_metadata_map(curie).items():
if k not in [HAS_DBXREF, HAS_DEFINITION_CURIE]:
if k not in [HAS_DBXREF, HAS_DEFINITION_CURIE, "id"]:
d[k] = str(vs)
if isinstance(oi, OboGraphInterface):
for k, vs in oi.outgoing_relationship_map(curie).items():
d[k] = "|".join(vs)
d[f"{k}_label"] = "|".join([str(oi.label(v)) for v in vs])
if isinstance(oi, SemanticSimilarityInterface):
if self.include_information_content and isinstance(oi, SemanticSimilarityInterface):
d["information_content_via_is_a"] = oi.get_information_content(curie, predicates=[IS_A])
d["information_content_via_is_a_part_of"] = oi.get_information_content(
curie, predicates=[IS_A, PART_OF]
)

return d

def emit_curie(self, curie: CURIE, label=None):
Expand Down
79 changes: 79 additions & 0 deletions src/oaklib/io/streaming_jinja_writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Union

import pkg_resources
from linkml_runtime import CurieNamespace
from linkml_runtime.utils.yamlutils import YAMLRoot

from oaklib.interfaces.obograph_interface import OboGraphInterface
from oaklib.interfaces.semsim_interface import SemanticSimilarityInterface
from oaklib.io.streaming_writer import StreamingWriter
from oaklib.utilities.obograph_utils import DEFAULT_PREDICATE_CODE_MAP
from jinja2 import Template, Environment, FileSystemLoader

predicate_code_map = DEFAULT_PREDICATE_CODE_MAP


@dataclass
class StreamingJinjaWriter(StreamingWriter):
"""
A writer that streams entries to a Jinja2 template

The main way this is used via the command line, passing a parameterized
output object:

runoak -i sqlite:obo:cl info .all -O jinja//template_path=test.j2

This will pass through all CL terms to a template, which might look something
like this:

.code ::

ID: {{obj}}
Name: {{oi.label(obj)}}

Parents:
{% for p,o in oi.outgoing_relationships(obj) %}
- {{p}} {{o}} {{oi.label(o)}}
{% endfor %}

"""
template_folder: Union[Path, str] = None
"""Path to folder in which templates are stored. If omitted, this is inferred from template_path"""

template_name: Union[Path, str] = None
"""Name of a template in the template folder. Do not specify if path is specified"""

template_path: Union[Path, str] = None
"""Absolute path for a template. If this is specified, then do not specify name or folder"""

template: Template = None
"""Jinja2 template object. This is instantiated from path/name"""

def _template(self) -> Template:
if self.template is None:
if self.template_path and not self.template_folder:
p = Path(self.template_path)
self.template_folder = p.parent
self.template_name = p.name
if self.template_folder is None:
self.template_folder = pkg_resources.resource_filename(__name__, "jinja2_templates")
loader = FileSystemLoader(self.template_folder)
env = Environment(loader=loader)
if "." not in self.template_name:
self.template_name = f"{self.template_name}.j2"
self.template = env.get_template(self.template_name)
return self.template

def emit_curie(self, curie, label=None, **kwargs):
oi = self.ontology_interface
if label is None:
label = oi.label(curie)
v = self._template().render(curie=curie, label=label, interface=oi)
self.file.write(v)

def emit(self, obj: Union[YAMLRoot, dict], label_fields=None):
oi = self.ontology_interface
v = self._template().render(obj=obj, oi=oi)
self.file.write(v)