Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include instances as attributes of the relevant class #29

Merged
merged 6 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 27 additions & 9 deletions build.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import defaultdict
import json
import os.path
import shutil
import subprocess
Expand All @@ -7,25 +8,41 @@
from jinja2 import Environment, select_autoescape, FileSystemLoader

from pipeline.translator import PythonBuilder
from pipeline.utils import clone_sources, SchemaLoader
from pipeline.utils import clone_sources, SchemaLoader, InstanceLoader

print("***************************************")
print("*********************************************************")
print(f"Triggering the generation of Python package for openMINDS")
print("***************************************")
print("*********************************************************")

# Step 0 - read code for additional methods
additional_methods = {}
with open("pipeline/src/additional_methods/by_name.py.txt") as fp:
code = fp.read()
additional_methods["by_name"] = code

# Step 1 - clone central repository in main branch to get the latest sources
clone_sources()
schema_loader = SchemaLoader()
instance_loader = InstanceLoader()
if os.path.exists("target"):
shutil.rmtree("target")

# Step 2 - load instances
instances = {}
for version in instance_loader.get_instance_versions():
instances[version] = defaultdict(list)
for instance_path in instance_loader.find_instances(version):
with open(instance_path) as fp:
instance_data = json.load(fp)
instances[version][instance_data["@type"]].append(instance_data)

python_modules = defaultdict(list)
for schema_version in schema_loader.get_schema_versions():

# Step 2 - find all involved schemas for the current version
# Step 3 - find all involved schemas for the current version
schemas_file_paths = schema_loader.find_schemas(schema_version)

# Step 3a - figure out which schemas are embedded and which are linked
# Step 4a - figure out which schemas are embedded and which are linked
embedded = set()
linked = set()
for schema_file_path in schemas_file_paths:
Expand All @@ -42,17 +59,18 @@
for schema_identifier in conflicts:
linked.remove(schema_identifier)

# Step 3b - translate and build each openMINDS schema as a Python class
# Step 4b - translate and build each openMINDS schema as a Python class
for schema_file_path in schemas_file_paths:
module_path, class_name = PythonBuilder(
schema_file_path, schema_loader.schemas_sources
schema_file_path, schema_loader.schemas_sources, instances=instances.get(schema_version, None),
additional_methods=additional_methods
).build(embedded=embedded)

parts = module_path.split(".")
parent_path = ".".join(parts[:-1])
python_modules[parent_path].append((parts[-1], class_name))

# Step 4 - create additional files, e.g. __init__.py
# Step 5 - create additional files, e.g. __init__.py
openminds_modules = defaultdict(set)
for path, classes in python_modules.items():
dir_path = ["target", "openminds"] + path.split(".")
Expand Down Expand Up @@ -96,5 +114,5 @@
shutil.copy("pipeline/src/README.md", "target/README.md")
shutil.copy("./LICENSE", "target/LICENSE")

# Step 5 - run formatter
# Step 6 - run formatter
subprocess.call([sys.executable, "-m", "black", "--quiet", "target"])
14 changes: 14 additions & 0 deletions pipeline/src/additional_methods/by_name.py.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@classmethod
def instances(cls):
return [value for value in cls.__dict__.values() if isinstance(value, cls)]

@classmethod
def by_name(cls, name):
if cls._instance_lookup is None:
cls._instance_lookup = {}
for instance in cls.instances():
cls._instance_lookup[instance.name] = instance
if instance.synonyms:
for synonym in instance.synonyms:
cls._instance_lookup[synonym] = instance
return cls._instance_lookup[name]
1 change: 1 addition & 0 deletions pipeline/src/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ class LinkedMetadata(Node):
"""
A Python representation of a metadata node that should have a unique identifier.
"""
_instance_lookup = None

def __init__(self, id=None, **properties):
self.id = id # todo: check this is a URI
Expand Down
7 changes: 7 additions & 0 deletions pipeline/src/module_template.py.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,10 @@ class {{ class_name }}({{ base_class }}):
return super().__init__({%- if base_class == "LinkedMetadata" %}id=id, {%- endif -%}{%- for property in properties -%}{{property.name}}={{property.name}}, {%- endfor -%})

{{ additional_methods }}

{% for instance_name, instance in instances.items() %}
{{ class_name }}.{{ instance_name }} = {{ class_name }}(
{% for key, value in instance.items() -%}
{% if value is string %}{{key}}="{{value}}",{% else %}{{key}}={{value}},{% endif %}
{% endfor -%}
){% endfor %}
15 changes: 9 additions & 6 deletions pipeline/src/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,38 +5,41 @@
"""

from __future__ import annotations
from collections import defaultdict
from typing import TYPE_CHECKING, Union, List, Optional

if TYPE_CHECKING:
from .base import ContainsMetadata

registry: dict = {"names": {}, "types": {}}
registry: dict = {"names": {}, "types": defaultdict(dict)}


def register_class(target_class: ContainsMetadata):
"""Add a class to the registry"""
if "openminds" in target_class.__module__:
parts = target_class.__module__.split(".")
assert parts[0] == "openminds"
version = parts[1]
name = ".".join(parts[0:3] + [target_class.__name__]) # e.g. openminds.latest.core.Dataset

if hasattr(target_class, "type_"):
registry["names"][name] = target_class
type_ = target_class.type_
registry["types"][type_] = target_class
registry["types"][version][type_] = target_class


def lookup(class_name: str) -> ContainsMetadata:
"""Return the class whose name is given."""
return registry["names"][class_name]


def lookup_type(class_type: str) -> ContainsMetadata:
def lookup_type(class_type: str, version: str = "latest") -> ContainsMetadata:
"""Return the class whose global type identifier (a URI) is given."""
if isinstance(class_type, str):
if class_type in registry["types"]:
return registry["types"][class_type]
if class_type in registry["types"][version]:
return registry["types"][version][class_type]
else:
raise ValueError(f"Type '{class_type}' was not found in the registry.")
raise ValueError(f"Type '{class_type}' was not found in the registry for version {version}.")
else:
raise TypeError("class type must be a string")

Expand Down
72 changes: 65 additions & 7 deletions pipeline/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,40 @@
from jinja2 import Environment, select_autoescape, FileSystemLoader


number_names = {
"0": "zero",
"1": "one",
"2": "two",
"3": "three",
"4": "four",
"5": "five",
"6": "six",
"7": "seven",
"8": "eight",
"9": "nine"
}


def generate_python_name(json_name, allow_multiple=False):
python_name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", json_name)
python_name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", json_name.strip())
python_name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", python_name).lower()
python_name = python_name.replace("-", "_")
replacements = [
("-", "_"), (".", "_"), ("+", "plus"), ("#", "sharp"), (",", "comma"), ("(", ""), (")", "")
]
for before, after in replacements:
python_name = python_name.replace(before, after)
if python_name[0] in number_names: # Python variables can't start with a number
python_name = number_names[python_name[0]] + python_name[1:]
if not python_name.isidentifier():
raise NameError(f"Cannot generate a valid Python name from '{json_name}'")
return python_name


class PythonBuilder(object):
"""docstring"""

def __init__(self, schema_file_path: str, root_path: str):
def __init__(self, schema_file_path: str, root_path: str, instances: Optional[dict] = None,
additional_methods: Optional[dict] = None):
self.template_name = "src/module_template.py.txt"
self.env = Environment(
loader=FileSystemLoader(os.path.dirname(os.path.realpath(__file__))), autoescape=select_autoescape()
Expand All @@ -30,6 +53,8 @@ def __init__(self, schema_file_path: str, root_path: str):
]
with open(schema_file_path, "r") as schema_f:
self._schema_payload = json.load(schema_f)
self.instances = instances or {}
self.additional_methods = additional_methods

@property
def _version_module(self):
Expand Down Expand Up @@ -90,22 +115,47 @@ def get_type(property):
else:
raise NotImplementedError

if self._schema_payload["_type"] in embedded:
openminds_type = self._schema_payload["_type"]
if openminds_type in embedded:
base_class = "EmbeddedMetadata"
else:
base_class = "LinkedMetadata"

def filter_value(value):
if isinstance(value, str):
return value.replace('"', "'").replace("\n", " ")
return value

def filter_instance(instance):
filtered_instance = {
k: filter_value(v)
for k, v in instance.items()
if k[0] != "@" and k[:4] != "http" and v is not None
}
filtered_instance["id"] = instance["@id"]
return filtered_instance

instances = {
generate_python_name(instance["@id"].split("/")[-1]) : filter_instance(instance)
for instance in self.instances.get(openminds_type, [])
}
instances = { # sort by key
name: instances[name] for name in sorted(instances)
}

properties = []
for iri, property in self._schema_payload["properties"].items():
allow_multiple = property.get("type", "") == "array"
if allow_multiple:
property_name = property['namePlural']
else:
property_name = property['name']
pythononic_name = generate_python_name(property_name)
properties.append(
{
"name": generate_python_name(property_name),
"name": pythononic_name,
"type": get_type(property), # compress using JSON-LD context
"iri": property['name'], # assumes IRI uses standard @vocab
"iri": property["name"], # assumes IRI uses standard @vocab
"allow_multiple": allow_multiple,
"required": iri in self._schema_payload.get("required", []),
"description": property.get("description", "no description available"),
Expand All @@ -118,16 +168,24 @@ def get_type(property):
}
)
# unused in property: "nameForReverseLink"
for instance in instances.values():
if property["name"] in instance:
instance[pythononic_name] = instance.pop(property['name'])
self.context = {
"docstring": self._schema_payload.get("description", "<description not available>"),
"base_class": base_class,
"preamble": "", # default value, may be updated below
"class_name": self._schema_payload["name"],
"openminds_type": self._schema_payload["_type"],
"openminds_type": openminds_type,
"schema_version": self.version,
"properties": properties,
"additional_methods": "",
"instances": instances
}

if len(instances) > 0:
self.context["additional_methods"] = self.additional_methods["by_name"]

import_map = {
"date": "from datetime import date",
"datetime": "from datetime import datetime",
Expand Down
47 changes: 37 additions & 10 deletions pipeline/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,49 @@

from git import Repo, GitCommandError

source_url = "https://github.com/openMetadataInitiative/openMINDS.git"


def clone_sources():
if os.path.exists("sources"):
shutil.rmtree("sources")
Repo.clone_from(source_url, to_path="sources", depth=1)


class SchemaLoader(object):
if os.path.exists("_sources"):
lzehl marked this conversation as resolved.
Show resolved Hide resolved
shutil.rmtree("_sources")
Repo.clone_from(
"https://github.com/openMetadataInitiative/openMINDS.git",
to_path="_sources/schemas",
depth=1,
)
Repo.clone_from(
"https://github.com/openMetadataInitiative/openMINDS_instances.git",
to_path="_sources/instances",
depth=1,
)


class SchemaLoader:
def __init__(self):
self._root_directory = os.path.realpath(".")
self.schemas_sources = os.path.join(self._root_directory, "sources", "schemas")
self.schemas_sources = os.path.join(self._root_directory, "_sources/schemas", "schemas")

def get_schema_versions(self) -> List[str]:
return os.listdir(self.schemas_sources)

def find_schemas(self, version: str) -> List[str]:
return glob.glob(os.path.join(self.schemas_sources, version, f"**/*.schema.omi.json"), recursive=True)
return glob.glob(
os.path.join(self.schemas_sources, version, f"**/*.schema.omi.json"),
recursive=True,
)


class InstanceLoader:
def __init__(self):
self._root_directory = os.path.realpath(".")
self.instances_sources = os.path.join(
self._root_directory, "_sources/instances", "instances"
)

def get_instance_versions(self) -> List[str]:
return os.listdir(self.instances_sources)

def find_instances(self, version: str) -> List[str]:
return glob.glob(
os.path.join(self.instances_sources, version, f"**/*.jsonld"),
recursive=True,
)
Loading