-
Notifications
You must be signed in to change notification settings - Fork 29
/
kgcl_utilities.py
123 lines (103 loc) · 3.27 KB
/
kgcl_utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import base64
import json
import re
import sys
import uuid
from io import TextIOWrapper
from pathlib import Path
from typing import Iterator, List, Optional, TextIO, Union
import kgcl_schema.datamodel.kgcl as kgcl
import kgcl_schema.grammar.parser as kgcl_parser
from kgcl_schema.grammar.render_operations import render
from linkml_runtime.dumpers import json_dumper, yaml_dumper
from oaklib.datamodels.vocabulary import IS_A
from oaklib.types import CURIE
re_quoted = re.compile("^'(.*)'$")
def generate_change_id() -> CURIE:
"""
Generates an identifier to be used on a change object
:return:
"""
return f"uuid:{uuid.uuid4()}"
def assign_id(change: kgcl.Change):
"""
Assigns an ID to a change object
:param change:
:return:
"""
message_bytes = str(change).encode("ascii")
return base64.b64encode(message_bytes)
def parse_kgcl_files(
files: List[Union[str, Path, TextIO]], changes_format="json"
) -> Iterator[kgcl.Change]:
"""
Parses a list of KGCL files yielding Change objects
:param files:
:param changes_format: default is "json"
:return: change iterator
"""
changes = []
for file in files:
if not isinstance(file, TextIOWrapper):
file = open(str(file), "r")
if changes_format == "json":
import kgcl_schema.utils as kgcl_utilities
objs = json.load(file)
for obj in objs:
obj["type"] = obj["@type"]
del obj["@type"]
changes = kgcl_utilities.from_dict({"change_set": objs}).change_set
else:
for line in file.readlines():
line = line.strip()
if not line:
continue
if line.startswith("#"):
continue
change = kgcl_parser.parse_statement(line)
changes.append(change)
for change in changes:
# tidy_change_object(change)
yield change
def write_kgcl(
changes: List[kgcl.Change], file: Optional[Union[str, Path, TextIO]], changes_format="json"
):
"""
Writes a list of changes to a file
:param changes:
:param file:
:param changes_format:
:return:
"""
if file is None:
file = sys.stdout
elif not isinstance(file, TextIOWrapper):
file = open(str(file), "w")
if changes_format == "json":
out = json_dumper.dumps(changes)
elif changes_format == "yaml":
out = yaml_dumper.dumps(changes)
else:
out = "\n".join([render(c) for c in changes])
file.write(out)
def tidy_change_object(change: kgcl.Change):
"""
Performs any necessary fixing on a Change object.
Sometimes the main kgcl parser will leave quotes in place, URIs quoted, etc.
As these are fixed in the main KCGL repo we can remove these here.
:param change:
:return:
"""
def _fix(prop: str):
v = getattr(change, prop)
if v:
m = re_quoted.match(v)
if m:
setattr(change, prop, m.group(1))
if isinstance(change, kgcl.NodeCreation):
_fix("name")
if isinstance(change, kgcl.NodeRename):
_fix("new_value")
if isinstance(change, kgcl.EdgeCreation):
if change.predicate == "is_a":
change.predicate = IS_A