-
Notifications
You must be signed in to change notification settings - Fork 559
/
defined_namespace_creator.py
223 lines (183 loc) · 6.58 KB
/
defined_namespace_creator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
"""
This rdflib Python script creates a DefinedNamespace Python file from a given RDF file
It is a very simple script: it finds all things defined in the RDF file within a given
namespace:
<thing> a ?x
where ?x is anything and <thing> starts with the given namespace
Nicholas J. Car, Dec, 2021
"""
from __future__ import annotations
import argparse
import datetime
import keyword
from collections.abc import Iterable
from pathlib import Path
from typing import TYPE_CHECKING
from rdflib.graph import Graph
from rdflib.namespace import DCTERMS, OWL, RDFS, SKOS
from rdflib.util import guess_format
if TYPE_CHECKING:
from rdflib.query import ResultRow
def validate_namespace(namespace: str) -> None:
if not namespace.endswith(("/", "#")):
raise ValueError("The supplied namespace must end with '/' or '#'")
def validate_object_id(object_id: str) -> None:
for c in object_id:
if not c.isupper():
raise ValueError("The supplied object_id must be an all-capitals string")
# This function is not used: it was originally written to get classes and to be used
# alongside a method to get properties, but then it was decided that a single function
# to get everything in the namespace, get_target_namespace_elements(), was both simper
# and better covered all namespace elements, so that function is used instead.
#
# def get_classes(g, target_namespace):
# namespaces = {"dcterms": DCTERMS, "owl": OWL, "rdfs": RDFS, "skos": SKOS}
# q = """
# SELECT DISTINCT ?x ?def
# WHERE {
# # anything that is an instance of owl:Class or rdfs:Class
# # or any subclass of them
# VALUES ?c { owl:Class rdfs:Class }
# ?x rdfs:subClassOf*/a ?c .
#
# # get any definitions, if they have one
# OPTIONAL {
# ?x rdfs:comment|dcterms:description|skos:definition ?def
# }
#
# # only get results for the targetted namespace (supplied by user)
# FILTER STRSTARTS(STR(?x), "xxx")
# }
# """.replace("xxx", target_namespace)
# classes = []
# for r in g.query(q, initNs=namespaces):
# classes.append((str(r[0]), str(r[1])))
#
# classes.sort(key=lambda tup: tup[1])
#
# return classes
def get_target_namespace_elements(
g: Graph, target_namespace: str
) -> tuple[list[tuple[str, str]], list[str], list[str]]:
namespaces = {"dcterms": DCTERMS, "owl": OWL, "rdfs": RDFS, "skos": SKOS}
q = """
SELECT ?s (GROUP_CONCAT(DISTINCT STR(?def)) AS ?defs)
WHERE {
# all things in the RDF data (anything RDF.type...)
?s a ?o .
# get any definitions, if they have one
OPTIONAL {
?s dcterms:description|rdfs:comment|skos:definition ?def
}
# only get results for the target namespace (supplied by user)
FILTER STRSTARTS(STR(?s), "xxx")
FILTER (STR(?s) != "xxx")
}
GROUP BY ?s
""".replace(
"xxx", target_namespace
)
elements: list[tuple[str, str]] = []
for r in g.query(q, initNs=namespaces):
if TYPE_CHECKING:
assert isinstance(r, ResultRow)
elements.append((str(r[0]), str(r[1])))
elements.sort(key=lambda tup: tup[0])
elements_strs: list[str] = []
non_python_elements_strs: list[str] = []
for e in elements:
name = e[0].replace(target_namespace, "")
desc = e[1].replace("\n", " ")
if name.isidentifier() and not keyword.iskeyword(name):
elements_strs.append(f" {name}: URIRef # {desc}\n")
else:
non_python_elements_strs.append(f""" "{name}", # {desc}\n""")
return elements, elements_strs, non_python_elements_strs
def make_dn_file(
output_file_name: Path,
target_namespace: str,
elements_strs: Iterable[str],
non_python_elements_strs: list[str],
object_id: str,
fail: bool,
) -> None:
header = f'''from rdflib.namespace import DefinedNamespace, Namespace
from rdflib.term import URIRef
class {object_id}(DefinedNamespace):
"""
DESCRIPTION_EDIT_ME_!
Generated from: SOURCE_RDF_FILE_EDIT_ME_!
Date: {datetime.datetime.utcnow()}
"""
'''
with open(output_file_name, "w") as f:
f.write(header)
f.write("\n")
f.write(f' _NS = Namespace("{target_namespace}")')
f.write("\n\n")
if fail:
f.write(" _fail = True")
f.write("\n\n")
f.writelines(elements_strs)
if len(non_python_elements_strs) > 0:
f.write("\n")
f.write(" # Valid non-python identifiers")
f.write("\n")
f.write(" _extras = [")
f.write("\n")
f.writelines(non_python_elements_strs)
f.write(" ]")
f.write("\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"ontology_file",
type=str,
help="Path to the RDF ontology to extract a DefinedNamespace from.",
)
parser.add_argument(
"target_namespace",
type=str,
help="The namespace within the ontology that you want to create a "
"DefinedNamespace for.",
)
parser.add_argument(
"object_id",
type=str,
help="The RDFlib object ID of the DefinedNamespace, e.g. GEO for GeoSPARQL.",
)
parser.add_argument(
"-f",
"--fail",
dest="fail",
action="store_true",
help="Whether (true) or not (false) to mimic ClosedNamespace and fail on "
"non-element use",
)
parser.add_argument("--no-fail", dest="fail", action="store_false")
parser.set_defaults(feature=False)
args = parser.parse_args()
fmt = guess_format(args.ontology_file)
if fmt is None:
print("The format of the file you've supplied is unknown.")
exit(1)
g = Graph().parse(args.ontology_file, format=fmt)
validate_namespace(args.target_namespace)
validate_object_id(args.object_id)
print(
f"Creating DefinedNamespace file {args.object_id} "
f"for {args.target_namespace}..."
)
print(f"Ontology with {len(g)} triples loaded...")
print("Getting all namespace elements...")
elements = get_target_namespace_elements(g, args.target_namespace)
output_file_name = Path().cwd() / f"_{args.object_id}.py"
print(f"Creating DefinedNamespace Python file {output_file_name}")
make_dn_file(
output_file_name,
args.target_namespace,
elements[1],
elements[2],
args.object_id,
args.fail,
)