-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils_cv.py
140 lines (130 loc) · 6.2 KB
/
utils_cv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 9 16:46:36 2022
@author: Yann Miquel
"""
import xml.etree.ElementTree as ET
import collections
def get_description(root:ET.Element):
"""Get the columns description in the semantics"""
desc = {}
for parent in root.findall('.//measure/.'):
for elem in parent.findall(".//descriptions/."):
desc[parent.get("id")] = elem.get("defaultDescription")
for parent in root.findall(".//attribute/."):
for elem in parent.findall(".//descriptions/."):
desc[parent.get("id")] = elem.get("defaultDescription")
for parent in root.findall(".//calculatedAttribute/."):
for elem in parent.findall(".//descriptions/."):
desc[parent.get("id")] = elem.get("defaultDescription")
return desc
def get_elements_in_nodes(root:ET.Element):
"""Get for each column created in the nodes the formula and the datatype."""
elements = collections.defaultdict(dict)
for node in root.findall('.//calculationView'):
node_name = node.get("id")
for parent in node.findall('.//calculatedViewAttribute/.'):
elements[parent.get("id")]["formule"] = parent.find(".//formula").text
elements[parent.get("id")]["node_definition"] = node_name
if parent.get("datatype") in ["DECIMAL"]:
datatype = (str(parent.get("datatype"))
+"("
+str(parent.get("length"))
+","
+str(parent.get("scale"))+")")
elif parent.get("datatype")=="NVARCHAR":
datatype = (str(parent.get("datatype"))
+"("+str(parent.get("length"))+")")
else:
datatype = parent.get("datatype")
elements[parent.get("id")]["datatype"] = datatype
return elements
def get_elements_in_final(root:ET.Element):
"""Get for each column created in the final node the formula and the datatype."""
elements = collections.defaultdict(dict)
for node in root.findall('.//logicalModel'):
for att in node.findall(".//calculatedAttribute"):
elements[att.get("id")]["formule"] = att.find("keyCalculation").find("formula").text
if att.find("keyCalculation").get("datatype") in ["DECIMAL"]:
datatype = (str(att.find("keyCalculation").get("datatype"))
+"("
+str(att.find("keyCalculation").get("length"))
+","
+str(att.find("keyCalculation").get("scale"))+')')
elif att.find("keyCalculation").get("datatype") == "NVARCHAR":
datatype = (str(att.find("keyCalculation").get("datatype"))
+"("+str(att.find("keyCalculation").get("length"))+")")
else:
datatype = att.find("keyCalculation").get("datatype")
elements[att.get("id")]["datatype"] = datatype
elements[att.get("id")]["node_definition"] = "Final node"
for parent in node.findall('.//measure/.'):
for elem in parent.findall(".//formula/."):
elements[parent.get("id")]["formule"] = elem.text
if parent.get("datatype") in ["DECIMAL"]:
datatype = (str(parent.get("datatype"))
+"("
+str(parent.get("length"))
+","
+str(parent.get("scale"))+')')
elif parent.get("datatype") == "NVARCHAR":
datatype = (str(parent.get("datatype"))
+"("+str(parent.get("length"))+")")
else:
datatype = parent.get("datatype")
elements[parent.get("id")]["datatype"] = datatype
elements[parent.get("id")]["node_definition"] = "Final node"
return elements
def get_elements(root:ET.Element):
"""Concat the elements from all nodes and add the CV name"""
cv_name = root.get("id")
elements = collections.defaultdict(dict)
# Dans les nodes individuels
elements_in_nodes = get_elements_in_nodes(root)
# Dans le node final
elements_in_final = get_elements_in_final(root)
elements = elements_in_nodes|elements_in_final
return elements, cv_name
def generate_markdown(elements:dict, cv_name:str, desc:dict, lang="FR"):
"""Generate the markdown"""
markdown = f"## Calculation view: {cv_name}\n\n"
match lang:
case "FR":
column_name = "Nom du champ "
no_semantics = "Champs absents de la sémantique"
dtype = "Type de champ "
where = "Node de définition "
case "EN":
column_name = "Name of the column"
no_semantics = "Column missing in the semantics"
dtype = "Datatype"
where = "Node of definition"
for champ in sorted(elements.keys()):
if desc.get(champ):
markdown += f"""### {desc.get(champ)}\n\n"""
markdown += f"""* {column_name}: `{champ}`\n\n"""
markdown += f"""* {dtype}: {elements.get(champ).get("datatype")}\n\n"""
markdown += f"""* {where}: {elements.get(champ).get("node_definition")}\n\n"""
markdown += f"""```sql\n{elements.get(champ).get("formule")}\n```"""
markdown += """\n\n"""
markdown += f"### {no_semantics}\n\n "
for champ in sorted(elements.keys()):
if not desc.get(champ):
markdown += f"""* {column_name}: `{champ}`\n\n"""
markdown += f"""* {dtype}: {elements.get(champ).get("datatype")}\n\n"""
markdown += f"""* {where}: {elements.get(champ).get("node_definition")}\n\n"""
markdown += f"""```sql\n{elements.get(champ).get("formule")}\n```"""
markdown += """\n\n"""
return markdown
def cv_to_markdown(file, lang="FR"):
""" read the file and return the markdown for this file"""
tree = ET.parse(file)
root = tree.getroot()
elem, cv_name = get_elements(root)
if not elem:
return ""
markdown = generate_markdown(elem,
cv_name,
get_description(root),
lang=lang)
return markdown