-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
XML macros: add named yields, tokenized macros, tokens for attributes, and fix replacement of toplevel yield #13152
Changes from 13 commits
0e730aa
44855fe
c3d3727
c47bd55
8ba6795
0dfcf48
be7a9a9
bd64bfd
c1604f5
7c64721
068f015
372439c
180ed75
5198c82
72141b3
a216858
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,13 +17,30 @@ def load_with_references(path): | |
|
||
macro_paths = _import_macros(root, path) | ||
|
||
# temporarily remove the children of the macros node | ||
# and create a copy. this is done because this allows | ||
# to iterate over all expand nodes of the tree | ||
# that are not included in the macros node | ||
macros_el = _macros_el(root) | ||
if macros_el is not None: | ||
macros_copy = deepcopy(macros_el) | ||
macros_el.clear() | ||
else: | ||
macros_copy = None | ||
|
||
# Collect tokens | ||
tokens = _macros_of_type(root, 'token', lambda el: el.text or '') | ||
tokens = _macros_of_type(macros_copy, 'token', lambda el: el.text or '') | ||
tokens = expand_nested_tokens(tokens) | ||
|
||
# Expand xml macros | ||
macro_dict = _macros_of_type(root, 'xml', lambda el: XmlMacroDef(el)) | ||
macro_dict = _macros_of_type(macros_copy, 'xml', lambda el: XmlMacroDef(el)) | ||
_expand_macros([root], macro_dict, tokens) | ||
|
||
# readd the stashed children of the macros node | ||
# TODO is this this really necesary? Since macro nodes are removed anyway just below. | ||
if macros_copy: | ||
_xml_set_children(macros_el, list(macros_copy)) | ||
|
||
for el in root.xpath('//macro'): | ||
if el.get('type') != 'template': | ||
# Only keep template macros | ||
|
@@ -43,7 +60,8 @@ def template_macro_params(root): | |
with these. | ||
""" | ||
param_dict = {} | ||
macro_dict = _macros_of_type(root, 'template', lambda el: el.text) | ||
macros_el = _macros_el(root) | ||
macro_dict = _macros_of_type(macros_el, 'template', lambda el: el.text) | ||
for key, value in macro_dict.items(): | ||
param_dict[key] = value | ||
return param_dict | ||
|
@@ -63,6 +81,10 @@ def imported_macro_paths(root): | |
|
||
|
||
def _import_macros(root, path): | ||
""" | ||
root the parsed XML tree | ||
path the path to the main xml document | ||
""" | ||
xml_base_dir = os.path.dirname(path) | ||
macros_el = _macros_el(root) | ||
if macros_el is not None: | ||
|
@@ -75,16 +97,14 @@ def _macros_el(root): | |
return root.find('macros') | ||
|
||
|
||
def _macros_of_type(root, type, el_func): | ||
macros_el = root.find('macros') | ||
macro_dict = {} | ||
if macros_el is not None: | ||
macro_els = macros_el.findall('macro') | ||
filtered_els = [(macro_el.get("name"), el_func(macro_el)) | ||
for macro_el in macro_els | ||
if macro_el.get('type') == type] | ||
macro_dict = dict(filtered_els) | ||
return macro_dict | ||
def _macros_of_type(macros_el, type, el_func): | ||
if macros_el is None: | ||
return {} | ||
macro_els = macros_el.findall('macro') | ||
filtered_els = [(macro_el.get("name"), el_func(macro_el)) | ||
for macro_el in macro_els | ||
if macro_el.get('type') == type] | ||
return dict(filtered_els) | ||
|
||
|
||
def expand_nested_tokens(tokens): | ||
|
@@ -106,6 +126,11 @@ def _expand_tokens(elements, tokens): | |
|
||
|
||
def _expand_tokens_for_el(element, tokens): | ||
""" | ||
expand tokens in element and (recursively) in its children | ||
replacements of text attributes and attribute values are | ||
possible | ||
""" | ||
value = element.text | ||
if value: | ||
new_value = _expand_tokens_str(element.text, tokens) | ||
|
@@ -115,6 +140,11 @@ def _expand_tokens_for_el(element, tokens): | |
new_value = _expand_tokens_str(value, tokens) | ||
if not (new_value is value): | ||
element.attrib[key] = new_value | ||
new_key = _expand_tokens_str(key, tokens) | ||
if not (new_key is key): | ||
element.attrib[new_key] = element.attrib[key] | ||
del element.attrib[key] | ||
# recursively expand in childrens | ||
_expand_tokens(list(element), tokens) | ||
|
||
|
||
|
@@ -125,7 +155,7 @@ def _expand_tokens_str(s, tokens): | |
return s | ||
|
||
|
||
def _expand_macros(elements, macros, tokens): | ||
def _expand_macros(elements, macros, tokens, visited=None): | ||
if not macros and not tokens: | ||
return | ||
|
||
|
@@ -134,48 +164,60 @@ def _expand_macros(elements, macros, tokens): | |
expand_el = element.find('.//expand') | ||
if expand_el is None: | ||
break | ||
_expand_macro(element, expand_el, macros, tokens) | ||
if visited is None: | ||
v = list() | ||
bernt-matthias marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else: | ||
v = visited | ||
_expand_macro(expand_el, macros, tokens, v) | ||
|
||
|
||
def _expand_macro(element, expand_el, macros, tokens): | ||
def _expand_macro(expand_el, macros, tokens, visited): | ||
macro_name = expand_el.get('macro') | ||
assert macro_name is not None, "Attempted to expand macro with no 'macro' attribute defined." | ||
|
||
# check for cycles in the nested macro expansion | ||
assert macro_name not in visited, f"Cycle in nested macros: already expanded {visited} can't expand '{macro_name}' again" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you sure this must be a cycle ? Also list lookups can be expensive, maybe make visited a set or an ordered set? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Quite. Essentially this is a DFS of the expansions and if a previously expanded macro is expanded again we have a cycle. Note that visisted is kind of a stack (i.e. the macro names are removed at the end of the function.)
Then I will go for ordered set, since otherwise the test can not check for the message (or I just test for the prefix |
||
visited.append(macro_name) | ||
|
||
assert macro_name in macros, f"No macro named {macro_name} found, known macros are {', '.join(macros.keys())}." | ||
macro_def = macros[macro_name] | ||
expanded_elements = deepcopy(macro_def.elements) | ||
|
||
expanded_elements = deepcopy(macro_def.element) | ||
_expand_yield_statements(expanded_elements, expand_el) | ||
|
||
# Recursively expand contained macros. | ||
_expand_macros(expanded_elements, macros, tokens) | ||
macro_tokens = macro_def.macro_tokens(expand_el) | ||
if macro_tokens: | ||
_expand_tokens(expanded_elements, macro_tokens) | ||
|
||
# Recursively expand contained macros. | ||
_expand_macros(expanded_elements, macros, tokens, visited) | ||
_xml_replace(expand_el, expanded_elements) | ||
del visited[-1] | ||
|
||
|
||
def _expand_yield_statements(macro_def, expand_el): | ||
yield_els = [yield_el for macro_def_el in macro_def for yield_el in macro_def_el.findall('.//yield')] | ||
|
||
expand_el_children = list(expand_el) | ||
|
||
""" | ||
Modifies the macro_def element by replacing | ||
1. all named yield tags by the content of the corresponding token tags | ||
- token tags need to be direct children of the expand | ||
- processed in order of definition of the token tags | ||
2. all unnamed yield tags by the non-token children of the expand tag | ||
""" | ||
# replace named yields | ||
for token_el in expand_el.findall('./token'): | ||
name = token_el.attrib.get("name", None) | ||
assert name is not None, "Found unnamed token" + str(token_el.attrib) | ||
yield_els = [yield_el for yield_el in macro_def.findall(f".//yield[@name='{name}']")] | ||
assert len(yield_els) > 0, f"No named yield found for named token {name}" | ||
token_el_children = list(token_el) | ||
for yield_el in yield_els: | ||
_xml_replace(yield_el, token_el_children) | ||
|
||
# replace unnamed yields | ||
yield_els = [yield_el for yield_el in macro_def.findall('.//yield')] | ||
expand_el_children = [c for c in expand_el if c.tag != "token"] | ||
for yield_el in yield_els: | ||
_xml_replace(yield_el, expand_el_children) | ||
|
||
# Replace yields at the top level of a macro, seems hacky approach | ||
replace_yield = True | ||
while replace_yield: | ||
for i, macro_def_el in enumerate(macro_def): | ||
if macro_def_el.tag == "yield": | ||
for target in expand_el_children: | ||
i += 1 | ||
macro_def.insert(i, target) | ||
macro_def.remove(macro_def_el) | ||
continue | ||
|
||
replace_yield = False | ||
|
||
|
||
def _load_macros(macros_el, xml_base_dir): | ||
macros = [] | ||
|
@@ -225,7 +267,6 @@ def _load_imported_macros(macros_el, xml_base_dir): | |
file_macros, current_macro_paths = _load_macro_file(import_path, xml_base_dir) | ||
macros.extend(file_macros) | ||
macro_paths.extend(current_macro_paths) | ||
|
||
return macros, macro_paths | ||
|
||
|
||
|
@@ -270,9 +311,25 @@ def _xml_replace(query, targets): | |
|
||
|
||
class XmlMacroDef: | ||
""" | ||
representation of a (Galaxy) XML macro | ||
|
||
stores the root element of the macro and the parameters. | ||
each parameter is represented as pair containing | ||
- the quote character, default '@' | ||
- parameter name | ||
|
||
parameter names can be given as comma separated list using the | ||
`token` attribute or as attributes `token_XXX` (where `XXX` is the name). | ||
The former option should be used to specify required attributes of the | ||
macro and the latter for optional attributes if the macro (the value of | ||
`token_XXX is used as default value). | ||
|
||
TODO: `token_quote` forbids `"quote"` as character name of optional | ||
parameters | ||
""" | ||
def __init__(self, el): | ||
self.elements = list(el) | ||
self.element = el | ||
parameters = {} | ||
tokens = [] | ||
token_quote = "@" | ||
|
@@ -290,12 +347,17 @@ def __init__(self, el): | |
self.parameters = parameters | ||
|
||
def macro_tokens(self, expand_el): | ||
""" | ||
get a dictionary mapping token names to values. The names are the | ||
parameter names surrounded by the quote character. Values are taken | ||
from the expand_el if absent default values of optional parameters are | ||
used. | ||
""" | ||
tokens = {} | ||
for key, (wrap_char, default_val) in self.parameters.items(): | ||
token_value = expand_el.attrib.get(key, default_val) | ||
if token_value is REQUIRED_PARAMETER: | ||
message = "Failed to expand macro - missing required parameter [%s]." | ||
raise ValueError(message % key) | ||
raise ValueError(f"Failed to expand macro - missing required parameter [{key}].") | ||
token_name = f"{wrap_char}{key.upper()}{wrap_char}" | ||
tokens[token_name] = token_value | ||
return tokens | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
deepcopy essentially pickles and unpickles the object, this could be detrimental to startup speed. Would a
.copy()
be sufficient ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
True. I remember that if I do not deepcopy then the following clear also removes the copied elements. Maybe calling
remove(subelement)
for each child helps.In the end this is only a workaround: I was not able to find a way to iterate only the
expand
elements that are not included in themacros
subtree. With lxml it might be possible, but it appears to me that the code needs to be compatible with xml from the stdlib and lxml, or am I wrong?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You could import
LXML_AVAILABLE
and make that conditional on this (if it works).