Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

XML macros: add named yields, tokenized macros, tokens for attributes, and fix replacement of toplevel yield #13152

Merged
merged 16 commits into from
Jan 19, 2022
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 104 additions & 42 deletions lib/galaxy/util/xml_macros.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from copy import deepcopy
from copy import copy, deepcopy

from galaxy.util import parse_xml

Expand All @@ -17,13 +17,30 @@ def load_with_references(path):

macro_paths = _import_macros(root, path)

# temporarily remove the children of the macros node
# and create a copy. this is done because this allows
# to iterate over all expand nodes of the tree
# that are not included in the macros node
macros_el = _macros_el(root)
if macros_el is not None:
macros_copy = copy(macros_el)
macros_el.clear()
else:
macros_copy = None

# Collect tokens
tokens = _macros_of_type(root, 'token', lambda el: el.text or '')
tokens = _macros_of_type(macros_copy, 'token', lambda el: el.text or '')
tokens = expand_nested_tokens(tokens)

# Expand xml macros
macro_dict = _macros_of_type(root, 'xml', lambda el: XmlMacroDef(el))
macro_dict = _macros_of_type(macros_copy, 'xml', lambda el: XmlMacroDef(el))
_expand_macros([root], macro_dict, tokens)

# readd the stashed children of the macros node
# TODO is this this really necesary? Since macro nodes are removed anyway just below.
if macros_copy:
_xml_set_children(macros_el, list(macros_copy))

for el in root.xpath('//macro'):
if el.get('type') != 'template':
# Only keep template macros
Expand All @@ -43,7 +60,8 @@ def template_macro_params(root):
with these.
"""
param_dict = {}
macro_dict = _macros_of_type(root, 'template', lambda el: el.text)
macros_el = _macros_el(root)
macro_dict = _macros_of_type(macros_el, 'template', lambda el: el.text)
for key, value in macro_dict.items():
param_dict[key] = value
return param_dict
Expand All @@ -63,6 +81,10 @@ def imported_macro_paths(root):


def _import_macros(root, path):
"""
root the parsed XML tree
path the path to the main xml document
"""
xml_base_dir = os.path.dirname(path)
macros_el = _macros_el(root)
if macros_el is not None:
Expand All @@ -75,16 +97,14 @@ def _macros_el(root):
return root.find('macros')


def _macros_of_type(root, type, el_func):
macros_el = root.find('macros')
macro_dict = {}
if macros_el is not None:
macro_els = macros_el.findall('macro')
filtered_els = [(macro_el.get("name"), el_func(macro_el))
for macro_el in macro_els
if macro_el.get('type') == type]
macro_dict = dict(filtered_els)
return macro_dict
def _macros_of_type(macros_el, type, el_func):
if macros_el is None:
return {}
macro_els = macros_el.findall('macro')
filtered_els = [(macro_el.get("name"), el_func(macro_el))
for macro_el in macro_els
if macro_el.get('type') == type]
return dict(filtered_els)


def expand_nested_tokens(tokens):
Expand All @@ -106,6 +126,11 @@ def _expand_tokens(elements, tokens):


def _expand_tokens_for_el(element, tokens):
"""
expand tokens in element and (recursively) in its children
replacements of text attributes and attribute values are
possible
"""
value = element.text
if value:
new_value = _expand_tokens_str(element.text, tokens)
Expand All @@ -115,6 +140,11 @@ def _expand_tokens_for_el(element, tokens):
new_value = _expand_tokens_str(value, tokens)
if not (new_value is value):
element.attrib[key] = new_value
new_key = _expand_tokens_str(key, tokens)
if not (new_key is key):
element.attrib[new_key] = element.attrib[key]
del element.attrib[key]
# recursively expand in childrens
_expand_tokens(list(element), tokens)


Expand All @@ -125,7 +155,7 @@ def _expand_tokens_str(s, tokens):
return s


def _expand_macros(elements, macros, tokens):
def _expand_macros(elements, macros, tokens, visited=None):
if not macros and not tokens:
return

Expand All @@ -134,48 +164,60 @@ def _expand_macros(elements, macros, tokens):
expand_el = element.find('.//expand')
if expand_el is None:
break
_expand_macro(element, expand_el, macros, tokens)
if visited is None:
v = []
else:
v = visited
_expand_macro(expand_el, macros, tokens, v)


def _expand_macro(element, expand_el, macros, tokens):
def _expand_macro(expand_el, macros, tokens, visited):
macro_name = expand_el.get('macro')
assert macro_name is not None, "Attempted to expand macro with no 'macro' attribute defined."

# check for cycles in the nested macro expansion
assert macro_name not in visited, f"Cycle in nested macros: already expanded {visited} can't expand '{macro_name}' again"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure this must be a cycle ? Also list lookups can be expensive, maybe make visited a set or an ordered set?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure this must be a cycle ?

Quite. Essentially this is a DFS of the expansions and if a previously expanded macro is expanded again we have a cycle. Note that visisted is kind of a stack (i.e. the macro names are removed at the end of the function.)

Also list lookups can be expensive, maybe make visited a set or an ordered set?

Then I will go for ordered set, since otherwise the test can not check for the message (or I just test for the prefix "Cycle in nested macros".

visited.append(macro_name)

assert macro_name in macros, f"No macro named {macro_name} found, known macros are {', '.join(macros.keys())}."
macro_def = macros[macro_name]
expanded_elements = deepcopy(macro_def.elements)

expanded_elements = deepcopy(macro_def.element)
_expand_yield_statements(expanded_elements, expand_el)

# Recursively expand contained macros.
_expand_macros(expanded_elements, macros, tokens)
macro_tokens = macro_def.macro_tokens(expand_el)
if macro_tokens:
_expand_tokens(expanded_elements, macro_tokens)

# Recursively expand contained macros.
_expand_macros(expanded_elements, macros, tokens, visited)
_xml_replace(expand_el, expanded_elements)
del visited[-1]


def _expand_yield_statements(macro_def, expand_el):
yield_els = [yield_el for macro_def_el in macro_def for yield_el in macro_def_el.findall('.//yield')]

expand_el_children = list(expand_el)

"""
Modifies the macro_def element by replacing
1. all named yield tags by the content of the corresponding token tags
- token tags need to be direct children of the expand
- processed in order of definition of the token tags
2. all unnamed yield tags by the non-token children of the expand tag
"""
# replace named yields
for token_el in expand_el.findall('./token'):
name = token_el.attrib.get("name", None)
assert name is not None, "Found unnamed token" + str(token_el.attrib)
yield_els = [yield_el for yield_el in macro_def.findall(f".//yield[@name='{name}']")]
assert len(yield_els) > 0, f"No named yield found for named token {name}"
token_el_children = list(token_el)
for yield_el in yield_els:
_xml_replace(yield_el, token_el_children)

# replace unnamed yields
yield_els = [yield_el for yield_el in macro_def.findall('.//yield')]
expand_el_children = [c for c in expand_el if c.tag != "token"]
for yield_el in yield_els:
_xml_replace(yield_el, expand_el_children)

# Replace yields at the top level of a macro, seems hacky approach
replace_yield = True
while replace_yield:
for i, macro_def_el in enumerate(macro_def):
if macro_def_el.tag == "yield":
for target in expand_el_children:
i += 1
macro_def.insert(i, target)
macro_def.remove(macro_def_el)
continue

replace_yield = False


def _load_macros(macros_el, xml_base_dir):
macros = []
Expand Down Expand Up @@ -225,7 +267,6 @@ def _load_imported_macros(macros_el, xml_base_dir):
file_macros, current_macro_paths = _load_macro_file(import_path, xml_base_dir)
macros.extend(file_macros)
macro_paths.extend(current_macro_paths)

return macros, macro_paths


Expand Down Expand Up @@ -270,9 +311,25 @@ def _xml_replace(query, targets):


class XmlMacroDef:
"""
representation of a (Galaxy) XML macro

stores the root element of the macro and the parameters.
each parameter is represented as pair containing
- the quote character, default '@'
- parameter name

parameter names can be given as comma separated list using the
`token` attribute or as attributes `token_XXX` (where `XXX` is the name).
The former option should be used to specify required attributes of the
macro and the latter for optional attributes if the macro (the value of
`token_XXX is used as default value).

TODO: `token_quote` forbids `"quote"` as character name of optional
parameters
"""
def __init__(self, el):
self.elements = list(el)
self.element = el
parameters = {}
tokens = []
token_quote = "@"
Expand All @@ -290,12 +347,17 @@ def __init__(self, el):
self.parameters = parameters

def macro_tokens(self, expand_el):
"""
get a dictionary mapping token names to values. The names are the
parameter names surrounded by the quote character. Values are taken
from the expand_el if absent default values of optional parameters are
used.
"""
tokens = {}
for key, (wrap_char, default_val) in self.parameters.items():
token_value = expand_el.attrib.get(key, default_val)
if token_value is REQUIRED_PARAMETER:
message = "Failed to expand macro - missing required parameter [%s]."
raise ValueError(message % key)
raise ValueError(f"Failed to expand macro - missing required parameter [{key}].")
token_name = f"{wrap_char}{key.upper()}{wrap_char}"
tokens[token_name] = token_value
return tokens
Expand Down
Loading