Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature ancestry #598

Merged
merged 7 commits into from
Nov 23, 2017
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/extensions/api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ must have the following methods:
Accepts a match object and returns an ElementTree element of a plain
Unicode string.

* **`getExcludes()`**:

Returns an array of tag names that are undesirable ancestors. The pattern
should not match if it would cause the content to be a descendant of one
of the tag names in the list.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Returns an array... tag names in the list." Let's be consistent here. And in Python we call them lists, not arrays.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I work in many languages, so things blur together at times and I make mistakes like this.


Note that any regular expression returned by `getCompiledRegExp` must capture
the whole block. Therefore, they should all start with `r'^(.*?)'` and end
with `r'(.*?)!'`. When using the default `getCompiledRegExp()` method
Expand Down
4 changes: 4 additions & 0 deletions markdown/inlinepatterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,10 @@ def __init__(self, pattern, markdown_instance=None):
if markdown_instance:
self.markdown = markdown_instance

def getExcludes(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason why this is a method? The end user of API (extension developer) is defining a list, not "getting" the list. The fact that the list is being retrieved is not relevant to the user. Rather, the user is interested in defining what to exclude. Why not a property exclude = [...]? Does it even need to be specific to an instance of the class?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something like exclude is fine (maybe something a bit more unique in case existing extensions happen to use that name for something else). We can make the default a tuple to be safe so no one will accidentally modify the base list and ripple through all instances that use the base. That will force people to override the default opposed to appending to it.

To be safe, we can probably just wrap the treeprocessor access with a try/except to be safe as well.

"""Get tag to exclude."""
return []

def getCompiledRegExp(self):
""" Return a compiled regular expression. """
return self.compiled_re
Expand Down
56 changes: 45 additions & 11 deletions markdown/treeprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def __init__(self, md):
self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
self.markdown = md
self.inlinePatterns = md.inlinePatterns
self.ancestors = []

def __makePlaceholder(self, type):
""" Generate a placeholder """
Expand Down Expand Up @@ -138,7 +139,7 @@ def __processElementText(self, node, subnode, isText=True):

childResult.reverse()
for newChild in childResult:
node.insert(pos, newChild)
node.insert(pos, newChild[0])

def __processPlaceholders(self, data, parent, isText=True):
"""
Expand All @@ -155,10 +156,10 @@ def __processPlaceholders(self, data, parent, isText=True):
def linkText(text):
if text:
if result:
if result[-1].tail:
result[-1].tail += text
if result[-1][0].tail:
result[-1][0].tail += text
else:
result[-1].tail = text
result[-1][0].tail = text
elif not isText:
if parent.tail:
parent.tail += text
Expand Down Expand Up @@ -199,7 +200,7 @@ def linkText(text):
continue

strartIndex = phEndIndex
result.append(node)
result.append((node, self.ancestors[:]))

else: # wrong placeholder
end = index + len(self.__placeholder_prefix)
Expand Down Expand Up @@ -230,6 +231,10 @@ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
Returns: String with placeholders instead of ElementTree elements.

"""
for exclude in pattern.getExcludes():
if exclude.lower() in self.ancestors:
return data, False, 0

match = pattern.getCompiledRegExp().match(data[startIndex:])
leftData = data[:startIndex]

Expand All @@ -247,9 +252,11 @@ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
for child in [node] + list(node):
if not isString(node):
if child.text:
self.ancestors.append(child.tag.lower())
child.text = self.__handleInline(
child.text, patternIndex + 1
)
self.ancestors.pop()
if child.tail:
child.tail = self.__handleInline(
child.tail, patternIndex
Expand All @@ -261,7 +268,17 @@ def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
match.group(1),
placeholder, match.groups()[-1]), True, 0

def run(self, tree):
def __build_ancestors(self, parent, parents):
"""Build the ancestor list."""
ancestors = []
while parent:
if parent:
ancestors.append(parent.tag.lower())
parent = self.parent_map.get(parent)
ancestors.reverse()
parents.extend(ancestors)

def run(self, tree, ancestors=None):
"""Apply inline patterns to a parsed Markdown tree.

Iterate over ElementTree, find elements with inline tag, apply inline
Expand All @@ -274,28 +291,42 @@ def run(self, tree):
Arguments:

* tree: ElementTree object, representing Markdown tree.
* ancestors: List of parent tag names that preceed the tree node (if needed).

Returns: ElementTree object with applied inline patterns.

"""
self.stashed_nodes = {}

stack = [tree]
# Ensure a valid parent list, but copy passed in lists
# to ensure we don't have the user accidentally change it on us.
tree_parents = [] if ancestors is None else ancestors[:]

self.parent_map = dict((c, p) for p in tree.getiterator() for c in p)
stack = [(tree, tree_parents)]

while stack:
currElement = stack.pop()
currElement, parents = stack.pop()

self.ancestors = parents
self.__build_ancestors(currElement, self.ancestors)

insertQueue = []
for child in currElement:
if child.text and not isinstance(
child.text, util.AtomicString
):
self.ancestors.append(child.tag.lower())
text = child.text
child.text = None
lst = self.__processPlaceholders(
self.__handleInline(text), child
)
for l in lst:
self.parent_map[l[0]] = child
stack += lst
insertQueue.append((child, lst))
self.ancestors.pop()
if child.tail:
tail = self.__handleInline(child.tail)
dumby = util.etree.Element('d')
Expand All @@ -306,9 +337,11 @@ def run(self, tree):
pos = list(currElement).index(child) + 1
tailResult.reverse()
for newChild in tailResult:
currElement.insert(pos, newChild)
self.parent_map[newChild[0]] = currElement
currElement.insert(pos, newChild[0])
if len(child):
stack.append(child)
self.parent_map[child] = currElement
stack.append((child, self.ancestors[:]))

for element, lst in insertQueue:
if self.markdown.enable_attributes:
Expand All @@ -317,7 +350,8 @@ def run(self, tree):
element.text, element
)
i = 0
for newChild in lst:
for obj in lst:
newChild = obj[0]
if self.markdown.enable_attributes:
# Processing attributes
if newChild.tail and isString(newChild.tail):
Expand Down
54 changes: 54 additions & 0 deletions tests/test_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,3 +770,57 @@ def testAppend(self):
self.assertEqual('|' in md.ESCAPED_CHARS, True)
md2 = markdown.Markdown()
self.assertEqual('|' not in md2.ESCAPED_CHARS, True)


class TestAncestorExclusion(unittest.TestCase):
""" Tests exclusion of tags in ancestor list. """

class AncestorExample(markdown.inlinepatterns.SimpleTagPattern):
""" Ancestor Test. """

def getExcludes(self):
""" Tags to exclude. """
return ['a']

def handleMatch(self, m):
""" Handle match. """
el = markdown.util.etree.Element(self.tag)
el.text = m.group(3)
return el

class AncestorExtension(markdown.Extension):

def __init__(self, *args, **kwargs):
"""Initialize."""

self.config = {}

def extendMarkdown(self, md, md_globals):
"""Modify inline patterns."""

pattern = r'(\+)([^\+]+)\2'
md.inlinePatterns["ancestor-test"] = TestAncestorExclusion.AncestorExample(pattern, 'strong')

def setUp(self):
"""Setup markdown object."""
self.md = markdown.Markdown(extensions=[TestAncestorExclusion.AncestorExtension()])

def test_ancestors(self):
""" Test that an extension can exclude parent tags. """
test = """
Some +test+ and a [+link+](http://test.com)
"""
result = """<p>Some <strong>test</strong> and a <a href="http://test.com">+link+</a></p>"""

self.md.reset()
self.assertEqual(self.md.convert(test), result)

def test_ancestors_tail(self):
""" Test that an extension can exclude parent tags when dealing with a tail. """
test = """
[***+em+*+strong+**](http://test.com)
"""
result = """<p><a href="http://test.com"><strong><em>+em+</em>+strong+</strong></a></p>"""

self.md.reset()
self.assertEqual(self.md.convert(test), result)