From d015e31bba674923d9459b793c40a6c76c8d4fd6 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Fri, 10 Nov 2023 22:09:30 +0100 Subject: [PATCH 1/8] Allow attr_list quoted values to contain curly braces How it worked before: * Extract the content without allowing any `}` in it, and require that it ends with `}` - for block elements anchored to the end of the line, otherwise not. * Parse the content in more detail. No edge cases with `}` can arise. If parsing is interrupted by some unrecognized token, discard the rest of the string. How it works now: * Extract the content *and allow* `}` in it, and require that it ends with `}` - for block elements it's anchored to the end of the line, otherwise not. * Parse the content in more detail. Allow `}` only within the quoted parts, otherwise interrupt parsing like for any other unrecognized token. If parsing is interrupted, there is remaining unrecognized text. Ideally perhaps we would bail out at this point entirely (and not recognize it as an attr_list), but to preserve historic behavior, any extra text before `}` is just discarded. If there is an extra `}` in the remaining text: * For block elements: that must mean that the attr_list syntax did not in fact terminate at the end of the line but earlier. So, bail out and do not register any attributes and do not change the original text. * For inline elements: that must mean that we just overmatched a bit, but that's OK, we just assign attrs as normal and put the extra text back into the string. As mentioned, any extra text *before* `}` is just discarded. --- markdown/extensions/attr_list.py | 67 +++++++++++++++++++------------- tests/extensions/attr_list.html | 7 +++- tests/extensions/attr_list.txt | 10 +++++ 3 files changed, 57 insertions(+), 27 deletions(-) diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index 7ce3f9925..1ec852f4f 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -57,10 +57,10 @@ def _handle_word(s, t): _scanner = re.Scanner([ - (r'[^ =]+=".*?"', _handle_double_quote), - (r"[^ =]+='.*?'", _handle_single_quote), - (r'[^ =]+=[^ =]+', _handle_key_value), - (r'[^ =]+', _handle_word), + (r'[^ =}]+=".*?"', _handle_double_quote), + (r"[^ =}]+='.*?'", _handle_single_quote), + (r'[^ =}]+=[^ =}]+', _handle_key_value), + (r'[^ =}]+', _handle_word), (r' ', None) ]) @@ -76,7 +76,7 @@ def isheader(elem: Element) -> bool: class AttrListTreeprocessor(Treeprocessor): - BASE_RE = r'\{\:?[ ]*([^\}\n ][^\}\n]*)[ ]*\}' + BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}' HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE)) BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE)) INLINE_RE = re.compile(r'^{}'.format(BASE_RE)) @@ -106,49 +106,62 @@ def run(self, doc: Element) -> None: # use tail of last child. no `ul` or `ol`. m = RE.search(elem[-1].tail) if m: - self.assign_attrs(elem, m.group(1)) - elem[-1].tail = elem[-1].tail[:m.start()] + if not self.assign_attrs(elem, m.group(1), strict=True): + elem[-1].tail = elem[-1].tail[:m.start()] elif pos is not None and pos > 0 and elem[pos-1].tail: # use tail of last child before `ul` or `ol` m = RE.search(elem[pos-1].tail) if m: - self.assign_attrs(elem, m.group(1)) - elem[pos-1].tail = elem[pos-1].tail[:m.start()] + if not self.assign_attrs(elem, m.group(1), strict=True): + elem[pos-1].tail = elem[pos-1].tail[:m.start()] elif elem.text: # use text. `ul` is first child. m = RE.search(elem.text) if m: - self.assign_attrs(elem, m.group(1)) - elem.text = elem.text[:m.start()] + if not self.assign_attrs(elem, m.group(1), strict=True): + elem.text = elem.text[:m.start()] elif len(elem) and elem[-1].tail: # has children. Get from tail of last child m = RE.search(elem[-1].tail) if m: - self.assign_attrs(elem, m.group(1)) - elem[-1].tail = elem[-1].tail[:m.start()] - if isheader(elem): - # clean up trailing #s - elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() + if not self.assign_attrs(elem, m.group(1), strict=True): + elem[-1].tail = elem[-1].tail[:m.start()] + if isheader(elem): + # clean up trailing #s + elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() elif elem.text: # no children. Get from text. m = RE.search(elem.text) if m: - self.assign_attrs(elem, m.group(1)) - elem.text = elem.text[:m.start()] - if isheader(elem): - # clean up trailing #s - elem.text = elem.text.rstrip('#').rstrip() + if not self.assign_attrs(elem, m.group(1), strict=True): + elem.text = elem.text[:m.start()] + if isheader(elem): + # clean up trailing #s + elem.text = elem.text.rstrip('#').rstrip() else: # inline: check for `attrs` at start of tail if elem.tail: m = self.INLINE_RE.match(elem.tail) if m: - self.assign_attrs(elem, m.group(1)) - elem.tail = elem.tail[m.end():] + remainder = self.assign_attrs(elem, m.group(1)) + elem.tail = elem.tail[m.end():] + remainder + + def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str: + """ Assign `attrs` to element. + + If the `attrs_string` has an extra closing curly brace, the remaining text is returned. + + The `strict` argument controls whether to still assign attrs if there is a remaining `}`. + """ + attrs, remainder = _scanner.scan(attrs_string) + # To keep historic behavior, discard all un-parseable text prior to '}'. + index = remainder.find('}') + remainder = remainder[index:] if index != -1 else '' + + if strict and remainder: + return remainder - def assign_attrs(self, elem: Element, attrs: str) -> None: - """ Assign `attrs` to element. """ - for k, v in get_attrs(attrs): + for k, v in attrs: if k == '.': # add to class cls = elem.get('class') @@ -159,6 +172,8 @@ def assign_attrs(self, elem: Element, attrs: str) -> None: else: # assign attribute `k` with `v` elem.set(self.sanitize_name(k), v) + # The text that we initially over-matched will be put back. + return remainder def sanitize_name(self, name: str) -> str: """ diff --git a/tests/extensions/attr_list.html b/tests/extensions/attr_list.html index e7bfe0bfe..f30e3be60 100644 --- a/tests/extensions/attr_list.html +++ b/tests/extensions/attr_list.html @@ -66,4 +66,9 @@

Bad attributes

More weirdness

This should not cause a crash

Attr_lists do not contain newlines{ foo=bar -key=value }

\ No newline at end of file +key=value }

+

Attrs

+

attr_list values can have curly braces

+

attr_list curly needs to be at the end {.foo} hi

+

attr_list curly needs to be at the end {.foo test=”{” } }

+

Multiple } items inline

\ No newline at end of file diff --git a/tests/extensions/attr_list.txt b/tests/extensions/attr_list.txt index 465ce4f52..f7b18215d 100644 --- a/tests/extensions/attr_list.txt +++ b/tests/extensions/attr_list.txt @@ -92,3 +92,13 @@ This should not cause a *crash*{ foo=a=b } Attr_lists do not contain *newlines*{ foo=bar key=value } + +# Attrs {data-test="{}"} + +attr_list values can have curly *braces*{ data-test='{hi{}' foo="bar" } + +## attr_list curly needs to be at the end {.foo} hi + +## attr_list curly needs to be at the end {.foo test="{" } } + +*Multiple*{.a} } *items*{.b} inline From c6105859892faa607dcb3e290389c25b0836c78e Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Fri, 8 Mar 2024 14:37:29 -0500 Subject: [PATCH 2/8] update new tests to new framework --- tests/extensions/attr_list.html | 7 +---- tests/extensions/attr_list.txt | 12 +------ .../test_syntax/extensions/test_attr_list.py | 31 ++++++++++++++++--- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/tests/extensions/attr_list.html b/tests/extensions/attr_list.html index f30e3be60..e7bfe0bfe 100644 --- a/tests/extensions/attr_list.html +++ b/tests/extensions/attr_list.html @@ -66,9 +66,4 @@

Bad attributes

More weirdness

This should not cause a crash

Attr_lists do not contain newlines{ foo=bar -key=value }

-

Attrs

-

attr_list values can have curly braces

-

attr_list curly needs to be at the end {.foo} hi

-

attr_list curly needs to be at the end {.foo test=”{” } }

-

Multiple } items inline

\ No newline at end of file +key=value }

\ No newline at end of file diff --git a/tests/extensions/attr_list.txt b/tests/extensions/attr_list.txt index f7b18215d..ad2b4ba95 100644 --- a/tests/extensions/attr_list.txt +++ b/tests/extensions/attr_list.txt @@ -91,14 +91,4 @@ No *key or value*{ = } This should not cause a *crash*{ foo=a=b } Attr_lists do not contain *newlines*{ foo=bar -key=value } - -# Attrs {data-test="{}"} - -attr_list values can have curly *braces*{ data-test='{hi{}' foo="bar" } - -## attr_list curly needs to be at the end {.foo} hi - -## attr_list curly needs to be at the end {.foo test="{" } } - -*Multiple*{.a} } *items*{.b} inline +key=value } \ No newline at end of file diff --git a/tests/test_syntax/extensions/test_attr_list.py b/tests/test_syntax/extensions/test_attr_list.py index ba8b23693..fd9d76e8d 100644 --- a/tests/test_syntax/extensions/test_attr_list.py +++ b/tests/test_syntax/extensions/test_attr_list.py @@ -23,16 +23,39 @@ class TestAttrList(TestCase): - maxDiff = None + default_kwargs = {'extensions': ['attr_list']} # TODO: Move the rest of the `attr_list` tests here. - def test_empty_list(self): + def test_empty_attr_list(self): self.assertMarkdownRenders( '*foo*{ }', - '

foo{ }

', - extensions=['attr_list'] + '

foo{ }

' + ) + + def test_curly_after_inline(self): + self.assertMarkdownRenders( + '*inline*{.a} } *text*{.a }}', + '

inline } text}

' + ) + + def test_curly_after_block(self): + self.assertMarkdownRenders( + '# Heading {.a} }', + '

Heading {.a} }

' + ) + + def test_curly_in_single_quote(self): + self.assertMarkdownRenders( + "# Heading {data-test='{}'}", + '

Heading

' + ) + + def test_curly_in_double_quote(self): + self.assertMarkdownRenders( + '# Heading {data-test="{}"}', + '

Heading

' ) def test_table_td(self): From 6201f6d4a6f21bf0505e41ebe54461f6d87b16b3 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Fri, 8 Mar 2024 14:53:27 -0500 Subject: [PATCH 3/8] Add fenced_code test --- .../test_syntax/extensions/test_fenced_code.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_syntax/extensions/test_fenced_code.py b/tests/test_syntax/extensions/test_fenced_code.py index e24a1778b..21448c785 100644 --- a/tests/test_syntax/extensions/test_fenced_code.py +++ b/tests/test_syntax/extensions/test_fenced_code.py @@ -394,6 +394,24 @@ def testFencedCodeEscapedAttrs(self): extensions=['fenced_code', 'attr_list'] ) + def testFencedCodeCurlyInAttrs(self): + self.assertMarkdownRenders( + self.dedent( + ''' + ``` { data-test="{}" } + # Some python code + ``` + ''' + ), + self.dedent( + ''' +
# Some python code
+                
+ ''' + ), + extensions=['fenced_code', 'attr_list'] + ) + class TestFencedCodeWithCodehilite(TestCase): From 8e1d621b11993fca25098b7496556141e6b58d8a Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Fri, 8 Mar 2024 21:54:35 +0100 Subject: [PATCH 4/8] Expand to cover attr lists in code fences --- markdown/extensions/attr_list.py | 23 ++++++++++++------ markdown/extensions/fenced_code.py | 15 ++++++++---- tests/extensions/attr_list.txt | 2 +- .../test_syntax/extensions/test_attr_list.py | 14 +++++++++++ .../extensions/test_fenced_code.py | 24 +++++++++++++++++++ 5 files changed, 66 insertions(+), 12 deletions(-) diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index 1ec852f4f..57789d96f 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -65,9 +65,22 @@ def _handle_word(s, t): ]) +def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]: + """ Parse attribute list and return a list of attribute tuples. + + Additionally, return any text that remained after a curly brace. In typical cases, its presence + should mean that the input does not match the intended attr_list syntax. + """ + attrs, remainder = _scanner.scan(attrs_string) + # To keep historic behavior, discard all un-parseable text prior to '}'. + index = remainder.find('}') + remainder = remainder[index:] if index != -1 else '' + return attrs, remainder + + def get_attrs(str: str) -> list[tuple[str, str]]: - """ Parse attribute list and return a list of attribute tuples. """ - return _scanner.scan(str)[0] + """ Soft-deprecated. Prefer `get_attrs_and_remainder`. """ + return get_attrs_and_remainder(str)[0] def isheader(elem: Element) -> bool: @@ -153,11 +166,7 @@ def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False The `strict` argument controls whether to still assign attrs if there is a remaining `}`. """ - attrs, remainder = _scanner.scan(attrs_string) - # To keep historic behavior, discard all un-parseable text prior to '}'. - index = remainder.find('}') - remainder = remainder[index:] if index != -1 else '' - + attrs, remainder = get_attrs_and_remainder(attrs_string) if strict and remainder: return remainder diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py index da1a9be1e..bae7330a3 100644 --- a/markdown/extensions/fenced_code.py +++ b/markdown/extensions/fenced_code.py @@ -25,7 +25,7 @@ from . import Extension from ..preprocessors import Preprocessor from .codehilite import CodeHilite, CodeHiliteExtension, parse_hl_lines -from .attr_list import get_attrs, AttrListExtension +from .attr_list import get_attrs_and_remainder, AttrListExtension from ..util import parseBoolValue from ..serializers import _escape_attrib_html import re @@ -56,7 +56,7 @@ class FencedBlockPreprocessor(Preprocessor): FENCED_BLOCK_RE = re.compile( dedent(r''' (?P^(?:~{3,}|`{3,}))[ ]* # opening fence - ((\{(?P[^\}\n]*)\})| # (optional {attrs} or + ((\{(?P[^\n]*)\})| # (optional {attrs} or (\.?(?P[\w#.+-]*)[ ]*)? # optional (.)lang (hl_lines=(?P"|')(?P.*?)(?P=quot)[ ]*)?) # optional hl_lines) \n # newline (end of opening fence) @@ -94,12 +94,17 @@ def run(self, lines: list[str]) -> list[str]: self.checked_for_deps = True text = "\n".join(lines) + index = 0 while 1: - m = self.FENCED_BLOCK_RE.search(text) + m = self.FENCED_BLOCK_RE.search(text, index) if m: lang, id, classes, config = None, '', [], {} if m.group('attrs'): - id, classes, config = self.handle_attrs(get_attrs(m.group('attrs'))) + attrs, remainder = get_attrs_and_remainder(m.group('attrs')) + if remainder: # Does not have correctly matching curly braces, so the syntax is invalid. + index = m.end('attrs') # Explicitly skip over this, to prevent an infinite loop. + continue + id, classes, config = self.handle_attrs(attrs) if len(classes): lang = classes.pop(0) else: @@ -151,6 +156,8 @@ def run(self, lines: list[str]) -> list[str]: placeholder = self.md.htmlStash.store(code) text = f'{text[:m.start()]}\n{placeholder}\n{text[m.end():]}' + # Continue from after the replaced text in the next iteration. + index = m.start() + 1 + len(placeholder) else: break return text.split("\n") diff --git a/tests/extensions/attr_list.txt b/tests/extensions/attr_list.txt index ad2b4ba95..465ce4f52 100644 --- a/tests/extensions/attr_list.txt +++ b/tests/extensions/attr_list.txt @@ -91,4 +91,4 @@ No *key or value*{ = } This should not cause a *crash*{ foo=a=b } Attr_lists do not contain *newlines*{ foo=bar -key=value } \ No newline at end of file +key=value } diff --git a/tests/test_syntax/extensions/test_attr_list.py b/tests/test_syntax/extensions/test_attr_list.py index fd9d76e8d..e9a10960f 100644 --- a/tests/test_syntax/extensions/test_attr_list.py +++ b/tests/test_syntax/extensions/test_attr_list.py @@ -40,6 +40,13 @@ def test_curly_after_inline(self): '

inline } text}

' ) + def test_extra_eq_gets_ignored_inside_curly_inline(self): + # Undesired behavior but kept for historic compatibility. + self.assertMarkdownRenders( + '*inline*{data-test="x" =a} *text*', + '

inline text

' + ) + def test_curly_after_block(self): self.assertMarkdownRenders( '# Heading {.a} }', @@ -58,6 +65,13 @@ def test_curly_in_double_quote(self): '

Heading

' ) + def test_unclosed_quote_ignored(self): + # Undesired behavior but kept for historic compatibility. + self.assertMarkdownRenders( + '# Heading {foo="bar}', + '

Heading

' + ) + def test_table_td(self): self.assertMarkdownRenders( self.dedent( diff --git a/tests/test_syntax/extensions/test_fenced_code.py b/tests/test_syntax/extensions/test_fenced_code.py index 21448c785..aa58bbfb4 100644 --- a/tests/test_syntax/extensions/test_fenced_code.py +++ b/tests/test_syntax/extensions/test_fenced_code.py @@ -412,6 +412,30 @@ def testFencedCodeCurlyInAttrs(self): extensions=['fenced_code', 'attr_list'] ) + def testFencedCodeMismatchedCurlyInAttrs(self): + self.assertMarkdownRenders( + self.dedent( + ''' + ``` { data-test="{}" } } + # Some python code + ``` + ``` + test + ``` + ''' + ), + self.dedent( + ''' +

``` { data-test="{}" } }

+

Some python code

+
+

test + ```

+ ''' + ), + extensions=['fenced_code', 'attr_list'] + ) + class TestFencedCodeWithCodehilite(TestCase): From b35a20049141992ddb0ff8499aba33806bd4a624 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Fri, 8 Mar 2024 22:07:06 +0100 Subject: [PATCH 5/8] Changelog --- docs/changelog.md | 4 +++- markdown/extensions/attr_list.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 9c2b302e6..f61c61982 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -34,8 +34,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Include `scripts/*.py` in the generated source tarballs (#1430). * Ensure lines after heading in loose list are properly detabbed (#1443). * Give smarty tree processor higher priority than toc (#1440). -* Permit carrots (`^`) and square brackets (`]`) but explicitly exclude +* Permit carets (`^`) and square brackets (`]`) but explicitly exclude backslashes (`\`) from abbreviations (#1444). +* In attribute lists (`attr_list`, `fenced_code`), quoted attribute values are + now allowed to contain curly braces (`}`) (#1414). ## [3.5.2] -- 2024-01-10 diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index 57789d96f..4098dc7c2 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -72,7 +72,7 @@ def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], s should mean that the input does not match the intended attr_list syntax. """ attrs, remainder = _scanner.scan(attrs_string) - # To keep historic behavior, discard all un-parseable text prior to '}'. + # To keep historic behavior, discard all un-parsable text prior to '}'. index = remainder.find('}') remainder = remainder[index:] if index != -1 else '' return attrs, remainder From 4c6530ee58e469c05e2abdfd2e9e5c93954ea479 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Fri, 8 Mar 2024 16:21:03 -0500 Subject: [PATCH 6/8] coverage --- markdown/extensions/attr_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index 4098dc7c2..7a5e51f1b 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -78,7 +78,7 @@ def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], s return attrs, remainder -def get_attrs(str: str) -> list[tuple[str, str]]: +def get_attrs(str: str) -> list[tuple[str, str]]: # pragma: no cover """ Soft-deprecated. Prefer `get_attrs_and_remainder`. """ return get_attrs_and_remainder(str)[0] From d8a7fb430e41a238c9b8ab8b1d10aa654327d198 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Fri, 8 Mar 2024 16:24:43 -0500 Subject: [PATCH 7/8] fix spelling --- .spell-dict | 1 + markdown/extensions/attr_list.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.spell-dict b/.spell-dict index ae1245426..9c1db010c 100644 --- a/.spell-dict +++ b/.spell-dict @@ -146,6 +146,7 @@ Treeprocessor Treeprocessors tuple tuples +unparsable unclosed unescape unescaping diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index 7a5e51f1b..a43e0ba9e 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -69,10 +69,10 @@ def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], s """ Parse attribute list and return a list of attribute tuples. Additionally, return any text that remained after a curly brace. In typical cases, its presence - should mean that the input does not match the intended attr_list syntax. + should mean that the input does not match the intended attribute list syntax. """ attrs, remainder = _scanner.scan(attrs_string) - # To keep historic behavior, discard all un-parsable text prior to '}'. + # To keep historic behavior, discard all unparsable text prior to '}'. index = remainder.find('}') remainder = remainder[index:] if index != -1 else '' return attrs, remainder From cb1ddba46caa229e28e2b4ffd93a4e7cce660660 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Tue, 12 Mar 2024 11:42:30 -0400 Subject: [PATCH 8/8] Docs cleanup --- markdown/extensions/attr_list.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index a43e0ba9e..9206d11e6 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -164,7 +164,7 @@ def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False If the `attrs_string` has an extra closing curly brace, the remaining text is returned. - The `strict` argument controls whether to still assign attrs if there is a remaining `}`. + The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`. """ attrs, remainder = get_attrs_and_remainder(attrs_string) if strict and remainder: @@ -186,8 +186,8 @@ def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False def sanitize_name(self, name: str) -> str: """ - Sanitize name as 'an XML Name, minus the ":"'. - See https://www.w3.org/TR/REC-xml-names/#NT-NCName + Sanitize name as 'an XML Name, minus the `:`.' + See . """ return self.NAME_RE.sub('_', name)