Skip to content

Commit

Permalink
👌 IMPROVE: MyST role syntax parsing
Browse files Browse the repository at this point in the history
Match characters instead of using regex.
This now allows for an unlimited name length and new lines in the content.
  • Loading branch information
chrisjsewell committed Dec 2, 2021
1 parent fc130fa commit 2e312e1
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 14 deletions.
37 changes: 29 additions & 8 deletions mdit_py_plugins/myst_role/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from markdown_it.common.utils import escapeHtml
from markdown_it.rules_inline import StateInline

PATTERN = re.compile(r"^\{([a-zA-Z0-9\_\-\+\:]{1,36})\}(`+)(?!`)(.+?)(?<!`)\2(?!`)")
VALID_NAME_PATTERN = re.compile(r"^\{([a-zA-Z0-9\_\-\+\:]+)\}")


def myst_role_plugin(md: MarkdownIt):
Expand All @@ -14,22 +14,45 @@ def myst_role_plugin(md: MarkdownIt):


def myst_role(state: StateInline, silent: bool):

# check name
match = VALID_NAME_PATTERN.match(state.src[state.pos :])
if not match:
return False
name = match.group(1)

# check for starting backslash escape
try:
if state.srcCharCode[state.pos - 1] == 0x5C: # /* \ */
# escaped (this could be improved in the case of edge case '\\{')
return False
except IndexError:
pass

match = PATTERN.search(state.src[state.pos :])
# scan opening tick length
start = pos = state.pos + match.end()
try:
while state.src[pos] == "`":
pos += 1
except IndexError:
return False

tick_length = pos - start
if not tick_length:
return False

# search for closing ticks
match = re.search("`" * tick_length, state.src[pos + 1 :])
if not match:
return False
state.pos += match.end()
content = state.src[pos : pos + match.start() + 1].replace("\n", " ")

if not silent:
token = state.push("myst_role", "", 0)
token.meta = {"name": match.group(1)}
token.content = match.group(3)
token.meta = {"name": name}
token.content = content

state.pos = pos + match.end() + 1

return True

Expand All @@ -38,7 +61,5 @@ def render_myst_role(self, tokens, idx, options, env):
token = tokens[idx]
name = token.meta.get("name", "unknown")
return (
'<code class="sphinx-role">'
f"{{{name}}}[{escapeHtml(token.content)}]"
"</code>"
'<code class="myst role">' f"{{{name}}}[{escapeHtml(token.content)}]" "</code>"
)
41 changes: 35 additions & 6 deletions tests/fixtures/myst_role.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,30 @@ Basic:
.
{abc}`xyz`
.
<p><code class="sphinx-role">{abc}[xyz]</code></p>
<p><code class="myst role">{abc}[xyz]</code></p>
.

Multiple:
.
{abc}`xyz`{def}`uvw`
.
<p><code class="myst role">{abc}[xyz]</code><code class="myst role">{def}[uvw]</code></p>
.

Surrounding Text:
.
a {abc}`xyz` b
.
<p>a <code class="sphinx-role">{abc}[xyz]</code> b</p>
<p>a <code class="myst role">{abc}[xyz]</code> b</p>
.

New lines:
.
{abc}`xy
z` `d
e`
.
<p><code class="myst role">{abc}[xy z]</code> <code>d e</code></p>
.

In Code:
Expand All @@ -20,20 +36,26 @@ In Code:
<p><code>{abc}`xyz`</code></p>
.

Empty content:
.
{name}`` a
.
<p>{name}`` a</p>
.

Surrounding Code:
.
`a` {abc}`xyz` `b`
.
<p><code>a</code> <code class="sphinx-role">{abc}[xyz]</code> <code>b</code></p>
<p><code>a</code> <code class="myst role">{abc}[xyz]</code> <code>b</code></p>
.

In list:
.
- {abc}`xyz`
.
<ul>
<li><code class="sphinx-role">{abc}[xyz]</code></li>
<li><code class="myst role">{abc}[xyz]</code></li>
</ul>
.

Expand All @@ -42,15 +64,22 @@ In Quote:
> {abc}`xyz` b
.
<blockquote>
<p><code class="sphinx-role">{abc}[xyz]</code> b</p>
<p><code class="myst role">{abc}[xyz]</code> b</p>
</blockquote>
.

Multiple ticks:
.
{abc}``xyz``
.
<p><code class="sphinx-role">{abc}[xyz]</code></p>
<p><code class="myst role">{abc}[xyz]</code></p>
.

Inner tick:
.
{abc}``x`yz``
.
<p><code class="myst role">{abc}[x`yz]</code></p>
.

Unbalanced ticks:
Expand Down

0 comments on commit 2e312e1

Please sign in to comment.