Skip to content

Commit

Permalink
🔧 More improvements for directive option parsing (#919)
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisjsewell authored Apr 23, 2024
1 parent 8614eca commit 5ad2d6d
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 119 deletions.
8 changes: 3 additions & 5 deletions docs/syntax/roles-and-directives.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,18 +73,16 @@ print(f'my {a}nd line')
```
:::

Comments, starting `#`, are also allowed in between options or at the end of values, and are ignored.
The values can be enclosed in quotes (`"` or `'`) and span multiple lines.
Newline behaviour can be controlled by starting the value with `|` (preserve newlines) or `>` (collapse newlines):

:::{myst-example}
```{code-block} python
:lineno-start: 10 # this is a comment
: # this is also a comment
:lineno-start: 10
:emphasize-lines: "1, 3"
:caption: |
: This is my
: multi-line caption. It is *pretty nifty* ;-)
: This is my
: multi-line caption. It is *pretty nifty* ;-)
a = 2
print('my 1st line')
Expand Down
23 changes: 15 additions & 8 deletions myst_parser/parsers/directives.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,16 +188,13 @@ def _parse_directive_options(
yaml_block = content
content = ""
yaml_block = dedent(yaml_block)
elif content.lstrip().startswith(":"):
# TODO deprecate allowing initial whitespace (by lstripping)
# or at least make it that all have the same indent
# also look at mystjs implementation
elif content.startswith(":"):
content_lines = content.splitlines()
yaml_lines = []
while content_lines:
if not content_lines[0].lstrip().startswith(":"):
if not content_lines[0].startswith(":"):
break
yaml_lines.append(content_lines.pop(0).lstrip()[1:])
yaml_lines.append(content_lines.pop(0)[1:])
yaml_block = "\n".join(yaml_lines)
content = "\n".join(content_lines)

Expand Down Expand Up @@ -227,10 +224,13 @@ def _parse_directive_options(
)
return _DirectiveOptions(content, yaml_options, yaml_errors, has_options_block)

validation_errors: list[ParseWarnings] = []

options: dict[str, str] = {}
if yaml_block is not None:
try:
options = dict(options_to_items(yaml_block))
_options, state = options_to_items(yaml_block)
options = dict(_options)
except TokenizeError as err:
return _DirectiveOptions(
content,
Expand All @@ -244,6 +244,14 @@ def _parse_directive_options(
],
has_options_block,
)
if state.has_comments:
validation_errors.append(
ParseWarnings(
"Directive options has # comments, which may not be supported in future versions.",
line,
MystWarnings.DIRECTIVE_OPTION_COMMENTS,
)
)

if issubclass(directive_class, TestDirective):
# technically this directive spec only accepts one option ('option')
Expand All @@ -258,7 +266,6 @@ def _parse_directive_options(
options_spec: dict[str, Callable] = directive_class.option_spec
unknown_options: list[str] = []
new_options: dict[str, Any] = {}
validation_errors: list[ParseWarnings] = []
value: str | None
for name, value in options.items():
try:
Expand Down
55 changes: 37 additions & 18 deletions myst_parser/parsers/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,14 @@ def __str__(self) -> str:
return "\n".join(lines)


@dataclass
class State:
has_comments: bool = False


def to_items(
text: str, line_offset: int = 0, column_offset: int = 0
) -> Iterable[tuple[str, str]]:
) -> tuple[list[tuple[str, str]], State]:
"""Parse a directive option block into (key, value) tuples.
:param text: The directive option text.
Expand All @@ -174,12 +179,17 @@ def to_items(
:raises: `TokenizeError`
"""
for key_token, value_token in to_tokens(text, line_offset, column_offset):
yield key_token.value, value_token.value if value_token is not None else ""
output = []
state = State()
for key_token, value_token in _to_tokens(text, state, line_offset, column_offset):
output.append(
(key_token.value, value_token.value if value_token is not None else "")
)
return output, state


def to_tokens(
text: str, line_offset: int = 0, column_offset: int = 0
def _to_tokens(
text: str, state: State, line_offset: int = 0, column_offset: int = 0
) -> Iterable[tuple[KeyToken, ValueToken | None]]:
"""Parse a directive option, and yield key/value token pairs.
Expand All @@ -191,7 +201,7 @@ def to_tokens(
"""
key_token: KeyToken | None = None
try:
for token in tokenize(text):
for token in _tokenize(text, state):
if isinstance(token, KeyToken):
if key_token is not None:
yield key_token, None
Expand All @@ -207,12 +217,12 @@ def to_tokens(
raise


def tokenize(text: str) -> Iterable[Token]:
def _tokenize(text: str, state: State) -> Iterable[Token]:
"""Yield tokens from a directive option stream."""
stream = StreamBuffer(text)

while True:
_scan_to_next_token(stream)
_scan_to_next_token(stream, state)

if stream.peek() == _CHARS_END:
break
Expand All @@ -227,9 +237,9 @@ def tokenize(text: str) -> Iterable[Token]:
if ch in ("'", '"'):
yield _scan_flow_scalar(stream, cast(Literal['"', "'"], ch), is_key=True)
else:
yield _scan_plain_scalar(stream, is_key=True)
yield _scan_plain_scalar(stream, state, is_key=True)

_scan_to_next_token(stream)
_scan_to_next_token(stream, state)

# check next char is colon + space
if stream.peek() != ":":
Expand All @@ -240,21 +250,21 @@ def tokenize(text: str) -> Iterable[Token]:
end_mark = stream.get_position()
yield ColonToken(start_mark, end_mark)

_scan_to_next_token(stream)
_scan_to_next_token(stream, state)

# now find value
ch = stream.peek()
if stream.column == 0:
pass
elif ch in ("|", ">"):
yield _scan_block_scalar(stream, cast(Literal["|", ">"], ch))
yield _scan_block_scalar(stream, cast(Literal["|", ">"], ch), state)
elif ch in ("'", '"'):
yield _scan_flow_scalar(stream, cast(Literal['"', "'"], ch), is_key=False)
else:
yield _scan_plain_scalar(stream, is_key=False)
yield _scan_plain_scalar(stream, state, is_key=False)


def _scan_to_next_token(stream: StreamBuffer) -> None:
def _scan_to_next_token(stream: StreamBuffer, state: State) -> None:
"""Skip spaces, line breaks and comments.
The byte order mark is also stripped,
Expand All @@ -267,14 +277,15 @@ def _scan_to_next_token(stream: StreamBuffer) -> None:
while stream.peek() == " ":
stream.forward()
if stream.peek() == "#":
state.has_comments = True
while stream.peek() not in _CHARS_END_NEWLINE:
stream.forward()
if not _scan_line_break(stream):
found = True


def _scan_plain_scalar(
stream: StreamBuffer, is_key: bool = False
stream: StreamBuffer, state: State, is_key: bool = False
) -> KeyToken | ValueToken:
chunks = []
start_mark = stream.get_position()
Expand All @@ -284,6 +295,7 @@ def _scan_plain_scalar(
while True:
length = 0
if stream.peek() == "#":
state.has_comments = True
break
while True:
ch = stream.peek(length)
Expand All @@ -302,6 +314,8 @@ def _scan_plain_scalar(
end_mark = stream.get_position()
spaces = _scan_plain_spaces(stream, allow_newline=(not is_key))
if not spaces or stream.peek() == "#" or (stream.column < indent):
if stream.peek() == "#":
state.has_comments = True
break

return (
Expand Down Expand Up @@ -472,7 +486,9 @@ def _scan_flow_scalar_breaks(stream: StreamBuffer) -> list[str]:
return chunks


def _scan_block_scalar(stream: StreamBuffer, style: Literal["|", ">"]) -> ValueToken:
def _scan_block_scalar(
stream: StreamBuffer, style: Literal["|", ">"], state: State
) -> ValueToken:
indent = 0
folded = style == ">"
chunks = []
Expand All @@ -481,7 +497,7 @@ def _scan_block_scalar(stream: StreamBuffer, style: Literal["|", ">"]) -> ValueT
# Scan the header.
stream.forward()
chomping, increment = _scan_block_scalar_indicators(stream, start_mark)
_scan_block_scalar_ignored_line(stream, start_mark)
_scan_block_scalar_ignored_line(stream, start_mark, state)

# Determine the indentation level and go to the first non-empty line.
min_indent = indent + 1
Expand Down Expand Up @@ -575,10 +591,13 @@ def _scan_block_scalar_indicators(
return chomping, increment


def _scan_block_scalar_ignored_line(stream: StreamBuffer, start_mark: Position) -> None:
def _scan_block_scalar_ignored_line(
stream: StreamBuffer, start_mark: Position, state: State
) -> None:
while stream.peek() == " ":
stream.forward()
if stream.peek() == "#":
state.has_comments = True
while stream.peek() not in _CHARS_END_NEWLINE:
stream.forward()
ch = stream.peek()
Expand Down
2 changes: 2 additions & 0 deletions myst_parser/warnings_.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class MystWarnings(Enum):
"""Issue parsing directive."""
DIRECTIVE_OPTION = "directive_option"
"""Issue parsing directive options."""
DIRECTIVE_OPTION_COMMENTS = "directive_comments"
"""Directive options has # comments, which may not be supported in future versions."""
DIRECTIVE_BODY = "directive_body"
"""Issue parsing directive body."""
UNKNOWN_DIRECTIVE = "directive_unknown"
Expand Down
19 changes: 19 additions & 0 deletions tests/test_renderers/fixtures/directive_parsing.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,25 @@ options:
warnings: []
.

note: comment in option
.
```{note}
:class: name # oops
a
```
.
arguments: []
body:
- a
content_offset: 1
options:
class:
- name
warnings:
- 'ParseWarnings(msg=''Directive options has # comments, which may not be supported
in future versions.'', lineno=0, type=<MystWarnings.DIRECTIVE_OPTION_COMMENTS: ''directive_comments''>)'
.

note: content after option with new line
.
```{note}
Expand Down
Loading

0 comments on commit 5ad2d6d

Please sign in to comment.