Skip to content

Commit

Permalink
Autodoc: recognize Markdown links in plain text
Browse files Browse the repository at this point in the history
This extension to the typical `<>` Markdown autolink syntax allows
HTTP(S) links to be recognized in normal text without being delimited by
`<>`. This is the most natural way to write links in text, so it makes
sense to support it and allow documentation comments to be written in a
more natural way.
  • Loading branch information
ianprime0509 committed Mar 23, 2024
1 parent d3ca9d5 commit ad34ed5
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 0 deletions.
25 changes: 25 additions & 0 deletions lib/docs/wasm/markdown.zig
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@
//! escapes). `target` is expected to be an absolute URI: an autolink will not
//! be recognized unless `target` starts with a URI scheme followed by a `:`.
//!
//! For convenience, autolinks may also be recognized in plain text without
//! any `<>` delimiters. Such autolinks are restricted to start with `http://`
//! or `https://` followed by at least one other character, not including any
//! trailing punctuation after the link.
//!
//! - **Image** - a link directly preceded by a `!`. The link text is
//! interpreted as the alt text of the image.
//!
Expand Down Expand Up @@ -740,6 +745,26 @@ test "autolinks" {
);
}

test "text autolinks" {
try testRender(
\\Text autolinks must start with http:// or https://.
\\This doesn't count: ftp://example.com.
\\Example: https://ziglang.org.
\\Here is an important link: **http://example.com**
\\(Links may be in parentheses: https://example.com/?q=(parens))
\\Escaping a link so it's plain text: https\://example.com
\\
,
\\<p>Text autolinks must start with http:// or https://.
\\This doesn't count: ftp://example.com.
\\Example: <a href="https://ziglang.org">https://ziglang.org</a>.
\\Here is an important link: <strong><a href="http://example.com">http://example.com</a></strong>
\\(Links may be in parentheses: <a href="https://example.com/?q=(parens)">https://example.com/?q=(parens)</a>)
\\Escaping a link so it's plain text: https://example.com</p>
\\
);
}

test "images" {
try testRender(
\\![Alt text](https://example.com/image.png)
Expand Down
112 changes: 112 additions & 0 deletions lib/docs/wasm/markdown/Parser.zig
Original file line number Diff line number Diff line change
Expand Up @@ -988,6 +988,9 @@ const InlineParser = struct {
'<' => try ip.parseAutolink(),
'*', '_' => try ip.parseEmphasis(),
'`' => try ip.parseCodeSpan(),
'h' => if (ip.pos == 0 or isPreTextAutolink(ip.content[ip.pos - 1])) {
try ip.parseTextAutolink();
},
else => {},
}
}
Expand Down Expand Up @@ -1123,6 +1126,115 @@ const InlineParser = struct {
ip.pos = start;
}

/// Parses a plain text autolink (not delimited by `<>`), starting at the
/// first character in the link (an `h`). `ip.pos` is left at the last
/// character of the link, or remains unchanged if there is no valid link.
fn parseTextAutolink(ip: *InlineParser) !void {
const start = ip.pos;
var state: union(enum) {
/// Inside `http`. Contains the rest of the text to be matched.
http: []const u8,
after_http,
after_https,
/// Inside `://`. Contains the rest of the text to be matched.
authority: []const u8,
/// Inside link content.
content: struct {
start: usize,
paren_nesting: usize,
},
} = .{ .http = "http" };

while (ip.pos < ip.content.len) : (ip.pos += 1) {
switch (state) {
.http => |rest| {
if (ip.content[ip.pos] != rest[0]) break;
if (rest.len > 1) {
state = .{ .http = rest[1..] };
} else {
state = .after_http;
}
},
.after_http => switch (ip.content[ip.pos]) {
's' => state = .after_https,
':' => state = .{ .authority = "//" },
else => break,
},
.after_https => switch (ip.content[ip.pos]) {
':' => state = .{ .authority = "//" },
else => break,
},
.authority => |rest| {
if (ip.content[ip.pos] != rest[0]) break;
if (rest.len > 1) {
state = .{ .authority = rest[1..] };
} else {
state = .{ .content = .{
.start = ip.pos + 1,
.paren_nesting = 0,
} };
}
},
.content => |*content| switch (ip.content[ip.pos]) {
' ', '\t', '\n' => break,
'(' => content.paren_nesting += 1,
')' => if (content.paren_nesting == 0) {
break;
} else {
content.paren_nesting -= 1;
},
else => {},
},
}
}

switch (state) {
.http, .after_http, .after_https, .authority => {
ip.pos = start;
},
.content => |content| {
while (ip.pos > content.start and isPostTextAutolink(ip.content[ip.pos - 1])) {
ip.pos -= 1;
}
if (ip.pos == content.start) {
ip.pos = start;
return;
}

const target = try ip.parent.addString(ip.content[start..ip.pos]);
const node = try ip.parent.addNode(.{
.tag = .autolink,
.data = .{ .text = .{
.content = target,
} },
});
try ip.completed_inlines.append(ip.parent.allocator, .{
.node = node,
.start = start,
.len = ip.pos - start,
});
ip.pos -= 1;
},
}
}

/// Returns whether `c` may appear before a text autolink is recognized.
fn isPreTextAutolink(c: u8) bool {
return switch (c) {
' ', '\t', '\n', '*', '_', '(' => true,
else => false,
};
}

/// Returns whether `c` is punctuation that may appear after a text autolink
/// and not be considered part of it.
fn isPostTextAutolink(c: u8) bool {
return switch (c) {
'?', '!', '.', ',', ':', '*', '_' => true,
else => false,
};
}

/// Parses emphasis, starting at the beginning of a run of `*` or `_`
/// characters. `ip.pos` is left at the last character in the run after
/// parsing.
Expand Down

0 comments on commit ad34ed5

Please sign in to comment.