Skip to content

Commit

Permalink
Merge pull request #19402 from ianprime0509/markdown-autolinks
Browse files Browse the repository at this point in the history
Autodoc: hyperlink URLs in text
  • Loading branch information
andrewrk authored Mar 25, 2024
2 parents 32b4d85 + ad34ed5 commit abadad4
Show file tree
Hide file tree
Showing 4 changed files with 221 additions and 1 deletion.
55 changes: 55 additions & 0 deletions lib/docs/wasm/markdown.zig
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,17 @@
//! content. `target` may contain `\`-escaped characters and balanced
//! parentheses.
//!
//! - **Autolink** - an abbreviated link, of the format `<target>`, where
//! `target` serves as both the link target and text. `target` may not
//! contain spaces or `<`, and any `\` in it are interpreted literally (not as
//! escapes). `target` is expected to be an absolute URI: an autolink will not
//! be recognized unless `target` starts with a URI scheme followed by a `:`.
//!
//! For convenience, autolinks may also be recognized in plain text without
//! any `<>` delimiters. Such autolinks are restricted to start with `http://`
//! or `https://` followed by at least one other character, not including any
//! trailing punctuation after the link.
//!
//! - **Image** - a link directly preceded by a `!`. The link text is
//! interpreted as the alt text of the image.
//!
Expand Down Expand Up @@ -710,6 +721,50 @@ test "links" {
);
}

test "autolinks" {
try testRender(
\\<https://example.com>
\\**This is important: <https://example.com/strong>**
\\<https://example.com?query=abc.123#page(parens)>
\\<placeholder>
\\<data:>
\\1 < 2
\\4 > 3
\\Unclosed: <
\\
,
\\<p><a href="https://example.com">https://example.com</a>
\\<strong>This is important: <a href="https://example.com/strong">https://example.com/strong</a></strong>
\\<a href="https://example.com?query=abc.123#page(parens)">https://example.com?query=abc.123#page(parens)</a>
\\&lt;placeholder&gt;
\\<a href="data:">data:</a>
\\1 &lt; 2
\\4 &gt; 3
\\Unclosed: &lt;</p>
\\
);
}

test "text autolinks" {
try testRender(
\\Text autolinks must start with http:// or https://.
\\This doesn't count: ftp://example.com.
\\Example: https://ziglang.org.
\\Here is an important link: **http://example.com**
\\(Links may be in parentheses: https://example.com/?q=(parens))
\\Escaping a link so it's plain text: https\://example.com
\\
,
\\<p>Text autolinks must start with http:// or https://.
\\This doesn't count: ftp://example.com.
\\Example: <a href="https://ziglang.org">https://ziglang.org</a>.
\\Here is an important link: <strong><a href="http://example.com">http://example.com</a></strong>
\\(Links may be in parentheses: <a href="https://example.com/?q=(parens)">https://example.com/?q=(parens)</a>)
\\Escaping a link so it's plain text: https://example.com</p>
\\
);
}

test "images" {
try testRender(
\\![Alt text](https://example.com/image.png)
Expand Down
2 changes: 2 additions & 0 deletions lib/docs/wasm/markdown/Document.zig
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ pub const Node = struct {
// Inlines
/// Data is `link`.
link,
/// Data is `text`.
autolink,
/// Data is `link`.
image,
/// Data is `container`.
Expand Down
159 changes: 159 additions & 0 deletions lib/docs/wasm/markdown/Parser.zig
Original file line number Diff line number Diff line change
Expand Up @@ -985,8 +985,12 @@ const InlineParser = struct {
ip.pos += 1;
},
']' => try ip.parseLink(),
'<' => try ip.parseAutolink(),
'*', '_' => try ip.parseEmphasis(),
'`' => try ip.parseCodeSpan(),
'h' => if (ip.pos == 0 or isPreTextAutolink(ip.content[ip.pos - 1])) {
try ip.parseTextAutolink();
},
else => {},
}
}
Expand Down Expand Up @@ -1076,6 +1080,161 @@ const InlineParser = struct {
return @enumFromInt(string_top);
}

/// Parses an autolink, starting at the opening `<`. `ip.pos` is left at the
/// closing `>`, or remains unchanged at the opening `<` if there is none.
fn parseAutolink(ip: *InlineParser) !void {
const start = ip.pos;
ip.pos += 1;
var state: enum {
start,
scheme,
target,
} = .start;
while (ip.pos < ip.content.len) : (ip.pos += 1) {
switch (state) {
.start => switch (ip.content[ip.pos]) {
'A'...'Z', 'a'...'z' => state = .scheme,
else => break,
},
.scheme => switch (ip.content[ip.pos]) {
'A'...'Z', 'a'...'z', '0'...'9', '+', '.', '-' => {},
':' => state = .target,
else => break,
},
.target => switch (ip.content[ip.pos]) {
'<', ' ', '\t', '\n' => break, // Not allowed in autolinks
'>' => {
// Backslash escapes are not recognized in autolink targets.
const target = try ip.parent.addString(ip.content[start + 1 .. ip.pos]);
const node = try ip.parent.addNode(.{
.tag = .autolink,
.data = .{ .text = .{
.content = target,
} },
});
try ip.completed_inlines.append(ip.parent.allocator, .{
.node = node,
.start = start,
.len = ip.pos - start + 1,
});
return;
},
else => {},
},
}
}
ip.pos = start;
}

/// Parses a plain text autolink (not delimited by `<>`), starting at the
/// first character in the link (an `h`). `ip.pos` is left at the last
/// character of the link, or remains unchanged if there is no valid link.
fn parseTextAutolink(ip: *InlineParser) !void {
const start = ip.pos;
var state: union(enum) {
/// Inside `http`. Contains the rest of the text to be matched.
http: []const u8,
after_http,
after_https,
/// Inside `://`. Contains the rest of the text to be matched.
authority: []const u8,
/// Inside link content.
content: struct {
start: usize,
paren_nesting: usize,
},
} = .{ .http = "http" };

while (ip.pos < ip.content.len) : (ip.pos += 1) {
switch (state) {
.http => |rest| {
if (ip.content[ip.pos] != rest[0]) break;
if (rest.len > 1) {
state = .{ .http = rest[1..] };
} else {
state = .after_http;
}
},
.after_http => switch (ip.content[ip.pos]) {
's' => state = .after_https,
':' => state = .{ .authority = "//" },
else => break,
},
.after_https => switch (ip.content[ip.pos]) {
':' => state = .{ .authority = "//" },
else => break,
},
.authority => |rest| {
if (ip.content[ip.pos] != rest[0]) break;
if (rest.len > 1) {
state = .{ .authority = rest[1..] };
} else {
state = .{ .content = .{
.start = ip.pos + 1,
.paren_nesting = 0,
} };
}
},
.content => |*content| switch (ip.content[ip.pos]) {
' ', '\t', '\n' => break,
'(' => content.paren_nesting += 1,
')' => if (content.paren_nesting == 0) {
break;
} else {
content.paren_nesting -= 1;
},
else => {},
},
}
}

switch (state) {
.http, .after_http, .after_https, .authority => {
ip.pos = start;
},
.content => |content| {
while (ip.pos > content.start and isPostTextAutolink(ip.content[ip.pos - 1])) {
ip.pos -= 1;
}
if (ip.pos == content.start) {
ip.pos = start;
return;
}

const target = try ip.parent.addString(ip.content[start..ip.pos]);
const node = try ip.parent.addNode(.{
.tag = .autolink,
.data = .{ .text = .{
.content = target,
} },
});
try ip.completed_inlines.append(ip.parent.allocator, .{
.node = node,
.start = start,
.len = ip.pos - start,
});
ip.pos -= 1;
},
}
}

/// Returns whether `c` may appear before a text autolink is recognized.
fn isPreTextAutolink(c: u8) bool {
return switch (c) {
' ', '\t', '\n', '*', '_', '(' => true,
else => false,
};
}

/// Returns whether `c` is punctuation that may appear after a text autolink
/// and not be considered part of it.
fn isPostTextAutolink(c: u8) bool {
return switch (c) {
'?', '!', '.', ',', ':', '*', '_' => true,
else => false,
};
}

/// Parses emphasis, starting at the beginning of a run of `*` or `_`
/// characters. `ip.pos` is left at the last character in the run after
/// parsing.
Expand Down
6 changes: 5 additions & 1 deletion lib/docs/wasm/markdown/renderer.zig
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,10 @@ pub fn Renderer(comptime Writer: type, comptime Context: type) type {
}
try writer.writeAll("</a>");
},
.autolink => {
const target = doc.string(data.text.content);
try writer.print("<a href=\"{0}\">{0}</a>", .{fmtHtml(target)});
},
.image => {
const target = doc.string(data.link.target);
try writer.print("<img src=\"{}\" alt=\"", .{fmtHtml(target)});
Expand Down Expand Up @@ -215,7 +219,7 @@ pub fn renderInlineNodeText(
try renderInlineNodeText(doc, child, writer);
}
},
.code_span, .text => {
.autolink, .code_span, .text => {
const content = doc.string(data.text.content);
try writer.print("{}", .{fmtHtml(content)});
},
Expand Down

0 comments on commit abadad4

Please sign in to comment.