Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Autodoc: hyperlink URLs in text #19402

Merged
merged 2 commits into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions lib/docs/wasm/markdown.zig
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,17 @@
//! content. `target` may contain `\`-escaped characters and balanced
//! parentheses.
//!
//! - **Autolink** - an abbreviated link, of the format `<target>`, where
//! `target` serves as both the link target and text. `target` may not
//! contain spaces or `<`, and any `\` in it are interpreted literally (not as
//! escapes). `target` is expected to be an absolute URI: an autolink will not
//! be recognized unless `target` starts with a URI scheme followed by a `:`.
//!
//! For convenience, autolinks may also be recognized in plain text without
//! any `<>` delimiters. Such autolinks are restricted to start with `http://`
//! or `https://` followed by at least one other character, not including any
//! trailing punctuation after the link.
//!
//! - **Image** - a link directly preceded by a `!`. The link text is
//! interpreted as the alt text of the image.
//!
Expand Down Expand Up @@ -710,6 +721,50 @@ test "links" {
);
}

test "autolinks" {
try testRender(
\\<https://example.com>
\\**This is important: <https://example.com/strong>**
\\<https://example.com?query=abc.123#page(parens)>
\\<placeholder>
\\<data:>
\\1 < 2
\\4 > 3
\\Unclosed: <
\\
,
\\<p><a href="https://example.com">https://example.com</a>
\\<strong>This is important: <a href="https://example.com/strong">https://example.com/strong</a></strong>
\\<a href="https://example.com?query=abc.123#page(parens)">https://example.com?query=abc.123#page(parens)</a>
\\&lt;placeholder&gt;
\\<a href="data:">data:</a>
\\1 &lt; 2
\\4 &gt; 3
\\Unclosed: &lt;</p>
\\
);
}

test "text autolinks" {
try testRender(
\\Text autolinks must start with http:// or https://.
\\This doesn't count: ftp://example.com.
\\Example: https://ziglang.org.
\\Here is an important link: **http://example.com**
\\(Links may be in parentheses: https://example.com/?q=(parens))
\\Escaping a link so it's plain text: https\://example.com
\\
,
\\<p>Text autolinks must start with http:// or https://.
\\This doesn't count: ftp://example.com.
\\Example: <a href="https://ziglang.org">https://ziglang.org</a>.
\\Here is an important link: <strong><a href="http://example.com">http://example.com</a></strong>
\\(Links may be in parentheses: <a href="https://example.com/?q=(parens)">https://example.com/?q=(parens)</a>)
\\Escaping a link so it's plain text: https://example.com</p>
\\
);
}

test "images" {
try testRender(
\\![Alt text](https://example.com/image.png)
Expand Down
2 changes: 2 additions & 0 deletions lib/docs/wasm/markdown/Document.zig
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ pub const Node = struct {
// Inlines
/// Data is `link`.
link,
/// Data is `text`.
autolink,
/// Data is `link`.
image,
/// Data is `container`.
Expand Down
159 changes: 159 additions & 0 deletions lib/docs/wasm/markdown/Parser.zig
Original file line number Diff line number Diff line change
Expand Up @@ -985,8 +985,12 @@ const InlineParser = struct {
ip.pos += 1;
},
']' => try ip.parseLink(),
'<' => try ip.parseAutolink(),
'*', '_' => try ip.parseEmphasis(),
'`' => try ip.parseCodeSpan(),
'h' => if (ip.pos == 0 or isPreTextAutolink(ip.content[ip.pos - 1])) {
try ip.parseTextAutolink();
},
else => {},
}
}
Expand Down Expand Up @@ -1076,6 +1080,161 @@ const InlineParser = struct {
return @enumFromInt(string_top);
}

/// Parses an autolink, starting at the opening `<`. `ip.pos` is left at the
/// closing `>`, or remains unchanged at the opening `<` if there is none.
fn parseAutolink(ip: *InlineParser) !void {
const start = ip.pos;
ip.pos += 1;
var state: enum {
start,
scheme,
target,
} = .start;
while (ip.pos < ip.content.len) : (ip.pos += 1) {
switch (state) {
.start => switch (ip.content[ip.pos]) {
'A'...'Z', 'a'...'z' => state = .scheme,
else => break,
},
.scheme => switch (ip.content[ip.pos]) {
'A'...'Z', 'a'...'z', '0'...'9', '+', '.', '-' => {},
':' => state = .target,
else => break,
},
.target => switch (ip.content[ip.pos]) {
'<', ' ', '\t', '\n' => break, // Not allowed in autolinks
'>' => {
// Backslash escapes are not recognized in autolink targets.
const target = try ip.parent.addString(ip.content[start + 1 .. ip.pos]);
const node = try ip.parent.addNode(.{
.tag = .autolink,
.data = .{ .text = .{
.content = target,
} },
});
try ip.completed_inlines.append(ip.parent.allocator, .{
.node = node,
.start = start,
.len = ip.pos - start + 1,
});
return;
},
else => {},
},
}
}
ip.pos = start;
}

/// Parses a plain text autolink (not delimited by `<>`), starting at the
/// first character in the link (an `h`). `ip.pos` is left at the last
/// character of the link, or remains unchanged if there is no valid link.
fn parseTextAutolink(ip: *InlineParser) !void {
const start = ip.pos;
var state: union(enum) {
/// Inside `http`. Contains the rest of the text to be matched.
http: []const u8,
after_http,
after_https,
/// Inside `://`. Contains the rest of the text to be matched.
authority: []const u8,
/// Inside link content.
content: struct {
start: usize,
paren_nesting: usize,
},
} = .{ .http = "http" };

while (ip.pos < ip.content.len) : (ip.pos += 1) {
switch (state) {
.http => |rest| {
if (ip.content[ip.pos] != rest[0]) break;
if (rest.len > 1) {
state = .{ .http = rest[1..] };
} else {
state = .after_http;
}
},
.after_http => switch (ip.content[ip.pos]) {
's' => state = .after_https,
':' => state = .{ .authority = "//" },
else => break,
},
.after_https => switch (ip.content[ip.pos]) {
':' => state = .{ .authority = "//" },
else => break,
},
.authority => |rest| {
if (ip.content[ip.pos] != rest[0]) break;
if (rest.len > 1) {
state = .{ .authority = rest[1..] };
} else {
state = .{ .content = .{
.start = ip.pos + 1,
.paren_nesting = 0,
} };
}
},
.content => |*content| switch (ip.content[ip.pos]) {
' ', '\t', '\n' => break,
'(' => content.paren_nesting += 1,
')' => if (content.paren_nesting == 0) {
break;
} else {
content.paren_nesting -= 1;
},
else => {},
},
}
}

switch (state) {
.http, .after_http, .after_https, .authority => {
ip.pos = start;
},
.content => |content| {
while (ip.pos > content.start and isPostTextAutolink(ip.content[ip.pos - 1])) {
ip.pos -= 1;
}
if (ip.pos == content.start) {
ip.pos = start;
return;
}

const target = try ip.parent.addString(ip.content[start..ip.pos]);
const node = try ip.parent.addNode(.{
.tag = .autolink,
.data = .{ .text = .{
.content = target,
} },
});
try ip.completed_inlines.append(ip.parent.allocator, .{
.node = node,
.start = start,
.len = ip.pos - start,
});
ip.pos -= 1;
},
}
}

/// Returns whether `c` may appear before a text autolink is recognized.
fn isPreTextAutolink(c: u8) bool {
return switch (c) {
' ', '\t', '\n', '*', '_', '(' => true,
else => false,
};
}

/// Returns whether `c` is punctuation that may appear after a text autolink
/// and not be considered part of it.
fn isPostTextAutolink(c: u8) bool {
return switch (c) {
'?', '!', '.', ',', ':', '*', '_' => true,
else => false,
};
}

/// Parses emphasis, starting at the beginning of a run of `*` or `_`
/// characters. `ip.pos` is left at the last character in the run after
/// parsing.
Expand Down
6 changes: 5 additions & 1 deletion lib/docs/wasm/markdown/renderer.zig
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,10 @@ pub fn Renderer(comptime Writer: type, comptime Context: type) type {
}
try writer.writeAll("</a>");
},
.autolink => {
const target = doc.string(data.text.content);
try writer.print("<a href=\"{0}\">{0}</a>", .{fmtHtml(target)});
},
.image => {
const target = doc.string(data.link.target);
try writer.print("<img src=\"{}\" alt=\"", .{fmtHtml(target)});
Expand Down Expand Up @@ -215,7 +219,7 @@ pub fn renderInlineNodeText(
try renderInlineNodeText(doc, child, writer);
}
},
.code_span, .text => {
.autolink, .code_span, .text => {
const content = doc.string(data.text.content);
try writer.print("{}", .{fmtHtml(content)});
},
Expand Down
Loading