Skip to content

Commit

Permalink
std.zig.tokenizer: simplify line-based tokens
Browse files Browse the repository at this point in the history
Closes ziglang#21358
Closes ziglang#21360

This commit modifies the `multiline_string_literal_line`, `doc_comment`,
and `container_doc_comment` tokens to no longer include the line ending
as part of the token. This makes it easier to handle line endings (which
may be LF, CRLF, or in edge cases possibly nonexistent) consistently.

In the two issues linked above, Autodoc was already assuming this for
doc comments, and yielding incorrect results when handling files with
CRLF line endings (both in Markdown parsing and source rendering).

Applying the same simplification for multiline string literals also
brings `zig fmt` into conformance with
ziglang/zig-spec#38 regarding formatting of
multiline strings with CRLF line endings: the spec says that `zig fmt`
should remove the CR from such line endings, but this was not previously
the case.
  • Loading branch information
ianprime0509 authored and DivergentClouds committed Sep 24, 2024
1 parent ac5bca6 commit b473123
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 11 deletions.
6 changes: 2 additions & 4 deletions lib/std/zig/AstGen.zig
Original file line number Diff line number Diff line change
Expand Up @@ -11721,16 +11721,14 @@ fn strLitNodeAsString(astgen: *AstGen, node: Ast.Node.Index) !IndexSlice {
var tok_i = start;
{
const slice = tree.tokenSlice(tok_i);
const carriage_return_ending: usize = if (slice[slice.len - 2] == '\r') 2 else 1;
const line_bytes = slice[2 .. slice.len - carriage_return_ending];
const line_bytes = slice[2..];
try string_bytes.appendSlice(gpa, line_bytes);
tok_i += 1;
}
// Following lines: each line prepends a newline.
while (tok_i <= end) : (tok_i += 1) {
const slice = tree.tokenSlice(tok_i);
const carriage_return_ending: usize = if (slice[slice.len - 2] == '\r') 2 else 1;
const line_bytes = slice[2 .. slice.len - carriage_return_ending];
const line_bytes = slice[2..];
try string_bytes.ensureUnusedCapacity(gpa, line_bytes.len + 1);
string_bytes.appendAssumeCapacity('\n');
string_bytes.appendSliceAssumeCapacity(line_bytes);
Expand Down
38 changes: 38 additions & 0 deletions lib/std/zig/parser_test.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3087,6 +3087,22 @@ test "zig fmt: multiline string" {
);
}

test "zig fmt: multiline string with CRLF line endings" {
try testTransform("" ++
"const s =\r\n" ++
" \\\\one\r\n" ++
" \\\\two)\r\n" ++
" \\\\three\r\n" ++
";\r\n",
\\const s =
\\ \\one
\\ \\two)
\\ \\three
\\;
\\
);
}

test "zig fmt: values" {
try testCanonical(
\\test "values" {
Expand Down Expand Up @@ -4404,6 +4420,28 @@ test "zig fmt: invalid doc comments on comptime and test blocks" {
});
}

test "zig fmt: comments with CRLF line endings" {
try testTransform("" ++
"//! Top-level doc comment\r\n" ++
"//! Continuing to another line\r\n" ++
"\r\n" ++
"/// Regular doc comment\r\n" ++
"const S = struct {\r\n" ++
" // Regular comment\r\n" ++
" // More content\r\n" ++
"};\r\n",
\\//! Top-level doc comment
\\//! Continuing to another line
\\
\\/// Regular doc comment
\\const S = struct {
\\ // Regular comment
\\ // More content
\\};
\\
);
}

test "zig fmt: else comptime expr" {
try testCanonical(
\\comptime {
Expand Down
3 changes: 0 additions & 3 deletions lib/std/zig/render.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3170,9 +3170,6 @@ fn discardAllParams(r: *Render, fn_proto_node: Ast.Node.Index) Error!void {
fn tokenSliceForRender(tree: Ast, token_index: Ast.TokenIndex) []const u8 {
var ret = tree.tokenSlice(token_index);
switch (tree.tokens.items(.tag)[token_index]) {
.multiline_string_literal_line => {
if (ret[ret.len - 1] == '\n') ret.len -= 1;
},
.container_doc_comment, .doc_comment => {
ret = mem.trimRight(u8, ret, &std.ascii.whitespace);
},
Expand Down
4 changes: 0 additions & 4 deletions lib/std/zig/tokenizer.zig
Original file line number Diff line number Diff line change
Expand Up @@ -847,12 +847,10 @@ pub const Tokenizer = struct {
break;
},
'\n' => {
self.index += 1;
break;
},
'\r' => {
if (self.buffer[self.index + 1] == '\n') {
self.index += 2;
break;
} else {
state = .invalid;
Expand Down Expand Up @@ -1117,7 +1115,6 @@ pub const Tokenizer = struct {
},
'\r' => {
if (self.buffer[self.index + 1] == '\n') {
self.index += 1;
result.tag = .doc_comment;
break;
} else {
Expand Down Expand Up @@ -1167,7 +1164,6 @@ pub const Tokenizer = struct {
},
'\r' => {
if (self.buffer[self.index + 1] == '\n') {
self.index += 1;
break;
} else {
state = .invalid;
Expand Down

0 comments on commit b473123

Please sign in to comment.