Skip to content

Commit

Permalink
Build system: Support Windows depfiles with unquoted, backslash escap…
Browse files Browse the repository at this point in the history
…ed spaces (#20100)
  • Loading branch information
bcrist authored Jun 6, 2024
1 parent 6375491 commit a9e9c99
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 44 deletions.
13 changes: 11 additions & 2 deletions lib/std/Build/Cache.zig
Original file line number Diff line number Diff line change
Expand Up @@ -860,14 +860,23 @@ pub const Manifest = struct {

var it: DepTokenizer = .{ .bytes = dep_file_contents };

while (true) {
switch (it.next() orelse return) {
while (it.next()) |token| {
switch (token) {
// We don't care about targets, we only want the prereqs
// Clang is invoked in single-source mode but other programs may not
.target, .target_must_resolve => {},
.prereq => |file_path| if (self.manifest_file == null) {
_ = try self.addFile(file_path, null);
} else try self.addFilePost(file_path),
.prereq_must_resolve => {
var resolve_buf = std.ArrayList(u8).init(self.cache.gpa);
defer resolve_buf.deinit();

try token.resolve(resolve_buf.writer());
if (self.manifest_file == null) {
_ = try self.addFile(resolve_buf.items, null);
} else try self.addFilePost(resolve_buf.items);
},
else => |err| {
try err.printError(error_buf.writer());
log.err("failed parsing {s}: {s}", .{ dep_file_basename, error_buf.items });
Expand Down
171 changes: 129 additions & 42 deletions lib/std/Build/Cache/DepTokenizer.zig
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ pub fn next(self: *Tokenizer) ?Token {
'"' => {
self.index += 1;
self.state = .rhs;
return Token{ .prereq = self.bytes[start .. self.index - 1] };
return finishPrereq(must_resolve, self.bytes[start .. self.index - 1]);
},
else => {
self.index += 1;
Expand All @@ -167,11 +167,11 @@ pub fn next(self: *Tokenizer) ?Token {
.prereq => switch (char) {
'\t', ' ' => {
self.state = .rhs;
return Token{ .prereq = self.bytes[start..self.index] };
return finishPrereq(must_resolve, self.bytes[start..self.index]);
},
'\n', '\r' => {
self.state = .lhs;
return Token{ .prereq = self.bytes[start..self.index] };
return finishPrereq(must_resolve, self.bytes[start..self.index]);
},
'\\' => {
self.state = .prereq_continuation;
Expand All @@ -185,12 +185,22 @@ pub fn next(self: *Tokenizer) ?Token {
'\n' => {
self.index += 1;
self.state = .rhs;
return Token{ .prereq = self.bytes[start .. self.index - 2] };
return finishPrereq(must_resolve, self.bytes[start .. self.index - 2]);
},
'\r' => {
self.state = .prereq_continuation_linefeed;
self.index += 1;
},
'\\' => {
// The previous \ wasn't a continuation, but this one might be.
self.index += 1;
},
' ' => {
// not continuation, but escaped space must be resolved
must_resolve = true;
self.state = .prereq;
self.index += 1;
},
else => {
// not continuation
self.state = .prereq;
Expand All @@ -201,7 +211,7 @@ pub fn next(self: *Tokenizer) ?Token {
'\n' => {
self.index += 1;
self.state = .rhs;
return Token{ .prereq = self.bytes[start .. self.index - 1] };
return finishPrereq(must_resolve, self.bytes[start .. self.index - 3]);
},
else => {
return errorIllegalChar(.continuation_eol, self.index, char);
Expand Down Expand Up @@ -251,15 +261,15 @@ pub fn next(self: *Tokenizer) ?Token {
},
.prereq => {
self.state = .lhs;
return Token{ .prereq = self.bytes[start..] };
return finishPrereq(must_resolve, self.bytes[start..]);
},
.prereq_continuation => {
self.state = .lhs;
return Token{ .prereq = self.bytes[start .. self.index - 1] };
return finishPrereq(must_resolve, self.bytes[start .. self.index - 1]);
},
.prereq_continuation_linefeed => {
self.state = .lhs;
return Token{ .prereq = self.bytes[start .. self.index - 2] };
return finishPrereq(must_resolve, self.bytes[start .. self.index - 2]);
},
}
}
Expand All @@ -278,6 +288,10 @@ fn finishTarget(must_resolve: bool, bytes: []const u8) Token {
return if (must_resolve) .{ .target_must_resolve = bytes } else .{ .target = bytes };
}

fn finishPrereq(must_resolve: bool, bytes: []const u8) Token {
return if (must_resolve) .{ .prereq_must_resolve = bytes } else .{ .prereq = bytes };
}

const State = enum {
lhs,
target,
Expand All @@ -298,6 +312,7 @@ pub const Token = union(enum) {
target: []const u8,
target_must_resolve: []const u8,
prereq: []const u8,
prereq_must_resolve: []const u8,

incomplete_quoted_prerequisite: IndexAndBytes,
incomplete_target: IndexAndBytes,
Expand All @@ -318,48 +333,76 @@ pub const Token = union(enum) {
bytes: []const u8,
};

/// Resolve escapes in target. Only valid with .target_must_resolve.
/// Resolve escapes in target or prereq. Only valid with .target_must_resolve or .prereq_must_resolve.
pub fn resolve(self: Token, writer: anytype) @TypeOf(writer).Error!void {
const bytes = self.target_must_resolve; // resolve called on incorrect token

var state: enum { start, escape, dollar } = .start;
for (bytes) |c| {
switch (state) {
.start => {
switch (c) {
'\\' => state = .escape,
'$' => state = .dollar,
else => try writer.writeByte(c),
}
},
.escape => {
switch (c) {
' ', '#', '\\' => {},
'$' => {
try writer.writeByte('\\');
state = .dollar;
continue;
switch (self) {
.target_must_resolve => |bytes| {
var state: enum { start, escape, dollar } = .start;
for (bytes) |c| {
switch (state) {
.start => {
switch (c) {
'\\' => state = .escape,
'$' => state = .dollar,
else => try writer.writeByte(c),
}
},
.escape => {
switch (c) {
' ', '#', '\\' => {},
'$' => {
try writer.writeByte('\\');
state = .dollar;
continue;
},
else => try writer.writeByte('\\'),
}
try writer.writeByte(c);
state = .start;
},
.dollar => {
try writer.writeByte('$');
switch (c) {
'$' => {},
else => try writer.writeByte(c),
}
state = .start;
},
else => try writer.writeByte('\\'),
}
try writer.writeByte(c);
state = .start;
},
.dollar => {
try writer.writeByte('$');
switch (c) {
'$' => {},
else => try writer.writeByte(c),
}
},
.prereq_must_resolve => |bytes| {
var state: enum { start, escape } = .start;
for (bytes) |c| {
switch (state) {
.start => {
switch (c) {
'\\' => state = .escape,
else => try writer.writeByte(c),
}
},
.escape => {
switch (c) {
' ' => {},
'\\' => {
try writer.writeByte(c);
continue;
},
else => try writer.writeByte('\\'),
}
try writer.writeByte(c);
state = .start;
},
}
state = .start;
},
}
}
},
else => unreachable,
}
}

pub fn printError(self: Token, writer: anytype) @TypeOf(writer).Error!void {
switch (self) {
.target, .target_must_resolve, .prereq => unreachable, // not an error
.target, .target_must_resolve, .prereq, .prereq_must_resolve => unreachable, // not an error
.incomplete_quoted_prerequisite,
.incomplete_target,
=> |index_and_bytes| {
Expand Down Expand Up @@ -387,7 +430,7 @@ pub const Token = union(enum) {

fn errStr(self: Token) []const u8 {
return switch (self) {
.target, .target_must_resolve, .prereq => unreachable, // not an error
.target, .target_must_resolve, .prereq, .prereq_must_resolve => unreachable, // not an error
.incomplete_quoted_prerequisite => "incomplete quoted prerequisite",
.incomplete_target => "incomplete target",
.invalid_target => "invalid target",
Expand Down Expand Up @@ -538,6 +581,15 @@ test "prereq continuation" {
, expect);
}

test "prereq continuation (CRLF)" {
const expect =
\\target = {foo.o}
\\prereq = {foo.h}
\\prereq = {bar.h}
;
try depTokenizer("foo.o: foo.h\\\r\nbar.h", expect);
}

test "multiple prereqs" {
const expect =
\\target = {foo.o}
Expand Down Expand Up @@ -728,6 +780,32 @@ test "windows funky targets" {
);
}

test "windows funky prereqs" {
// Note we don't support unquoted escaped spaces at the very beginning of a relative path
// e.g. `\ SpaceAtTheBeginning.c`
// This typically wouldn't be seen in the wild, since depfiles usually use absolute paths
// and supporting it would degrade error messages for cases where it was meant to be a
// continuation, but the line ending is missing.
try depTokenizer(
\\cimport.o: \
\\ trailingbackslash\\
\\ C:\Users\John\ Smith\AppData\Local\zig\p\1220d14057af1a9d6dde4643293527bd5ee5099517d655251a066666a4320737ea7c\cimport.c \
\\ somedir\\ a.c\
\\ somedir/\ a.c\
\\ somedir\\ \ \ b.c\
\\ somedir\\ \\ \c.c\
\\
,
\\target = {cimport.o}
\\prereq = {trailingbackslash\}
\\prereq = {C:\Users\John Smith\AppData\Local\zig\p\1220d14057af1a9d6dde4643293527bd5ee5099517d655251a066666a4320737ea7c\cimport.c}
\\prereq = {somedir\ a.c}
\\prereq = {somedir/ a.c}
\\prereq = {somedir\ b.c}
\\prereq = {somedir\ \ \c.c}
);
}

test "windows drive and forward slashes" {
try depTokenizer(
\\C:/msys64/what/zig-cache\tmp\48ac4d78dd531abd-cxa_thread_atexit.obj: \
Expand Down Expand Up @@ -915,6 +993,15 @@ fn depTokenizer(input: []const u8, expect: []const u8) !void {
resolve_buf.items.len = 0;
try buffer.appendSlice("}");
},
.prereq_must_resolve => {
try buffer.appendSlice("prereq = {");
try token.resolve(resolve_buf.writer());
for (resolve_buf.items) |b| {
try buffer.append(printable_char_tab[b]);
}
resolve_buf.items.len = 0;
try buffer.appendSlice("}");
},
else => {
try buffer.appendSlice("ERROR: ");
try token.printError(buffer.writer());
Expand Down

0 comments on commit a9e9c99

Please sign in to comment.