Skip to content

Commit

Permalink
compiler: integrate ZON with the ZIR caching system
Browse files Browse the repository at this point in the history
This came with a big cleanup to `Zcu.PerThread.updateFile` (formerly
`astGenFile`).

Also, change how the cache manifest works for files in the import table.
Instead of being added to the manifest when we call `semaFile` on them,
we iterate the import table after running the AstGen workers and add all
the files to the cache manifest then.

The downside is that this is a bit more eager to include files in the
manifest; in particular, files which are imported but not actually
referenced are now included in analysis. So, for instance, modifying any
standard library file will invalidate all Zig compilations using that
standard library, even if they don't use that file.

The original motivation here was simply that the old logic in `semaFile`
didn't translate nicely to ZON. However, it turns out to actually be
necessary for correctness. Because `@import("foo.zig")` is an
AstGen-level error if `foo.zig` does not exist, we need to invalidate
the cache when an imported but unreferenced file is removed to make sure
this error is triggered when it needs to be.

Resolves: ziglang#22746
  • Loading branch information
mlugg committed Feb 4, 2025
1 parent 3e3390b commit 9a2bb0e
Show file tree
Hide file tree
Showing 6 changed files with 405 additions and 187 deletions.
25 changes: 25 additions & 0 deletions lib/std/zig/Zoir.zig
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,31 @@ string_bytes: []u8,
compile_errors: []Zoir.CompileError,
error_notes: []Zoir.CompileError.Note,

/// The data stored at byte offset 0 when ZOIR is stored in a file.
pub const Header = extern struct {
nodes_len: u32,
extra_len: u32,
limbs_len: u32,
string_bytes_len: u32,
compile_errors_len: u32,
error_notes_len: u32,

/// We could leave this as padding, however it triggers a Valgrind warning because
/// we read and write undefined bytes to the file system. This is harmless, but
/// it's essentially free to have a zero field here and makes the warning go away,
/// making it more likely that following Valgrind warnings will be taken seriously.
unused: u64 = 0,

stat_inode: std.fs.File.INode,
stat_size: u64,
stat_mtime: i128,

comptime {
// Check that `unused` is working as expected
assert(std.meta.hasUniqueRepresentation(Header));
}
};

pub fn hasCompileErrors(zoir: Zoir) bool {
if (zoir.compile_errors.len > 0) {
assert(zoir.nodes.len == 0);
Expand Down
46 changes: 38 additions & 8 deletions src/Compilation.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2220,10 +2220,7 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void {
try comp.astgen_work_queue.ensureUnusedCapacity(zcu.import_table.count());
for (zcu.import_table.values()) |file_index| {
if (zcu.fileByIndex(file_index).mod.isBuiltin()) continue;
const file = zcu.fileByIndex(file_index);
if (file.getMode() == .zig) {
comp.astgen_work_queue.writeItemAssumeCapacity(file_index);
}
comp.astgen_work_queue.writeItemAssumeCapacity(file_index);
}
if (comp.file_system_inputs) |fsi| {
for (zcu.import_table.values()) |file_index| {
Expand Down Expand Up @@ -3810,19 +3807,48 @@ fn performAllTheWorkInner(
const pt: Zcu.PerThread = .activate(zcu, .main);
defer pt.deactivate();

// If the cache mode is `whole`, then add every source file to the cache manifest.
switch (comp.cache_use) {
.whole => |whole| if (whole.cache_manifest) |man| {
const gpa = zcu.gpa;
for (zcu.import_table.values()) |file_index| {
const file = zcu.fileByIndex(file_index);
const source = file.getSource(gpa) catch |err| {
try pt.reportRetryableFileError(file_index, "unable to load source: {s}", .{@errorName(err)});
continue;
};
const resolved_path = try std.fs.path.resolve(gpa, &.{
file.mod.root.root_dir.path orelse ".",
file.mod.root.sub_path,
file.sub_file_path,
});
errdefer gpa.free(resolved_path);
whole.cache_manifest_mutex.lock();
defer whole.cache_manifest_mutex.unlock();
man.addFilePostContents(resolved_path, source.bytes, source.stat) catch |err| switch (err) {
error.OutOfMemory => |e| return e,
else => {
try pt.reportRetryableFileError(file_index, "unable to update cache: {s}", .{@errorName(err)});
continue;
},
};
}
},
.incremental => {},
}

try reportMultiModuleErrors(pt);

const any_fatal_files = for (zcu.import_table.values()) |file_index| {
const file = zcu.fileByIndex(file_index);
if (file.getMode() == .zon) continue;
switch (file.status) {
.never_loaded => unreachable, // everything is loaded by the workers
.retryable_failure, .astgen_failure => break true,
.success => {},
}
} else false;

if (any_fatal_files) {
if (any_fatal_files or comp.alloc_failure_occurred) {
// We give up right now! No updating of ZIR refs, no nothing. The idea is that this prevents
// us from invalidating lots of incremental dependencies due to files with e.g. parse errors.
// However, this means our analysis data is invalid, so we want to omit all analysis errors.
Expand Down Expand Up @@ -4290,7 +4316,6 @@ fn workerUpdateFile(
wg: *WaitGroup,
src: Zcu.AstGenSrc,
) void {
assert(file.getMode() == .zig);
const child_prog_node = prog_node.start(file.sub_file_path, 0);
defer child_prog_node.end();

Expand All @@ -4310,6 +4335,11 @@ fn workerUpdateFile(
},
};

switch (file.getMode()) {
.zig => {}, // continue to logic below
.zon => return, // ZON can't import anything so we're done
}

// Pre-emptively look for `@import` paths and queue them up.
// If we experience an error preemptively fetching the
// file, just ignore it and let it happen again later during Sema.
Expand Down Expand Up @@ -4344,7 +4374,7 @@ fn workerUpdateFile(
const imported_path_digest = pt.zcu.filePathDigest(res.file_index);
break :blk .{ res, imported_path_digest };
};
if (import_result.is_new and import_result.file.getMode() == .zig) {
if (import_result.is_new) {
log.debug("AstGen of {s} has import '{s}'; queuing AstGen of {s}", .{
file.sub_file_path, import_path, import_result.file.sub_file_path,
});
Expand Down
6 changes: 0 additions & 6 deletions src/Sema.zig
Original file line number Diff line number Diff line change
Expand Up @@ -13994,12 +13994,6 @@ fn zirImport(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
return Air.internedToRef(ty);
},
.zon => {
_ = result.file.getTree(zcu.gpa) catch |err| {
// TODO: these errors are file system errors; make sure an update() will
// retry this and not cache the file system error, which may be transient.
return sema.fail(block, operand_src, "unable to open '{s}': {s}", .{ result.file.sub_file_path, @errorName(err) });
};

if (extra.res_ty == .none) {
return sema.fail(block, operand_src, "'@import' of ZON must have a known result type", .{});
}
Expand Down
2 changes: 0 additions & 2 deletions src/Sema/LowerZon.zig
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ pub fn run(
) CompileError!InternPool.Index {
const pt = sema.pt;

_ = try file.getZoir(pt.zcu);

const tracked_inst = try pt.zcu.intern_pool.trackZir(pt.zcu.gpa, pt.tid, .{
.file = file_index,
.inst = .main_struct_inst, // this is the only trackable instruction in a ZON file
Expand Down
183 changes: 183 additions & 0 deletions src/Zcu.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2643,6 +2643,189 @@ pub fn loadZirCacheBody(gpa: Allocator, header: Zir.Header, cache_file: std.fs.F
return zir;
}

pub fn saveZirCache(gpa: Allocator, cache_file: std.fs.File, stat: std.fs.File.Stat, zir: Zir) !void {
const safety_buffer = if (data_has_safety_tag)
try gpa.alloc([8]u8, zir.instructions.len)
else
undefined;
defer if (data_has_safety_tag) gpa.free(safety_buffer);

const data_ptr: [*]const u8 = if (data_has_safety_tag)
if (zir.instructions.len == 0)
undefined
else
@ptrCast(safety_buffer.ptr)
else
@ptrCast(zir.instructions.items(.data).ptr);

if (data_has_safety_tag) {
// The `Data` union has a safety tag but in the file format we store it without.
for (zir.instructions.items(.data), 0..) |*data, i| {
const as_struct: *const HackDataLayout = @ptrCast(data);
safety_buffer[i] = as_struct.data;
}
}

const header: Zir.Header = .{
.instructions_len = @intCast(zir.instructions.len),
.string_bytes_len = @intCast(zir.string_bytes.len),
.extra_len = @intCast(zir.extra.len),

.stat_size = stat.size,
.stat_inode = stat.inode,
.stat_mtime = stat.mtime,
};
var iovecs: [5]std.posix.iovec_const = .{
.{
.base = @ptrCast(&header),
.len = @sizeOf(Zir.Header),
},
.{
.base = @ptrCast(zir.instructions.items(.tag).ptr),
.len = zir.instructions.len,
},
.{
.base = data_ptr,
.len = zir.instructions.len * 8,
},
.{
.base = zir.string_bytes.ptr,
.len = zir.string_bytes.len,
},
.{
.base = @ptrCast(zir.extra.ptr),
.len = zir.extra.len * 4,
},
};
try cache_file.writevAll(&iovecs);
}

pub fn saveZoirCache(cache_file: std.fs.File, stat: std.fs.File.Stat, zoir: Zoir) !void {
const header: Zoir.Header = .{
.nodes_len = @intCast(zoir.nodes.len),
.extra_len = @intCast(zoir.extra.len),
.limbs_len = @intCast(zoir.limbs.len),
.string_bytes_len = @intCast(zoir.string_bytes.len),
.compile_errors_len = @intCast(zoir.compile_errors.len),
.error_notes_len = @intCast(zoir.error_notes.len),

.stat_size = stat.size,
.stat_inode = stat.inode,
.stat_mtime = stat.mtime,
};
var iovecs: [9]std.posix.iovec_const = .{
.{
.base = @ptrCast(&header),
.len = @sizeOf(Zoir.Header),
},
.{
.base = @ptrCast(zoir.nodes.items(.tag)),
.len = zoir.nodes.len * @sizeOf(Zoir.Node.Repr.Tag),
},
.{
.base = @ptrCast(zoir.nodes.items(.data)),
.len = zoir.nodes.len * 4,
},
.{
.base = @ptrCast(zoir.nodes.items(.ast_node)),
.len = zoir.nodes.len * 4,
},
.{
.base = @ptrCast(zoir.extra),
.len = zoir.extra.len * 4,
},
.{
.base = @ptrCast(zoir.limbs),
.len = zoir.limbs.len * 4,
},
.{
.base = zoir.string_bytes.ptr,
.len = zoir.string_bytes.len,
},
.{
.base = @ptrCast(zoir.compile_errors),
.len = zoir.compile_errors.len * @sizeOf(Zoir.CompileError),
},
.{
.base = @ptrCast(zoir.error_notes),
.len = zoir.error_notes.len * @sizeOf(Zoir.CompileError.Note),
},
};
try cache_file.writevAll(&iovecs);
}

pub fn loadZoirCacheBody(gpa: Allocator, header: Zoir.Header, cache_file: std.fs.File) !Zoir {
var zoir: Zoir = .{
.nodes = .empty,
.extra = &.{},
.limbs = &.{},
.string_bytes = &.{},
.compile_errors = &.{},
.error_notes = &.{},
};
errdefer zoir.deinit(gpa);

zoir.nodes = nodes: {
var nodes: std.MultiArrayList(Zoir.Node.Repr) = .empty;
defer nodes.deinit(gpa);
try nodes.setCapacity(gpa, header.nodes_len);
nodes.len = header.nodes_len;
break :nodes nodes.toOwnedSlice();
};

zoir.extra = try gpa.alloc(u32, header.extra_len);
zoir.limbs = try gpa.alloc(std.math.big.Limb, header.limbs_len);
zoir.string_bytes = try gpa.alloc(u8, header.string_bytes_len);

zoir.compile_errors = try gpa.alloc(Zoir.CompileError, header.compile_errors_len);
zoir.error_notes = try gpa.alloc(Zoir.CompileError.Note, header.error_notes_len);

var iovecs: [8]std.posix.iovec = .{
.{
.base = @ptrCast(zoir.nodes.items(.tag)),
.len = header.nodes_len * @sizeOf(Zoir.Node.Repr.Tag),
},
.{
.base = @ptrCast(zoir.nodes.items(.data)),
.len = header.nodes_len * 4,
},
.{
.base = @ptrCast(zoir.nodes.items(.ast_node)),
.len = header.nodes_len * 4,
},
.{
.base = @ptrCast(zoir.extra),
.len = header.extra_len * 4,
},
.{
.base = @ptrCast(zoir.limbs),
.len = header.limbs_len * @sizeOf(std.math.big.Limb),
},
.{
.base = zoir.string_bytes.ptr,
.len = header.string_bytes_len,
},
.{
.base = @ptrCast(zoir.compile_errors),
.len = header.compile_errors_len * @sizeOf(Zoir.CompileError),
},
.{
.base = @ptrCast(zoir.error_notes),
.len = header.error_notes_len * @sizeOf(Zoir.CompileError.Note),
},
};

const bytes_expected = expected: {
var n: usize = 0;
for (iovecs) |v| n += v.len;
break :expected n;
};

const bytes_read = try cache_file.readvAll(&iovecs);
if (bytes_read != bytes_expected) return error.UnexpectedFileSize;
return zoir;
}

pub fn markDependeeOutdated(
zcu: *Zcu,
/// When we are diffing ZIR and marking things as outdated, we won't yet have marked the dependencies as PO.
Expand Down
Loading

0 comments on commit 9a2bb0e

Please sign in to comment.