compiler: integrate ZON with the ZIR caching system

This came with a big cleanup to `Zcu.PerThread.updateFile` (formerly `astGenFile`). Also, change how the cache manifest works for files in the import table. Instead of being added to the manifest when we call `semaFile` on them, we iterate the import table after running the AstGen workers and add all the files to the cache manifest then. The downside is that this is a bit more eager to include files in the manifest; in particular, files which are imported but not actually referenced are now included in analysis. So, for instance, modifying any standard library file will invalidate all Zig compilations using that standard library, even if they don't use that file. The original motivation here was simply that the old logic in `semaFile` didn't translate nicely to ZON. However, it turns out to actually be necessary for correctness. Because `@import("foo.zig")` is an AstGen-level error if `foo.zig` does not exist, we need to invalidate the cache when an imported but unreferenced file is removed to make sure this error is triggered when it needs to be. Resolves: ziglang#22746
mlugg · Feb 4, 2025 · 9a2bb0e · 9a2bb0e
1 parent 3e3390b
commit 9a2bb0e
Show file tree

Hide file tree

Showing 6 changed files with 405 additions and 187 deletions.
diff --git a/lib/std/zig/Zoir.zig b/lib/std/zig/Zoir.zig
@@ -10,6 +10,31 @@ string_bytes: []u8,
 compile_errors: []Zoir.CompileError,
 error_notes: []Zoir.CompileError.Note,
 
+/// The data stored at byte offset 0 when ZOIR is stored in a file.
+pub const Header = extern struct {
+    nodes_len: u32,
+    extra_len: u32,
+    limbs_len: u32,
+    string_bytes_len: u32,
+    compile_errors_len: u32,
+    error_notes_len: u32,
+
+    /// We could leave this as padding, however it triggers a Valgrind warning because
+    /// we read and write undefined bytes to the file system. This is harmless, but
+    /// it's essentially free to have a zero field here and makes the warning go away,
+    /// making it more likely that following Valgrind warnings will be taken seriously.
+    unused: u64 = 0,
+
+    stat_inode: std.fs.File.INode,
+    stat_size: u64,
+    stat_mtime: i128,
+
+    comptime {
+        // Check that `unused` is working as expected
+        assert(std.meta.hasUniqueRepresentation(Header));
+    }
+};
+
 pub fn hasCompileErrors(zoir: Zoir) bool {
     if (zoir.compile_errors.len > 0) {
         assert(zoir.nodes.len == 0);

diff --git a/src/Compilation.zig b/src/Compilation.zig
@@ -2220,10 +2220,7 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void {
         try comp.astgen_work_queue.ensureUnusedCapacity(zcu.import_table.count());
         for (zcu.import_table.values()) |file_index| {
             if (zcu.fileByIndex(file_index).mod.isBuiltin()) continue;
-            const file = zcu.fileByIndex(file_index);
-            if (file.getMode() == .zig) {
-                comp.astgen_work_queue.writeItemAssumeCapacity(file_index);
-            }
+            comp.astgen_work_queue.writeItemAssumeCapacity(file_index);
         }
         if (comp.file_system_inputs) |fsi| {
             for (zcu.import_table.values()) |file_index| {
@@ -3810,19 +3807,48 @@ fn performAllTheWorkInner(
         const pt: Zcu.PerThread = .activate(zcu, .main);
         defer pt.deactivate();
 
+        // If the cache mode is `whole`, then add every source file to the cache manifest.
+        switch (comp.cache_use) {
+            .whole => |whole| if (whole.cache_manifest) |man| {
+                const gpa = zcu.gpa;
+                for (zcu.import_table.values()) |file_index| {
+                    const file = zcu.fileByIndex(file_index);
+                    const source = file.getSource(gpa) catch |err| {
+                        try pt.reportRetryableFileError(file_index, "unable to load source: {s}", .{@errorName(err)});
+                        continue;
+                    };
+                    const resolved_path = try std.fs.path.resolve(gpa, &.{
+                        file.mod.root.root_dir.path orelse ".",
+                        file.mod.root.sub_path,
+                        file.sub_file_path,
+                    });
+                    errdefer gpa.free(resolved_path);
+                    whole.cache_manifest_mutex.lock();
+                    defer whole.cache_manifest_mutex.unlock();
+                    man.addFilePostContents(resolved_path, source.bytes, source.stat) catch |err| switch (err) {
+                        error.OutOfMemory => |e| return e,
+                        else => {
+                            try pt.reportRetryableFileError(file_index, "unable to update cache: {s}", .{@errorName(err)});
+                            continue;
+                        },
+                    };
+                }
+            },
+            .incremental => {},
+        }
+
         try reportMultiModuleErrors(pt);
 
         const any_fatal_files = for (zcu.import_table.values()) |file_index| {
             const file = zcu.fileByIndex(file_index);
-            if (file.getMode() == .zon) continue;
             switch (file.status) {
                 .never_loaded => unreachable, // everything is loaded by the workers
                 .retryable_failure, .astgen_failure => break true,
                 .success => {},
             }
         } else false;
 
-        if (any_fatal_files) {
+        if (any_fatal_files or comp.alloc_failure_occurred) {
             // We give up right now! No updating of ZIR refs, no nothing. The idea is that this prevents
             // us from invalidating lots of incremental dependencies due to files with e.g. parse errors.
             // However, this means our analysis data is invalid, so we want to omit all analysis errors.
@@ -4290,7 +4316,6 @@ fn workerUpdateFile(
     wg: *WaitGroup,
     src: Zcu.AstGenSrc,
 ) void {
-    assert(file.getMode() == .zig);
     const child_prog_node = prog_node.start(file.sub_file_path, 0);
     defer child_prog_node.end();
 
@@ -4310,6 +4335,11 @@ fn workerUpdateFile(
         },
     };
 
+    switch (file.getMode()) {
+        .zig => {}, // continue to logic below
+        .zon => return, // ZON can't import anything so we're done
+    }
+
     // Pre-emptively look for `@import` paths and queue them up.
     // If we experience an error preemptively fetching the
     // file, just ignore it and let it happen again later during Sema.
@@ -4344,7 +4374,7 @@ fn workerUpdateFile(
                 const imported_path_digest = pt.zcu.filePathDigest(res.file_index);
                 break :blk .{ res, imported_path_digest };
             };
-            if (import_result.is_new and import_result.file.getMode() == .zig) {
+            if (import_result.is_new) {
                 log.debug("AstGen of {s} has import '{s}'; queuing AstGen of {s}", .{
                     file.sub_file_path, import_path, import_result.file.sub_file_path,
                 });

diff --git a/src/Sema.zig b/src/Sema.zig
@@ -13994,12 +13994,6 @@ fn zirImport(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
             return Air.internedToRef(ty);
         },
         .zon => {
-            _ = result.file.getTree(zcu.gpa) catch |err| {
-                // TODO: these errors are file system errors; make sure an update() will
-                // retry this and not cache the file system error, which may be transient.
-                return sema.fail(block, operand_src, "unable to open '{s}': {s}", .{ result.file.sub_file_path, @errorName(err) });
-            };
-
             if (extra.res_ty == .none) {
                 return sema.fail(block, operand_src, "'@import' of ZON must have a known result type", .{});
             }

diff --git a/src/Sema/LowerZon.zig b/src/Sema/LowerZon.zig
@@ -39,8 +39,6 @@ pub fn run(
 ) CompileError!InternPool.Index {
     const pt = sema.pt;
 
-    _ = try file.getZoir(pt.zcu);
-
     const tracked_inst = try pt.zcu.intern_pool.trackZir(pt.zcu.gpa, pt.tid, .{
         .file = file_index,
         .inst = .main_struct_inst, // this is the only trackable instruction in a ZON file

diff --git a/src/Zcu.zig b/src/Zcu.zig
@@ -2643,6 +2643,189 @@ pub fn loadZirCacheBody(gpa: Allocator, header: Zir.Header, cache_file: std.fs.F
     return zir;
 }
 
+pub fn saveZirCache(gpa: Allocator, cache_file: std.fs.File, stat: std.fs.File.Stat, zir: Zir) !void {
+    const safety_buffer = if (data_has_safety_tag)
+        try gpa.alloc([8]u8, zir.instructions.len)
+    else
+        undefined;
+    defer if (data_has_safety_tag) gpa.free(safety_buffer);
+
+    const data_ptr: [*]const u8 = if (data_has_safety_tag)
+        if (zir.instructions.len == 0)
+            undefined
+        else
+            @ptrCast(safety_buffer.ptr)
+    else
+        @ptrCast(zir.instructions.items(.data).ptr);
+
+    if (data_has_safety_tag) {
+        // The `Data` union has a safety tag but in the file format we store it without.
+        for (zir.instructions.items(.data), 0..) |*data, i| {
+            const as_struct: *const HackDataLayout = @ptrCast(data);
+            safety_buffer[i] = as_struct.data;
+        }
+    }
+
+    const header: Zir.Header = .{
+        .instructions_len = @intCast(zir.instructions.len),
+        .string_bytes_len = @intCast(zir.string_bytes.len),
+        .extra_len = @intCast(zir.extra.len),
+
+        .stat_size = stat.size,
+        .stat_inode = stat.inode,
+        .stat_mtime = stat.mtime,
+    };
+    var iovecs: [5]std.posix.iovec_const = .{
+        .{
+            .base = @ptrCast(&header),
+            .len = @sizeOf(Zir.Header),
+        },
+        .{
+            .base = @ptrCast(zir.instructions.items(.tag).ptr),
+            .len = zir.instructions.len,
+        },
+        .{
+            .base = data_ptr,
+            .len = zir.instructions.len * 8,
+        },
+        .{
+            .base = zir.string_bytes.ptr,
+            .len = zir.string_bytes.len,
+        },
+        .{
+            .base = @ptrCast(zir.extra.ptr),
+            .len = zir.extra.len * 4,
+        },
+    };
+    try cache_file.writevAll(&iovecs);
+}
+
+pub fn saveZoirCache(cache_file: std.fs.File, stat: std.fs.File.Stat, zoir: Zoir) !void {
+    const header: Zoir.Header = .{
+        .nodes_len = @intCast(zoir.nodes.len),
+        .extra_len = @intCast(zoir.extra.len),
+        .limbs_len = @intCast(zoir.limbs.len),
+        .string_bytes_len = @intCast(zoir.string_bytes.len),
+        .compile_errors_len = @intCast(zoir.compile_errors.len),
+        .error_notes_len = @intCast(zoir.error_notes.len),
+
+        .stat_size = stat.size,
+        .stat_inode = stat.inode,
+        .stat_mtime = stat.mtime,
+    };
+    var iovecs: [9]std.posix.iovec_const = .{
+        .{
+            .base = @ptrCast(&header),
+            .len = @sizeOf(Zoir.Header),
+        },
+        .{
+            .base = @ptrCast(zoir.nodes.items(.tag)),
+            .len = zoir.nodes.len * @sizeOf(Zoir.Node.Repr.Tag),
+        },
+        .{
+            .base = @ptrCast(zoir.nodes.items(.data)),
+            .len = zoir.nodes.len * 4,
+        },
+        .{
+            .base = @ptrCast(zoir.nodes.items(.ast_node)),
+            .len = zoir.nodes.len * 4,
+        },
+        .{
+            .base = @ptrCast(zoir.extra),
+            .len = zoir.extra.len * 4,
+        },
+        .{
+            .base = @ptrCast(zoir.limbs),
+            .len = zoir.limbs.len * 4,
+        },
+        .{
+            .base = zoir.string_bytes.ptr,
+            .len = zoir.string_bytes.len,
+        },
+        .{
+            .base = @ptrCast(zoir.compile_errors),
+            .len = zoir.compile_errors.len * @sizeOf(Zoir.CompileError),
+        },
+        .{
+            .base = @ptrCast(zoir.error_notes),
+            .len = zoir.error_notes.len * @sizeOf(Zoir.CompileError.Note),
+        },
+    };
+    try cache_file.writevAll(&iovecs);
+}
+
+pub fn loadZoirCacheBody(gpa: Allocator, header: Zoir.Header, cache_file: std.fs.File) !Zoir {
+    var zoir: Zoir = .{
+        .nodes = .empty,
+        .extra = &.{},
+        .limbs = &.{},
+        .string_bytes = &.{},
+        .compile_errors = &.{},
+        .error_notes = &.{},
+    };
+    errdefer zoir.deinit(gpa);
+
+    zoir.nodes = nodes: {
+        var nodes: std.MultiArrayList(Zoir.Node.Repr) = .empty;
+        defer nodes.deinit(gpa);
+        try nodes.setCapacity(gpa, header.nodes_len);
+        nodes.len = header.nodes_len;
+        break :nodes nodes.toOwnedSlice();
+    };
+
+    zoir.extra = try gpa.alloc(u32, header.extra_len);
+    zoir.limbs = try gpa.alloc(std.math.big.Limb, header.limbs_len);
+    zoir.string_bytes = try gpa.alloc(u8, header.string_bytes_len);
+
+    zoir.compile_errors = try gpa.alloc(Zoir.CompileError, header.compile_errors_len);
+    zoir.error_notes = try gpa.alloc(Zoir.CompileError.Note, header.error_notes_len);
+
+    var iovecs: [8]std.posix.iovec = .{
+        .{
+            .base = @ptrCast(zoir.nodes.items(.tag)),
+            .len = header.nodes_len * @sizeOf(Zoir.Node.Repr.Tag),
+        },
+        .{
+            .base = @ptrCast(zoir.nodes.items(.data)),
+            .len = header.nodes_len * 4,
+        },
+        .{
+            .base = @ptrCast(zoir.nodes.items(.ast_node)),
+            .len = header.nodes_len * 4,
+        },
+        .{
+            .base = @ptrCast(zoir.extra),
+            .len = header.extra_len * 4,
+        },
+        .{
+            .base = @ptrCast(zoir.limbs),
+            .len = header.limbs_len * @sizeOf(std.math.big.Limb),
+        },
+        .{
+            .base = zoir.string_bytes.ptr,
+            .len = header.string_bytes_len,
+        },
+        .{
+            .base = @ptrCast(zoir.compile_errors),
+            .len = header.compile_errors_len * @sizeOf(Zoir.CompileError),
+        },
+        .{
+            .base = @ptrCast(zoir.error_notes),
+            .len = header.error_notes_len * @sizeOf(Zoir.CompileError.Note),
+        },
+    };
+
+    const bytes_expected = expected: {
+        var n: usize = 0;
+        for (iovecs) |v| n += v.len;
+        break :expected n;
+    };
+
+    const bytes_read = try cache_file.readvAll(&iovecs);
+    if (bytes_read != bytes_expected) return error.UnexpectedFileSize;
+    return zoir;
+}
+
 pub fn markDependeeOutdated(
     zcu: *Zcu,
     /// When we are diffing ZIR and marking things as outdated, we won't yet have marked the dependencies as PO.