Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for stack traces on macosx #780

Merged
merged 2 commits into from
Feb 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,7 @@ set(ZIG_STD_FILES
"index.zig"
"io.zig"
"linked_list.zig"
"macho.zig"
"math/acos.zig"
"math/acosh.zig"
"math/asin.zig"
Expand Down
149 changes: 92 additions & 57 deletions std/debug/index.zig
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const io = std.io;
const os = std.os;
const elf = std.elf;
const DW = std.dwarf;
const macho = std.macho;
const ArrayList = std.ArrayList;
const builtin = @import("builtin");

Expand Down Expand Up @@ -180,50 +181,65 @@ pub fn writeCurrentStackTrace(out_stream: var, allocator: &mem.Allocator,
}

fn printSourceAtAddress(debug_info: &ElfStackTrace, out_stream: var, address: usize) !void {
if (builtin.os == builtin.Os.windows) {
return error.UnsupportedDebugInfo;
}
// TODO we really should be able to convert @sizeOf(usize) * 2 to a string literal
// at compile time. I'll call it issue #313
const ptr_hex = if (@sizeOf(usize) == 4) "0x{x8}" else "0x{x16}";

const compile_unit = findCompileUnit(debug_info, address) catch {
try out_stream.print("???:?:?: " ++ DIM ++ ptr_hex ++ " in ??? (???)" ++ RESET ++ "\n ???\n\n",
address);
return;
};
const compile_unit_name = try compile_unit.die.getAttrString(debug_info, DW.AT_name);
if (getLineNumberInfo(debug_info, compile_unit, address - 1)) |line_info| {
defer line_info.deinit();
try out_stream.print(WHITE ++ "{}:{}:{}" ++ RESET ++ ": " ++
DIM ++ ptr_hex ++ " in ??? ({})" ++ RESET ++ "\n",
line_info.file_name, line_info.line, line_info.column,
address, compile_unit_name);
if (printLineFromFile(debug_info.allocator(), out_stream, line_info)) {
if (line_info.column == 0) {
try out_stream.write("\n");
} else {
{var col_i: usize = 1; while (col_i < line_info.column) : (col_i += 1) {
try out_stream.writeByte(' ');
}}
try out_stream.write(GREEN ++ "^" ++ RESET ++ "\n");
switch (builtin.os) {
builtin.Os.windows => return error.UnsupportedDebugInfo,
builtin.Os.macosx => {
// TODO(bnoordhuis) It's theoretically possible to obtain the
// compilation unit from the symbtab but it's not that useful
// in practice because the compiler dumps everything in a single
// object file. Future improvement: use external dSYM data when
// available.
const unknown = macho.Symbol { .name = "???", .address = address };
const symbol = debug_info.symbol_table.search(address) ?? &unknown;
try out_stream.print(WHITE ++ "{}" ++ RESET ++ ": " ++
DIM ++ ptr_hex ++ " in ??? (???)" ++ RESET ++ "\n",
symbol.name, address);
},
else => {
const compile_unit = findCompileUnit(debug_info, address) catch {
try out_stream.print("???:?:?: " ++ DIM ++ ptr_hex ++ " in ??? (???)" ++ RESET ++ "\n ???\n\n",
address);
return;
};
const compile_unit_name = try compile_unit.die.getAttrString(debug_info, DW.AT_name);
if (getLineNumberInfo(debug_info, compile_unit, address - 1)) |line_info| {
defer line_info.deinit();
try out_stream.print(WHITE ++ "{}:{}:{}" ++ RESET ++ ": " ++
DIM ++ ptr_hex ++ " in ??? ({})" ++ RESET ++ "\n",
line_info.file_name, line_info.line, line_info.column,
address, compile_unit_name);
if (printLineFromFile(debug_info.allocator(), out_stream, line_info)) {
if (line_info.column == 0) {
try out_stream.write("\n");
} else {
{var col_i: usize = 1; while (col_i < line_info.column) : (col_i += 1) {
try out_stream.writeByte(' ');
}}
try out_stream.write(GREEN ++ "^" ++ RESET ++ "\n");
}
} else |err| switch (err) {
error.EndOfFile => {},
else => return err,
}
} else |err| switch (err) {
error.MissingDebugInfo, error.InvalidDebugInfo => {
try out_stream.print(ptr_hex ++ " in ??? ({})\n", address, compile_unit_name);
},
else => return err,
}
} else |err| switch (err) {
error.EndOfFile => {},
else => return err,
}
} else |err| switch (err) {
error.MissingDebugInfo, error.InvalidDebugInfo => {
try out_stream.print(ptr_hex ++ " in ??? ({})\n", address, compile_unit_name);
},
else => return err,
}
}

pub fn openSelfDebugInfo(allocator: &mem.Allocator) !&ElfStackTrace {
switch (builtin.object_format) {
builtin.ObjectFormat.elf => {
const st = try allocator.create(ElfStackTrace);
errdefer allocator.destroy(st);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch

*st = ElfStackTrace {
.self_exe_file = undefined,
.elf = undefined,
Expand All @@ -249,12 +265,22 @@ pub fn openSelfDebugInfo(allocator: &mem.Allocator) !&ElfStackTrace {
try scanAllCompileUnits(st);
return st;
},
builtin.ObjectFormat.macho => {
var exe_file = try os.openSelfExe();
defer exe_file.close();

const st = try allocator.create(ElfStackTrace);
errdefer allocator.destroy(st);

*st = ElfStackTrace {
.symbol_table = try macho.loadSymbols(allocator, &io.FileInStream.init(&exe_file)),
};

return st;
},
builtin.ObjectFormat.coff => {
return error.TodoSupportCoffDebugInfo;
},
builtin.ObjectFormat.macho => {
return error.TodoSupportMachoDebugInfo;
},
builtin.ObjectFormat.wasm => {
return error.TodoSupportCOFFDebugInfo;
},
Expand Down Expand Up @@ -297,31 +323,40 @@ fn printLineFromFile(allocator: &mem.Allocator, out_stream: var, line_info: &con
}
}

pub const ElfStackTrace = struct {
self_exe_file: os.File,
elf: elf.Elf,
debug_info: &elf.SectionHeader,
debug_abbrev: &elf.SectionHeader,
debug_str: &elf.SectionHeader,
debug_line: &elf.SectionHeader,
debug_ranges: ?&elf.SectionHeader,
abbrev_table_list: ArrayList(AbbrevTableHeader),
compile_unit_list: ArrayList(CompileUnit),

pub fn allocator(self: &const ElfStackTrace) &mem.Allocator {
return self.abbrev_table_list.allocator;
}
pub const ElfStackTrace = switch (builtin.os) {
builtin.Os.macosx => struct {
symbol_table: macho.SymbolTable,

pub fn readString(self: &ElfStackTrace) ![]u8 {
var in_file_stream = io.FileInStream.init(&self.self_exe_file);
const in_stream = &in_file_stream.stream;
return readStringRaw(self.allocator(), in_stream);
}
pub fn close(self: &ElfStackTrace) void {
self.symbol_table.deinit();
}
},
else => struct {
self_exe_file: os.File,
elf: elf.Elf,
debug_info: &elf.SectionHeader,
debug_abbrev: &elf.SectionHeader,
debug_str: &elf.SectionHeader,
debug_line: &elf.SectionHeader,
debug_ranges: ?&elf.SectionHeader,
abbrev_table_list: ArrayList(AbbrevTableHeader),
compile_unit_list: ArrayList(CompileUnit),

pub fn allocator(self: &const ElfStackTrace) &mem.Allocator {
return self.abbrev_table_list.allocator;
}

pub fn close(self: &ElfStackTrace) void {
self.self_exe_file.close();
self.elf.close();
}
pub fn readString(self: &ElfStackTrace) ![]u8 {
var in_file_stream = io.FileInStream.init(&self.self_exe_file);
const in_stream = &in_file_stream.stream;
return readStringRaw(self.allocator(), in_stream);
}

pub fn close(self: &ElfStackTrace) void {
self.self_exe_file.close();
self.elf.close();
}
},
};

const PcRange = struct {
Expand Down
2 changes: 2 additions & 0 deletions std/index.zig
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pub const endian = @import("endian.zig");
pub const fmt = @import("fmt/index.zig");
pub const heap = @import("heap.zig");
pub const io = @import("io.zig");
pub const macho = @import("macho.zig");
pub const math = @import("math/index.zig");
pub const mem = @import("mem.zig");
pub const net = @import("net.zig");
Expand Down Expand Up @@ -51,6 +52,7 @@ test "std" {
_ = @import("endian.zig");
_ = @import("fmt/index.zig");
_ = @import("io.zig");
_ = @import("macho.zig");
_ = @import("math/index.zig");
_ = @import("mem.zig");
_ = @import("heap.zig");
Expand Down
177 changes: 177 additions & 0 deletions std/macho.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
const builtin = @import("builtin");
const std = @import("index.zig");
const io = std.io;
const mem = std.mem;

const MH_MAGIC_64 = 0xFEEDFACF;
const MH_PIE = 0x200000;
const LC_SYMTAB = 2;

const MachHeader64 = packed struct {
magic: u32,
cputype: u32,
cpusubtype: u32,
filetype: u32,
ncmds: u32,
sizeofcmds: u32,
flags: u32,
reserved: u32,
};

const LoadCommand = packed struct {
cmd: u32,
cmdsize: u32,
};

const SymtabCommand = packed struct {
symoff: u32,
nsyms: u32,
stroff: u32,
strsize: u32,
};

const Nlist64 = packed struct {
n_strx: u32,
n_type: u8,
n_sect: u8,
n_desc: u16,
n_value: u64,
};

pub const Symbol = struct {
name: []const u8,
address: u64,

fn addressLessThan(lhs: &const Symbol, rhs: &const Symbol) bool {
return lhs.address < rhs.address;
}
};

pub const SymbolTable = struct {
allocator: &mem.Allocator,
symbols: []const Symbol,
strings: []const u8,

// Doubles as an eyecatcher to calculate the PIE slide, see loadSymbols().
// Ideally we'd use _mh_execute_header because it's always at 0x100000000
// in the image but as it's located in a different section than executable
// code, its displacement is different.
pub fn deinit(self: &SymbolTable) void {
self.allocator.free(self.symbols);
self.symbols = []const Symbol {};

self.allocator.free(self.strings);
self.strings = []const u8 {};
}

pub fn search(self: &const SymbolTable, address: usize) ?&const Symbol {
var min: usize = 0;
var max: usize = self.symbols.len - 1; // Exclude sentinel.
while (min < max) {
const mid = min + (max - min) / 2;
const curr = &self.symbols[mid];
const next = &self.symbols[mid + 1];
if (address >= next.address) {
min = mid + 1;
} else if (address < curr.address) {
max = mid;
} else {
return curr;
}
}
return null;
}
};

pub fn loadSymbols(allocator: &mem.Allocator, in: &io.FileInStream) !SymbolTable {
var file = in.file;
try file.seekTo(0);

var hdr: MachHeader64 = undefined;
try readNoEof(in, &hdr);
if (hdr.magic != MH_MAGIC_64) return error.MissingDebugInfo;
const is_pie = MH_PIE == (hdr.flags & MH_PIE);

var pos: usize = @sizeOf(@typeOf(hdr));
var ncmd: u32 = hdr.ncmds;
while (ncmd != 0) : (ncmd -= 1) {
try file.seekTo(pos);
var lc: LoadCommand = undefined;
try readNoEof(in, &lc);
if (lc.cmd == LC_SYMTAB) break;
pos += lc.cmdsize;
} else {
return error.MissingDebugInfo;
}

var cmd: SymtabCommand = undefined;
try readNoEof(in, &cmd);

try file.seekTo(cmd.symoff);
var syms = try allocator.alloc(Nlist64, cmd.nsyms);
defer allocator.free(syms);
try readNoEof(in, syms);

try file.seekTo(cmd.stroff);
var strings = try allocator.alloc(u8, cmd.strsize);
errdefer allocator.free(strings);
try in.stream.readNoEof(strings);

var nsyms: usize = 0;
for (syms) |sym| if (isSymbol(sym)) nsyms += 1;
if (nsyms == 0) return error.MissingDebugInfo;

var symbols = try allocator.alloc(Symbol, nsyms + 1); // Room for sentinel.
errdefer allocator.free(symbols);

var pie_slide: usize = 0;
var nsym: usize = 0;
for (syms) |sym| {
if (!isSymbol(sym)) continue;
const start = sym.n_strx;
const end = ??mem.indexOfScalarPos(u8, strings, start, 0);
const name = strings[start..end];
const address = sym.n_value;
symbols[nsym] = Symbol { .name = name, .address = address };
nsym += 1;
if (is_pie and mem.eql(u8, name, "_SymbolTable_deinit")) {
pie_slide = @ptrToInt(SymbolTable.deinit) - address;
}
}

// Effectively a no-op, lld emits symbols in ascending order.
std.sort.insertionSort(Symbol, symbols[0..nsyms], Symbol.addressLessThan);

// Insert the sentinel. Since we don't know where the last function ends,
// we arbitrarily limit it to the start address + 4 KB.
const top = symbols[nsyms - 1].address + 4096;
symbols[nsyms] = Symbol { .name = "", .address = top };

if (pie_slide != 0) {
for (symbols) |*symbol| symbol.address += pie_slide;
}

return SymbolTable {
.allocator = allocator,
.symbols = symbols,
.strings = strings,
};
}

fn readNoEof(in: &io.FileInStream, sink: var) !void {
if (@typeOf(sink) == []Nlist64) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I couldn't figure out a way to do this generically.

Copy link
Member

@andrewrk andrewrk Feb 19, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a lot of responses to this, starting with minimal changes, ending with let's change the standard library.

  1. I'm actually fine with this, given that it's at least correct for this file, and it's a private function so it's not really exposed to much of anything.

  2. Here's how to do what you wanted to do:

fn readNoEof(in: &io.FileInStream, sink: var) !void {
    switch (@typeId(@typeOf(sink)) {
        builtin.TypeId.Slice => {
            const T = @typeOf(sink).Child;
            ...
        },
        builtin.TypeId.Pointer => {
            const T = @typeOf(sink).Child;
            ...
        },
        else => @compileError("expected slice or pointer"),
    }
}
  1. However, let's use this as an example to write idiomatic zig code. I think it would be better to separate this out into 2 functions:
fn readNoEof(in: &io.FileInStream, comptime T: type, result: []T) !void {
    return in.stream.readNoEof(([]u8)(result));
}
fn readOneNoEof(in: &io.FileInStream, comptime T: type, result: &T) !void {
    return readNoEof(in, T, result[0..1]);
}

related: #287

  1. One more thing, this looks like it's a pretty common thing to want to do. Let's modify std.io.InStream to support these APIs. I think that it's ok to break backwards compatibility of readNoEof and give it a comptime parameter of the type you want to read. Existing callsites can be updated to get the old behavior by passing in u8.

    And then we can add a safety check, if you pass a struct type to readNoEof, it will give you a compile error if your struct is not packed (because you would get an undefined value when deserializing a non-packed struct).

    I think this macho code should be passing around a &std.io.InStream instead of the &std.io.FileInStream directly. This is where that readNoEof API would go. I understand that it's a little weird because you'd have to use var for the parameter due to error sets. That's In/OutStream design flaw introduced with new error set changes. #764 and we don't have a good solution for that yet (other than "just use var").

  2. Finally, I think we should go back to (3) for now, because in order for (4) to make sense, we need the ability to specify endianness in packed structs. That's pending Add endianness as one of the pointer properties #649. Once packed structs support the concept of endianness, it makes sense to use them as types to serialization/deserialization functions.

Hope that helps!

My proposal is:

  • do (3) since it's easy
  • merge this PR since it's better than status quo
  • I'll bump the priority of Add endianness as one of the pointer properties #649
  • Once we get packed struct endianness working, we can add support for serializing/deserializing them using std.io.OutStream / std.io.InStream and then update this code to use std.io.InStream instead of std.io.FileInStream

const T = @typeOf(sink[0]);
const len = @sizeOf(T) * sink.len;
const bytes = @ptrCast(&u8, &sink[0]);
return in.stream.readNoEof(bytes[0..len]);
} else {
const T = @typeOf(*sink);
const len = @sizeOf(T);
const bytes = @ptrCast(&u8, sink);
return in.stream.readNoEof(bytes[0..len]);
}
}

fn isSymbol(sym: &const Nlist64) bool {
return sym.n_value != 0 and sym.n_desc == 0;
}