Skip to content

Commit

Permalink
[Object][Wasm] Use file offset for section addresses in linked wasm f…
Browse files Browse the repository at this point in the history
…iles (#80529)

Wasm has no unified virtual memory space as other object formats and
architectures do, so previously WasmObjectFile reported 0 for all
section addresses, and until 428cf71 used section offsets for function
symbols. Now we use file offsets for function symbols, and this change
switches section addresses to do the same (in linked files). The main
result of this is that objdump now reports VMAs in section listings, and
also uses file offets rather than section offsets when disassembling
linked binaries (matching the behavior of other disassemblers and stack
traces produced by browwsers). To make this work, this PR also updates
objdump's generation of synthetics fallback symbols to match lib/Object
and also correctly plumbs symbol types for regular and dummy symbols
through to the backend to avoid needing special knowledge of address 0.

This also paves the way for generating symbols from name sections rather
than symbol tables or imports (see #76107) by allowing the
disassembler's synthetic fallback symbols match the name-section
generated symbols (in a followup PR).
  • Loading branch information
dschuff authored Feb 7, 2024
1 parent 2ecf608 commit 8b0f47b
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 39 deletions.
12 changes: 6 additions & 6 deletions lld/test/wasm/build-id.test
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,18 @@ foo:


# DEFAULT: Contents of section build_id:
# DEFAULT-NEXT: 0000 10299168 1e3c845a 3c8f80ae 2f16cc22 .).h.<.Z<.../.."
# DEFAULT-NEXT: 0010 2d
# DEFAULT-NEXT: 0079 10299168 1e3c845a 3c8f80ae 2f16cc22 .).h.<.Z<.../.."
# DEFAULT-NEXT: 0089 2d

# SHA1: Contents of section build_id:
# SHA1-NEXT: 0000 145abdda 387a9bc4 e3aed3c3 3319cd37 .Z..8z......3..7
# SHA1-NEXT: 0010 0212237c e4 ..#|.
# SHA1-NEXT: 0079 145abdda 387a9bc4 e3aed3c3 3319cd37 .Z..8z......3..7
# SHA1-NEXT: 0089 0212237c e4 ..#|.

# UUID: Contents of section build_id:
# UUID-NEXT: 0000 10
# UUID-NEXT: 0079 10

# HEX: Contents of section build_id:
# HEX-NEXT: 0000 04123456 78 ..4Vx
# HEX-NEXT: 0079 04123456 78 ..4Vx


# NONE-NOT: Contents of section build_id:
14 changes: 7 additions & 7 deletions lld/test/wasm/merge-string-debug.s
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@

# CHECK: Hex dump of section '.debug_str':

# CHECK-O0: 0x00000000 636c616e 67207665 7273696f 6e203133 clang version 13
# CHECK-O0: 0x00000010 2e302e30 00666f6f 62617200 636c616e .0.0.foobar.clan
# CHECK-O0: 0x00000020 67207665 7273696f 6e203133 2e302e30 g version 13.0.0
# CHECK-O0: 0x00000030 00626172 00666f6f 00 .bar.foo.
# CHECK-O0: 0x00000025 636c616e 67207665 7273696f 6e203133 clang version 13
# CHECK-O0: 0x00000035 2e302e30 00666f6f 62617200 636c616e .0.0.foobar.clan
# CHECK-O0: 0x00000045 67207665 7273696f 6e203133 2e302e30 g version 13.0.0
# CHECK-O0: 0x00000055 00626172 00666f6f 00 .bar.foo.

# CHECK-O1: 0x00000000 666f6f62 61720066 6f6f0063 6c616e67 foobar.foo.clang
# CHECK-O1: 0x00000010 20766572 73696f6e 2031332e 302e3000 version 13.0.0.
# CHECK-O1: 0x00000025 666f6f62 61720066 6f6f0063 6c616e67 foobar.foo.clang
# CHECK-O1: 0x00000035 20766572 73696f6e 2031332e 302e3000 version 13.0.0.

# CHECK-OFFSETS: Hex dump of section '.debug_str_offsets':
# CHECK-OFFSETS: 0x00000000 00000000 00000000 00000000 ............
# CHECK-OFFSETS: 0x0000007e 00000000 00000000 00000000 ............
12 changes: 6 additions & 6 deletions lld/test/wasm/startstop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ entry:
; CHECK-NEXT: Value: 1024
; CHECK-NEXT: Content: 03000000040000002A0000002B000000

; ASM: 00000001 <get_start>:
; ASM: 0000006e <get_start>:
; ASM-EMPTY:
; ASM-NEXT: 3: i32.const 1024
; ASM-NEXT: 9: end
; ASM-NEXT: 70: i32.const 1024
; ASM-NEXT: 76: end

; ASM: 0000000a <get_end>:
; ASM: 00000077 <get_end>:
; ASM-EMPTY:
; ASM-NEXT: c: i32.const 1040
; ASM-NEXT: 12: end
; ASM-NEXT: 79: i32.const 1040
; ASM-NEXT: 7f: end
8 changes: 7 additions & 1 deletion llvm/lib/Object/WasmObjectFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1906,7 +1906,13 @@ Expected<StringRef> WasmObjectFile::getSectionName(DataRefImpl Sec) const {
return wasm::sectionTypeToString(S.Type);
}

uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; }
uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const {
// For object files, use 0 for section addresses, and section offsets for
// symbol addresses. For linked files, use file offsets.
// See also getSymbolAddress.
return isRelocatableObject() || isSharedObject() ? 0
: Sections[Sec.d.a].Offset;
}

uint64_t WasmObjectFile::getSectionIndex(DataRefImpl Sec) const {
return Sec.d.a;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
#include "TargetInfo/WebAssemblyTargetInfo.h"
#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
Expand Down Expand Up @@ -127,7 +128,7 @@ WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
uint64_t Address,
raw_ostream &CStream) const {
Size = 0;
if (Address == 0) {
if (Symbol.Type == wasm::WASM_SYMBOL_TYPE_SECTION) {
// Start of a code section: we're parsing only the function count.
int64_t FunctionCount;
if (!nextLEB(FunctionCount, Bytes, Size, false))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ Sections:

# CHECK: Disassembly of section CODE:
# CHECK-EMPTY:
# CHECK-NEXT: 00000000 <CODE>:
# CHECK-NEXT: 00000026 <CODE>:
# CHECK-NEXT: # 2 functions in section.
# CHECK-EMPTY:
# CHECK-NEXT: 00000001 <f>:
# CHECK-NEXT: 00000027 <f>:
# CHECK-EMPTY:
# CHECK-NEXT: 3: 0b end
# CHECK-NEXT: 29: 0b end
# CHECK-EMPTY:
# CHECK-NEXT: 00000004 <g>:
# CHECK-NEXT: 0000002a <g>:
# CHECK-EMPTY:
# CHECK-NEXT: 6: 20 00 local.get 0
# CHECK-NEXT: 8: 0b end
# CHECK-NEXT: 2c: 20 00 local.get 0
# CHECK-NEXT: 2e: 0b end
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ Sections:

# CHECK: Disassembly of section CODE:
# CHECK-EMPTY:
# CHECK-NEXT: 00000000 <CODE>:
# CHECK-NEXT: 00000026 <CODE>:
# CHECK-NEXT: # 2 functions in section.
# CHECK-EMPTY:
# CHECK-NEXT: 00000001 <>:
# CHECK-NEXT: 00000027 <>:
# CHECK-EMPTY:
# CHECK-NEXT: 3: 0b end
# CHECK-NEXT: 29: 0b end
# CHECK-EMPTY:
# CHECK-NEXT: 00000004 <>:
# CHECK-NEXT: 0000002a <>:
# CHECK-EMPTY:
# CHECK-NEXT: 6: 20 00 local.get 0
# CHECK-NEXT: 8: 0b end
# CHECK-NEXT: 2c: 20 00 local.get 0
# CHECK-NEXT: 2e: 0b end
6 changes: 3 additions & 3 deletions llvm/test/tools/llvm-objdump/wasm/no-codesec.test
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

# CHECK: Sections:
# CHECK-NEXT: Idx Name Size VMA Type
# CHECK-NEXT: 0 TYPE 00000004 00000000
# CHECK-NEXT: 1 FUNCTION 00000002 00000000
# CHECK-NEXT: 2 name 00000008 00000000
# CHECK-NEXT: 0 TYPE 00000004 0000000e
# CHECK-NEXT: 1 FUNCTION 00000002 00000018
# CHECK-NEXT: 2 name 00000008 00000020

--- !WASM
FileHeader:
Expand Down
16 changes: 13 additions & 3 deletions llvm/tools/llvm-objdump/llvm-objdump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/DebugInfo/BTF/BTFParser.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
Expand Down Expand Up @@ -1149,7 +1150,11 @@ addMissingWasmCodeSymbols(const WasmObjectFile &Obj,
SymbolAddresses.insert(Sym.Addr);

for (const wasm::WasmFunction &Function : Obj.functions()) {
uint64_t Address = Function.CodeSectionOffset;
// This adjustment mirrors the one in WasmObjectFile::getSymbolAddress.
uint32_t Adjustment = Obj.isRelocatableObject() || Obj.isSharedObject()
? 0
: Section->getAddress();
uint64_t Address = Function.CodeSectionOffset + Adjustment;
// Only add fallback symbols for functions not already present in the symbol
// table.
if (SymbolAddresses.count(Address))
Expand Down Expand Up @@ -1354,6 +1359,10 @@ SymbolInfoTy objdump::createSymbolInfo(const ObjectFile &Obj,
const SymbolRef::Type SymType = unwrapOrError(Symbol.getType(), FileName);
return SymbolInfoTy(Addr, Name, SymType, /*IsMappingSymbol=*/false,
/*IsXCOFF=*/true);
} else if (Obj.isWasm()) {
uint8_t SymType =
cast<WasmObjectFile>(&Obj)->getWasmSymbol(Symbol).Info.Kind;
return SymbolInfoTy(Addr, Name, SymType, false);
} else {
uint8_t Type =
Obj.isELF() ? getElfSymbolType(Obj, Symbol) : (uint8_t)ELF::STT_NOTYPE;
Expand All @@ -1366,8 +1375,9 @@ static SymbolInfoTy createDummySymbolInfo(const ObjectFile &Obj,
uint8_t Type) {
if (Obj.isXCOFF() && (SymbolDescription || TracebackTable))
return SymbolInfoTy(std::nullopt, Addr, Name, std::nullopt, false);
else
return SymbolInfoTy(Addr, Name, Type);
if (Obj.isWasm())
return SymbolInfoTy(Addr, Name, wasm::WASM_SYMBOL_TYPE_SECTION);
return SymbolInfoTy(Addr, Name, Type);
}

static void collectBBAddrMapLabels(
Expand Down

0 comments on commit 8b0f47b

Please sign in to comment.