Skip to content

Commit

Permalink
[AArch64][PAC] Lower jump-tables using hardened pseudo. (llvm#97666)
Browse files Browse the repository at this point in the history
This introduces an alternative hardened lowering for jump-table
dispatch, controlled by the function attribute
`"aarch64-jump-table-hardening"`.
The implementation is centered around a pseudo, BR_JumpTable:

> A hardened but more expensive version of jump-table dispatch.
> This combines the target address computation (otherwise done using
> the JumpTableDest pseudos above) with the branch itself (otherwise
> done using a plain BR) in a single non-attackable sequence.
>
> We take the final entry index as an operand to allow isel freedom.
> This does mean that the index can be attacker-controlled. To
> address that, we also do limited checking of the offset, mainly
> ensuring it still points within the jump-table array.  When it
> doesn't, this branches to the first entry.  We might want it to
> trap instead.
>
> This is intended for use in conjunction with ptrauth for other
> code pointers, to avoid signing jump-table entries and turning
> them into pointers.
>
> Entry index is passed in x16.  Clobbers x16/x17/nzcv.

Jump-table compression isn't supported yet.
  • Loading branch information
ahmedbougacha authored Jul 23, 2024
1 parent 03e92bf commit 6049cd6
Show file tree
Hide file tree
Showing 5 changed files with 347 additions and 1 deletion.
139 changes: 139 additions & 0 deletions llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ class AArch64AsmPrinter : public AsmPrinter {

void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI);

void LowerHardenedBRJumpTable(const MachineInstr &MI);

void LowerMOPS(MCStreamer &OutStreamer, const MachineInstr &MI);

void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
Expand Down Expand Up @@ -1354,6 +1356,139 @@ void AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
.addImm(Size == 4 ? 0 : 2));
}

void AArch64AsmPrinter::LowerHardenedBRJumpTable(const MachineInstr &MI) {
unsigned InstsEmitted = 0;

const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
assert(MJTI && "Can't lower jump-table dispatch without JTI");

const std::vector<MachineJumpTableEntry> &JTs = MJTI->getJumpTables();
assert(!JTs.empty() && "Invalid JT index for jump-table dispatch");

// Emit:
// mov x17, #<size of table> ; depending on table size, with MOVKs
// cmp x16, x17 ; or #imm if table size fits in 12-bit
// csel x16, x16, xzr, ls ; check for index overflow
//
// adrp x17, Ltable@PAGE ; materialize table address
// add x17, Ltable@PAGEOFF
// ldrsw x16, [x17, x16, lsl #2] ; load table entry
//
// Lanchor:
// adr x17, Lanchor ; compute target address
// add x16, x17, x16
// br x16 ; branch to target

MachineOperand JTOp = MI.getOperand(0);

unsigned JTI = JTOp.getIndex();
assert(!AArch64FI->getJumpTableEntryPCRelSymbol(JTI) &&
"unsupported compressed jump table");

const uint64_t NumTableEntries = JTs[JTI].MBBs.size();

// cmp only supports a 12-bit immediate. If we need more, materialize the
// immediate, using x17 as a scratch register.
uint64_t MaxTableEntry = NumTableEntries - 1;
if (isUInt<12>(MaxTableEntry)) {
EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::SUBSXri)
.addReg(AArch64::XZR)
.addReg(AArch64::X16)
.addImm(MaxTableEntry)
.addImm(0));
++InstsEmitted;
} else {
EmitToStreamer(*OutStreamer,
MCInstBuilder(AArch64::MOVZXi)
.addReg(AArch64::X17)
.addImm(static_cast<uint16_t>(MaxTableEntry))
.addImm(0));
++InstsEmitted;
// It's sad that we have to manually materialize instructions, but we can't
// trivially reuse the main pseudo expansion logic.
// A MOVK sequence is easy enough to generate and handles the general case.
for (int Offset = 16; Offset < 64; Offset += 16) {
if ((MaxTableEntry >> Offset) == 0)
break;
EmitToStreamer(*OutStreamer,
MCInstBuilder(AArch64::MOVKXi)
.addReg(AArch64::X17)
.addReg(AArch64::X17)
.addImm(static_cast<uint16_t>(MaxTableEntry >> Offset))
.addImm(Offset));
++InstsEmitted;
}
EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::SUBSXrs)
.addReg(AArch64::XZR)
.addReg(AArch64::X16)
.addReg(AArch64::X17)
.addImm(0));
++InstsEmitted;
}

// This picks entry #0 on failure.
// We might want to trap instead.
EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::CSELXr)
.addReg(AArch64::X16)
.addReg(AArch64::X16)
.addReg(AArch64::XZR)
.addImm(AArch64CC::LS));
++InstsEmitted;

// Prepare the @PAGE/@PAGEOFF low/high operands.
MachineOperand JTMOHi(JTOp), JTMOLo(JTOp);
MCOperand JTMCHi, JTMCLo;

JTMOHi.setTargetFlags(AArch64II::MO_PAGE);
JTMOLo.setTargetFlags(AArch64II::MO_PAGEOFF | AArch64II::MO_NC);

MCInstLowering.lowerOperand(JTMOHi, JTMCHi);
MCInstLowering.lowerOperand(JTMOLo, JTMCLo);

EmitToStreamer(
*OutStreamer,
MCInstBuilder(AArch64::ADRP).addReg(AArch64::X17).addOperand(JTMCHi));
++InstsEmitted;

EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXri)
.addReg(AArch64::X17)
.addReg(AArch64::X17)
.addOperand(JTMCLo)
.addImm(0));
++InstsEmitted;

EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX)
.addReg(AArch64::X16)
.addReg(AArch64::X17)
.addReg(AArch64::X16)
.addImm(0)
.addImm(1));
++InstsEmitted;

MCSymbol *AdrLabel = MF->getContext().createTempSymbol();
const auto *AdrLabelE = MCSymbolRefExpr::create(AdrLabel, MF->getContext());
AArch64FI->setJumpTableEntryInfo(JTI, 4, AdrLabel);

OutStreamer->emitLabel(AdrLabel);
EmitToStreamer(
*OutStreamer,
MCInstBuilder(AArch64::ADR).addReg(AArch64::X17).addExpr(AdrLabelE));
++InstsEmitted;

EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXrs)
.addReg(AArch64::X16)
.addReg(AArch64::X17)
.addReg(AArch64::X16)
.addImm(0));
++InstsEmitted;

EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::BR).addReg(AArch64::X16));
++InstsEmitted;

(void)InstsEmitted;
assert(STI->getInstrInfo()->getInstSizeInBytes(MI) >= InstsEmitted * 4);
}

void AArch64AsmPrinter::LowerMOPS(llvm::MCStreamer &OutStreamer,
const llvm::MachineInstr &MI) {
unsigned Opcode = MI.getOpcode();
Expand Down Expand Up @@ -2232,6 +2367,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
LowerJumpTableDest(*OutStreamer, *MI);
return;

case AArch64::BR_JumpTable:
LowerHardenedBRJumpTable(*MI);
return;

case AArch64::FMOVH0:
case AArch64::FMOVS0:
case AArch64::FMOVD0:
Expand Down
24 changes: 24 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10742,6 +10742,30 @@ SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
AFI->setJumpTableEntryInfo(JTI, 4, nullptr);

// With aarch64-jump-table-hardening, we only expand the jump table dispatch
// sequence later, to guarantee the integrity of the intermediate values.
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
"aarch64-jump-table-hardening")) {
CodeModel::Model CM = getTargetMachine().getCodeModel();
if (Subtarget->isTargetMachO()) {
if (CM != CodeModel::Small && CM != CodeModel::Large)
report_fatal_error("Unsupported code-model for hardened jump-table");
} else {
// Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
assert(Subtarget->isTargetELF() &&
"jump table hardening only supported on MachO/ELF");
if (CM != CodeModel::Small)
report_fatal_error("Unsupported code-model for hardened jump-table");
}

SDValue X16Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X16,
Entry, SDValue());
SDNode *B = DAG.getMachineNode(AArch64::BR_JumpTable, DL, MVT::Other,
DAG.getTargetJumpTable(JTI, MVT::i32),
X16Copy.getValue(0), X16Copy.getValue(1));
return SDValue(B, 0);
}

SDNode *Dest =
DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
Expand Down
27 changes: 27 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,33 @@ def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
Sched<[]>;
}

// A hardened but more expensive version of jump-table dispatch.
// This combines the target address computation (otherwise done using the
// JumpTableDest pseudos above) with the branch itself (otherwise done using
// a plain BR) in a single non-attackable sequence.
//
// We take the final entry index as an operand to allow isel freedom. This does
// mean that the index can be attacker-controlled. To address that, we also do
// limited checking of the offset, mainly ensuring it still points within the
// jump-table array. When it doesn't, this branches to the first entry.
// We might want to trap instead.
//
// This is intended for use in conjunction with ptrauth for other code pointers,
// to avoid signing jump-table entries and turning them into pointers.
//
// Entry index is passed in x16. Clobbers x16/x17/nzcv.
let isNotDuplicable = 1 in
def BR_JumpTable : Pseudo<(outs), (ins i32imm:$jti), []>, Sched<[]> {
let isBranch = 1;
let isTerminator = 1;
let isIndirectBranch = 1;
let isBarrier = 1;
let isNotDuplicable = 1;
let Defs = [X16,X17,NZCV];
let Uses = [X16];
let Size = 44; // 28 fixed + 16 variable, for table size materialization
}

// Space-consuming pseudo to aid testing of placement and reachability
// algorithms. Immediate operand is the number of bytes this "instruction"
// occupies; register operands can be used to enforce dependency and constrain
Expand Down
25 changes: 24 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3602,10 +3602,33 @@ bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
unsigned JTI = I.getOperand(1).getIndex();
Register Index = I.getOperand(2).getReg();

MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);

// With aarch64-jump-table-hardening, we only expand the jump table dispatch
// sequence later, to guarantee the integrity of the intermediate values.
if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
CodeModel::Model CM = TM.getCodeModel();
if (STI.isTargetMachO()) {
if (CM != CodeModel::Small && CM != CodeModel::Large)
report_fatal_error("Unsupported code-model for hardened jump-table");
} else {
// Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
assert(STI.isTargetELF() &&
"jump table hardening only supported on MachO/ELF");
if (CM != CodeModel::Small)
report_fatal_error("Unsupported code-model for hardened jump-table");
}

MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
MIB.buildInstr(AArch64::BR_JumpTable)
.addJumpTableIndex(I.getOperand(1).getIndex());
I.eraseFromParent();
return true;
}

Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);

MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
{TargetReg, ScratchReg}, {JTAddr, Index})
.addJumpTableIndex(JTI);
Expand Down
133 changes: 133 additions & 0 deletions llvm/test/CodeGen/AArch64/hardened-br-jump-table.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
; RUN: rm -rf %t && split-file %s %t

;--- err1.ll

; RUN: not --crash llc %t/err1.ll -mtriple=aarch64-elf \
; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
; RUN: -code-model=large \
; RUN: -o - -verify-machineinstrs 2>&1 | FileCheck %s --check-prefix=ERR1

; RUN: not --crash llc %t/err1.ll -mtriple=aarch64-elf \
; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
; RUN: -global-isel -global-isel-abort=1 \
; RUN: -code-model=large \
; RUN: -o - -verify-machineinstrs 2>&1 | FileCheck %s --check-prefix=ERR1

; ERR1: LLVM ERROR: Unsupported code-model for hardened jump-table
define i32 @test_jumptable(i32 %in) "aarch64-jump-table-hardening" {

switch i32 %in, label %def [
i32 0, label %lbl1
i32 1, label %lbl2
]

def:
ret i32 0

lbl1:
ret i32 1

lbl2:
ret i32 2
}

;--- test.ll

; RUN: llc %t/test.ll -mtriple=arm64-apple-darwin -aarch64-enable-collect-loh=0 \
; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
; RUN: -o - -verify-machineinstrs | FileCheck %s --check-prefix=MACHO

; RUN: llc %t/test.ll -mtriple=arm64-apple-darwin -aarch64-enable-collect-loh=0 \
; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
; RUN: -global-isel -global-isel-abort=1 \
; RUN: -o - -verify-machineinstrs | FileCheck %s --check-prefix=MACHO

; RUN: llc %t/test.ll -mtriple=arm64-apple-darwin -aarch64-enable-collect-loh=0 \
; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
; RUN: -code-model=large \
; RUN: -o - -verify-machineinstrs | FileCheck %s --check-prefix=MACHO

; RUN: llc %t/test.ll -mtriple=arm64-apple-darwin -aarch64-enable-collect-loh=0 \
; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
; RUN: -global-isel -global-isel-abort=1 \
; RUN: -code-model=large \
; RUN: -o - -verify-machineinstrs | FileCheck %s --check-prefix=MACHO

; RUN: llc %t/test.ll -mtriple=aarch64-elf \
; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
; RUN: -o - -verify-machineinstrs | FileCheck %s --check-prefix=ELF

; RUN: llc %t/test.ll -mtriple=aarch64-elf \
; RUN: -aarch64-min-jump-table-entries=1 -aarch64-enable-atomic-cfg-tidy=0 \
; RUN: -global-isel -global-isel-abort=1 \
; RUN: -o - -verify-machineinstrs | FileCheck %s --check-prefix=ELF

; MACHO-LABEL: test_jumptable:
; MACHO: mov w16, w0
; MACHO: cmp x16, #5
; MACHO: csel x16, x16, xzr, ls
; MACHO-NEXT: adrp x17, LJTI0_0@PAGE
; MACHO-NEXT: add x17, x17, LJTI0_0@PAGEOFF
; MACHO-NEXT: ldrsw x16, [x17, x16, lsl #2]
; MACHO-NEXT: Ltmp0:
; MACHO-NEXT: adr x17, Ltmp0
; MACHO-NEXT: add x16, x17, x16
; MACHO-NEXT: br x16

; ELF-LABEL: test_jumptable:
; ELF: mov w16, w0
; ELF: cmp x16, #5
; ELF: csel x16, x16, xzr, ls
; ELF-NEXT: adrp x17, .LJTI0_0
; ELF-NEXT: add x17, x17, :lo12:.LJTI0_0
; ELF-NEXT: ldrsw x16, [x17, x16, lsl #2]
; ELF-NEXT: .Ltmp0:
; ELF-NEXT: adr x17, .Ltmp0
; ELF-NEXT: add x16, x17, x16
; ELF-NEXT: br x16

define i32 @test_jumptable(i32 %in) "aarch64-jump-table-hardening" {

switch i32 %in, label %def [
i32 0, label %lbl1
i32 1, label %lbl2
i32 2, label %lbl3
i32 4, label %lbl4
i32 5, label %lbl5
]

def:
ret i32 0

lbl1:
ret i32 1

lbl2:
ret i32 2

lbl3:
ret i32 4

lbl4:
ret i32 8

lbl5:
ret i32 10

}

; MACHO-LABEL: LJTI0_0:
; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0
; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0
; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0
; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0
; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0
; MACHO-NEXT: .long LBB{{[0-9_]+}}-Ltmp0

; ELF-LABEL: .LJTI0_0:
; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0
; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0
; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0
; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0
; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0
; ELF-NEXT: .word .LBB{{[0-9_]+}}-.Ltmp0

0 comments on commit 6049cd6

Please sign in to comment.