Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor - Stops clobbering FRAME_PTR_REG in ANCHOR_INTERNAL_FUNCTION_CALL_REG #637

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 20 additions & 24 deletions src/jit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1114,8 +1114,11 @@ impl<'a, C: ContextObject> JitCompiler<'a, C> {

match dst {
Value::Register(reg) => {
// Move guest_target_address into REGISTER_MAP[FRAME_PTR_REG]
self.emit_ins(X86Instruction::mov(OperandSize::S64, reg, REGISTER_MAP[FRAME_PTR_REG]));
// REGISTER_SCRATCH contains self.pc, and we must store it for proper error handling.
// We can discard the value if callx succeeds, so we are not incrementing the stack pointer (RSP).
self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_SCRATCH, RSP, X86IndirectAccess::OffsetIndexShift(-24, RSP, 0)));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to use another x86 register here? I'm concerned that adding loads and stores may impact performance.

Copy link
Author

@Lichtso Lichtso Nov 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is the same store that was inside the anchored subroutine before, just inlined here. The only thing this makes worse is more bytes emitted per callx.

// Move guest_target_address into REGISTER_SCRATCH
self.emit_ins(X86Instruction::mov(OperandSize::S64, reg, REGISTER_SCRATCH));
self.emit_ins(X86Instruction::call_immediate(self.relative_to_anchor(ANCHOR_INTERNAL_FUNCTION_CALL_REG, 5)));
},
Value::Constant64(target_pc, user_provided) => {
Expand Down Expand Up @@ -1477,6 +1480,7 @@ impl<'a, C: ContextObject> JitCompiler<'a, C> {
// Handler for EbpfError::CallOutsideTextSegment
self.set_anchor(ANCHOR_CALL_OUTSIDE_TEXT_SEGMENT);
self.emit_set_exception_kind(EbpfError::CallOutsideTextSegment);
self.emit_ins(X86Instruction::load(OperandSize::S64, RSP, REGISTER_SCRATCH, X86IndirectAccess::OffsetIndexShift(-8, RSP, 0)));
self.emit_ins(X86Instruction::jump_immediate(self.relative_to_anchor(ANCHOR_THROW_EXCEPTION, 5)));

// Handler for EbpfError::DivideByZero
Expand Down Expand Up @@ -1556,43 +1560,35 @@ impl<'a, C: ContextObject> JitCompiler<'a, C> {
self.emit_ins(X86Instruction::return_near());

// Routine for emit_internal_call(Value::Register())
// Inputs: Guest current pc in REGISTER_SCRATCH, Guest target address in REGISTER_MAP[FRAME_PTR_REG]
// Outputs: Guest target pc in REGISTER_SCRATCH, Host target address in RIP
// Inputs: Guest current pc in X86IndirectAccess::OffsetIndexShift(-16, RSP, 0), Guest target address in REGISTER_SCRATCH
// Outputs: Guest current pc in X86IndirectAccess::OffsetIndexShift(-16, RSP, 0), Guest target pc in REGISTER_SCRATCH, Host target address in RIP
self.set_anchor(ANCHOR_INTERNAL_FUNCTION_CALL_REG);
self.emit_ins(X86Instruction::push(REGISTER_MAP[0], None));
// REGISTER_SCRATCH contains the current program counter, and we must store it for proper
// error handling. We can discard the value if callx succeeds, so we are not incrementing
// the stack pointer (RSP).
self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_SCRATCH, RSP, X86IndirectAccess::OffsetIndexShift(-8, RSP, 0)));
self.emit_ins(X86Instruction::mov(OperandSize::S64, REGISTER_MAP[FRAME_PTR_REG], REGISTER_MAP[0]));
// Calculate offset relative to program_vm_addr
self.emit_ins(X86Instruction::load_immediate(OperandSize::S64, REGISTER_MAP[FRAME_PTR_REG], self.program_vm_addr as i64));
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x29, REGISTER_MAP[FRAME_PTR_REG], REGISTER_MAP[0], 0, None)); // guest_target_address -= self.program_vm_addr;
self.emit_ins(X86Instruction::load_immediate(OperandSize::S64, REGISTER_MAP[0], self.program_vm_addr as i64));
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x29, REGISTER_MAP[0], REGISTER_SCRATCH, 0, None)); // guest_target_address -= self.program_vm_addr;
// Force alignment of guest_target_address
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 4, REGISTER_MAP[0], !(INSN_SIZE as i64 - 1), None)); // guest_target_address &= !(INSN_SIZE - 1);
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 4, REGISTER_SCRATCH, !(INSN_SIZE as i64 - 1), None)); // guest_target_address &= !(INSN_SIZE - 1);
// Bound check
// if(guest_target_address >= number_of_instructions * INSN_SIZE) throw CALL_OUTSIDE_TEXT_SEGMENT;
let number_of_instructions = self.result.pc_section.len();
self.emit_ins(X86Instruction::cmp_immediate(OperandSize::S64, REGISTER_MAP[0], (number_of_instructions * INSN_SIZE) as i64, None)); // guest_target_address.cmp(number_of_instructions * INSN_SIZE)
self.emit_ins(X86Instruction::cmp_immediate(OperandSize::S64, REGISTER_SCRATCH, (number_of_instructions * INSN_SIZE) as i64, None)); // guest_target_address.cmp(number_of_instructions * INSN_SIZE)
self.emit_ins(X86Instruction::conditional_jump_immediate(0x83, self.relative_to_anchor(ANCHOR_CALL_OUTSIDE_TEXT_SEGMENT, 6)));
// First half of self.emit_profile_instruction_count(false, None);
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x29, REGISTER_SCRATCH, REGISTER_INSTRUCTION_METER, 0, None)); // instruction_meter -= guest_current_pc;
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x2b, REGISTER_INSTRUCTION_METER, RSP, 0, Some(X86IndirectAccess::OffsetIndexShift(-8, RSP, 0)))); // instruction_meter -= guest_current_pc;
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 5, REGISTER_INSTRUCTION_METER, 1, None)); // instruction_meter -= 1;
// Calculate the target_pc (dst / INSN_SIZE) to update REGISTER_INSTRUCTION_METER
// and as target pc for potential ANCHOR_CALL_UNSUPPORTED_INSTRUCTION
// Load host target_address from self.result.pc_section
debug_assert_eq!(INSN_SIZE, 8); // Because the instruction size is also the slot size we do not need to shift the offset
self.emit_ins(X86Instruction::load_immediate(OperandSize::S64, REGISTER_MAP[0], self.result.pc_section.as_ptr() as i64)); // host_target_address = self.result.pc_section;
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x01, REGISTER_SCRATCH, REGISTER_MAP[0], 0, None)); // host_target_address += guest_target_address;
self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_MAP[0], REGISTER_MAP[0], X86IndirectAccess::Offset(0))); // host_target_address = self.result.pc_section[host_target_address / 8];
// Calculate the guest_target_pc (dst / INSN_SIZE) to update REGISTER_INSTRUCTION_METER
// and as target_pc for potential ANCHOR_CALL_UNSUPPORTED_INSTRUCTION
let shift_amount = INSN_SIZE.trailing_zeros();
debug_assert_eq!(INSN_SIZE, 1 << shift_amount);
self.emit_ins(X86Instruction::mov(OperandSize::S64, REGISTER_MAP[0], REGISTER_SCRATCH)); // guest_target_pc = guest_target_address;
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0xc1, 5, REGISTER_SCRATCH, shift_amount as i64, None)); // guest_target_pc /= INSN_SIZE;
// Second half of self.emit_profile_instruction_count(false, None);
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x01, REGISTER_SCRATCH, REGISTER_INSTRUCTION_METER, 0, None)); // instruction_meter += guest_target_pc;
// Load host target_address from self.result.pc_section
debug_assert_eq!(INSN_SIZE, 8); // Because the instruction size is also the slot size we do not need to shift the offset
self.emit_ins(X86Instruction::load_immediate(OperandSize::S64, REGISTER_MAP[FRAME_PTR_REG], self.result.pc_section.as_ptr() as i64));
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x01, REGISTER_MAP[FRAME_PTR_REG], REGISTER_MAP[0], 0, None)); // host_target_address = guest_target_address + self.result.pc_section;
self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_MAP[0], REGISTER_MAP[0], X86IndirectAccess::Offset(0))); // host_target_address = self.result.pc_section[host_target_address / 8];
// Load the frame pointer again since we've clobbered REGISTER_MAP[FRAME_PTR_REG]
self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_PTR_TO_VM, REGISTER_MAP[FRAME_PTR_REG], stack_pointer_access));
// Restore the clobbered REGISTER_MAP[0]
self.emit_ins(X86Instruction::xchg(OperandSize::S64, REGISTER_MAP[0], RSP, Some(X86IndirectAccess::OffsetIndexShift(0, RSP, 0)))); // Swap REGISTER_MAP[0] and host_target_address
self.emit_ins(X86Instruction::return_near()); // Tail call to host_target_address
Expand Down