diff --git a/CHANGELOG.md b/CHANGELOG.md index 589142e77d2..71d9a0c2511 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Looking for changes that affect our C API? See the [C API Changelog](lib/c-api/CHANGELOG.md). ## **[Unreleased]** +- [#2750](https://github.com/wasmerio/wasmer/pull/2750) Added Aarch64 support to Singlepass (both Linux and macOS). ## 2.1.1 - 2021/12/20 diff --git a/Makefile b/Makefile index 4d49156fba4..2d2fa4c5f09 100644 --- a/Makefile +++ b/Makefile @@ -179,6 +179,11 @@ ifneq ($(ENABLE_SINGLEPASS), 0) ifeq ($(IS_AMD64), 1) compilers += singlepass endif + ifeq ($(IS_AARCH64), 1) + ifneq ($(IS_WINDOWS), 1) + compilers += singlepass + endif + endif endif endif @@ -249,6 +254,9 @@ ifeq ($(ENABLE_SINGLEPASS), 1) ifeq ($(IS_AMD64), 1) compilers_engines += singlepass-universal endif + ifeq ($(IS_AARCH64), 1) + compilers_engines += singlepass-universal + endif endif endif diff --git a/lib/compiler-singlepass/Cargo.toml b/lib/compiler-singlepass/Cargo.toml index 18c8a3bdd29..029c3adde2e 100644 --- a/lib/compiler-singlepass/Cargo.toml +++ b/lib/compiler-singlepass/Cargo.toml @@ -18,8 +18,8 @@ wasmer-types = { path = "../types", version = "=2.1.1", default-features = false rayon = { version = "1.5", optional = true } hashbrown = { version = "0.11", optional = true } more-asserts = "0.2" -dynasm = "1.2" -dynasmrt = "1.2" +dynasm = "1.2.1" +dynasmrt = "1.2.1" lazy_static = "1.4" byteorder = "1.3" smallvec = "1.6" diff --git a/lib/compiler-singlepass/src/arm64_decl.rs b/lib/compiler-singlepass/src/arm64_decl.rs new file mode 100644 index 00000000000..400f705daaf --- /dev/null +++ b/lib/compiler-singlepass/src/arm64_decl.rs @@ -0,0 +1,302 @@ +//! ARM64 structures. + +use crate::common_decl::{MachineState, MachineValue, RegisterIndex}; +use crate::location::CombinedRegister; +use crate::location::Reg as AbstractReg; +use std::collections::BTreeMap; +use wasmer_compiler::CallingConvention; +use wasmer_types::Type; + +/// General-purpose registers. +#[repr(u8)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum GPR { + X0 = 0, + X1 = 1, + X2 = 2, + X3 = 3, + X4 = 4, + X5 = 5, + X6 = 6, + X7 = 7, + X8 = 8, + X9 = 9, + X10 = 10, + X11 = 11, + X12 = 12, + X13 = 13, + X14 = 14, + X15 = 15, + X16 = 16, + X17 = 17, + X18 = 18, + X19 = 19, + X20 = 20, + X21 = 21, + X22 = 22, + X23 = 23, + X24 = 24, + X25 = 25, + X26 = 26, + X27 = 27, + X28 = 28, + X29 = 29, + X30 = 30, + XzrSp = 31, +} + +/// NEON registers. +#[repr(u8)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[allow(dead_code)] +pub enum NEON { + V0 = 0, + V1 = 1, + V2 = 2, + V3 = 3, + V4 = 4, + V5 = 5, + V6 = 6, + V7 = 7, + V8 = 8, + V9 = 9, + V10 = 10, + V11 = 11, + V12 = 12, + V13 = 13, + V14 = 14, + V15 = 15, + V16 = 16, + V17 = 17, + V18 = 18, + V19 = 19, + V20 = 20, + V21 = 21, + V22 = 22, + V23 = 23, + V24 = 24, + V25 = 25, + V26 = 26, + V27 = 27, + V28 = 28, + V29 = 29, + V30 = 30, + V31 = 31, +} + +impl AbstractReg for GPR { + fn is_callee_save(self) -> bool { + self as usize > 18 + } + fn is_reserved(self) -> bool { + match self.into_index() { + 0..=16 | 19..=27 => false, + _ => true, + } + } + fn into_index(self) -> usize { + self as usize + } + fn from_index(n: usize) -> Result { + const REGS: [GPR; 32] = [ + GPR::X0, + GPR::X1, + GPR::X2, + GPR::X3, + GPR::X4, + GPR::X5, + GPR::X6, + GPR::X7, + GPR::X8, + GPR::X9, + GPR::X10, + GPR::X11, + GPR::X12, + GPR::X13, + GPR::X14, + GPR::X15, + GPR::X16, + GPR::X17, + GPR::X18, + GPR::X19, + GPR::X20, + GPR::X21, + GPR::X22, + GPR::X23, + GPR::X24, + GPR::X25, + GPR::X26, + GPR::X27, + GPR::X28, + GPR::X29, + GPR::X30, + GPR::XzrSp, + ]; + REGS.get(n).cloned().ok_or(()) + } +} + +impl AbstractReg for NEON { + fn is_callee_save(self) -> bool { + self as usize > 16 + } + fn is_reserved(self) -> bool { + false + } + fn into_index(self) -> usize { + self as usize + } + fn from_index(n: usize) -> Result { + const REGS: [NEON; 32] = [ + NEON::V0, + NEON::V1, + NEON::V2, + NEON::V3, + NEON::V4, + NEON::V5, + NEON::V6, + NEON::V7, + NEON::V8, + NEON::V9, + NEON::V10, + NEON::V11, + NEON::V12, + NEON::V13, + NEON::V14, + NEON::V15, + NEON::V16, + NEON::V17, + NEON::V18, + NEON::V19, + NEON::V20, + NEON::V21, + NEON::V22, + NEON::V23, + NEON::V24, + NEON::V25, + NEON::V26, + NEON::V27, + NEON::V28, + NEON::V29, + NEON::V30, + NEON::V31, + ]; + match n { + 0..=31 => Ok(REGS[n]), + _ => Err(()), + } + } +} + +/// A machine register under the x86-64 architecture. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum ARM64Register { + /// General-purpose registers. + GPR(GPR), + /// NEON (floating point/SIMD) registers. + NEON(NEON), +} + +impl CombinedRegister for ARM64Register { + /// Returns the index of the register. + fn to_index(&self) -> RegisterIndex { + match *self { + ARM64Register::GPR(x) => RegisterIndex(x as usize), + ARM64Register::NEON(x) => RegisterIndex(x as usize + 64), + } + } + /// Convert from a GPR register + fn from_gpr(x: u16) -> Self { + ARM64Register::GPR(GPR::from_index(x as usize).unwrap()) + } + /// Convert from an SIMD register + fn from_simd(x: u16) -> Self { + ARM64Register::NEON(NEON::from_index(x as usize).unwrap()) + } + + /// Converts a DWARF regnum to ARM64Register. + fn _from_dwarf_regnum(x: u16) -> Option { + Some(match x { + 0..=31 => ARM64Register::GPR(GPR::from_index(x as usize).unwrap()), + 64..=95 => ARM64Register::NEON(NEON::from_index(x as usize - 64).unwrap()), + _ => return None, + }) + } +} + +/// An allocator that allocates registers for function arguments according to the System V ABI. +#[derive(Default)] +pub struct ArgumentRegisterAllocator { + n_gprs: usize, + n_neons: usize, +} + +impl ArgumentRegisterAllocator { + /// Allocates a register for argument type `ty`. Returns `None` if no register is available for this type. + pub fn next( + &mut self, + ty: Type, + calling_convention: CallingConvention, + ) -> Option { + match calling_convention { + CallingConvention::SystemV | CallingConvention::AppleAarch64 => { + static GPR_SEQ: &'static [GPR] = &[ + GPR::X0, + GPR::X1, + GPR::X2, + GPR::X3, + GPR::X4, + GPR::X5, + GPR::X6, + GPR::X7, + ]; + static NEON_SEQ: &'static [NEON] = &[ + NEON::V0, + NEON::V1, + NEON::V2, + NEON::V3, + NEON::V4, + NEON::V5, + NEON::V6, + NEON::V7, + ]; + match ty { + Type::I32 | Type::I64 => { + if self.n_gprs < GPR_SEQ.len() { + let gpr = GPR_SEQ[self.n_gprs]; + self.n_gprs += 1; + Some(ARM64Register::GPR(gpr)) + } else { + None + } + } + Type::F32 | Type::F64 => { + if self.n_neons < NEON_SEQ.len() { + let neon = NEON_SEQ[self.n_neons]; + self.n_neons += 1; + Some(ARM64Register::NEON(neon)) + } else { + None + } + } + _ => todo!( + "ArgumentRegisterAllocator::next: Unsupported type: {:?}", + ty + ), + } + } + _ => unimplemented!(), + } + } +} + +/// Create a new `MachineState` with default values. +pub fn new_machine_state() -> MachineState { + MachineState { + stack_values: vec![], + register_values: vec![MachineValue::Undefined; 32 + 32], + prev_frame: BTreeMap::new(), + wasm_stack: vec![], + wasm_inst_offset: std::usize::MAX, + } +} diff --git a/lib/compiler-singlepass/src/codegen.rs b/lib/compiler-singlepass/src/codegen.rs index 6bdbc4375b0..c9f98b30b64 100644 --- a/lib/compiler-singlepass/src/codegen.rs +++ b/lib/compiler-singlepass/src/codegen.rs @@ -85,6 +85,7 @@ pub struct FuncGen<'a, M: Machine> { struct SpecialLabelSet { integer_division_by_zero: Label, + integer_overflow: Label, heap_access_oob: Label, table_access_oob: Label, indirect_call_null: Label, @@ -290,6 +291,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { ret.push(loc); } + let delta_stack_offset = self.machine.round_stack_adjust(delta_stack_offset); if delta_stack_offset != 0 { self.machine.adjust_stack(delta_stack_offset as u32); } @@ -335,7 +337,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { } self.state.wasm_stack.pop().unwrap(); } - + let delta_stack_offset = self.machine.round_stack_adjust(delta_stack_offset); if delta_stack_offset != 0 { self.machine.restore_stack(delta_stack_offset as u32); } @@ -376,6 +378,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.state.wasm_stack.pop().unwrap(); } + let delta_stack_offset = self.machine.round_stack_adjust(delta_stack_offset); if delta_stack_offset != 0 { self.machine.adjust_stack(delta_stack_offset as u32); } @@ -421,6 +424,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { // Wasm state popping is deferred to `release_locations_only_osr_state`. } + let delta_stack_offset = self.machine.round_stack_adjust(delta_stack_offset); if delta_stack_offset != 0 { self.machine.pop_stack_locals(delta_stack_offset as u32); } @@ -457,6 +461,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { } } + let delta_stack_offset = self.machine.round_stack_adjust(delta_stack_offset); if delta_stack_offset != 0 { self.machine.pop_stack_locals(delta_stack_offset as u32); } @@ -465,7 +470,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { fn init_locals( &mut self, n: usize, - n_params: usize, + sig: FunctionType, calling_convention: CallingConvention, ) -> Vec> { // How many machine stack slots will all the locals use? @@ -486,7 +491,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { } } - // Callee-saved R15 for vmctx. + // Callee-saved vmctx. static_area_size += 8; // Some ABI (like Windows) needs extrat reg save @@ -504,6 +509,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { static_area_size += num_mem_slots * 8; // Allocate save area, without actually writing to it. + static_area_size = self.machine.round_stack_adjust(static_area_size); self.machine.adjust_stack(static_area_size as _); // Save callee-saved registers. @@ -517,7 +523,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { } } - // Save R15 for vmctx use. + // Save the Reg use for vmctx. self.stack_offset.0 += 8; self.machine.move_local( self.stack_offset.0 as i32, @@ -554,15 +560,29 @@ impl<'a, M: Machine> FuncGen<'a, M> { // Load in-register parameters into the allocated locations. // Locals are allocated on the stack from higher address to lower address, // so we won't skip the stack guard page here. - for i in 0..n_params { - let loc = self.machine.get_param_location(i + 1, calling_convention); - self.machine.move_location(Size::S64, loc, locations[i]); + let mut stack_offset: usize = 0; + for (i, param) in sig.params().iter().enumerate() { + let sz = match *param { + Type::I32 | Type::F32 => Size::S32, + Type::I64 | Type::F64 => Size::S64, + Type::ExternRef | Type::FuncRef => Size::S64, + _ => unimplemented!(), + }; + let loc = self.machine.get_call_param_location( + i + 1, + sz, + &mut stack_offset, + calling_convention, + ); + self.machine + .move_location_extend(sz, false, loc, Size::S64, locations[i]); } - // Load vmctx into R15. + // Load vmctx into it's GPR. self.machine.move_location( Size::S64, - self.machine.get_param_location(0, calling_convention), + self.machine + .get_simple_param_location(0, calling_convention), Location::GPR(self.machine.get_vmctx_reg()), ); @@ -570,14 +590,17 @@ impl<'a, M: Machine> FuncGen<'a, M> { // // `rep stosq` writes data from low address to high address and may skip the stack guard page. // so here we probe it explicitly when needed. - for i in (n_params..n).step_by(NATIVE_PAGE_SIZE / 8).skip(1) { + for i in (sig.params().len()..n) + .step_by(NATIVE_PAGE_SIZE / 8) + .skip(1) + { self.machine.zero_location(Size::S64, locations[i]); } // Initialize all normal locals to zero. let mut init_stack_loc_cnt = 0; let mut last_stack_loc = Location::Memory(self.machine.local_pointer(), i32::MAX); - for i in n_params..n { + for i in sig.params().len()..n { match locations[i] { Location::Memory(_, _) => { init_stack_loc_cnt += 1; @@ -687,24 +710,34 @@ impl<'a, M: Machine> FuncGen<'a, M> { ); } - /// Emits a System V / Windows call sequence. - /// - /// This function will not use RAX before `cb` is called. + /// Emits a Native ABI call sequence. /// /// The caller MUST NOT hold any temporary registers allocated by `acquire_temp_gpr` when calling /// this function. - fn emit_call_native>, F: FnOnce(&mut Self)>( + fn emit_call_native< + I: Iterator>, + J: Iterator, + F: FnOnce(&mut Self), + >( &mut self, cb: F, params: I, + params_type: J, ) -> Result<(), CodegenError> { // Values pushed in this function are above the shadow region. self.state.stack_values.push(MachineValue::ExplicitShadow); let params: Vec<_> = params.collect(); + let params_size: Vec<_> = params_type + .map(|x| match x { + WpType::F32 | WpType::I32 => Size::S32, + WpType::V128 => unimplemented!(), + _ => Size::S64, + }) + .collect(); - // Save used GPRs. - self.machine.push_used_gpr(); + // Save used GPRs. Preserve correct stack alignment + let mut used_stack = self.machine.push_used_gpr(); let used_gprs = self.machine.get_used_gprs(); for r in used_gprs.iter() { let content = self.state.register_values[self.machine.index_from_gpr(*r).0].clone(); @@ -716,10 +749,10 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.state.stack_values.push(content); } - // Save used XMM registers. + // Save used SIMD registers. let used_simds = self.machine.get_used_simd(); if used_simds.len() > 0 { - self.machine.push_used_simd(); + used_stack += self.machine.push_used_simd(); for r in used_simds.iter().rev() { let content = @@ -732,6 +765,10 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.state.stack_values.push(content); } } + // mark the GPR used for Call as used + self.machine + .reserve_unused_temp_gpr(self.machine.get_grp_for_call()); + let calling_convention = self.calling_convention; let stack_padding: usize = match calling_convention { @@ -740,35 +777,37 @@ impl<'a, M: Machine> FuncGen<'a, M> { }; let mut stack_offset: usize = 0; - + let mut args: Vec> = vec![]; + let mut pushed_args: usize = 0; // Calculate stack offset. for (i, _param) in params.iter().enumerate() { - if let Location::Memory(_, _) = - self.machine.get_param_location(1 + i, calling_convention) - { - stack_offset += 8; - } + args.push(self.machine.get_param_location( + 1 + i, + params_size[i], + &mut stack_offset, + calling_convention, + )); } // Align stack to 16 bytes. - if (self.get_stack_offset() + used_gprs.len() * 8 + used_simds.len() * 8 + stack_offset) - % 16 - != 0 - { - self.machine.adjust_stack(8); - stack_offset += 8; - self.state.stack_values.push(MachineValue::Undefined); + let stack_unaligned = + (self.machine.round_stack_adjust(self.get_stack_offset()) + used_stack + stack_offset) + % 16; + if stack_unaligned != 0 { + stack_offset += 16 - stack_unaligned; } + self.machine.adjust_stack(stack_offset as u32); let mut call_movs: Vec<(Location, M::GPR)> = vec![]; // Prepare register & stack parameters. for (i, param) in params.iter().enumerate().rev() { - let loc = self.machine.get_param_location(1 + i, calling_convention); + let loc = args[i]; match loc { Location::GPR(x) => { call_movs.push((*param, x)); } Location::Memory(_, _) => { + pushed_args += 1; match *param { Location::GPR(x) => { let content = self.state.register_values @@ -803,7 +842,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.state.stack_values.push(MachineValue::Undefined); } } - self.machine.push_location_for_native(*param); + self.machine.move_location(params_size[i], *param, loc); } _ => { return Err(CodegenError { @@ -828,19 +867,15 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.machine.move_location( Size::S64, Location::GPR(self.machine.get_vmctx_reg()), - self.machine.get_param_location(0, calling_convention), + self.machine + .get_simple_param_location(0, calling_convention), ); // vmctx - if (self.state.stack_values.len() % 2) != 1 { - return Err(CodegenError { - message: "emit_call_native: explicit shadow takes one slot".to_string(), - }); - } - if stack_padding > 0 { self.machine.adjust_stack(stack_padding as u32); } - + // release the GPR used for call + self.machine.release_gpr(self.machine.get_grp_for_call()); cb(self); // Offset needs to be after the 'call' instruction. @@ -863,19 +898,21 @@ impl<'a, M: Machine> FuncGen<'a, M> { // Restore stack. if stack_offset + stack_padding > 0 { - self.machine - .restore_stack((stack_offset + stack_padding) as u32); + self.machine.restore_stack( + self.machine + .round_stack_adjust(stack_offset + stack_padding) as u32, + ); if (stack_offset % 8) != 0 { return Err(CodegenError { message: "emit_call_native: Bad restoring stack alignement".to_string(), }); } - for _ in 0..stack_offset / 8 { + for _ in 0..pushed_args { self.state.stack_values.pop().unwrap(); } } - // Restore XMMs. + // Restore SIMDs. if !used_simds.is_empty() { self.machine.pop_used_simd(); for _ in 0..used_simds.len() { @@ -897,13 +934,21 @@ impl<'a, M: Machine> FuncGen<'a, M> { Ok(()) } - /// Emits a System V call sequence, specialized for labels as the call target. - fn _emit_call_native_label>>( + /// Emits a Native ABI call sequence, specialized for labels as the call target. + fn _emit_call_native_label< + I: Iterator>, + J: Iterator, + >( &mut self, label: Label, params: I, + params_type: J, ) -> Result<(), CodegenError> { - self.emit_call_native(|this| this.machine.emit_call_label(label), params)?; + self.emit_call_native( + |this| this.machine.emit_call_label(label), + params, + params_type, + )?; Ok(()) } @@ -945,15 +990,12 @@ impl<'a, M: Machine> FuncGen<'a, M> { } fn emit_head(&mut self) -> Result<(), CodegenError> { - // TODO: Patchpoint is not emitted for now, and ARM trampoline is not prepended. - - // Normal x86 entry prologue. self.machine.emit_function_prolog(); // Initialize locals. self.locals = self.init_locals( self.local_types.len(), - self.signature.params().len(), + self.signature.clone(), self.calling_convention, ); @@ -1024,6 +1066,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { let mut machine = machine; let special_labels = SpecialLabelSet { integer_division_by_zero: machine.get_label(), + integer_overflow: machine.get_label(), heap_access_oob: machine.get_label(), table_access_oob: machine.get_label(), indirect_call_null: machine.get_label(), @@ -1295,6 +1338,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } @@ -1305,6 +1349,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } @@ -1315,6 +1360,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } @@ -1325,6 +1371,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } @@ -1454,46 +1501,46 @@ impl<'a, M: Machine> FuncGen<'a, M> { self.machine.emit_binop_mul64(loc_a, loc_b, ret); } Operator::I64DivU => { - // We assume that RAX and RDX are temporary registers here. let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::I64); let offset = self.machine.emit_binop_udiv64( loc_a, loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } Operator::I64DivS => { - // We assume that RAX and RDX are temporary registers here. let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::I64); let offset = self.machine.emit_binop_sdiv64( loc_a, loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } Operator::I64RemU => { - // We assume that RAX and RDX are temporary registers here. let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::I64); let offset = self.machine.emit_binop_urem64( loc_a, loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } Operator::I64RemS => { - // We assume that RAX and RDX are temporary registers here. let I2O1 { loc_a, loc_b, ret } = self.i2o1_prepare(WpType::I64); let offset = self.machine.emit_binop_srem64( loc_a, loc_b, ret, self.special_labels.integer_division_by_zero, + self.special_labels.integer_overflow, ); self.mark_offset_trappable(offset); } @@ -2563,6 +2610,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { this.machine.mark_instruction_address_end(offset); }, params.iter().copied(), + param_types.iter().copied(), )?; self.release_locations_only_stack(¶ms); @@ -2774,7 +2822,8 @@ impl<'a, M: Machine> FuncGen<'a, M> { gpr_for_call, vmcaller_checked_anyfunc_vmctx as i32, ), - this.machine.get_param_location(0, calling_convention), + this.machine + .get_simple_param_location(0, calling_convention), ); this.machine.emit_call_location(Location::Memory( @@ -2785,6 +2834,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { } }, params.iter().copied(), + param_types.iter().copied(), )?; self.release_locations_only_stack(¶ms); @@ -3018,6 +3068,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, memory_index] iter::once(Location::Imm32(memory_index.index() as u32)), + iter::once(WpType::I64), )?; let ret = self.acquire_locations( &[(WpType::I64, MachineValue::WasmStack(self.value_stack.len()))], @@ -3065,6 +3116,15 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [ + WpType::I64, + WpType::I64, + WpType::I64, + WpType::I64, + WpType::I64, + ] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dst, src, len]); } @@ -3087,6 +3147,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, segment_index] iter::once(Location::Imm32(segment)), + iter::once(WpType::I64), )?; } Operator::MemoryCopy { src, dst } => { @@ -3137,6 +3198,9 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [WpType::I32, WpType::I64, WpType::I64, WpType::I64] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dst_pos, src_pos, len]); } @@ -3181,6 +3245,9 @@ impl<'a, M: Machine> FuncGen<'a, M> { [Location::Imm32(memory_index.index() as u32), dst, val, len] .iter() .cloned(), + [WpType::I32, WpType::I64, WpType::I64, WpType::I64] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dst, val, len]); } @@ -3215,6 +3282,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { // [vmctx, val, memory_index] iter::once(param_pages) .chain(iter::once(Location::Imm32(memory_index.index() as u32))), + [WpType::I64, WpType::I64].iter().cloned(), )?; self.release_locations_only_stack(&[param_pages]); @@ -5412,6 +5480,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, func_index] -> funcref iter::once(Location::Imm32(function_index as u32)), + iter::once(WpType::I64), )?; let ret = self.acquire_locations( @@ -5470,6 +5539,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { [Location::Imm32(table_index.index() as u32), index, value] .iter() .cloned(), + [WpType::I32, WpType::I64, WpType::I64].iter().cloned(), )?; self.release_locations_only_stack(&[index, value]); @@ -5504,6 +5574,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { [Location::Imm32(table_index.index() as u32), index] .iter() .cloned(), + [WpType::I32, WpType::I64].iter().cloned(), )?; self.release_locations_only_stack(&[index]); @@ -5547,6 +5618,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, table_index] -> i32 iter::once(Location::Imm32(table_index.index() as u32)), + iter::once(WpType::I32), )?; let ret = self.acquire_locations( @@ -5596,6 +5668,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [WpType::I64, WpType::I64, WpType::I64].iter().cloned(), )?; self.release_locations_only_stack(&[init_value, delta]); @@ -5648,6 +5721,15 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [ + WpType::I32, + WpType::I32, + WpType::I64, + WpType::I64, + WpType::I64, + ] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dest, src, len]); @@ -5679,6 +5761,9 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, table_index, start_idx, item, len] [Location::Imm32(table), dest, val, len].iter().cloned(), + [WpType::I32, WpType::I64, WpType::I64, WpType::I64] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dest, val, len]); @@ -5717,6 +5802,15 @@ impl<'a, M: Machine> FuncGen<'a, M> { ] .iter() .cloned(), + [ + WpType::I32, + WpType::I32, + WpType::I64, + WpType::I64, + WpType::I64, + ] + .iter() + .cloned(), )?; self.release_locations_only_stack(&[dest, src, len]); @@ -5742,6 +5836,7 @@ impl<'a, M: Machine> FuncGen<'a, M> { }, // [vmctx, elem_index] [Location::Imm32(segment)].iter().cloned(), + [WpType::I32].iter().cloned(), )?; } _ => { @@ -5762,6 +5857,12 @@ impl<'a, M: Machine> FuncGen<'a, M> { .mark_address_with_trap_code(TrapCode::IntegerDivisionByZero); self.machine.emit_illegal_op(); + self.machine + .emit_label(self.special_labels.integer_overflow); + self.machine + .mark_address_with_trap_code(TrapCode::IntegerOverflow); + self.machine.emit_illegal_op(); + self.machine.emit_label(self.special_labels.heap_access_oob); self.machine .mark_address_with_trap_code(TrapCode::HeapAccessOutOfBounds); diff --git a/lib/compiler-singlepass/src/compiler.rs b/lib/compiler-singlepass/src/compiler.rs index 5599f9f3c65..0e0b08f8cb4 100644 --- a/lib/compiler-singlepass/src/compiler.rs +++ b/lib/compiler-singlepass/src/compiler.rs @@ -4,9 +4,11 @@ use crate::codegen::FuncGen; use crate::config::Singlepass; +use crate::machine::Machine; use crate::machine::{ gen_import_call_trampoline, gen_std_dynamic_import_trampoline, gen_std_trampoline, CodegenError, }; +use crate::machine_arm64::MachineARM64; use crate::machine_x64::MachineX86_64; use loupe::MemoryUsage; #[cfg(feature = "rayon")] @@ -58,17 +60,18 @@ impl Compiler for SinglepassCompiler { _module_translation: &ModuleTranslationState, function_body_inputs: PrimaryMap>, ) -> Result { - /*if target.triple().operating_system == OperatingSystem::Windows { - return Err(CompileError::UnsupportedTarget( - OperatingSystem::Windows.to_string(), - )); - }*/ - if target.triple().architecture != Architecture::X86_64 { - return Err(CompileError::UnsupportedTarget( - target.triple().architecture.to_string(), - )); + match target.triple().architecture { + Architecture::X86_64 => {} + Architecture::Aarch64(_) => {} + _ => { + return Err(CompileError::UnsupportedTarget( + target.triple().architecture.to_string(), + )) + } } - if !target.cpu_features().contains(CpuFeature::AVX) { + if target.triple().architecture == Architecture::X86_64 + && !target.cpu_features().contains(CpuFeature::AVX) + { return Err(CompileError::UnsupportedTarget( "x86_64 without AVX".to_string(), )); @@ -79,7 +82,7 @@ impl Compiler for SinglepassCompiler { let calling_convention = match target.triple().default_calling_convention() { Ok(CallingConvention::WindowsFastcall) => CallingConvention::WindowsFastcall, Ok(CallingConvention::SystemV) => CallingConvention::SystemV, - //Ok(CallingConvention::AppleAarch64) => AppleAarch64, + Ok(CallingConvention::AppleAarch64) => CallingConvention::AppleAarch64, _ => panic!("Unsupported Calling convention for Singlepass compiler"), }; @@ -126,30 +129,53 @@ impl Compiler for SinglepassCompiler { } } - let machine = match target.triple().architecture { - Architecture::X86_64 => MachineX86_64::new(), - _ => unimplemented!(), - }; - let mut generator = FuncGen::new( - module, - &self.config, - &vmoffsets, - &memory_styles, - &table_styles, - i, - &locals, - machine, - calling_convention, - ) - .map_err(to_compile_error)?; + match target.triple().architecture { + Architecture::X86_64 => { + let machine = MachineX86_64::new(); + let mut generator = FuncGen::new( + module, + &self.config, + &vmoffsets, + &memory_styles, + &table_styles, + i, + &locals, + machine, + calling_convention, + ) + .map_err(to_compile_error)?; + while generator.has_control_frames() { + generator.set_srcloc(reader.original_position() as u32); + let op = reader.read_operator()?; + generator.feed_operator(op).map_err(to_compile_error)?; + } - while generator.has_control_frames() { - generator.set_srcloc(reader.original_position() as u32); - let op = reader.read_operator()?; - generator.feed_operator(op).map_err(to_compile_error)?; - } + Ok(generator.finalize(&input)) + } + Architecture::Aarch64(_) => { + let machine = MachineARM64::new(); + let mut generator = FuncGen::new( + module, + &self.config, + &vmoffsets, + &memory_styles, + &table_styles, + i, + &locals, + machine, + calling_convention, + ) + .map_err(to_compile_error)?; + while generator.has_control_frames() { + generator.set_srcloc(reader.original_position() as u32); + let op = reader.read_operator()?; + generator.feed_operator(op).map_err(to_compile_error)?; + } - Ok(generator.finalize(&input)) + Ok(generator.finalize(&input)) + } + _ => unimplemented!(), + } }) .collect::, CompileError>>()? .into_iter() @@ -252,15 +278,6 @@ mod tests { fn errors_for_unsupported_targets() { let compiler = SinglepassCompiler::new(Singlepass::default()); - // Compile for win64 - /*let win64 = Target::new(triple!("x86_64-pc-windows-msvc"), CpuFeature::for_host()); - let (mut info, translation, inputs) = dummy_compilation_ingredients(); - let result = compiler.compile_module(&win64, &mut info, &translation, inputs); - match result.unwrap_err() { - CompileError::UnsupportedTarget(name) => assert_eq!(name, "windows"), - error => panic!("Unexpected error: {:?}", error), - };*/ - // Compile for 32bit Linux let linux32 = Target::new(triple!("i686-unknown-linux-gnu"), CpuFeature::for_host()); let (mut info, translation, inputs) = dummy_compilation_ingredients(); diff --git a/lib/compiler-singlepass/src/emitter_arm64.rs b/lib/compiler-singlepass/src/emitter_arm64.rs new file mode 100644 index 00000000000..51722a051a1 --- /dev/null +++ b/lib/compiler-singlepass/src/emitter_arm64.rs @@ -0,0 +1,2789 @@ +pub use crate::arm64_decl::{ARM64Register, ArgumentRegisterAllocator, GPR, NEON}; +use crate::common_decl::Size; +use crate::location::Location as AbstractLocation; +pub use crate::location::{Multiplier, Reg}; +pub use crate::machine::{Label, Offset}; +use dynasm::dynasm; +pub use dynasmrt::aarch64::{encode_logical_immediate_32bit, encode_logical_immediate_64bit}; +use dynasmrt::{ + aarch64::Aarch64Relocation, AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, + VecAssembler, +}; +use wasmer_compiler::{ + CallingConvention, CustomSection, CustomSectionProtection, FunctionBody, SectionBody, +}; +use wasmer_types::{FunctionIndex, FunctionType, Type}; +use wasmer_vm::VMOffsets; + +type Assembler = VecAssembler; + +/// Force `dynasm!` to use the correct arch (aarch64) when cross-compiling. +/// `dynasm!` proc-macro tries to auto-detect it by default by looking at the +/// `target_arch`, but it sees the `target_arch` of the proc-macro itself, which +/// is always equal to host, even when cross-compiling. +macro_rules! dynasm { + ($a:expr ; $($tt:tt)*) => { + dynasm::dynasm!( + $a + ; .arch aarch64 + ; $($tt)* + ) + }; +} + +pub type Location = AbstractLocation; + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[allow(dead_code)] +#[repr(u8)] +pub enum Condition { + // meaning for cmp or sub + /// Equal + Eq = 0, + /// Not equal + Ne = 1, + /// Unsigned higher or same (or carry set) + Cs = 2, + /// Unsigned lower (or carry clear) + Cc = 3, + /// Negative. The mnemonic stands for "minus" + Mi = 4, + /// Positive or zero. The mnemonic stands for "plus" + Pl = 5, + /// Signed overflow. The mnemonic stands for "V set" + Vs = 6, + /// No signed overflow. The mnemonic stands for "V clear" + Vc = 7, + /// Unsigned higher + Hi = 8, + /// Unsigned lower or same + Ls = 9, + /// Signed greater than or equal + Ge = 10, + /// Signed less than + Lt = 11, + /// Signed greater than + Gt = 12, + /// Signed less than or equal + Le = 13, + /// Always executed + Al = 14, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[allow(dead_code)] +pub enum NeonOrMemory { + NEON(NEON), + Memory(GPR, i32), +} + +#[derive(Copy, Clone, Debug)] +#[allow(dead_code)] +pub enum GPROrMemory { + GPR(GPR), + Memory(GPR, i32), +} + +pub trait EmitterARM64 { + fn get_label(&mut self) -> Label; + fn get_offset(&self) -> Offset; + fn get_jmp_instr_size(&self) -> u8; + + fn finalize_function(&mut self); + + fn emit_str(&mut self, sz: Size, reg: Location, addr: Location); + fn emit_ldr(&mut self, sz: Size, reg: Location, addr: Location); + fn emit_stur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32); + fn emit_ldur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32); + fn emit_strdb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_stria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_ldria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32); + fn emit_stpdb(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32); + fn emit_ldpia(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32); + + fn emit_ldrb(&mut self, sz: Size, reg: Location, dst: Location); + fn emit_ldrh(&mut self, sz: Size, reg: Location, dst: Location); + fn emit_ldrsb(&mut self, sz: Size, reg: Location, dst: Location); + fn emit_ldrsh(&mut self, sz: Size, reg: Location, dst: Location); + fn emit_ldrsw(&mut self, sz: Size, reg: Location, dst: Location); + fn emit_strb(&mut self, sz: Size, reg: Location, dst: Location); + fn emit_strh(&mut self, sz: Size, reg: Location, dst: Location); + + fn emit_mov(&mut self, sz: Size, src: Location, dst: Location); + + fn emit_movn(&mut self, sz: Size, reg: Location, val: u32); + fn emit_movz(&mut self, reg: Location, val: u32); + fn emit_movk(&mut self, reg: Location, val: u32, shift: u32); + + fn emit_mov_imm(&mut self, dst: Location, val: u64); + + fn emit_add(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_sub(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_mul(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_adds(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_subs(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + + fn emit_add_lsl(&mut self, sz: Size, src1: Location, src2: Location, lsl: u32, dst: Location); + + fn emit_cmp(&mut self, sz: Size, src: Location, dst: Location); + fn emit_tst(&mut self, sz: Size, src: Location, dst: Location); + + fn emit_lsl(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_lsr(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_asr(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_ror(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + + fn emit_or(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_and(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_eor(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + + fn emit_bfc(&mut self, se: Size, lsb: u32, width: u32, dst: Location); + fn emit_bfi(&mut self, se: Size, src: Location, lsb: u32, width: u32, dst: Location); + + fn emit_udiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_sdiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + /// msub : c - a*b -> dst + fn emit_msub(&mut self, sz: Size, a: Location, b: Location, c: Location, dst: Location); + + fn emit_sxtb(&mut self, sz: Size, src: Location, dst: Location); + fn emit_sxth(&mut self, sz: Size, src: Location, dst: Location); + fn emit_sxtw(&mut self, sz: Size, src: Location, dst: Location); + fn emit_uxtb(&mut self, sz: Size, src: Location, dst: Location); + fn emit_uxth(&mut self, sz: Size, src: Location, dst: Location); + + fn emit_cset(&mut self, sz: Size, dst: Location, cond: Condition); + fn emit_csetm(&mut self, sz: Size, dst: Location, cond: Condition); + fn emit_cinc(&mut self, sz: Size, src: Location, dst: Location, cond: Condition); + fn emit_clz(&mut self, sz: Size, src: Location, dst: Location); + fn emit_rbit(&mut self, sz: Size, src: Location, dst: Location); + + fn emit_label(&mut self, label: Label); + fn emit_load_label(&mut self, reg: GPR, label: Label); + fn emit_b_label(&mut self, label: Label); + fn emit_cbz_label(&mut self, sz: Size, reg: Location, label: Label); + fn emit_cbnz_label(&mut self, sz: Size, reg: Location, label: Label); + fn emit_tbz_label(&mut self, sz: Size, reg: Location, n: u32, label: Label); + fn emit_tbnz_label(&mut self, sz: Size, reg: Location, n: u32, label: Label); + fn emit_bcond_label(&mut self, condition: Condition, label: Label); + fn emit_b_register(&mut self, reg: GPR); + fn emit_call_label(&mut self, label: Label); + fn emit_call_register(&mut self, reg: GPR); + fn emit_ret(&mut self); + + fn emit_udf(&mut self); + fn emit_dmb(&mut self); + fn emit_brk(&mut self); + + fn emit_fcmp(&mut self, sz: Size, src1: Location, src2: Location); + fn emit_fneg(&mut self, sz: Size, src: Location, dst: Location); + fn emit_fsqrt(&mut self, sz: Size, src: Location, dst: Location); + + fn emit_fadd(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_fsub(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_fmul(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_fdiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + + fn emit_fmin(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + fn emit_fmax(&mut self, sz: Size, src1: Location, src2: Location, dst: Location); + + fn emit_frintz(&mut self, sz: Size, src: Location, dst: Location); + fn emit_frintn(&mut self, sz: Size, src: Location, dst: Location); + fn emit_frintm(&mut self, sz: Size, src: Location, dst: Location); + fn emit_frintp(&mut self, sz: Size, src: Location, dst: Location); + + fn emit_scvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location); + fn emit_ucvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location); + fn emit_fcvt(&mut self, sz_in: Size, src: Location, dst: Location); + fn emit_fcvtzs(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location); + fn emit_fcvtzu(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location); + + fn emit_read_fpcr(&mut self, reg: GPR); + fn emit_write_fpcr(&mut self, reg: GPR); + fn emit_read_fpsr(&mut self, reg: GPR); + fn emit_write_fpsr(&mut self, reg: GPR); + + fn arch_supports_canonicalize_nan(&self) -> bool { + true + } + + fn arch_requires_indirect_call_trampoline(&self) -> bool { + false + } + + fn arch_emit_indirect_call_with_trampoline(&mut self, _loc: Location) { + unimplemented!() + } +} + +impl EmitterARM64 for Assembler { + fn get_label(&mut self) -> DynamicLabel { + self.new_dynamic_label() + } + + fn get_offset(&self) -> AssemblyOffset { + self.offset() + } + + fn get_jmp_instr_size(&self) -> u8 { + 4 // relative jump, not full 32bits capable + } + + fn finalize_function(&mut self) { + dynasm!( + self + ; const_neg_one_32: + ; .word -1 + ; const_zero_32: + ; .word 0 + ; const_pos_one_32: + ; .word 1 + ); + } + + fn emit_str(&mut self, sz: Size, reg: Location, addr: Location) { + match (sz, reg, addr) { + (Size::S64, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + assert!((disp & 0x7) == 0 && (disp < 0x8000)); + dynasm!(self ; str X(reg), [X(addr), disp]); + } + (Size::S32, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + assert!((disp & 0x3) == 0 && (disp < 0x4000)); + dynasm!(self ; str W(reg), [X(addr), disp]); + } + (Size::S16, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + assert!((disp & 0x1) == 0 && (disp < 0x2000)); + dynasm!(self ; strh W(reg), [X(addr), disp]); + } + (Size::S8, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + assert!(disp < 0x1000); + dynasm!(self ; strb W(reg), [X(addr), disp]); + } + (Size::S64, Location::SIMD(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + assert!((disp & 0x7) == 0 && (disp < 0x8000)); + dynasm!(self ; str D(reg), [X(addr), disp]); + } + (Size::S32, Location::SIMD(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + assert!((disp & 0x3) == 0 && (disp < 0x4000)); + dynasm!(self ; str S(reg), [X(addr), disp]); + } + (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; str X(reg), [X(addr)]), + 1 => dynasm!(self ; str X(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; str X(reg), [X(addr), X(r2), LSL mult]), + }; + } + (Size::S32, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; str W(reg), [X(addr)]), + 1 => dynasm!(self ; str W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; str W(reg), [X(addr), X(r2), LSL mult]), + }; + } + _ => panic!("singlepass can't emit STR {:?}, {:?}, {:?}", sz, reg, addr), + } + } + fn emit_ldr(&mut self, sz: Size, reg: Location, addr: Location) { + match (sz, reg, addr) { + (Size::S64, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + assert!((disp & 0x7) == 0 && (disp < 0x8000)); + let disp = disp as u32; + dynasm!(self ; ldr X(reg), [X(addr), disp]); + } + (Size::S32, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + assert!((disp & 0x3) == 0 && (disp < 0x4000)); + let disp = disp as u32; + dynasm!(self ; ldr W(reg), [X(addr), disp]); + } + (Size::S16, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + assert!((disp & 0x1 == 0) && (disp < 0x2000)); + let disp = disp as u32; + dynasm!(self ; ldrh W(reg), [X(addr), disp]); + } + (Size::S8, Location::GPR(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + assert!(disp < 0x1000); + let disp = disp as u32; + dynasm!(self ; ldrb W(reg), [X(addr), disp]); + } + (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; ldr X(reg), [X(addr)]), + 1 => dynasm!(self ; ldr X(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldr X(reg), [X(addr), X(r2), LSL mult]), + }; + } + (Size::S32, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; ldr W(reg), [X(addr)]), + 1 => dynasm!(self ; ldr W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldr W(reg), [X(addr), X(r2), LSL mult]), + }; + } + (Size::S64, Location::SIMD(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + assert!((disp & 0x7) == 0 && (disp < 0x8000)); + dynasm!(self ; ldr D(reg), [X(addr), disp]); + } + (Size::S32, Location::SIMD(reg), Location::Memory(addr, disp)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let disp = disp as u32; + assert!((disp & 0x3) == 0 && (disp < 0x4000)); + dynasm!(self ; ldr S(reg), [X(addr), disp]); + } + _ => panic!("singlepass can't emit LDR {:?}, {:?}, {:?}", sz, reg, addr), + } + } + fn emit_stur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32) { + assert!((offset >= -255) && (offset <= 255)); + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; stur X(reg), [X(addr), offset]); + } + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; stur W(reg), [X(addr), offset]); + } + (Size::S64, Location::SIMD(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; stur D(reg), [X(addr), offset]); + } + _ => panic!( + "singlepass can't emit STUR {:?}, {:?}, {:?}, {:?}", + sz, reg, addr, offset + ), + } + } + fn emit_ldur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32) { + assert!((offset >= -255) && (offset <= 255)); + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldur X(reg), [X(addr), offset]); + } + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldur W(reg), [X(addr), offset]); + } + (Size::S64, Location::SIMD(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldur D(reg), [X(addr), offset]); + } + _ => panic!( + "singlepass can't emit LDUR {:?}, {:?}, {:?}, {:?}", + sz, reg, addr, offset + ), + } + } + + fn emit_strdb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + assert!(offset <= 255); + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; str X(reg), [X(addr), -(offset as i32)]!); + } + (Size::S64, Location::SIMD(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; str D(reg), [X(addr), -(offset as i32)]!); + } + _ => unreachable!(), + } + } + fn emit_stria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + assert!(offset <= 255); + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; str X(reg), [X(addr)], (offset as i32)); + } + (Size::S64, Location::SIMD(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; str D(reg), [X(addr)], (offset as i32)); + } + _ => unreachable!(), + } + } + fn emit_ldria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) { + assert!(offset <= 255); + match (sz, reg) { + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldr X(reg), [X(addr)], offset); + } + (Size::S64, Location::SIMD(reg)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldr D(reg), [X(addr)], offset); + } + _ => unreachable!(), + } + } + + fn emit_stpdb(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32) { + assert!(offset <= 255); + match (sz, reg1, reg2) { + (Size::S64, Location::GPR(reg1), Location::GPR(reg2)) => { + let reg1 = reg1.into_index() as u32; + let reg2 = reg2.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; stp X(reg1), X(reg2), [X(addr), -(offset as i32)]!); + } + _ => unreachable!(), + } + } + fn emit_ldpia(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32) { + assert!(offset <= 255); + match (sz, reg1, reg2) { + (Size::S64, Location::GPR(reg1), Location::GPR(reg2)) => { + let reg1 = reg1.into_index() as u32; + let reg2 = reg2.into_index() as u32; + let addr = addr.into_index() as u32; + dynasm!(self ; ldp X(reg1), X(reg2), [X(addr)], offset); + } + _ => unreachable!(), + } + } + + fn emit_ldrb(&mut self, _sz: Size, reg: Location, dst: Location) { + match (reg, dst) { + (Location::GPR(reg), Location::Memory(addr, offset)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let offset = offset as u32; + assert!(offset < 0x1000); + dynasm!(self ; ldrb W(reg), [X(addr), offset]); + } + (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; ldrb W(reg), [X(addr)]), + 1 => dynasm!(self ; ldrb W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldrb W(reg), [X(addr), X(r2), LSL mult]), + }; + } + _ => panic!("singlepass can't emit LDRB {:?}, {:?}", reg, dst), + } + } + fn emit_ldrh(&mut self, _sz: Size, reg: Location, dst: Location) { + match (reg, dst) { + (Location::GPR(reg), Location::Memory(addr, offset)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let offset = offset as u32; + assert!((offset & 1 == 0) && (offset < 0x2000)); + dynasm!(self ; ldrh W(reg), [X(addr), offset]); + } + (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; ldrh W(reg), [X(addr)]), + 1 => dynasm!(self ; ldrh W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldrh W(reg), [X(addr), X(r2), LSL mult]), + }; + } + _ => panic!("singlepass can't emit LDRH {:?}, {:?}", reg, dst), + } + } + fn emit_ldrsb(&mut self, sz: Size, reg: Location, dst: Location) { + match (sz, reg, dst) { + (Size::S64, Location::GPR(reg), Location::Memory(addr, offset)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let offset = offset as u32; + assert!(offset < 0x1000); + dynasm!(self ; ldrsb X(reg), [X(addr), offset]); + } + (Size::S32, Location::GPR(reg), Location::Memory(addr, offset)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let offset = offset as u32; + assert!(offset < 0x1000); + dynasm!(self ; ldrsb W(reg), [X(addr), offset]); + } + (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; ldrsb X(reg), [X(addr)]), + 1 => dynasm!(self ; ldrsb X(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldrsb X(reg), [X(addr), X(r2), LSL mult]), + }; + } + (Size::S32, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; ldrsb W(reg), [X(addr)]), + 1 => dynasm!(self ; ldrsb W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldrsb W(reg), [X(addr), X(r2), LSL mult]), + }; + } + _ => panic!("singlepass can't emit LDRSB {:?}, {:?}, {:?}", sz, reg, dst), + } + } + fn emit_ldrsh(&mut self, sz: Size, reg: Location, dst: Location) { + match (sz, reg, dst) { + (Size::S64, Location::GPR(reg), Location::Memory(addr, offset)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let offset = offset as u32; + assert!((offset & 1 == 0) && (offset < 0x2000)); + dynasm!(self ; ldrsh X(reg), [X(addr), offset]); + } + (Size::S32, Location::GPR(reg), Location::Memory(addr, offset)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let offset = offset as u32; + assert!((offset & 1 == 0) && (offset < 0x2000)); + dynasm!(self ; ldrsh W(reg), [X(addr), offset]); + } + (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; ldrsh X(reg), [X(addr)]), + 1 => dynasm!(self ; ldrsh X(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldrsh X(reg), [X(addr), X(r2), LSL mult]), + }; + } + (Size::S32, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; ldrsh W(reg), [X(addr)]), + 1 => dynasm!(self ; ldrsh W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldrsh W(reg), [X(addr), X(r2), LSL mult]), + }; + } + _ => panic!("singlepass can't emit LDRSH {:?}, {:?}, {:?}", sz, reg, dst), + } + } + fn emit_ldrsw(&mut self, sz: Size, reg: Location, dst: Location) { + match (sz, reg, dst) { + (Size::S64, Location::GPR(reg), Location::Memory(addr, offset)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let offset = offset as u32; + assert!((offset & 3 == 0) && (offset < 0x4000)); + dynasm!(self ; ldrsw X(reg), [X(addr), offset]); + } + (Size::S64, Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; ldrsw X(reg), [X(addr)]), + 1 => dynasm!(self ; ldrsw X(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; ldrsw X(reg), [X(addr), X(r2), LSL mult]), + }; + } + _ => panic!("singlepass can't emit LDRSW {:?}, {:?}, {:?}", sz, reg, dst), + } + } + fn emit_strb(&mut self, _sz: Size, reg: Location, dst: Location) { + match (reg, dst) { + (Location::GPR(reg), Location::Memory(addr, offset)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let offset = offset as u32; + assert!(offset < 0x1000); + dynasm!(self ; strb W(reg), [X(addr), offset]); + } + (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; strb W(reg), [X(addr)]), + 1 => dynasm!(self ; strb W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; strb W(reg), [X(addr), X(r2), LSL mult]), + }; + } + _ => panic!("singlepass can't emit STRB {:?}, {:?}", reg, dst), + } + } + fn emit_strh(&mut self, _sz: Size, reg: Location, dst: Location) { + match (reg, dst) { + (Location::GPR(reg), Location::Memory(addr, offset)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let offset = offset as u32; + assert!((offset & 1 == 0) && (offset < 0x2000)); + dynasm!(self ; strh W(reg), [X(addr), offset]); + } + (Location::GPR(reg), Location::Memory2(addr, r2, mult, offs)) => { + let reg = reg.into_index() as u32; + let addr = addr.into_index() as u32; + let r2 = r2.into_index() as u32; + assert!(offs == 0); + let mult = mult as u32; + match mult { + 0 => dynasm!(self ; strh W(reg), [X(addr)]), + 1 => dynasm!(self ; strh W(reg), [X(addr), X(r2)]), + _ => dynasm!(self ; strh W(reg), [X(addr), X(r2), LSL mult]), + }; + } + _ => panic!("singlepass can't emit STRH {:?}, {:?}", reg, dst), + } + } + + fn emit_mov(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov X(dst), X(src)); + } + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov W(dst), W(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov V(dst).D[0], V(src).D[0]); + } + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov V(dst).S[0], V(src).S[0]); + } + (Size::S64, Location::GPR(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov V(dst).D[0], X(src)); + } + (Size::S32, Location::GPR(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov V(dst).S[0], W(src)); + } + (Size::S64, Location::SIMD(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov X(dst), V(src).D[0]); + } + (Size::S32, Location::SIMD(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mov W(dst), V(src).S[0]); + } + (Size::S32, Location::Imm32(val), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + if val < 0x1000 { + dynasm!(self ; mov W(dst), val as u64); + } else if encode_logical_immediate_32bit(val as _).is_some() { + dynasm!(self ; orr W(dst), wzr, val); + } else { + unreachable!(); + } + } + (Size::S64, Location::Imm32(val), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + if val < 0x1000 { + dynasm!(self ; mov W(dst), val as u64); + } else if encode_logical_immediate_64bit(val as _).is_some() { + dynasm!(self ; orr X(dst), xzr, val as u64); + } else { + unreachable!(); + } + } + (Size::S64, Location::Imm64(val), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + if val < 0x1000 { + dynasm!(self ; mov W(dst), val as u64); + } else if encode_logical_immediate_64bit(val as _).is_some() { + dynasm!(self ; orr X(dst), xzr, val as u64); + } else { + unreachable!(); + } + } + _ => panic!("singlepass can't emit MOV {:?}, {:?}, {:?}", sz, src, dst), + } + } + + fn emit_movn(&mut self, sz: Size, reg: Location, val: u32) { + match (sz, reg) { + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; movn W(reg), val); + } + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; movn X(reg), val); + } + _ => unreachable!(), + } + } + fn emit_movz(&mut self, reg: Location, val: u32) { + match reg { + Location::GPR(reg) => { + let reg = reg.into_index() as u32; + dynasm!(self ; movz W(reg), val); + } + _ => unreachable!(), + } + } + fn emit_movk(&mut self, reg: Location, val: u32, shift: u32) { + match reg { + Location::GPR(reg) => { + let reg = reg.into_index() as u32; + dynasm!(self ; movk X(reg), val, LSL shift); + } + _ => unreachable!(), + } + } + + fn emit_mov_imm(&mut self, dst: Location, val: u64) { + match dst { + Location::GPR(dst) => { + let dst = dst.into_index() as u32; + let offset = val.trailing_zeros() & 48; + let masked = 0xffff & (val >> offset); + if (masked << offset) == val { + dynasm!(self ; movz X(dst), masked as u32, LSL offset); + } else { + dynasm!(self ; movz W(dst), (val&0xffff) as u32); + let val = val >> 16; + if val != 0 { + dynasm!(self ; movk X(dst), (val&0xffff) as u32, LSL 16); + let val = val >> 16; + if val != 0 { + dynasm!(self ; movk X(dst), (val&0xffff) as u32, LSL 32); + let val = val >> 16; + if val != 0 { + dynasm!(self ; movk X(dst), (val&0xffff) as u32, LSL 48); + } + } + } + } + } + _ => panic!("singlepass can't emit MOVW {:?}", dst), + } + } + + fn emit_add(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add X(dst), X(src1), X(src2)); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add X(dst), X(src1), imm as u32); + } + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } + dynasm!(self ; add X(dst), X(src1), imm); + } + (Size::S64, Location::GPR(src1), Location::Imm64(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm64(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } + let imm = imm as u32; + dynasm!(self ; add X(dst), X(src1), imm); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add W(dst), W(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } + dynasm!(self ; add W(dst), W(src1), imm); + } + _ => panic!( + "singlepass can't emit ADD {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_sub(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sub X(dst), X(src1), X(src2)); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sub W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sub X(dst), X(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sub W(dst), W(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } + dynasm!(self ; sub W(dst), W(src1), imm); + } + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } + dynasm!(self ; sub X(dst), X(src1), imm); + } + (Size::S64, Location::GPR(src1), Location::Imm64(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } + dynasm!(self ; sub X(dst), X(src1), imm as u32); + } + _ => panic!( + "singlepass can't emit SUB {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_mul(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mul X(dst), X(src1), X(src2)); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; mul W(dst), W(src1), W(src2)); + } + _ => panic!( + "singlepass can't emit MUL {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_adds(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; adds X(dst), X(src1), X(src2)); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; adds W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; adds X(dst), X(src1), imm as u32); + } + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } + dynasm!(self ; adds X(dst), X(src1), imm); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; adds W(dst), W(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } + dynasm!(self ; adds W(dst), W(src1), imm); + } + _ => panic!( + "singlepass can't emit ADD.S {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_subs(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; subs X(dst), X(src1), X(src2)); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; subs W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; subs X(dst), X(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; subs W(dst), W(src1), imm as u32); + } + _ => panic!( + "singlepass can't emit SUB.S {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_add_lsl(&mut self, sz: Size, src1: Location, src2: Location, lsl: u32, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; add X(dst), X(src1), X(src2), LSL lsl); + } + _ => panic!( + "singlepass can't emit LSL {:?} {:?} {:?} {:?} LSL {:?}", + sz, src1, src2, dst, lsl + ), + } + } + + fn emit_cmp(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; cmp X(dst), X(src)); + } + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; cmp W(dst), W(src)); + } + (Size::S64, Location::Imm8(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; cmp X(dst), imm as u32); + } + (Size::S64, Location::Imm32(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } + dynasm!(self ; cmp X(dst), imm as u32); + } + (Size::S64, Location::Imm64(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } + dynasm!(self ; cmp X(dst), imm as u32); + } + (Size::S32, Location::Imm8(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + dynasm!(self ; cmp W(dst), imm as u32); + } + (Size::S32, Location::Imm32(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + if imm >= 0x1000 { + unreachable!(); + } + dynasm!(self ; cmp W(dst), imm as u32); + } + _ => panic!("singlepass can't emit CMP {:?} {:?} {:?}", sz, src, dst), + } + } + + fn emit_tst(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; tst X(dst), X(src)); + } + (Size::S64, Location::Imm32(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + if !encode_logical_immediate_64bit(imm as u64).is_some() { + unreachable!(); + } + dynasm!(self ; tst X(dst), imm as u64); + } + (Size::S64, Location::Imm64(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + if !encode_logical_immediate_64bit(imm as u64).is_some() { + unreachable!(); + } + dynasm!(self ; tst X(dst), imm as u64); + } + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; tst W(dst), W(src)); + } + (Size::S32, Location::Imm32(imm), Location::GPR(dst)) => { + let dst = dst.into_index() as u32; + if !encode_logical_immediate_64bit(imm as u64).is_some() { + unreachable!(); + } + dynasm!(self ; tst W(dst), imm); + } + _ => unreachable!(), + } + } + + fn emit_lsl(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsl X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + if imm > 63 { + unreachable!(); + } + let imm = imm as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsl X(dst), X(src1), imm); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsl W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm > 63 { + unreachable!(); + } + dynasm!(self ; lsl X(dst), X(src1), imm as u32); + } + (Size::S64, Location::GPR(src1), Location::Imm64(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm64(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm > 63 { + unreachable!(); + } + dynasm!(self ; lsl X(dst), X(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm > 31 { + unreachable!(); + } + dynasm!(self ; lsl W(dst), W(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm > 31 { + unreachable!(); + } + dynasm!(self ; lsl W(dst), W(src1), imm as u32); + } + _ => panic!( + "singlepass can't emit LSL {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_asr(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; asr X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let imm = imm as u32; + let dst = dst.into_index() as u32; + if imm == 0 || imm > 63 { + unreachable!(); + } + dynasm!(self ; asr X(dst), X(src1), imm); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; asr W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 || imm > 63 { + unreachable!(); + } + dynasm!(self ; asr X(dst), X(src1), imm as u32); + } + (Size::S64, Location::GPR(src1), Location::Imm64(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm64(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 || imm > 63 { + unreachable!(); + } + dynasm!(self ; asr X(dst), X(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 || imm > 31 { + unreachable!(); + } + dynasm!(self ; asr W(dst), W(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 || imm > 31 { + unreachable!(); + } + dynasm!(self ; asr W(dst), W(src1), imm as u32); + } + _ => panic!( + "singlepass can't emit ASR {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_lsr(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsr X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let imm = imm as u32; + let dst = dst.into_index() as u32; + if imm == 0 || imm > 63 { + unreachable!(); + } + dynasm!(self ; lsr X(dst), X(src1), imm); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; lsr W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 || imm > 63 { + unreachable!(); + } + dynasm!(self ; lsr X(dst), X(src1), imm as u32); + } + (Size::S64, Location::GPR(src1), Location::Imm64(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm64(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 || imm > 63 { + unreachable!(); + } + dynasm!(self ; lsr X(dst), X(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 || imm > 31 { + unreachable!(); + } + dynasm!(self ; lsr W(dst), W(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm32(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 || imm > 31 { + unreachable!(); + } + dynasm!(self ; lsr W(dst), W(src1), imm as u32); + } + _ => panic!( + "singlepass can't emit LSR {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_ror(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; ror X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm32(imm), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let imm = imm as u32; + let dst = dst.into_index() as u32; + if imm == 0 || imm > 63 { + unreachable!(); + } + dynasm!(self ; ror X(dst), X(src1), imm); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; ror W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S64, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 || imm > 63 { + unreachable!(); + } + dynasm!(self ; ror X(dst), X(src1), imm as u32); + } + (Size::S32, Location::GPR(src1), Location::Imm8(imm), Location::GPR(dst)) + | (Size::S32, Location::Imm8(imm), Location::GPR(src1), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let dst = dst.into_index() as u32; + if imm == 0 || imm > 31 { + unreachable!(); + } + dynasm!(self ; ror W(dst), W(src1), imm as u32); + } + _ => panic!( + "singlepass can't emit ROR {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + + fn emit_or(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; orr X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm64(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u64; + let dst = dst.into_index() as u32; + if !encode_logical_immediate_64bit(src2 as u64).is_some() { + unreachable!(); + } + dynasm!(self ; orr X(dst), X(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u32; + let dst = dst.into_index() as u32; + if !encode_logical_immediate_32bit(src2).is_some() { + unreachable!(); + } + dynasm!(self ; orr W(dst), W(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; orr W(dst), W(src1), W(src2)); + } + _ => panic!( + "singlepass can't emit OR {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_and(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; and X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm64(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u64; + let dst = dst.into_index() as u32; + if !encode_logical_immediate_64bit(src2 as u64).is_some() { + unreachable!(); + } + dynasm!(self ; and X(dst), X(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u32; + let dst = dst.into_index() as u32; + if !encode_logical_immediate_32bit(src2).is_some() { + unreachable!(); + } + dynasm!(self ; and W(dst), W(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; and W(dst), W(src1), W(src2)); + } + _ => panic!( + "singlepass can't emit AND {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_eor(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; eor X(dst), X(src1), X(src2)); + } + (Size::S64, Location::GPR(src1), Location::Imm64(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u64; + let dst = dst.into_index() as u32; + if !encode_logical_immediate_64bit(src2 as u64).is_some() { + unreachable!(); + } + dynasm!(self ; eor X(dst), X(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::Imm32(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2 as u32; + let dst = dst.into_index() as u32; + if !encode_logical_immediate_32bit(src2).is_some() { + unreachable!(); + } + dynasm!(self ; eor W(dst), W(src1), src2); + } + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; eor W(dst), W(src1), W(src2)); + } + _ => panic!( + "singlepass can't emit EOR {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + + fn emit_bfc(&mut self, sz: Size, lsb: u32, width: u32, dst: Location) { + match (sz, dst) { + (Size::S32, Location::GPR(dst)) => { + dynasm!(self ; bfc W(dst as u32), lsb, width); + } + (Size::S64, Location::GPR(dst)) => { + dynasm!(self ; bfc X(dst as u32), lsb, width); + } + _ => unimplemented!(), + } + } + fn emit_bfi(&mut self, sz: Size, src: Location, lsb: u32, width: u32, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; bfi W(dst as u32), W(src as u32), lsb, width); + } + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; bfi X(dst as u32), X(src as u32), lsb, width); + } + _ => unimplemented!(), + } + } + + fn emit_udiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; udiv W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; udiv X(dst), X(src1), X(src2)); + } + _ => panic!( + "singlepass can't emit UDIV {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_sdiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sdiv W(dst), W(src1), W(src2)); + } + (Size::S64, Location::GPR(src1), Location::GPR(src2), Location::GPR(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sdiv X(dst), X(src1), X(src2)); + } + _ => panic!( + "singlepass can't emit UDIV {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + + /// msub : c - a*b -> dst + fn emit_msub(&mut self, sz: Size, a: Location, b: Location, c: Location, dst: Location) { + match (sz, a, b, c, dst) { + ( + Size::S32, + Location::GPR(a), + Location::GPR(b), + Location::GPR(c), + Location::GPR(dst), + ) => { + let a = a.into_index() as u32; + let b = b.into_index() as u32; + let c = c.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; msub W(dst), W(a), W(b), W(c)); + } + ( + Size::S64, + Location::GPR(a), + Location::GPR(b), + Location::GPR(c), + Location::GPR(dst), + ) => { + let a = a.into_index() as u32; + let b = b.into_index() as u32; + let c = c.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; msub X(dst), X(a), X(b), X(c)); + } + _ => panic!( + "singlepass can't emit msub {:?} {:?} {:?} {:?} {:?}", + sz, a, b, c, dst + ), + } + } + + fn emit_sxtb(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sxtb W(dst), W(src)); + } + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sxtb X(dst), W(src)); + } + _ => panic!("singlepass can't emit SXTB {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_sxth(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sxth W(dst), W(src)); + } + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sxth X(dst), W(src)); + } + _ => panic!("singlepass can't emit SXTH {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_sxtw(&mut self, _sz: Size, src: Location, dst: Location) { + match (src, dst) { + (Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; sxtw X(dst), W(src)); + } + _ => panic!("singlepass can't emit SXTW {:?} {:?}", src, dst), + } + } + fn emit_uxtb(&mut self, _sz: Size, src: Location, dst: Location) { + match (src, dst) { + (Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; uxtb W(dst), W(src)); + } + _ => panic!("singlepass can't emit UXTB {:?} {:?}", src, dst), + } + } + fn emit_uxth(&mut self, _sz: Size, src: Location, dst: Location) { + match (src, dst) { + (Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; uxth W(dst), W(src)); + } + _ => panic!("singlepass can't emit UXTH {:?} {:?}", src, dst), + } + } + + fn emit_cset(&mut self, sz: Size, dst: Location, cond: Condition) { + match (sz, dst) { + (Size::S32, Location::GPR(reg)) => { + let reg = reg as u32; + match cond { + Condition::Eq => dynasm!(self ; cset W(reg), eq), + Condition::Ne => dynasm!(self ; cset W(reg), ne), + Condition::Cs => dynasm!(self ; cset W(reg), cs), + Condition::Cc => dynasm!(self ; cset W(reg), cc), + Condition::Mi => dynasm!(self ; cset W(reg), mi), + Condition::Pl => dynasm!(self ; cset W(reg), pl), + Condition::Vs => dynasm!(self ; cset W(reg), vs), + Condition::Vc => dynasm!(self ; cset W(reg), vc), + Condition::Hi => dynasm!(self ; cset W(reg), hi), + Condition::Ls => dynasm!(self ; cset W(reg), ls), + Condition::Ge => dynasm!(self ; cset W(reg), ge), + Condition::Lt => dynasm!(self ; cset W(reg), lt), + Condition::Gt => dynasm!(self ; cset W(reg), gt), + Condition::Le => dynasm!(self ; cset W(reg), le), + Condition::Al => dynasm!(self ; cset W(reg), al), + } + } + (Size::S64, Location::GPR(reg)) => { + let reg = reg as u32; + match cond { + Condition::Eq => dynasm!(self ; cset X(reg), eq), + Condition::Ne => dynasm!(self ; cset X(reg), ne), + Condition::Cs => dynasm!(self ; cset X(reg), cs), + Condition::Cc => dynasm!(self ; cset X(reg), cc), + Condition::Mi => dynasm!(self ; cset X(reg), mi), + Condition::Pl => dynasm!(self ; cset X(reg), pl), + Condition::Vs => dynasm!(self ; cset X(reg), vs), + Condition::Vc => dynasm!(self ; cset X(reg), vc), + Condition::Hi => dynasm!(self ; cset X(reg), hi), + Condition::Ls => dynasm!(self ; cset X(reg), ls), + Condition::Ge => dynasm!(self ; cset X(reg), ge), + Condition::Lt => dynasm!(self ; cset X(reg), lt), + Condition::Gt => dynasm!(self ; cset X(reg), gt), + Condition::Le => dynasm!(self ; cset X(reg), le), + Condition::Al => dynasm!(self ; cset X(reg), al), + } + } + _ => panic!("singlepass can't emit CSET {:?} {:?} {:?}", sz, dst, cond), + } + } + fn emit_csetm(&mut self, sz: Size, dst: Location, cond: Condition) { + match (sz, dst) { + (Size::S32, Location::GPR(reg)) => { + let reg = reg as u32; + match cond { + Condition::Eq => dynasm!(self ; csetm W(reg), eq), + Condition::Ne => dynasm!(self ; csetm W(reg), ne), + Condition::Cs => dynasm!(self ; csetm W(reg), cs), + Condition::Cc => dynasm!(self ; csetm W(reg), cc), + Condition::Mi => dynasm!(self ; csetm W(reg), mi), + Condition::Pl => dynasm!(self ; csetm W(reg), pl), + Condition::Vs => dynasm!(self ; csetm W(reg), vs), + Condition::Vc => dynasm!(self ; csetm W(reg), vc), + Condition::Hi => dynasm!(self ; csetm W(reg), hi), + Condition::Ls => dynasm!(self ; csetm W(reg), ls), + Condition::Ge => dynasm!(self ; csetm W(reg), ge), + Condition::Lt => dynasm!(self ; csetm W(reg), lt), + Condition::Gt => dynasm!(self ; csetm W(reg), gt), + Condition::Le => dynasm!(self ; csetm W(reg), le), + Condition::Al => dynasm!(self ; csetm W(reg), al), + } + } + (Size::S64, Location::GPR(reg)) => { + let reg = reg as u32; + match cond { + Condition::Eq => dynasm!(self ; csetm X(reg), eq), + Condition::Ne => dynasm!(self ; csetm X(reg), ne), + Condition::Cs => dynasm!(self ; csetm X(reg), cs), + Condition::Cc => dynasm!(self ; csetm X(reg), cc), + Condition::Mi => dynasm!(self ; csetm X(reg), mi), + Condition::Pl => dynasm!(self ; csetm X(reg), pl), + Condition::Vs => dynasm!(self ; csetm X(reg), vs), + Condition::Vc => dynasm!(self ; csetm X(reg), vc), + Condition::Hi => dynasm!(self ; csetm X(reg), hi), + Condition::Ls => dynasm!(self ; csetm X(reg), ls), + Condition::Ge => dynasm!(self ; csetm X(reg), ge), + Condition::Lt => dynasm!(self ; csetm X(reg), lt), + Condition::Gt => dynasm!(self ; csetm X(reg), gt), + Condition::Le => dynasm!(self ; csetm X(reg), le), + Condition::Al => dynasm!(self ; csetm X(reg), al), + } + } + _ => panic!("singlepass can't emit CSETM {:?} {:?} {:?}", sz, dst, cond), + } + } + fn emit_cinc(&mut self, sz: Size, src: Location, dst: Location, cond: Condition) { + match (sz, src, dst) { + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + match cond { + Condition::Eq => dynasm!(self ; cinc W(dst), W(src), eq), + Condition::Ne => dynasm!(self ; cinc W(dst), W(src), ne), + Condition::Cs => dynasm!(self ; cinc W(dst), W(src), cs), + Condition::Cc => dynasm!(self ; cinc W(dst), W(src), cc), + Condition::Mi => dynasm!(self ; cinc W(dst), W(src), mi), + Condition::Pl => dynasm!(self ; cinc W(dst), W(src), pl), + Condition::Vs => dynasm!(self ; cinc W(dst), W(src), vs), + Condition::Vc => dynasm!(self ; cinc W(dst), W(src), vc), + Condition::Hi => dynasm!(self ; cinc W(dst), W(src), hi), + Condition::Ls => dynasm!(self ; cinc W(dst), W(src), ls), + Condition::Ge => dynasm!(self ; cinc W(dst), W(src), ge), + Condition::Lt => dynasm!(self ; cinc W(dst), W(src), lt), + Condition::Gt => dynasm!(self ; cinc W(dst), W(src), gt), + Condition::Le => dynasm!(self ; cinc W(dst), W(src), le), + Condition::Al => dynasm!(self ; cinc W(dst), W(src), al), + }; + } + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + match cond { + Condition::Eq => dynasm!(self ; cinc X(src), X(dst), eq), + Condition::Ne => dynasm!(self ; cinc X(src), X(dst), ne), + Condition::Cs => dynasm!(self ; cinc X(src), X(dst), cs), + Condition::Cc => dynasm!(self ; cinc X(src), X(dst), cc), + Condition::Mi => dynasm!(self ; cinc X(src), X(dst), mi), + Condition::Pl => dynasm!(self ; cinc X(src), X(dst), pl), + Condition::Vs => dynasm!(self ; cinc X(src), X(dst), vs), + Condition::Vc => dynasm!(self ; cinc X(src), X(dst), vc), + Condition::Hi => dynasm!(self ; cinc X(src), X(dst), hi), + Condition::Ls => dynasm!(self ; cinc X(src), X(dst), ls), + Condition::Ge => dynasm!(self ; cinc X(src), X(dst), ge), + Condition::Lt => dynasm!(self ; cinc X(src), X(dst), lt), + Condition::Gt => dynasm!(self ; cinc X(src), X(dst), gt), + Condition::Le => dynasm!(self ; cinc X(src), X(dst), le), + Condition::Al => dynasm!(self ; cinc X(src), X(dst), al), + }; + } + _ => unreachable!(), + } + } + + fn emit_clz(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; clz X(dst), X(src)); + } + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; clz W(dst), W(src)); + } + _ => panic!("singlepass can't emit CLS {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_rbit(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; rbit X(dst), X(src)); + } + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; rbit W(dst), W(src)); + } + _ => panic!("singlepass can't emit CLS {:?} {:?} {:?}", sz, src, dst), + } + } + + fn emit_label(&mut self, label: Label) { + dynasm!(self ; => label); + } + fn emit_load_label(&mut self, reg: GPR, label: Label) { + let reg = reg.into_index() as u32; + dynasm!(self ; adr X(reg), =>label); + } + fn emit_b_label(&mut self, label: Label) { + dynasm!(self ; b =>label); + } + fn emit_cbz_label(&mut self, sz: Size, reg: Location, label: Label) { + match (sz, reg) { + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; cbz W(reg), =>label); + } + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; cbz X(reg), =>label); + } + _ => panic!("singlepass can't emit CBZ {:?} {:?} {:?}", sz, reg, label), + } + } + fn emit_cbnz_label(&mut self, sz: Size, reg: Location, label: Label) { + match (sz, reg) { + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; cbnz W(reg), =>label); + } + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; cbnz X(reg), =>label); + } + _ => panic!("singlepass can't emit CBNZ {:?} {:?} {:?}", sz, reg, label), + } + } + fn emit_tbz_label(&mut self, sz: Size, reg: Location, n: u32, label: Label) { + match (sz, reg) { + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; tbz W(reg), n, =>label); + } + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; tbz X(reg), n, =>label); + } + _ => panic!( + "singlepass can't emit TBZ {:?} {:?} {:?} {:?}", + sz, reg, n, label + ), + } + } + fn emit_tbnz_label(&mut self, sz: Size, reg: Location, n: u32, label: Label) { + match (sz, reg) { + (Size::S32, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; tbnz W(reg), n, =>label); + } + (Size::S64, Location::GPR(reg)) => { + let reg = reg.into_index() as u32; + dynasm!(self ; tbnz X(reg), n, =>label); + } + _ => panic!( + "singlepass can't emit TBNZ {:?} {:?} {:?} {:?}", + sz, reg, n, label + ), + } + } + fn emit_bcond_label(&mut self, condition: Condition, label: Label) { + match condition { + Condition::Eq => dynasm!(self ; b.eq => label), + Condition::Ne => dynasm!(self ; b.ne => label), + Condition::Cs => dynasm!(self ; b.cs => label), + Condition::Cc => dynasm!(self ; b.cc => label), + Condition::Mi => dynasm!(self ; b.mi => label), + Condition::Pl => dynasm!(self ; b.pl => label), + Condition::Vs => dynasm!(self ; b.vs => label), + Condition::Vc => dynasm!(self ; b.vc => label), + Condition::Hi => dynasm!(self ; b.hi => label), + Condition::Ls => dynasm!(self ; b.ls => label), + Condition::Ge => dynasm!(self ; b.ge => label), + Condition::Lt => dynasm!(self ; b.lt => label), + Condition::Gt => dynasm!(self ; b.gt => label), + Condition::Le => dynasm!(self ; b.le => label), + Condition::Al => dynasm!(self ; b => label), + } + } + fn emit_b_register(&mut self, reg: GPR) { + dynasm!(self ; br X(reg.into_index() as u32)); + } + fn emit_call_label(&mut self, label: Label) { + dynasm!(self ; bl =>label); + } + fn emit_call_register(&mut self, reg: GPR) { + dynasm!(self ; blr X(reg.into_index() as u32)); + } + fn emit_ret(&mut self) { + dynasm!(self ; ret); + } + + fn emit_udf(&mut self) { + dynasm!(self ; udf 0x1234); + } + fn emit_dmb(&mut self) { + dynasm!(self ; dmb ish); + } + fn emit_brk(&mut self) { + dynasm!(self ; brk 0); + } + + fn emit_fcmp(&mut self, sz: Size, src1: Location, src2: Location) { + match (sz, src1, src2) { + (Size::S32, Location::SIMD(src1), Location::SIMD(src2)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + dynasm!(self ; fcmp S(src1), S(src2)); + } + (Size::S64, Location::SIMD(src1), Location::SIMD(src2)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + dynasm!(self ; fcmp D(src1), D(src2)); + } + _ => panic!("singlepass can't emit FCMP {:?} {:?} {:?}", sz, src1, src2), + } + } + + fn emit_fneg(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fneg S(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fneg D(dst), D(src)); + } + _ => panic!("singlepass can't emit FNEG {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_fsqrt(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fsqrt S(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fsqrt D(dst), D(src)); + } + _ => panic!("singlepass can't emit FSQRT {:?} {:?} {:?}", sz, src, dst), + } + } + + fn emit_fadd(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fadd S(dst), S(src1), S(src2)); + } + (Size::S64, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fadd D(dst), D(src1), D(src2)); + } + _ => panic!( + "singlepass can't emit FADD {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_fsub(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fsub S(dst), S(src1), S(src2)); + } + (Size::S64, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fsub D(dst), D(src1), D(src2)); + } + _ => panic!( + "singlepass can't emit FSUB {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_fmul(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fmul S(dst), S(src1), S(src2)); + } + (Size::S64, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fmul D(dst), D(src1), D(src2)); + } + _ => panic!( + "singlepass can't emit FMUL {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_fdiv(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fdiv S(dst), S(src1), S(src2)); + } + (Size::S64, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fdiv D(dst), D(src1), D(src2)); + } + _ => panic!( + "singlepass can't emit FDIV {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + + fn emit_fmin(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fmin S(dst), S(src1), S(src2)); + } + (Size::S64, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fmin D(dst), D(src1), D(src2)); + } + _ => panic!( + "singlepass can't emit FMIN {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + fn emit_fmax(&mut self, sz: Size, src1: Location, src2: Location, dst: Location) { + match (sz, src1, src2, dst) { + (Size::S32, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fmax S(dst), S(src1), S(src2)); + } + (Size::S64, Location::SIMD(src1), Location::SIMD(src2), Location::SIMD(dst)) => { + let src1 = src1.into_index() as u32; + let src2 = src2.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fmax D(dst), D(src1), D(src2)); + } + _ => panic!( + "singlepass can't emit FMAX {:?} {:?} {:?} {:?}", + sz, src1, src2, dst + ), + } + } + + fn emit_frintz(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintz S(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintz D(dst), D(src)); + } + _ => panic!("singlepass can't emit FRINTZ {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_frintn(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintn S(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintn D(dst), D(src)); + } + _ => panic!("singlepass can't emit FRINTN {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_frintm(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintm S(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintm D(dst), D(src)); + } + _ => panic!("singlepass can't emit FRINTM {:?} {:?} {:?}", sz, src, dst), + } + } + fn emit_frintp(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintp S(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; frintp D(dst), D(src)); + } + _ => panic!("singlepass can't emit FRINTP {:?} {:?} {:?}", sz, src, dst), + } + } + + fn emit_scvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location) { + match (sz_in, src, sz_out, dst) { + (Size::S32, Location::GPR(src), Size::S32, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; scvtf S(dst), W(src)); + } + (Size::S64, Location::GPR(src), Size::S32, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; scvtf S(dst), X(src)); + } + (Size::S32, Location::GPR(src), Size::S64, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; scvtf D(dst), W(src)); + } + (Size::S64, Location::GPR(src), Size::S64, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; scvtf D(dst), X(src)); + } + _ => panic!( + "singlepass can't emit SCVTF {:?} {:?} {:?} {:?}", + sz_in, src, sz_out, dst + ), + } + } + fn emit_ucvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location) { + match (sz_in, src, sz_out, dst) { + (Size::S32, Location::GPR(src), Size::S32, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; ucvtf S(dst), W(src)); + } + (Size::S64, Location::GPR(src), Size::S32, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; ucvtf S(dst), X(src)); + } + (Size::S32, Location::GPR(src), Size::S64, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; ucvtf D(dst), W(src)); + } + (Size::S64, Location::GPR(src), Size::S64, Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; ucvtf D(dst), X(src)); + } + _ => panic!( + "singlepass can't emit UCVTF {:?} {:?} {:?} {:?}", + sz_in, src, sz_out, dst + ), + } + } + fn emit_fcvt(&mut self, sz_in: Size, src: Location, dst: Location) { + match (sz_in, src, dst) { + (Size::S32, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvt D(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Location::SIMD(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvt S(dst), D(src)); + } + _ => panic!( + "singlepass can't emit UCVTF {:?} {:?} {:?}", + sz_in, src, dst + ), + } + } + fn emit_fcvtzs(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location) { + match (sz_in, src, sz_out, dst) { + (Size::S32, Location::SIMD(src), Size::S32, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzs W(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Size::S32, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzs W(dst), D(src)); + } + (Size::S32, Location::SIMD(src), Size::S64, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzs X(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Size::S64, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzs X(dst), D(src)); + } + _ => panic!( + "singlepass can't emit FCVTZS {:?} {:?} {:?} {:?}", + sz_in, src, sz_out, dst + ), + } + } + fn emit_fcvtzu(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location) { + match (sz_in, src, sz_out, dst) { + (Size::S32, Location::SIMD(src), Size::S32, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzu W(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Size::S32, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzu W(dst), D(src)); + } + (Size::S32, Location::SIMD(src), Size::S64, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzu X(dst), S(src)); + } + (Size::S64, Location::SIMD(src), Size::S64, Location::GPR(dst)) => { + let src = src.into_index() as u32; + let dst = dst.into_index() as u32; + dynasm!(self ; fcvtzu X(dst), D(src)); + } + _ => panic!( + "singlepass can't emit FCVTZU {:?} {:?} {:?} {:?}", + sz_in, src, sz_out, dst + ), + } + } + + // 1 011 0100 0100 000 => fpcr + fn emit_read_fpcr(&mut self, reg: GPR) { + dynasm!(self ; mrs X(reg as u32), 0b1_011_0100_0100_000); + } + fn emit_write_fpcr(&mut self, reg: GPR) { + dynasm!(self ; msr 0b1_011_0100_0100_000, X(reg as u32)); + } + // 1 011 0100 0100 001 => fpsr + fn emit_read_fpsr(&mut self, reg: GPR) { + dynasm!(self ; mrs X(reg as u32), 0b1_011_0100_0100_001); + } + fn emit_write_fpsr(&mut self, reg: GPR) { + dynasm!(self ; msr 0b1_011_0100_0100_001, X(reg as u32)); + } +} + +pub fn gen_std_trampoline_arm64( + sig: &FunctionType, + calling_convention: CallingConvention, +) -> FunctionBody { + let mut a = Assembler::new(0); + + let fptr = GPR::X27; + let args = GPR::X28; + + dynasm!(a + ; sub sp, sp, 32 + ; stp x29, x30, [sp] + ; stp X(fptr as u32), X(args as u32), [sp, 16] + ; mov x29, sp + ; mov X(fptr as u32), x1 + ; mov X(args as u32), x2 + ); + + let stack_args = sig.params().len().saturating_sub(7); //1st arg is ctx, not an actual arg + let mut stack_offset = stack_args as u32 * 8; + if stack_args > 0 { + if stack_offset % 16 != 0 { + stack_offset += 8; + assert!(stack_offset % 16 == 0); + } + dynasm!(a ; sub sp, sp, stack_offset); + } + + // Move arguments to their locations. + // `callee_vmctx` is already in the first argument register, so no need to move. + let mut caller_stack_offset: i32 = 0; + for (i, param) in sig.params().iter().enumerate() { + let sz = match *param { + Type::I32 | Type::F32 => Size::S32, + Type::I64 | Type::F64 => Size::S64, + Type::ExternRef => Size::S64, + Type::FuncRef => Size::S64, + _ => panic!( + "singlepass unsupported param type for trampoline {:?}", + *param + ), + }; + match i { + 0..=6 => { + a.emit_ldr( + sz, + Location::GPR(GPR::from_index(i + 1).unwrap()), + Location::Memory(args, (i * 16) as i32), + ); + } + _ => { + match calling_convention { + CallingConvention::AppleAarch64 => { + match sz { + Size::S8 => (), + Size::S16 => { + if caller_stack_offset & 1 != 0 { + caller_stack_offset = (caller_stack_offset + 1) & !1; + } + } + Size::S32 => { + if caller_stack_offset & 3 != 0 { + caller_stack_offset = (caller_stack_offset + 3) & !3; + } + } + Size::S64 => { + if caller_stack_offset & 7 != 0 { + caller_stack_offset = (caller_stack_offset + 7) & !7; + } + } + }; + } + _ => (), + }; + // using X16 as scratch reg + a.emit_ldr( + sz, + Location::GPR(GPR::X16), + Location::Memory(args, (i * 16) as i32), + ); + a.emit_str( + sz, + Location::GPR(GPR::X16), + Location::Memory(GPR::XzrSp, caller_stack_offset), + ); + match calling_convention { + CallingConvention::AppleAarch64 => { + caller_stack_offset += match sz { + Size::S8 => 1, + Size::S16 => 2, + Size::S32 => 4, + Size::S64 => 8, + }; + } + _ => { + caller_stack_offset += 8; + } + } + } + } + } + + dynasm!(a ; blr X(fptr as u32)); + + // Write return value. + if !sig.results().is_empty() { + a.emit_str(Size::S64, Location::GPR(GPR::X0), Location::Memory(args, 0)); + } + + // Restore stack. + dynasm!(a + ; ldp X(fptr as u32), X(args as u32), [x29, 16] + ; ldp x29, x30, [x29] + ; add sp, sp, 32 + stack_offset as u32 + ; ret + ); + + FunctionBody { + body: a.finalize().unwrap().to_vec(), + unwind_info: None, + } +} +// Generates dynamic import function call trampoline for a function type. +pub fn gen_std_dynamic_import_trampoline_arm64( + vmoffsets: &VMOffsets, + sig: &FunctionType, + calling_convention: CallingConvention, +) -> FunctionBody { + let mut a = Assembler::new(0); + // Allocate argument array. + let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()); + // Save LR and X26, as scratch register + a.emit_stpdb( + Size::S64, + Location::GPR(GPR::X30), + Location::GPR(GPR::X26), + GPR::XzrSp, + 16, + ); + + if stack_offset != 0 { + if stack_offset < 0x1000 { + a.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm32(stack_offset as _), + Location::GPR(GPR::XzrSp), + ); + } else { + a.emit_mov_imm(Location::GPR(GPR::X26), stack_offset as u64); + a.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::GPR(GPR::X26), + Location::GPR(GPR::XzrSp), + ); + } + } + + // Copy arguments. + if !sig.params().is_empty() { + let mut argalloc = ArgumentRegisterAllocator::default(); + argalloc.next(Type::I64, calling_convention).unwrap(); // skip VMContext + + let mut stack_param_count: usize = 0; + + for (i, ty) in sig.params().iter().enumerate() { + let source_loc = match argalloc.next(*ty, calling_convention) { + Some(ARM64Register::GPR(gpr)) => Location::GPR(gpr), + Some(ARM64Register::NEON(neon)) => Location::SIMD(neon), + None => { + let sz = match calling_convention { + CallingConvention::AppleAarch64 => match *ty { + Type::I32 | Type::F32 => Size::S32, + _ => { + if stack_param_count & 7 != 0 { + stack_param_count = (stack_param_count + 7) & !7; + }; + Size::S64 + } + }, + _ => Size::S64, + }; + a.emit_ldr( + sz, + Location::GPR(GPR::X26), + Location::Memory(GPR::XzrSp, (stack_offset + 16 + stack_param_count) as _), + ); + stack_param_count += match sz { + Size::S32 => 4, + Size::S64 => 8, + _ => unreachable!(), + }; + Location::GPR(GPR::X26) + } + }; + a.emit_str( + Size::S64, + source_loc, + Location::Memory(GPR::XzrSp, (i * 16) as _), + ); + + // Zero upper 64 bits. + a.emit_str( + Size::S64, + Location::GPR(GPR::XzrSp), // XZR here + Location::Memory(GPR::XzrSp, (i * 16 + 8) as _), // XSP here + ); + } + } + + match calling_convention { + _ => { + // Load target address. + let offset = vmoffsets.vmdynamicfunction_import_context_address(); + a.emit_ldur(Size::S64, Location::GPR(GPR::X26), GPR::X0, offset as i32); + // Load values array. + a.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm8(0), + Location::GPR(GPR::X1), + ); + } + }; + + // Call target. + a.emit_call_register(GPR::X26); + + // Fetch return value. + if !sig.results().is_empty() { + assert_eq!(sig.results().len(), 1); + a.emit_ldr( + Size::S64, + Location::GPR(GPR::X0), + Location::Memory(GPR::XzrSp, 0), + ); + } + + // Release values array. + if stack_offset != 0 { + if stack_offset < 0x1000 { + a.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm32(stack_offset as _), + Location::GPR(GPR::XzrSp), + ); + } else { + a.emit_mov_imm(Location::GPR(GPR::X26), stack_offset as u64); + a.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::GPR(GPR::X26), + Location::GPR(GPR::XzrSp), + ); + } + } + a.emit_ldpia( + Size::S64, + Location::GPR(GPR::X30), + Location::GPR(GPR::X26), + GPR::XzrSp, + 16, + ); + + // Return. + a.emit_ret(); + + FunctionBody { + body: a.finalize().unwrap().to_vec(), + unwind_info: None, + } +} +// Singlepass calls import functions through a trampoline. +pub fn gen_import_call_trampoline_arm64( + vmoffsets: &VMOffsets, + index: FunctionIndex, + sig: &FunctionType, + calling_convention: CallingConvention, +) -> CustomSection { + let mut a = Assembler::new(0); + + // Singlepass internally treats all arguments as integers + // For the standard System V calling convention requires + // floating point arguments to be passed in NEON registers. + // Translation is expensive, so only do it if needed. + if sig + .params() + .iter() + .any(|&x| x == Type::F32 || x == Type::F64) + { + match calling_convention { + _ => { + let mut param_locations: Vec = vec![]; + + // Allocate stack space for arguments. + let stack_offset: i32 = if sig.params().len() > 7 { + 7 * 8 + } else { + (sig.params().len() as i32) * 8 + }; + let stack_offset = if stack_offset & 15 != 0 { + stack_offset + 8 + } else { + stack_offset + }; + if stack_offset > 0 { + if stack_offset < 0x1000 { + a.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm32(stack_offset as u32), + Location::GPR(GPR::XzrSp), + ); + } else { + a.emit_mov_imm(Location::GPR(GPR::X16), stack_offset as u64); + a.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::GPR(GPR::X16), + Location::GPR(GPR::XzrSp), + ); + } + } + + // Store all arguments to the stack to prevent overwrite. + for i in 0..sig.params().len() { + let loc = match i { + 0..=6 => { + static PARAM_REGS: &[GPR] = &[ + GPR::X1, + GPR::X2, + GPR::X3, + GPR::X4, + GPR::X5, + GPR::X6, + GPR::X7, + ]; + let loc = Location::Memory(GPR::XzrSp, (i * 8) as i32); + a.emit_str(Size::S64, Location::GPR(PARAM_REGS[i]), loc); + loc + } + _ => Location::Memory(GPR::XzrSp, stack_offset + ((i - 7) * 8) as i32), + }; + param_locations.push(loc); + } + + // Copy arguments. + let mut caller_stack_offset: i32 = 0; + let mut argalloc = ArgumentRegisterAllocator::default(); + argalloc.next(Type::I64, calling_convention).unwrap(); // skip VMContext + for (i, ty) in sig.params().iter().enumerate() { + let prev_loc = param_locations[i]; + let targ = match argalloc.next(*ty, calling_convention) { + Some(ARM64Register::GPR(gpr)) => Location::GPR(gpr), + Some(ARM64Register::NEON(neon)) => Location::SIMD(neon), + None => { + // No register can be allocated. Put this argument on the stack. + a.emit_ldr(Size::S64, Location::GPR(GPR::X16), prev_loc); + a.emit_str( + Size::S64, + Location::GPR(GPR::X16), + Location::Memory(GPR::XzrSp, stack_offset + caller_stack_offset), + ); + caller_stack_offset += 8; + continue; + } + }; + a.emit_ldr(Size::S64, targ, prev_loc); + } + + // Restore stack pointer. + if stack_offset > 0 { + if stack_offset < 0x1000 { + a.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm32(stack_offset as u32), + Location::GPR(GPR::XzrSp), + ); + } else { + a.emit_mov_imm(Location::GPR(GPR::X16), stack_offset as u64); + a.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::GPR(GPR::X16), + Location::GPR(GPR::XzrSp), + ); + } + } + } + } + } + + // Emits a tail call trampoline that loads the address of the target import function + // from Ctx and jumps to it. + + let offset = vmoffsets.vmctx_vmfunction_import(index); + // for ldr, offset needs to be a multiple of 8, wich often is not + // so use ldur, but then offset is limited to -255 .. +255. It will be positive here + let offset = + if (offset > 0 && offset < 0xF8) || (offset > 0 && offset < 0x7FF8 && (offset & 7) == 0) { + offset + } else { + a.emit_mov_imm(Location::GPR(GPR::X16), (offset as i64) as u64); + a.emit_add( + Size::S64, + Location::GPR(GPR::X0), + Location::GPR(GPR::X16), + Location::GPR(GPR::X0), + ); + 0 + }; + match calling_convention { + _ => { + if (offset & 7) == 0 { + a.emit_ldr( + Size::S64, + Location::GPR(GPR::X16), + Location::Memory(GPR::X0, offset as i32), // function pointer + ); + a.emit_ldr( + Size::S64, + Location::GPR(GPR::X0), + Location::Memory(GPR::X0, offset as i32 + 8), // target vmctx + ); + } else { + a.emit_ldur( + Size::S64, + Location::GPR(GPR::X16), + GPR::X0, + offset as i32, // function pointer + ); + a.emit_ldur( + Size::S64, + Location::GPR(GPR::X0), + GPR::X0, + offset as i32 + 8, // target vmctx + ); + } + } + } + a.emit_b_register(GPR::X16); + + let section_body = SectionBody::new_with_vec(a.finalize().unwrap().to_vec()); + + CustomSection { + protection: CustomSectionProtection::ReadExecute, + bytes: section_body, + relocations: vec![], + } +} diff --git a/lib/compiler-singlepass/src/lib.rs b/lib/compiler-singlepass/src/lib.rs index 42cde289d36..48329748abf 100644 --- a/lib/compiler-singlepass/src/lib.rs +++ b/lib/compiler-singlepass/src/lib.rs @@ -9,13 +9,16 @@ //! runtime performance. mod address_map; +mod arm64_decl; mod codegen; mod common_decl; mod compiler; mod config; +mod emitter_arm64; mod emitter_x64; mod location; mod machine; +mod machine_arm64; mod machine_x64; mod x64_decl; diff --git a/lib/compiler-singlepass/src/location.rs b/lib/compiler-singlepass/src/location.rs index bc5277d2cad..61817068a49 100644 --- a/lib/compiler-singlepass/src/location.rs +++ b/lib/compiler-singlepass/src/location.rs @@ -71,9 +71,4 @@ pub trait CombinedRegister: Copy + Clone + Eq + PartialEq + Debug { fn from_gpr(x: u16) -> Self; /// Convert from an SIMD register fn from_simd(x: u16) -> Self; - /// Returns the instruction prefix for move to stack - /// for example `movq %this_reg, ?(%rsp)` on x86_64 - /// To build an instruction, append the memory location as a 32-bit - /// offset to the stack pointer to this prefix. - fn _prefix_mov_to_stack(&self) -> Option<&'static [u8]>; } diff --git a/lib/compiler-singlepass/src/machine.rs b/lib/compiler-singlepass/src/machine.rs index 67b940749c9..a68daa91ec5 100644 --- a/lib/compiler-singlepass/src/machine.rs +++ b/lib/compiler-singlepass/src/machine.rs @@ -1,5 +1,6 @@ use crate::common_decl::*; use crate::location::{Location, Reg}; +use crate::machine_arm64::MachineARM64; use crate::machine_x64::MachineX86_64; use dynasmrt::{AssemblyOffset, DynamicLabel}; use std::collections::BTreeMap; @@ -81,8 +82,8 @@ pub trait Machine { fn reserve_unused_temp_gpr(&mut self, gpr: Self::GPR) -> Self::GPR; /// reserve a GPR fn reserve_gpr(&mut self, gpr: Self::GPR); - /// Push used gpr to the stack - fn push_used_gpr(&mut self); + /// Push used gpr to the stack. Return the bytes taken on the stack + fn push_used_gpr(&mut self) -> usize; /// Pop used gpr to the stack fn pop_used_gpr(&mut self); /// Picks an unused SIMD register. @@ -99,10 +100,12 @@ pub trait Machine { fn reserve_simd(&mut self, simd: Self::SIMD); /// Releases a temporary XMM register. fn release_simd(&mut self, simd: Self::SIMD); - /// Push used simd regs to the stack - fn push_used_simd(&mut self); + /// Push used simd regs to the stack. Return bytes taken on the stack + fn push_used_simd(&mut self) -> usize; /// Pop used simd regs to the stack fn pop_used_simd(&mut self); + /// Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example) + fn round_stack_adjust(&self, value: usize) -> usize; /// Set the source location of the Wasm to the given offset. fn set_srcloc(&mut self, offset: u32); /// Marks each address in the code range emitted by `f` with the trap code `code`. @@ -129,10 +132,6 @@ pub trait Machine { /// restore stack /// Like assembler.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) fn restore_stack(&mut self, delta_stack_offset: u32); - /// push callee saved register to the stack - fn push_callee_saved(&mut self); - /// pop callee saved register from the stack - fn pop_callee_saved(&mut self); /// Pop stack of locals /// Like assembler.emit_add(Size::S64, Location::Imm32(delta_stack_offset as u32), Location::GPR(GPR::RSP)) fn pop_stack_locals(&mut self, delta_stack_offset: u32); @@ -158,8 +157,24 @@ pub trait Machine { &self, calling_convention: CallingConvention, ) -> Vec>; - /// Get param location + /// Get param location (to build a call, using SP for stack args) fn get_param_location( + &self, + idx: usize, + sz: Size, + stack_offset: &mut usize, + calling_convention: CallingConvention, + ) -> Location; + /// Get call param location (from a call, using FP for stack args) + fn get_call_param_location( + &self, + idx: usize, + sz: Size, + stack_offset: &mut usize, + calling_convention: CallingConvention, + ) -> Location; + /// Get simple param location + fn get_simple_param_location( &self, idx: usize, calling_convention: CallingConvention, @@ -259,6 +274,10 @@ pub trait Machine { fn get_gpr_for_ret(&self) -> Self::GPR; /// get the simd for the return of float/double values fn get_simd_for_ret(&self) -> Self::SIMD; + + /// Emit a debug breakpoint + fn emit_debug_breakpoint(&mut self); + /// load the address of a memory location (will panic if src is not a memory) /// like LEA opcode on x86_64 fn location_address( @@ -430,6 +449,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// Signed Division with location directly from the stack. return the offset of the DIV opcode, to mark as trappable. fn emit_binop_sdiv32( @@ -438,6 +458,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// Unsigned Reminder (of a division) with location directly from the stack. return the offset of the DIV opcode, to mark as trappable. fn emit_binop_urem32( @@ -446,6 +467,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// Signed Reminder (of a Division) with location directly from the stack. return the offset of the DIV opcode, to mark as trappable. fn emit_binop_srem32( @@ -454,6 +476,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// And with location directly from the stack fn emit_binop_and32( @@ -1043,6 +1066,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// Signed Division with location directly from the stack. return the offset of the DIV opcode, to mark as trappable. fn emit_binop_sdiv64( @@ -1051,6 +1075,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// Unsigned Reminder (of a division) with location directly from the stack. return the offset of the DIV opcode, to mark as trappable. fn emit_binop_urem64( @@ -1059,6 +1084,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// Signed Reminder (of a Division) with location directly from the stack. return the offset of the DIV opcode, to mark as trappable. fn emit_binop_srem64( @@ -1067,6 +1093,7 @@ pub trait Machine { loc_b: Location, ret: Location, integer_division_by_zero: Label, + integer_overflow: Label, ) -> usize; /// And with location directly from the stack fn emit_binop_and64( @@ -2166,11 +2193,17 @@ pub fn gen_std_trampoline( target: &Target, calling_convention: CallingConvention, ) -> FunctionBody { - let machine = match target.triple().architecture { - Architecture::X86_64 => MachineX86_64::new(), + match target.triple().architecture { + Architecture::X86_64 => { + let machine = MachineX86_64::new(); + machine.gen_std_trampoline(sig, calling_convention) + } + Architecture::Aarch64(_) => { + let machine = MachineARM64::new(); + machine.gen_std_trampoline(sig, calling_convention) + } _ => unimplemented!(), - }; - machine.gen_std_trampoline(sig, calling_convention) + } } /// Generates dynamic import function call trampoline for a function type. pub fn gen_std_dynamic_import_trampoline( @@ -2179,11 +2212,17 @@ pub fn gen_std_dynamic_import_trampoline( target: &Target, calling_convention: CallingConvention, ) -> FunctionBody { - let machine = match target.triple().architecture { - Architecture::X86_64 => MachineX86_64::new(), + match target.triple().architecture { + Architecture::X86_64 => { + let machine = MachineX86_64::new(); + machine.gen_std_dynamic_import_trampoline(vmoffsets, sig, calling_convention) + } + Architecture::Aarch64(_) => { + let machine = MachineARM64::new(); + machine.gen_std_dynamic_import_trampoline(vmoffsets, sig, calling_convention) + } _ => unimplemented!(), - }; - machine.gen_std_dynamic_import_trampoline(vmoffsets, sig, calling_convention) + } } /// Singlepass calls import functions through a trampoline. pub fn gen_import_call_trampoline( @@ -2193,9 +2232,54 @@ pub fn gen_import_call_trampoline( target: &Target, calling_convention: CallingConvention, ) -> CustomSection { - let machine = match target.triple().architecture { - Architecture::X86_64 => MachineX86_64::new(), + match target.triple().architecture { + Architecture::X86_64 => { + let machine = MachineX86_64::new(); + machine.gen_import_call_trampoline(vmoffsets, index, sig, calling_convention) + } + Architecture::Aarch64(_) => { + let machine = MachineARM64::new(); + machine.gen_import_call_trampoline(vmoffsets, index, sig, calling_convention) + } _ => unimplemented!(), - }; - machine.gen_import_call_trampoline(vmoffsets, index, sig, calling_convention) + } } + +// Constants for the bounds of truncation operations. These are the least or +// greatest exact floats in either f32 or f64 representation less-than (for +// least) or greater-than (for greatest) the i32 or i64 or u32 or u64 +// min (for least) or max (for greatest), when rounding towards zero. + +/// Greatest Exact Float (32 bits) less-than i32::MIN when rounding towards zero. +pub const GEF32_LT_I32_MIN: f32 = -2147483904.0; +/// Least Exact Float (32 bits) greater-than i32::MAX when rounding towards zero. +pub const LEF32_GT_I32_MAX: f32 = 2147483648.0; +/// Greatest Exact Float (32 bits) less-than i64::MIN when rounding towards zero. +pub const GEF32_LT_I64_MIN: f32 = -9223373136366403584.0; +/// Least Exact Float (32 bits) greater-than i64::MAX when rounding towards zero. +pub const LEF32_GT_I64_MAX: f32 = 9223372036854775808.0; +/// Greatest Exact Float (32 bits) less-than u32::MIN when rounding towards zero. +pub const GEF32_LT_U32_MIN: f32 = -1.0; +/// Least Exact Float (32 bits) greater-than u32::MAX when rounding towards zero. +pub const LEF32_GT_U32_MAX: f32 = 4294967296.0; +/// Greatest Exact Float (32 bits) less-than u64::MIN when rounding towards zero. +pub const GEF32_LT_U64_MIN: f32 = -1.0; +/// Least Exact Float (32 bits) greater-than u64::MAX when rounding towards zero. +pub const LEF32_GT_U64_MAX: f32 = 18446744073709551616.0; + +/// Greatest Exact Float (64 bits) less-than i32::MIN when rounding towards zero. +pub const GEF64_LT_I32_MIN: f64 = -2147483649.0; +/// Least Exact Float (64 bits) greater-than i32::MAX when rounding towards zero. +pub const LEF64_GT_I32_MAX: f64 = 2147483648.0; +/// Greatest Exact Float (64 bits) less-than i64::MIN when rounding towards zero. +pub const GEF64_LT_I64_MIN: f64 = -9223372036854777856.0; +/// Least Exact Float (64 bits) greater-than i64::MAX when rounding towards zero. +pub const LEF64_GT_I64_MAX: f64 = 9223372036854775808.0; +/// Greatest Exact Float (64 bits) less-than u32::MIN when rounding towards zero. +pub const GEF64_LT_U32_MIN: f64 = -1.0; +/// Least Exact Float (64 bits) greater-than u32::MAX when rounding towards zero. +pub const LEF64_GT_U32_MAX: f64 = 4294967296.0; +/// Greatest Exact Float (64 bits) less-than u64::MIN when rounding towards zero. +pub const GEF64_LT_U64_MIN: f64 = -1.0; +/// Least Exact Float (64 bits) greater-than u64::MAX when rounding towards zero. +pub const LEF64_GT_U64_MAX: f64 = 18446744073709551616.0; diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs new file mode 100644 index 00000000000..eca8e1f5777 --- /dev/null +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -0,0 +1,5025 @@ +use crate::arm64_decl::new_machine_state; +use crate::arm64_decl::{GPR, NEON}; +use crate::common_decl::*; +use crate::emitter_arm64::*; +use crate::location::Location as AbstractLocation; +use crate::machine::*; +use dynasmrt::{aarch64::Aarch64Relocation, VecAssembler}; +use std::collections::HashSet; +use wasmer_compiler::wasmparser::Type as WpType; +use wasmer_compiler::{ + CallingConvention, CustomSection, FunctionBody, InstructionAddressMap, Relocation, + RelocationKind, RelocationTarget, SourceLoc, TrapInformation, +}; +use wasmer_types::{FunctionIndex, FunctionType}; +use wasmer_vm::{TrapCode, VMOffsets}; + +type Assembler = VecAssembler; +type Location = AbstractLocation; + +pub struct MachineARM64 { + assembler: Assembler, + used_gprs: HashSet, + used_simd: HashSet, + trap_table: TrapTable, + /// Map from byte offset into wasm function to range of native instructions. + /// + // Ordered by increasing InstructionAddressMap::srcloc. + instructions_address_map: Vec, + /// The source location for the current operator. + src_loc: u32, + /// is last push on a 8byte multiple or 16bytes? + pushed: bool, +} + +#[allow(dead_code)] +#[derive(PartialEq)] +enum ImmType { + None, + NoneXzr, + Bits8, + Bits12, + Shift32, + Shift32No0, + Shift64, + Shift64No0, + Logical32, + Logical64, + UnscaledOffset, + OffsetByte, + OffsetHWord, + OffsetWord, + OffsetDWord, +} + +#[allow(dead_code)] +impl MachineARM64 { + pub fn new() -> Self { + MachineARM64 { + assembler: Assembler::new(0), + used_gprs: HashSet::new(), + used_simd: HashSet::new(), + trap_table: TrapTable::default(), + instructions_address_map: vec![], + src_loc: 0, + pushed: false, + } + } + fn compatible_imm(&self, imm: i64, ty: ImmType) -> bool { + match ty { + ImmType::None => false, + ImmType::NoneXzr => false, + ImmType::Bits8 => (imm >= 0) && (imm < 256), + ImmType::Bits12 => (imm >= 0) && (imm < 0x1000), + ImmType::Shift32 => (imm >= 0) && (imm < 32), + ImmType::Shift32No0 => (imm > 0) && (imm < 32), + ImmType::Shift64 => (imm >= 0) && (imm < 64), + ImmType::Shift64No0 => (imm > 0) && (imm < 64), + ImmType::Logical32 => encode_logical_immediate_32bit(imm as u32).is_some(), + ImmType::Logical64 => encode_logical_immediate_64bit(imm as u64).is_some(), + ImmType::UnscaledOffset => (imm > -256) && (imm < 256), + ImmType::OffsetByte => (imm >= 0) && (imm < 0x1000), + ImmType::OffsetHWord => (imm & 1 == 0) && (imm >= 0) && (imm < 0x2000), + ImmType::OffsetWord => (imm & 3 == 0) && (imm >= 0) && (imm < 0x4000), + ImmType::OffsetDWord => (imm & 7 == 0) && (imm >= 0) && (imm < 0x8000), + } + } + + fn location_to_reg( + &mut self, + sz: Size, + src: Location, + temps: &mut Vec, + allow_imm: ImmType, + read_val: bool, + wanted: Option, + ) -> Location { + match src { + Location::GPR(_) | Location::SIMD(_) => src, + Location::Imm8(val) => { + if allow_imm == ImmType::NoneXzr && val == 0 { + Location::GPR(GPR::XzrSp) + } else { + if self.compatible_imm(val as i64, allow_imm) { + src + } else { + let tmp = if wanted.is_some() { + wanted.unwrap() + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + tmp + }; + self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); + Location::GPR(tmp) + } + } + } + Location::Imm32(val) => { + if allow_imm == ImmType::NoneXzr && val == 0 { + Location::GPR(GPR::XzrSp) + } else { + if self.compatible_imm(val as i64, allow_imm) { + src + } else { + let tmp = if wanted.is_some() { + wanted.unwrap() + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + tmp + }; + self.assembler + .emit_mov_imm(Location::GPR(tmp), (val as i64) as u64); + Location::GPR(tmp) + } + } + } + Location::Imm64(val) => { + if allow_imm == ImmType::NoneXzr && val == 0 { + Location::GPR(GPR::XzrSp) + } else { + if self.compatible_imm(val as i64, allow_imm) { + src + } else { + let tmp = if wanted.is_some() { + wanted.unwrap() + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + tmp + }; + self.assembler.emit_mov_imm(Location::GPR(tmp), val as u64); + Location::GPR(tmp) + } + } + } + Location::Memory(reg, val) => { + let tmp = if wanted.is_some() { + wanted.unwrap() + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + tmp + }; + if read_val { + let offsize = if sz == Size::S32 { + ImmType::OffsetWord + } else { + ImmType::OffsetDWord + }; + if self.compatible_imm(val as i64, offsize) { + self.assembler.emit_ldr( + sz, + Location::GPR(tmp), + Location::Memory(reg, val as _), + ); + } else if self.compatible_imm(val as i64, ImmType::UnscaledOffset) { + self.assembler.emit_ldur(sz, Location::GPR(tmp), reg, val); + } else { + if reg == tmp { + unreachable!(); + } + self.assembler + .emit_mov_imm(Location::GPR(tmp), (val as i64) as u64); + self.assembler.emit_ldr( + sz, + Location::GPR(tmp), + Location::Memory2(reg, tmp, Multiplier::One, 0), + ); + } + } + Location::GPR(tmp) + } + _ => panic!("singlepass can't emit location_to_reg {:?} {:?}", sz, src), + } + } + fn location_to_neon( + &mut self, + sz: Size, + src: Location, + temps: &mut Vec, + allow_imm: ImmType, + read_val: bool, + ) -> Location { + match src { + Location::SIMD(_) => src, + Location::GPR(_) => { + let tmp = self.acquire_temp_simd().unwrap(); + temps.push(tmp.clone()); + if read_val { + self.assembler.emit_mov(sz, src, Location::SIMD(tmp)); + } + Location::SIMD(tmp) + } + Location::Imm8(val) => { + if self.compatible_imm(val as i64, allow_imm) { + src + } else { + let gpr = self.acquire_temp_gpr().unwrap(); + let tmp = self.acquire_temp_simd().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov_imm(Location::GPR(gpr), val as u64); + self.assembler + .emit_mov(sz, Location::GPR(gpr), Location::SIMD(tmp)); + self.release_gpr(gpr); + Location::SIMD(tmp) + } + } + Location::Imm32(val) => { + if self.compatible_imm(val as i64, allow_imm) { + src + } else { + let gpr = self.acquire_temp_gpr().unwrap(); + let tmp = self.acquire_temp_simd().unwrap(); + temps.push(tmp.clone()); + self.assembler + .emit_mov_imm(Location::GPR(gpr), (val as i64) as u64); + self.assembler + .emit_mov(sz, Location::GPR(gpr), Location::SIMD(tmp)); + self.release_gpr(gpr); + Location::SIMD(tmp) + } + } + Location::Imm64(val) => { + if self.compatible_imm(val as i64, allow_imm) { + src + } else { + let gpr = self.acquire_temp_gpr().unwrap(); + let tmp = self.acquire_temp_simd().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov_imm(Location::GPR(gpr), val as u64); + self.assembler + .emit_mov(sz, Location::GPR(gpr), Location::SIMD(tmp)); + self.release_gpr(gpr); + Location::SIMD(tmp) + } + } + Location::Memory(reg, val) => { + let tmp = self.acquire_temp_simd().unwrap(); + temps.push(tmp.clone()); + if read_val { + let offsize = if sz == Size::S32 { + ImmType::OffsetWord + } else { + ImmType::OffsetDWord + }; + if self.compatible_imm(val as i64, offsize) { + self.assembler.emit_ldr( + sz, + Location::SIMD(tmp), + Location::Memory(reg, val as _), + ); + } else if self.compatible_imm(val as i64, ImmType::UnscaledOffset) { + self.assembler.emit_ldur(sz, Location::SIMD(tmp), reg, val); + } else { + let gpr = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(gpr), (val as i64) as u64); + self.assembler.emit_ldr( + sz, + Location::SIMD(tmp), + Location::Memory2(reg, gpr, Multiplier::One, 0), + ); + self.release_gpr(gpr); + } + } + Location::SIMD(tmp) + } + _ => panic!("singlepass can't emit location_to_neon {:?} {:?}", sz, src), + } + } + + fn emit_relaxed_binop( + &mut self, + op: fn(&mut Assembler, Size, Location, Location), + sz: Size, + src: Location, + dst: Location, + putback: bool, + ) { + let mut temps = vec![]; + let src_imm = if putback { + ImmType::None + } else { + ImmType::Bits12 + }; + let src = self.location_to_reg(sz, src, &mut temps, src_imm, true, None); + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, !putback, None); + op(&mut self.assembler, sz, src, dest); + if dst != dest && putback { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_binop_neon( + &mut self, + op: fn(&mut Assembler, Size, Location, Location), + sz: Size, + src: Location, + dst: Location, + putback: bool, + ) { + let mut temps = vec![]; + let src = self.location_to_neon(sz, src, &mut temps, ImmType::None, true); + let dest = self.location_to_neon(sz, dst, &mut temps, ImmType::None, !putback); + op(&mut self.assembler, sz, src, dest); + if dst != dest && putback { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_simd(r); + } + } + fn emit_relaxed_binop3( + &mut self, + op: fn(&mut Assembler, Size, Location, Location, Location), + sz: Size, + src1: Location, + src2: Location, + dst: Location, + allow_imm: ImmType, + ) { + let mut temps = vec![]; + let src1 = self.location_to_reg(sz, src1, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(sz, src2, &mut temps, allow_imm, true, None); + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); + op(&mut self.assembler, sz, src1, src2, dest); + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_binop3_neon( + &mut self, + op: fn(&mut Assembler, Size, Location, Location, Location), + sz: Size, + src1: Location, + src2: Location, + dst: Location, + allow_imm: ImmType, + ) { + let mut temps = vec![]; + let src1 = self.location_to_neon(sz, src1, &mut temps, ImmType::None, true); + let src2 = self.location_to_neon(sz, src2, &mut temps, allow_imm, true); + let dest = self.location_to_neon(sz, dst, &mut temps, ImmType::None, false); + op(&mut self.assembler, sz, src1, src2, dest); + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_simd(r); + } + } + fn emit_relaxed_ldr64(&mut self, sz: Size, dst: Location, src: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetDWord) { + self.assembler.emit_ldr(Size::S64, dest, src); + } else if self.compatible_imm(offset as i64, ImmType::UnscaledOffset) { + self.assembler.emit_ldur(Size::S64, dest, addr, offset); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); + self.assembler.emit_ldr( + Size::S64, + dest, + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + temps.push(tmp); + } + } + _ => unreachable!(), + } + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_ldr32(&mut self, sz: Size, dst: Location, src: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetWord) { + self.assembler.emit_ldr(Size::S32, dest, src); + } else if self.compatible_imm(offset as i64, ImmType::UnscaledOffset) { + self.assembler.emit_ldur(Size::S32, dest, addr, offset); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); + self.assembler.emit_ldr( + Size::S32, + dest, + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + temps.push(tmp); + } + } + _ => unreachable!(), + } + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_ldr32s(&mut self, sz: Size, dst: Location, src: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetWord) { + self.assembler.emit_ldrsw(Size::S64, dest, src); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); + self.assembler.emit_ldrsw( + Size::S64, + dest, + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + temps.push(tmp); + } + } + _ => unreachable!(), + } + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_ldr16(&mut self, sz: Size, dst: Location, src: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetHWord) { + self.assembler.emit_ldrh(Size::S32, dest, src); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); + self.assembler.emit_ldrh( + Size::S32, + dest, + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + temps.push(tmp); + } + } + _ => unreachable!(), + } + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_ldr16s(&mut self, sz: Size, dst: Location, src: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetHWord) { + self.assembler.emit_ldrsh(sz, dest, src); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); + self.assembler.emit_ldrsh( + sz, + dest, + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + temps.push(tmp); + } + } + _ => unreachable!(), + } + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_ldr8(&mut self, sz: Size, dst: Location, src: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetByte) { + self.assembler.emit_ldrb(Size::S32, dest, src); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); + self.assembler.emit_ldrb( + Size::S32, + dest, + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + temps.push(tmp); + } + } + _ => unreachable!(), + } + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_ldr8s(&mut self, sz: Size, dst: Location, src: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(sz, dst, &mut temps, ImmType::None, false, None); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetByte) { + self.assembler.emit_ldrsb(sz, dest, src); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); + self.assembler.emit_ldrsb( + sz, + dest, + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + temps.push(tmp); + } + } + _ => unreachable!(), + } + if dst != dest { + self.move_location(sz, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_str64(&mut self, dst: Location, src: Location) { + let mut temps = vec![]; + let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, true, None); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetDWord) { + self.assembler.emit_str(Size::S64, dst, src); + } else if self.compatible_imm(offset as i64, ImmType::UnscaledOffset) { + self.assembler.emit_stur(Size::S64, dst, addr, offset); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); + self.assembler.emit_str( + Size::S64, + dst, + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + temps.push(tmp); + } + } + _ => panic!("singlepass can't emit str64 {:?} {:?}", dst, src), + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_str32(&mut self, dst: Location, src: Location) { + let mut temps = vec![]; + let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, true, None); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetWord) { + self.assembler.emit_str(Size::S32, dst, src); + } else if self.compatible_imm(offset as i64, ImmType::UnscaledOffset) { + self.assembler.emit_stur(Size::S32, dst, addr, offset); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); + self.assembler.emit_str( + Size::S32, + dst, + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + temps.push(tmp); + } + } + _ => unreachable!(), + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_str16(&mut self, dst: Location, src: Location) { + let mut temps = vec![]; + let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, true, None); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetHWord) { + self.assembler.emit_strh(Size::S32, dst, src); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); + self.assembler.emit_strh( + Size::S32, + dst, + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + temps.push(tmp); + } + } + _ => unreachable!(), + } + for r in temps { + self.release_gpr(r); + } + } + fn emit_relaxed_str8(&mut self, dst: Location, src: Location) { + let mut temps = vec![]; + let dst = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::NoneXzr, true, None); + match src { + Location::Memory(addr, offset) => { + if self.compatible_imm(offset as i64, ImmType::OffsetByte) { + self.assembler + .emit_strb(Size::S32, dst, Location::Memory(addr, offset)); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); + self.assembler.emit_strb( + Size::S32, + dst, + Location::Memory2(addr, tmp, Multiplier::One, 0), + ); + temps.push(tmp); + } + } + _ => unreachable!(), + } + for r in temps { + self.release_gpr(r); + } + } + /// I64 comparison with. + fn emit_cmpop_i64_dynamic_b( + &mut self, + c: Condition, + loc_a: Location, + loc_b: Location, + ret: Location, + ) { + match ret { + Location::GPR(_) => { + self.emit_relaxed_cmp(Size::S64, loc_b, loc_a); + self.assembler.emit_cset(Size::S32, ret, c); + } + Location::Memory(_, _) => { + let tmp = self.acquire_temp_gpr().unwrap(); + self.emit_relaxed_cmp(Size::S64, loc_b, loc_a); + self.assembler.emit_cset(Size::S32, Location::GPR(tmp), c); + self.move_location(Size::S32, Location::GPR(tmp), ret); + self.release_gpr(tmp); + } + _ => { + unreachable!(); + } + } + } + /// I32 comparison with. + fn emit_cmpop_i32_dynamic_b( + &mut self, + c: Condition, + loc_a: Location, + loc_b: Location, + ret: Location, + ) { + match ret { + Location::GPR(_) => { + self.emit_relaxed_cmp(Size::S32, loc_b, loc_a); + self.assembler.emit_cset(Size::S32, ret, c); + } + Location::Memory(_, _) => { + let tmp = self.acquire_temp_gpr().unwrap(); + self.emit_relaxed_cmp(Size::S32, loc_b, loc_a); + self.assembler.emit_cset(Size::S32, Location::GPR(tmp), c); + self.move_location(Size::S32, Location::GPR(tmp), ret); + self.release_gpr(tmp); + } + _ => { + unreachable!(); + } + } + } + + fn memory_op( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + check_alignment: bool, + value_size: usize, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + cb: F, + ) { + let tmp_addr = self.acquire_temp_gpr().unwrap(); + + // Reusing `tmp_addr` for temporary indirection here, since it's not used before the last reference to `{base,bound}_loc`. + let (base_loc, bound_loc) = if imported_memories { + // Imported memories require one level of indirection. + self.emit_relaxed_binop( + Assembler::emit_mov, + Size::S64, + Location::Memory(self.get_vmctx_reg(), offset), + Location::GPR(tmp_addr), + true, + ); + (Location::Memory(tmp_addr, 0), Location::Memory(tmp_addr, 8)) + } else { + ( + Location::Memory(self.get_vmctx_reg(), offset), + Location::Memory(self.get_vmctx_reg(), offset + 8), + ) + }; + + let tmp_base = self.acquire_temp_gpr().unwrap(); + let tmp_bound = self.acquire_temp_gpr().unwrap(); + + // Load base into temporary register. + self.emit_relaxed_ldr64(Size::S64, Location::GPR(tmp_base), base_loc); + + // Load bound into temporary register, if needed. + if need_check { + self.emit_relaxed_ldr64(Size::S64, Location::GPR(tmp_bound), bound_loc); + + // Wasm -> Effective. + // Assuming we never underflow - should always be true on Linux/macOS and Windows >=8, + // since the first page from 0x0 to 0x1000 is not accepted by mmap. + self.assembler.emit_add( + Size::S64, + Location::GPR(tmp_bound), + Location::GPR(tmp_base), + Location::GPR(tmp_bound), + ); + if self.compatible_imm(value_size as _, ImmType::Bits12) { + self.assembler.emit_sub( + Size::S64, + Location::GPR(tmp_bound), + Location::GPR(tmp_bound), + Location::Imm32(value_size as _), + ); + } else { + let tmp2 = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp2), value_size as u64); + self.assembler.emit_sub( + Size::S64, + Location::GPR(tmp_bound), + Location::GPR(tmp2), + Location::GPR(tmp_bound), + ); + self.release_gpr(tmp2); + } + } + + // Load effective address. + // `base_loc` and `bound_loc` becomes INVALID after this line, because `tmp_addr` + // might be reused. + self.move_location(Size::S32, addr, Location::GPR(tmp_addr)); + + // Add offset to memory address. + if memarg.offset != 0 { + if self.compatible_imm(memarg.offset as _, ImmType::Bits12) { + self.assembler.emit_adds( + Size::S32, + Location::Imm32(memarg.offset), + Location::GPR(tmp_addr), + Location::GPR(tmp_addr), + ); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), memarg.offset as _); + self.assembler.emit_adds( + Size::S32, + Location::GPR(tmp_addr), + Location::GPR(tmp), + Location::GPR(tmp_addr), + ); + self.release_gpr(tmp); + } + + // Trap if offset calculation overflowed. + self.assembler + .emit_bcond_label(Condition::Cs, heap_access_oob); + } + + // Wasm linear memory -> real memory + self.assembler.emit_add( + Size::S64, + Location::GPR(tmp_base), + Location::GPR(tmp_addr), + Location::GPR(tmp_addr), + ); + + if need_check { + // Trap if the end address of the requested area is above that of the linear memory. + self.assembler + .emit_cmp(Size::S64, Location::GPR(tmp_bound), Location::GPR(tmp_addr)); + + // `tmp_bound` is inclusive. So trap only if `tmp_addr > tmp_bound`. + self.assembler + .emit_bcond_label(Condition::Hi, heap_access_oob); + } + + self.release_gpr(tmp_bound); + self.release_gpr(tmp_base); + + let align = memarg.align; + if check_alignment && align != 1 { + self.assembler.emit_tst( + Size::S64, + Location::Imm32((align - 1).into()), + Location::GPR(tmp_addr), + ); + self.assembler + .emit_bcond_label(Condition::Ne, heap_access_oob); + } + let begin = self.assembler.get_offset().0; + cb(self, tmp_addr); + let end = self.assembler.get_offset().0; + self.mark_address_range_with_trap_code(TrapCode::HeapAccessOutOfBounds, begin, end); + + self.release_gpr(tmp_addr); + } + + /*fn emit_compare_and_swap( + &mut self, + _loc: Location, + _target: Location, + _ret: Location, + _memarg: &MemoryImmediate, + _value_size: usize, + _memory_sz: Size, + _stack_sz: Size, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + _cb: F, + ) { + unimplemented!(); + }*/ + + fn offset_is_ok(&self, size: Size, offset: i32) -> bool { + if offset < 0 { + return false; + } + let shift = match size { + Size::S8 => 0, + Size::S16 => 1, + Size::S32 => 2, + Size::S64 => 3, + }; + if offset >= 0x1000 << shift { + return false; + } + if (offset & ((1 << shift) - 1)) != 0 { + return false; + } + return true; + } + + fn emit_push(&mut self, sz: Size, src: Location) { + match (sz, src) { + (Size::S64, Location::GPR(_)) | (Size::S64, Location::SIMD(_)) => { + let offset = if self.pushed { + 0 + } else { + self.assembler.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm8(16), + Location::GPR(GPR::XzrSp), + ); + 8 + }; + self.assembler.emit_stur(Size::S64, src, GPR::XzrSp, offset); + self.pushed = !self.pushed; + } + (Size::S64, _) => { + let mut temps = vec![]; + let src = self.location_to_reg(sz, src, &mut temps, ImmType::None, true, None); + let offset = if self.pushed { + 0 + } else { + self.assembler.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm8(16), + Location::GPR(GPR::XzrSp), + ); + 8 + }; + self.assembler.emit_stur(Size::S64, src, GPR::XzrSp, offset); + self.pushed = !self.pushed; + for r in temps { + self.release_gpr(r); + } + } + _ => panic!("singlepass can't emit PUSH {:?} {:?}", sz, src), + } + } + fn emit_double_push(&mut self, sz: Size, src1: Location, src2: Location) { + if !self.pushed { + match (sz, src1, src2) { + (Size::S64, Location::GPR(_), Location::GPR(_)) => { + self.assembler + .emit_stpdb(Size::S64, src1, src2, GPR::XzrSp, 16); + } + _ => { + self.emit_push(sz, src1); + self.emit_push(sz, src2); + } + } + } else { + self.emit_push(sz, src1); + self.emit_push(sz, src2); + } + } + fn emit_pop(&mut self, sz: Size, dst: Location) { + match (sz, dst) { + (Size::S64, Location::GPR(_)) | (Size::S64, Location::SIMD(_)) => { + let offset = if self.pushed { 8 } else { 0 }; + self.assembler.emit_ldur(Size::S64, dst, GPR::XzrSp, offset); + if self.pushed { + self.assembler.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm8(16), + Location::GPR(GPR::XzrSp), + ); + } + self.pushed = !self.pushed; + } + _ => panic!("singlepass can't emit PUSH {:?} {:?}", sz, dst), + } + } + fn emit_double_pop(&mut self, sz: Size, dst1: Location, dst2: Location) { + if !self.pushed { + match (sz, dst1, dst2) { + (Size::S64, Location::GPR(_), Location::GPR(_)) => { + self.assembler + .emit_ldpia(Size::S64, dst1, dst2, GPR::XzrSp, 16); + } + _ => { + self.emit_pop(sz, dst2); + self.emit_pop(sz, dst1); + } + } + } else { + self.emit_pop(sz, dst2); + self.emit_pop(sz, dst1); + } + } + + fn set_default_nan(&mut self, temps: &mut Vec) -> GPR { + // temporarly set FPCR to DefaultNan + let old_fpcr = self.acquire_temp_gpr().unwrap(); + temps.push(old_fpcr.clone()); + self.assembler.emit_read_fpcr(old_fpcr); + let new_fpcr = self.acquire_temp_gpr().unwrap(); + temps.push(new_fpcr.clone()); + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler + .emit_mov(Size::S32, Location::Imm32(1), Location::GPR(tmp)); + self.assembler + .emit_mov(Size::S64, Location::GPR(old_fpcr), Location::GPR(new_fpcr)); + // DN is bit 25 of FPCR + self.assembler.emit_bfi( + Size::S64, + Location::GPR(tmp), + 25, + 1, + Location::GPR(new_fpcr), + ); + self.assembler.emit_write_fpcr(new_fpcr); + old_fpcr + } + fn set_trap_enabled(&mut self, temps: &mut Vec) -> GPR { + // temporarly set FPCR to DefaultNan + let old_fpcr = self.acquire_temp_gpr().unwrap(); + temps.push(old_fpcr.clone()); + self.assembler.emit_read_fpcr(old_fpcr); + let new_fpcr = self.acquire_temp_gpr().unwrap(); + temps.push(new_fpcr.clone()); + self.assembler + .emit_mov(Size::S64, Location::GPR(old_fpcr), Location::GPR(new_fpcr)); + // IOE is bit 8 of FPCR + self.assembler + .emit_bfc(Size::S64, 8, 1, Location::GPR(new_fpcr)); + self.assembler.emit_write_fpcr(new_fpcr); + old_fpcr + } + fn restore_fpcr(&mut self, old_fpcr: GPR) { + self.assembler.emit_write_fpcr(old_fpcr); + } + + fn reset_exception_fpsr(&mut self) { + // reset exception count in FPSR + let fpsr = self.acquire_temp_gpr().unwrap(); + self.assembler.emit_read_fpsr(fpsr); + // IOC is 0 + self.assembler + .emit_bfc(Size::S64, 0, 1, Location::GPR(fpsr)); + self.assembler.emit_write_fpsr(fpsr); + self.release_gpr(fpsr); + } + fn read_fpsr(&mut self) -> GPR { + let fpsr = self.acquire_temp_gpr().unwrap(); + self.assembler.emit_read_fpsr(fpsr); + fpsr + } + + fn trap_float_convertion_errors( + &mut self, + old_fpcr: GPR, + sz: Size, + f: Location, + temps: &mut Vec, + ) { + let trap_badconv = self.assembler.get_label(); + let end = self.assembler.get_label(); + + let fpsr = self.read_fpsr(); + temps.push(fpsr.clone()); + // no trap, than all good + self.assembler + .emit_tbz_label(Size::S32, Location::GPR(fpsr), 0, end); + // now need to check if it's overflow or NaN + self.assembler + .emit_bfc(Size::S64, 0, 4, Location::GPR(fpsr)); + self.restore_fpcr(old_fpcr); + self.assembler.emit_fcmp(sz, f, f); + self.assembler.emit_bcond_label(Condition::Vs, trap_badconv); + // fallthru: trap_overflow + let offset = self.assembler.get_offset().0; + self.trap_table + .offset_to_code + .insert(offset, TrapCode::IntegerOverflow); + self.emit_illegal_op(); + self.mark_instruction_address_end(offset); + + self.emit_label(trap_badconv); + let offset = self.assembler.get_offset().0; + self.trap_table + .offset_to_code + .insert(offset, TrapCode::BadConversionToInteger); + self.emit_illegal_op(); + self.mark_instruction_address_end(offset); + + self.emit_label(end); + self.restore_fpcr(old_fpcr); + } +} + +impl Machine for MachineARM64 { + type GPR = GPR; + type SIMD = NEON; + fn assembler_get_offset(&self) -> Offset { + self.assembler.get_offset() + } + fn index_from_gpr(&self, x: GPR) -> RegisterIndex { + RegisterIndex(x as usize) + } + fn index_from_simd(&self, x: NEON) -> RegisterIndex { + RegisterIndex(x as usize + 32) + } + + fn get_vmctx_reg(&self) -> GPR { + GPR::X28 + } + + fn get_used_gprs(&self) -> Vec { + self.used_gprs.iter().cloned().collect() + } + + fn get_used_simd(&self) -> Vec { + self.used_simd.iter().cloned().collect() + } + + fn pick_gpr(&self) -> Option { + use GPR::*; + static REGS: &[GPR] = &[X9, X10, X11, X12, X13, X14, X15]; + for r in REGS { + if !self.used_gprs.contains(r) { + return Some(*r); + } + } + None + } + + // Picks an unused general purpose register for internal temporary use. + fn pick_temp_gpr(&self) -> Option { + use GPR::*; + static REGS: &[GPR] = &[X8, X7, X6, X5, X4, X3, X2, X1]; + for r in REGS { + if !self.used_gprs.contains(r) { + return Some(*r); + } + } + None + } + + fn acquire_temp_gpr(&mut self) -> Option { + let gpr = self.pick_temp_gpr(); + if let Some(x) = gpr { + self.used_gprs.insert(x); + } + gpr + } + + fn release_gpr(&mut self, gpr: GPR) { + assert!(self.used_gprs.remove(&gpr)); + } + + fn reserve_unused_temp_gpr(&mut self, gpr: GPR) -> GPR { + assert!(!self.used_gprs.contains(&gpr)); + self.used_gprs.insert(gpr); + gpr + } + + fn reserve_gpr(&mut self, gpr: GPR) { + self.used_gprs.insert(gpr); + } + + fn push_used_gpr(&mut self) -> usize { + let used_gprs = self.get_used_gprs(); + if used_gprs.len() % 2 == 1 { + self.emit_push(Size::S64, Location::GPR(GPR::XzrSp)); + } + for r in used_gprs.iter() { + self.emit_push(Size::S64, Location::GPR(*r)); + } + ((used_gprs.len() + 1) / 2) * 16 + } + fn pop_used_gpr(&mut self) { + let used_gprs = self.get_used_gprs(); + for r in used_gprs.iter().rev() { + self.emit_pop(Size::S64, Location::GPR(*r)); + } + if used_gprs.len() % 2 == 1 { + self.emit_pop(Size::S64, Location::GPR(GPR::XzrSp)); + } + } + + // Picks an unused NEON register. + fn pick_simd(&self) -> Option { + use NEON::*; + static REGS: &[NEON] = &[V8, V9, V10, V11, V12]; + for r in REGS { + if !self.used_simd.contains(r) { + return Some(*r); + } + } + None + } + + // Picks an unused NEON register for internal temporary use. + fn pick_temp_simd(&self) -> Option { + use NEON::*; + static REGS: &[NEON] = &[V0, V1, V2, V3, V4, V5, V6, V7]; + for r in REGS { + if !self.used_simd.contains(r) { + return Some(*r); + } + } + None + } + + // Acquires a temporary NEON register. + fn acquire_temp_simd(&mut self) -> Option { + let simd = self.pick_temp_simd(); + if let Some(x) = simd { + self.used_simd.insert(x); + } + simd + } + + fn reserve_simd(&mut self, simd: NEON) { + self.used_simd.insert(simd); + } + + // Releases a temporary NEON register. + fn release_simd(&mut self, simd: NEON) { + assert_eq!(self.used_simd.remove(&simd), true); + } + + fn push_used_simd(&mut self) -> usize { + let used_neons = self.get_used_simd(); + let stack_adjust = if used_neons.len() & 1 == 1 { + (used_neons.len() * 8) as u32 + 8 + } else { + (used_neons.len() * 8) as u32 + }; + self.adjust_stack(stack_adjust); + + for (i, r) in used_neons.iter().enumerate() { + self.assembler.emit_str( + Size::S64, + Location::SIMD(*r), + Location::Memory(GPR::XzrSp, (i * 8) as i32), + ); + } + stack_adjust as usize + } + fn pop_used_simd(&mut self) { + let used_neons = self.get_used_simd(); + for (i, r) in used_neons.iter().enumerate() { + self.assembler.emit_ldr( + Size::S64, + Location::SIMD(*r), + Location::Memory(GPR::XzrSp, (i * 8) as i32), + ); + } + let stack_adjust = if used_neons.len() & 1 == 1 { + (used_neons.len() * 8) as u32 + 8 + } else { + (used_neons.len() * 8) as u32 + }; + self.assembler.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm32(stack_adjust as _), + Location::GPR(GPR::XzrSp), + ); + } + + /// Set the source location of the Wasm to the given offset. + fn set_srcloc(&mut self, offset: u32) { + self.src_loc = offset; + } + /// Marks each address in the code range emitted by `f` with the trap code `code`. + fn mark_address_range_with_trap_code(&mut self, code: TrapCode, begin: usize, end: usize) { + for i in begin..end { + self.trap_table.offset_to_code.insert(i, code); + } + self.mark_instruction_address_end(begin); + } + + /// Marks one address as trappable with trap code `code`. + fn mark_address_with_trap_code(&mut self, code: TrapCode) { + let offset = self.assembler.get_offset().0; + self.trap_table.offset_to_code.insert(offset, code); + self.mark_instruction_address_end(offset); + } + /// Marks the instruction as trappable with trap code `code`. return "begin" offset + fn mark_instruction_with_trap_code(&mut self, code: TrapCode) -> usize { + let offset = self.assembler.get_offset().0; + self.trap_table.offset_to_code.insert(offset, code); + offset + } + /// Pushes the instruction to the address map, calculating the offset from a + /// provided beginning address. + fn mark_instruction_address_end(&mut self, begin: usize) { + self.instructions_address_map.push(InstructionAddressMap { + srcloc: SourceLoc::new(self.src_loc), + code_offset: begin, + code_len: self.assembler.get_offset().0 - begin, + }); + } + + /// Insert a StackOverflow (at offset 0) + fn insert_stackoverflow(&mut self) { + let offset = 0; + self.trap_table + .offset_to_code + .insert(offset, TrapCode::StackOverflow); + self.mark_instruction_address_end(offset); + } + + /// Get all current TrapInformation + fn collect_trap_information(&self) -> Vec { + self.trap_table + .offset_to_code + .clone() + .into_iter() + .map(|(offset, code)| TrapInformation { + code_offset: offset as u32, + trap_code: code, + }) + .collect() + } + + fn instructions_address_map(&self) -> Vec { + self.instructions_address_map.clone() + } + + // Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example) + fn round_stack_adjust(&self, value: usize) -> usize { + if value & 0xf != 0 { + ((value >> 4) + 1) << 4 + } else { + value + } + } + + // Memory location for a local on the stack + fn local_on_stack(&mut self, stack_offset: i32) -> Location { + Location::Memory(GPR::X29, -stack_offset) + } + + // Adjust stack for locals + fn adjust_stack(&mut self, delta_stack_offset: u32) { + let delta = if self.compatible_imm(delta_stack_offset as _, ImmType::Bits12) { + Location::Imm32(delta_stack_offset as _) + } else { + let tmp = GPR::X17; + self.assembler + .emit_mov_imm(Location::GPR(tmp), delta_stack_offset as u64); + Location::GPR(tmp) + }; + self.assembler.emit_sub( + Size::S64, + Location::GPR(GPR::XzrSp), + delta, + Location::GPR(GPR::XzrSp), + ); + } + // restore stack + fn restore_stack(&mut self, delta_stack_offset: u32) { + let delta = if self.compatible_imm(delta_stack_offset as _, ImmType::Bits12) { + Location::Imm32(delta_stack_offset as _) + } else { + let tmp = GPR::X17; + self.assembler + .emit_mov_imm(Location::GPR(tmp), delta_stack_offset as u64); + Location::GPR(tmp) + }; + self.assembler.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + delta, + Location::GPR(GPR::XzrSp), + ); + } + fn pop_stack_locals(&mut self, delta_stack_offset: u32) { + let real_delta = if delta_stack_offset & 15 != 0 { + delta_stack_offset + 8 + } else { + delta_stack_offset + }; + let delta = if self.compatible_imm(real_delta as i64, ImmType::Bits12) { + Location::Imm32(real_delta as _) + } else { + let tmp = GPR::X17; + self.assembler + .emit_mov_imm(Location::GPR(tmp), real_delta as u64); + Location::GPR(tmp) + }; + self.assembler.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + delta, + Location::GPR(GPR::XzrSp), + ); + } + // push a value on the stack for a native call + fn push_location_for_native(&mut self, loc: Location) { + match loc { + Location::Imm64(_) => { + self.move_location(Size::S64, loc, Location::GPR(GPR::X17)); + self.emit_push(Size::S64, Location::GPR(GPR::X17)); + } + _ => self.emit_push(Size::S64, loc), + } + } + + // Zero a location that is 32bits + fn zero_location(&mut self, size: Size, location: Location) { + self.move_location(size, Location::GPR(GPR::XzrSp), location); + } + + // GPR Reg used for local pointer on the stack + fn local_pointer(&self) -> GPR { + GPR::X29 + } + + // Determine whether a local should be allocated on the stack. + fn is_local_on_stack(&self, idx: usize) -> bool { + idx > 7 + } + + // Determine a local's location. + fn get_local_location(&self, idx: usize, callee_saved_regs_size: usize) -> Location { + // Use callee-saved registers for the first locals. + match idx { + 0 => Location::GPR(GPR::X19), + 1 => Location::GPR(GPR::X20), + 2 => Location::GPR(GPR::X21), + 3 => Location::GPR(GPR::X22), + 4 => Location::GPR(GPR::X23), + 5 => Location::GPR(GPR::X24), + 6 => Location::GPR(GPR::X25), + 7 => Location::GPR(GPR::X26), + _ => Location::Memory(GPR::X29, -(((idx - 7) * 8 + callee_saved_regs_size) as i32)), + } + } + // Move a local to the stack + fn move_local(&mut self, stack_offset: i32, location: Location) { + if stack_offset < 256 { + self.assembler + .emit_stur(Size::S64, location, GPR::X29, -stack_offset); + } else { + let tmp = GPR::X17; + self.assembler + .emit_mov_imm(Location::GPR(tmp), (stack_offset as i64) as u64); + self.assembler.emit_sub( + Size::S64, + Location::GPR(GPR::X29), + Location::GPR(tmp), + Location::GPR(tmp), + ); + self.assembler + .emit_str(Size::S64, location, Location::GPR(tmp)); + } + } + + // List of register to save, depending on the CallingConvention + fn list_to_save(&self, _calling_convention: CallingConvention) -> Vec { + vec![] + } + + // Get param location, MUST be called in order! + fn get_param_location( + &self, + idx: usize, + sz: Size, + stack_args: &mut usize, + calling_convention: CallingConvention, + ) -> Location { + match calling_convention { + CallingConvention::AppleAarch64 => match idx { + 0 => Location::GPR(GPR::X0), + 1 => Location::GPR(GPR::X1), + 2 => Location::GPR(GPR::X2), + 3 => Location::GPR(GPR::X3), + 4 => Location::GPR(GPR::X4), + 5 => Location::GPR(GPR::X5), + 6 => Location::GPR(GPR::X6), + 7 => Location::GPR(GPR::X7), + _ => { + let sz = match sz { + Size::S8 => 0, + Size::S16 => 1, + Size::S32 => 2, + Size::S64 => 3, + }; + // align first + if sz > 1 { + if *stack_args & !((1 << sz) - 1) != 0 { + *stack_args = (*stack_args + ((1 << sz) - 1)) & !((1 << sz) - 1); + } + } + let loc = Location::Memory(GPR::XzrSp, *stack_args as i32); + *stack_args += 1 << sz; + loc + } + }, + _ => match idx { + 0 => Location::GPR(GPR::X0), + 1 => Location::GPR(GPR::X1), + 2 => Location::GPR(GPR::X2), + 3 => Location::GPR(GPR::X3), + 4 => Location::GPR(GPR::X4), + 5 => Location::GPR(GPR::X5), + 6 => Location::GPR(GPR::X6), + 7 => Location::GPR(GPR::X7), + _ => { + let loc = Location::Memory(GPR::XzrSp, *stack_args as i32); + *stack_args += 8; + loc + } + }, + } + } + // Get call param location, MUST be called in order! + fn get_call_param_location( + &self, + idx: usize, + sz: Size, + stack_args: &mut usize, + calling_convention: CallingConvention, + ) -> Location { + match calling_convention { + CallingConvention::AppleAarch64 => match idx { + 0 => Location::GPR(GPR::X0), + 1 => Location::GPR(GPR::X1), + 2 => Location::GPR(GPR::X2), + 3 => Location::GPR(GPR::X3), + 4 => Location::GPR(GPR::X4), + 5 => Location::GPR(GPR::X5), + 6 => Location::GPR(GPR::X6), + 7 => Location::GPR(GPR::X7), + _ => { + let sz = match sz { + Size::S8 => 0, + Size::S16 => 1, + Size::S32 => 2, + Size::S64 => 3, + }; + // align first + if sz > 1 { + if *stack_args & !((1 << sz) - 1) != 0 { + *stack_args = (*stack_args + ((1 << sz) - 1)) & !((1 << sz) - 1); + } + } + let loc = Location::Memory(GPR::X29, 16 * 2 + *stack_args as i32); + *stack_args += 1 << sz; + loc + } + }, + _ => match idx { + 0 => Location::GPR(GPR::X0), + 1 => Location::GPR(GPR::X1), + 2 => Location::GPR(GPR::X2), + 3 => Location::GPR(GPR::X3), + 4 => Location::GPR(GPR::X4), + 5 => Location::GPR(GPR::X5), + 6 => Location::GPR(GPR::X6), + 7 => Location::GPR(GPR::X7), + _ => { + let loc = Location::Memory(GPR::X29, 16 * 2 + *stack_args as i32); + *stack_args += 8; + loc + } + }, + } + } + // Get simple param location, Will not be accurate for Apple calling convention on "stack" arguments + fn get_simple_param_location( + &self, + idx: usize, + calling_convention: CallingConvention, + ) -> Location { + match calling_convention { + _ => match idx { + 0 => Location::GPR(GPR::X0), + 1 => Location::GPR(GPR::X1), + 2 => Location::GPR(GPR::X2), + 3 => Location::GPR(GPR::X3), + 4 => Location::GPR(GPR::X4), + 5 => Location::GPR(GPR::X5), + 6 => Location::GPR(GPR::X6), + 7 => Location::GPR(GPR::X7), + _ => Location::Memory(GPR::X29, (16 * 2 + (idx - 8) * 8) as i32), + }, + } + } + // move a location to another + fn move_location(&mut self, size: Size, source: Location, dest: Location) { + match source { + Location::GPR(_) | Location::SIMD(_) => match dest { + Location::GPR(_) | Location::SIMD(_) => self.assembler.emit_mov(size, source, dest), + Location::Memory(addr, offs) => { + if self.offset_is_ok(size, offs) { + self.assembler.emit_str(size, source, dest); + } else if self.compatible_imm(offs as i64, ImmType::UnscaledOffset) { + self.assembler.emit_stur(size, source, addr, offs); + } else { + let tmp = GPR::X17; + if offs < 0 { + self.assembler + .emit_mov_imm(Location::GPR(tmp), (-offs) as u64); + self.assembler.emit_sub( + Size::S64, + Location::GPR(addr), + Location::GPR(tmp), + Location::GPR(tmp), + ); + } else { + self.assembler.emit_mov_imm(Location::GPR(tmp), offs as u64); + self.assembler.emit_add( + Size::S64, + Location::GPR(addr), + Location::GPR(tmp), + Location::GPR(tmp), + ); + } + self.assembler + .emit_str(size, source, Location::Memory(tmp, 0)); + } + } + _ => panic!( + "singlepass can't emit move_location {:?} {:?} => {:?}", + size, source, dest + ), + }, + Location::Imm8(_) => match dest { + Location::GPR(_) => self.assembler.emit_mov(size, source, dest), + Location::Memory(_, _) => match size { + Size::S64 => self.emit_relaxed_str64(source, dest), + Size::S32 => self.emit_relaxed_str32(source, dest), + Size::S16 => self.emit_relaxed_str16(source, dest), + Size::S8 => self.emit_relaxed_str8(source, dest), + }, + _ => panic!( + "singlepass can't emit move_location {:?} {:?} => {:?}", + size, source, dest + ), + }, + Location::Imm32(val) => match dest { + Location::GPR(_) => self.assembler.emit_mov_imm(dest, val as u64), + Location::Memory(_, _) => match size { + Size::S64 => self.emit_relaxed_str64(source, dest), + Size::S32 => self.emit_relaxed_str32(source, dest), + Size::S16 => self.emit_relaxed_str16(source, dest), + Size::S8 => self.emit_relaxed_str8(source, dest), + }, + _ => panic!( + "singlepass can't emit move_location {:?} {:?} => {:?}", + size, source, dest + ), + }, + Location::Imm64(val) => match dest { + Location::GPR(_) => self.assembler.emit_mov_imm(dest, val), + Location::Memory(_, _) => match size { + Size::S64 => self.emit_relaxed_str64(source, dest), + Size::S32 => self.emit_relaxed_str32(source, dest), + Size::S16 => self.emit_relaxed_str16(source, dest), + Size::S8 => self.emit_relaxed_str8(source, dest), + }, + _ => panic!( + "singlepass can't emit move_location {:?} {:?} => {:?}", + size, source, dest + ), + }, + Location::Memory(addr, offs) => match dest { + Location::GPR(_) | Location::SIMD(_) => { + if self.offset_is_ok(size, offs) { + self.assembler.emit_ldr(size, dest, source); + } else if offs > -256 && offs < 256 { + self.assembler.emit_ldur(size, dest, addr, offs); + } else { + let tmp = GPR::X17; + if offs < 0 { + self.assembler + .emit_mov_imm(Location::GPR(tmp), (-offs) as u64); + self.assembler.emit_sub( + Size::S64, + Location::GPR(addr), + Location::GPR(tmp), + Location::GPR(tmp), + ); + } else { + self.assembler.emit_mov_imm(Location::GPR(tmp), offs as u64); + self.assembler.emit_add( + Size::S64, + Location::GPR(addr), + Location::GPR(tmp), + Location::GPR(tmp), + ); + } + self.assembler + .emit_ldr(size, dest, Location::Memory(tmp, 0)); + } + } + _ => { + let mut temps = vec![]; + let src = + self.location_to_reg(size, source, &mut temps, ImmType::None, true, None); + self.move_location(size, src, dest); + for r in temps { + self.release_gpr(r); + } + } + }, + _ => panic!( + "singlepass can't emit move_location {:?} {:?} => {:?}", + size, source, dest + ), + } + } + // move a location to another + fn move_location_extend( + &mut self, + size_val: Size, + signed: bool, + source: Location, + size_op: Size, + dest: Location, + ) { + if size_op != Size::S64 { + unreachable!(); + } + let mut temps = vec![]; + let dst = self.location_to_reg(size_op, dest, &mut temps, ImmType::None, false, None); + let src = match (size_val, signed, source) { + (Size::S64, _, _) => source, + (Size::S32, false, Location::GPR(_)) => { + self.assembler.emit_mov(size_val, source, dst); + dst + } + (Size::S32, true, Location::GPR(_)) => { + self.assembler.emit_sxtw(size_val, source, dst); + dst + } + (Size::S32, false, Location::Memory(_, _)) => { + self.emit_relaxed_ldr32(size_op, dst, source); + dst + } + (Size::S32, true, Location::Memory(_, _)) => { + self.emit_relaxed_ldr32s(size_op, dst, source); + dst + } + _ => panic!( + "singlepass can't emit move_location_extend {:?} {:?} {:?} => {:?} {:?}", + size_val, signed, source, size_op, dest + ), + }; + if src != dst { + self.move_location(size_op, src, dst); + } + if dst != dest { + self.move_location(size_op, dst, dest); + } + for r in temps { + self.release_gpr(r); + } + } + fn load_address(&mut self, _size: Size, _reg: Location, _mem: Location) { + unimplemented!(); + } + // Init the stack loc counter + fn init_stack_loc(&mut self, init_stack_loc_cnt: u64, last_stack_loc: Location) { + let label = self.assembler.get_label(); + let mut temps = vec![]; + let dest = self.acquire_temp_gpr().unwrap(); + temps.push(dest.clone()); + let cnt = self.location_to_reg( + Size::S64, + Location::Imm64(init_stack_loc_cnt), + &mut temps, + ImmType::None, + true, + None, + ); + let dest = match last_stack_loc { + Location::GPR(_) => unreachable!(), + Location::SIMD(_) => unreachable!(), + Location::Memory(reg, offset) => { + if offset < 0 { + let offset = (-offset) as u32; + if self.compatible_imm(offset as i64, ImmType::Bits12) { + self.assembler.emit_sub( + Size::S64, + Location::GPR(reg), + Location::Imm32(offset), + Location::GPR(dest), + ); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); + self.assembler.emit_sub( + Size::S64, + Location::GPR(reg), + Location::GPR(tmp), + Location::GPR(dest), + ); + temps.push(tmp); + } + dest + } else { + let offset = offset as u32; + if self.compatible_imm(offset as i64, ImmType::Bits12) { + self.assembler.emit_add( + Size::S64, + Location::GPR(reg), + Location::Imm32(offset), + Location::GPR(dest), + ); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), (offset as i64) as u64); + self.assembler.emit_add( + Size::S64, + Location::GPR(reg), + Location::GPR(tmp), + Location::GPR(dest), + ); + temps.push(tmp); + } + dest + } + } + _ => panic!("singlepass can't emit init_stack_loc {:?}", last_stack_loc), + }; + self.assembler.emit_label(label); + self.assembler + .emit_stria(Size::S64, Location::GPR(GPR::XzrSp), dest, 8); + self.assembler + .emit_sub(Size::S64, cnt, Location::Imm8(1), cnt); + self.assembler.emit_cbnz_label(Size::S64, cnt, label); + for r in temps { + self.release_gpr(r); + } + } + // Restore save_area + fn restore_saved_area(&mut self, saved_area_offset: i32) { + let real_delta = if saved_area_offset & 15 != 0 { + self.pushed = true; + saved_area_offset + 8 + } else { + self.pushed = false; + saved_area_offset + }; + if self.compatible_imm(real_delta as _, ImmType::Bits12) { + self.assembler.emit_sub( + Size::S64, + Location::GPR(GPR::X29), + Location::Imm32(real_delta as _), + Location::GPR(GPR::XzrSp), + ); + } else { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), real_delta as u64); + self.assembler.emit_sub( + Size::S64, + Location::GPR(GPR::X29), + Location::GPR(tmp), + Location::GPR(GPR::XzrSp), + ); + self.release_gpr(tmp); + } + } + // Pop a location + fn pop_location(&mut self, location: Location) { + self.emit_pop(Size::S64, location); + } + // Create a new `MachineState` with default values. + fn new_machine_state(&self) -> MachineState { + new_machine_state() + } + + // assembler finalize + fn assembler_finalize(self) -> Vec { + self.assembler.finalize().unwrap() + } + + fn get_offset(&self) -> Offset { + self.assembler.get_offset() + } + + fn finalize_function(&mut self) { + self.assembler.finalize_function(); + } + + fn emit_function_prolog(&mut self) { + self.emit_double_push(Size::S64, Location::GPR(GPR::X29), Location::GPR(GPR::X30)); // save LR too + self.emit_double_push(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X28)); + // cannot use mov, because XSP is XZR there. Need to use ADD with #0 + self.assembler.emit_add( + Size::S64, + Location::GPR(GPR::XzrSp), + Location::Imm8(0), + Location::GPR(GPR::X29), + ); + } + + fn emit_function_epilog(&mut self) { + // cannot use mov, because XSP is XZR there. Need to use ADD with #0 + self.assembler.emit_add( + Size::S64, + Location::GPR(GPR::X29), + Location::Imm8(0), + Location::GPR(GPR::XzrSp), + ); + self.pushed = false; // SP is restored, consider it aligned + self.emit_double_pop(Size::S64, Location::GPR(GPR::X27), Location::GPR(GPR::X28)); + self.emit_double_pop(Size::S64, Location::GPR(GPR::X29), Location::GPR(GPR::X30)); + } + + fn emit_function_return_value(&mut self, ty: WpType, canonicalize: bool, loc: Location) { + if canonicalize { + self.canonicalize_nan( + match ty { + WpType::F32 => Size::S32, + WpType::F64 => Size::S64, + _ => unreachable!(), + }, + loc, + Location::GPR(GPR::X0), + ); + } else { + self.emit_relaxed_mov(Size::S64, loc, Location::GPR(GPR::X0)); + } + } + + fn emit_function_return_float(&mut self) { + self.assembler + .emit_mov(Size::S64, Location::GPR(GPR::X0), Location::SIMD(NEON::V0)); + } + + fn arch_supports_canonicalize_nan(&self) -> bool { + self.assembler.arch_supports_canonicalize_nan() + } + fn canonicalize_nan(&mut self, sz: Size, input: Location, output: Location) { + let mut tempn = vec![]; + let mut temps = vec![]; + let old_fpcr = self.set_default_nan(&mut temps); + // use FMAX (input, intput) => output to automaticaly normalize the NaN + match (sz, input, output) { + (Size::S32, Location::SIMD(_), Location::SIMD(_)) => { + self.assembler.emit_fmax(sz, input, input, output); + } + (Size::S64, Location::SIMD(_), Location::SIMD(_)) => { + self.assembler.emit_fmax(sz, input, input, output); + } + (Size::S32, Location::SIMD(_), _) | (Size::S64, Location::SIMD(_), _) => { + let tmp = self.location_to_neon(sz, output, &mut tempn, ImmType::None, false); + self.assembler.emit_fmax(sz, input, input, tmp); + self.move_location(sz, tmp, output); + } + _ => panic!( + "singlepass can't emit canonicalize_nan {:?} {:?} {:?}", + sz, input, output + ), + } + + self.restore_fpcr(old_fpcr); + for r in temps { + self.release_gpr(r); + } + for r in tempn { + self.release_simd(r); + } + } + + fn emit_illegal_op(&mut self) { + self.assembler.emit_udf(); + } + fn get_label(&mut self) -> Label { + self.assembler.new_dynamic_label() + } + fn emit_label(&mut self, label: Label) { + self.assembler.emit_label(label); + } + fn get_grp_for_call(&self) -> GPR { + GPR::X27 + } + fn emit_call_register(&mut self, reg: GPR) { + self.assembler.emit_call_register(reg); + } + fn emit_call_label(&mut self, label: Label) { + self.assembler.emit_call_label(label); + } + fn get_gpr_for_ret(&self) -> GPR { + GPR::X0 + } + fn get_simd_for_ret(&self) -> NEON { + NEON::V0 + } + + fn arch_requires_indirect_call_trampoline(&self) -> bool { + self.assembler.arch_requires_indirect_call_trampoline() + } + + fn arch_emit_indirect_call_with_trampoline(&mut self, location: Location) { + self.assembler + .arch_emit_indirect_call_with_trampoline(location); + } + + fn emit_debug_breakpoint(&mut self) { + self.assembler.emit_brk(); + } + + fn emit_call_location(&mut self, location: Location) { + let mut temps = vec![]; + let loc = self.location_to_reg( + Size::S64, + location, + &mut temps, + ImmType::None, + true, + Some(GPR::X27), + ); + match loc { + Location::GPR(reg) => self.assembler.emit_call_register(reg), + _ => unreachable!(), + } + for r in temps { + self.release_gpr(r); + } + } + + fn location_address(&mut self, _size: Size, _source: Location, _dest: Location) { + unimplemented!(); + } + // logic + fn location_and(&mut self, _size: Size, _source: Location, _dest: Location, _flags: bool) { + unimplemented!(); + } + fn location_xor(&mut self, _size: Size, _source: Location, _dest: Location, _flags: bool) { + unimplemented!(); + } + fn location_or(&mut self, _size: Size, _source: Location, _dest: Location, _flags: bool) { + unimplemented!(); + } + fn location_test(&mut self, _size: Size, _source: Location, _dest: Location) { + unimplemented!(); + } + // math + fn location_add(&mut self, size: Size, source: Location, dest: Location, flags: bool) { + let mut temps = vec![]; + let src = self.location_to_reg(size, source, &mut temps, ImmType::Bits12, true, None); + let dst = self.location_to_reg(size, dest, &mut temps, ImmType::None, true, None); + if flags { + self.assembler.emit_adds(size, dst, src, dst); + } else { + self.assembler.emit_add(size, dst, src, dst); + } + if dst != dest { + self.move_location(size, dst, dest); + } + for r in temps { + self.release_gpr(r); + } + } + fn location_sub(&mut self, size: Size, source: Location, dest: Location, flags: bool) { + let mut temps = vec![]; + let src = self.location_to_reg(size, source, &mut temps, ImmType::Bits12, true, None); + let dst = self.location_to_reg(size, dest, &mut temps, ImmType::None, true, None); + if flags { + self.assembler.emit_subs(size, dst, src, dst); + } else { + self.assembler.emit_sub(size, dst, src, dst); + } + if dst != dest { + self.move_location(size, dst, dest); + } + for r in temps { + self.release_gpr(r); + } + } + fn location_cmp(&mut self, size: Size, source: Location, dest: Location) { + self.emit_relaxed_binop(Assembler::emit_cmp, size, source, dest, false); + } + fn jmp_unconditionnal(&mut self, label: Label) { + self.assembler.emit_b_label(label); + } + fn jmp_on_equal(&mut self, label: Label) { + self.assembler.emit_bcond_label(Condition::Eq, label); + } + fn jmp_on_different(&mut self, label: Label) { + self.assembler.emit_bcond_label(Condition::Ne, label); + } + fn jmp_on_above(&mut self, label: Label) { + self.assembler.emit_bcond_label(Condition::Hi, label); + } + fn jmp_on_aboveequal(&mut self, label: Label) { + self.assembler.emit_bcond_label(Condition::Cs, label); + } + fn jmp_on_belowequal(&mut self, label: Label) { + self.assembler.emit_bcond_label(Condition::Ls, label); + } + fn jmp_on_overflow(&mut self, label: Label) { + self.assembler.emit_bcond_label(Condition::Cs, label); + } + + // jmp table + fn emit_jmp_to_jumptable(&mut self, label: Label, cond: Location) { + let tmp1 = self.acquire_temp_gpr().unwrap(); + let tmp2 = self.acquire_temp_gpr().unwrap(); + + self.assembler.emit_load_label(tmp1, label); + self.move_location(Size::S32, cond, Location::GPR(tmp2)); + + self.assembler.emit_add_lsl( + Size::S64, + Location::GPR(tmp1), + Location::GPR(tmp2), + 2, + Location::GPR(tmp2), + ); + self.assembler.emit_b_register(tmp2); + self.release_gpr(tmp2); + self.release_gpr(tmp1); + } + + fn align_for_loop(&mut self) { + // noting to do on ARM64 + } + + fn emit_ret(&mut self) { + self.assembler.emit_ret(); + } + + fn emit_push(&mut self, size: Size, loc: Location) { + self.emit_push(size, loc); + } + fn emit_pop(&mut self, size: Size, loc: Location) { + self.emit_pop(size, loc); + } + + fn emit_memory_fence(&mut self) { + self.assembler.emit_dmb(); + } + + fn location_neg( + &mut self, + _size_val: Size, // size of src + _signed: bool, + _source: Location, + _size_op: Size, + _dest: Location, + ) { + unimplemented!(); + } + + fn emit_imul_imm32(&mut self, size: Size, imm32: u32, gpr: GPR) { + let tmp = self.acquire_temp_gpr().unwrap(); + self.assembler + .emit_mov_imm(Location::GPR(tmp), imm32 as u64); + self.assembler.emit_mul( + size, + Location::GPR(gpr), + Location::GPR(tmp), + Location::GPR(gpr), + ); + self.release_gpr(tmp); + } + + // relaxed binop based... + fn emit_relaxed_mov(&mut self, sz: Size, src: Location, dst: Location) { + self.emit_relaxed_binop(Assembler::emit_mov, sz, src, dst, true); + } + fn emit_relaxed_cmp(&mut self, sz: Size, src: Location, dst: Location) { + self.emit_relaxed_binop(Assembler::emit_cmp, sz, src, dst, false); + } + fn emit_relaxed_zero_extension( + &mut self, + _sz_src: Size, + _src: Location, + _sz_dst: Size, + _dst: Location, + ) { + unimplemented!(); + } + fn emit_relaxed_sign_extension( + &mut self, + sz_src: Size, + src: Location, + sz_dst: Size, + dst: Location, + ) { + match (src, dst) { + (Location::Memory(_, _), Location::GPR(_)) => match sz_src { + Size::S8 => self.emit_relaxed_ldr8s(sz_dst, dst, src), + Size::S16 => self.emit_relaxed_ldr16s(sz_dst, dst, src), + Size::S32 => self.emit_relaxed_ldr32s(sz_dst, dst, src), + _ => unreachable!(), + }, + _ => { + let mut temps = vec![]; + let src = self.location_to_reg(sz_src, src, &mut temps, ImmType::None, true, None); + let dest = + self.location_to_reg(sz_dst, dst, &mut temps, ImmType::None, false, None); + match sz_src { + Size::S8 => self.assembler.emit_sxtb(sz_dst, src, dest), + Size::S16 => self.assembler.emit_sxth(sz_dst, src, dest), + Size::S32 => self.assembler.emit_sxtw(sz_dst, src, dest), + _ => unreachable!(), + }; + if dst != dest { + self.move_location(sz_dst, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + } + } + + fn emit_binop_add32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_add, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Bits12, + ); + } + fn emit_binop_sub32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_sub, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Bits12, + ); + } + fn emit_binop_mul32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_mul, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::None, + ); + } + fn emit_binop_udiv32( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + _integer_overflow: Label, + ) -> usize { + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); + + self.assembler + .emit_cbz_label(Size::S32, src2, integer_division_by_zero); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_udiv(Size::S32, src1, src2, dest); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset + } + fn emit_binop_sdiv32( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + integer_overflow: Label, + ) -> usize { + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); + + self.assembler + .emit_cbz_label(Size::S32, src2, integer_division_by_zero); + let label_nooverflow = self.assembler.get_label(); + let tmp = self.location_to_reg( + Size::S32, + Location::Imm32(0x80000000), + &mut temps, + ImmType::None, + true, + None, + ); + self.assembler.emit_cmp(Size::S32, tmp, src1); + self.assembler + .emit_bcond_label(Condition::Ne, label_nooverflow); + self.assembler.emit_movn(Size::S32, tmp, 0); + self.assembler.emit_cmp(Size::S32, tmp, src2); + self.assembler + .emit_bcond_label(Condition::Eq, integer_overflow); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_label(label_nooverflow); + self.assembler.emit_sdiv(Size::S32, src1, src2, dest); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset + } + fn emit_binop_urem32( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + _integer_overflow: Label, + ) -> usize { + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); + let dest = if dest == src1 || dest == src2 { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov(Size::S32, dest, Location::GPR(tmp)); + Location::GPR(tmp) + } else { + dest + }; + self.assembler + .emit_cbz_label(Size::S32, src2, integer_division_by_zero); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_udiv(Size::S32, src1, src2, dest); + // unsigned remainder : src1 - (src1/src2)*src2 + self.assembler.emit_msub(Size::S32, dest, src2, src1, dest); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset + } + fn emit_binop_srem32( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + _integer_overflow: Label, + ) -> usize { + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S32, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); + let dest = if dest == src1 || dest == src2 { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov(Size::S32, dest, Location::GPR(tmp)); + Location::GPR(tmp) + } else { + dest + }; + self.assembler + .emit_cbz_label(Size::S32, src2, integer_division_by_zero); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_sdiv(Size::S32, src1, src2, dest); + // unsigned remainder : src1 - (src1/src2)*src2 + self.assembler.emit_msub(Size::S32, dest, src2, src1, dest); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset + } + fn emit_binop_and32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_and, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Logical32, + ); + } + fn emit_binop_or32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_or, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Logical32, + ); + } + fn emit_binop_xor32(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_eor, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Logical32, + ); + } + fn i32_cmp_ge_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Ge, loc_a, loc_b, ret); + } + fn i32_cmp_gt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Gt, loc_a, loc_b, ret); + } + fn i32_cmp_le_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Le, loc_a, loc_b, ret); + } + fn i32_cmp_lt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Lt, loc_a, loc_b, ret); + } + fn i32_cmp_ge_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Cs, loc_a, loc_b, ret); + } + fn i32_cmp_gt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Hi, loc_a, loc_b, ret); + } + fn i32_cmp_le_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Ls, loc_a, loc_b, ret); + } + fn i32_cmp_lt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Cc, loc_a, loc_b, ret); + } + fn i32_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Ne, loc_a, loc_b, ret); + } + fn i32_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i32_dynamic_b(Condition::Eq, loc_a, loc_b, ret); + } + fn i32_clz(&mut self, src: Location, dst: Location) { + self.emit_relaxed_binop(Assembler::emit_clz, Size::S32, src, dst, true); + } + fn i32_ctz(&mut self, src: Location, dst: Location) { + let mut temps = vec![]; + let src = self.location_to_reg(Size::S32, src, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S32, dst, &mut temps, ImmType::None, false, None); + self.assembler.emit_rbit(Size::S32, src, dest); + self.assembler.emit_clz(Size::S32, dest, dest); + if dst != dest { + self.move_location(Size::S32, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + fn i32_popcnt(&mut self, loc: Location, ret: Location) { + // no opcode for that. + // 2 solutions: using NEON CNT, that count bits per Byte, or using clz with some shift and loop + let mut temps = vec![]; + let src = self.location_to_reg(Size::S32, loc, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); + let src = if src == loc { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov(Size::S32, src, Location::GPR(tmp)); + Location::GPR(tmp) + } else { + src + }; + let tmp = { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + Location::GPR(tmp) + }; + let label_loop = self.assembler.get_label(); + let label_exit = self.assembler.get_label(); + self.assembler + .emit_mov(Size::S32, Location::GPR(GPR::XzrSp), dest); // 0 => dest + self.assembler.emit_cbz_label(Size::S32, src, label_exit); // src==0, exit + self.assembler.emit_label(label_loop); // loop: + self.assembler + .emit_add(Size::S32, dest, Location::Imm8(1), dest); // inc dest + self.assembler.emit_clz(Size::S32, src, tmp); // clz src => tmp + self.assembler + .emit_add(Size::S32, tmp, Location::Imm8(1), tmp); // inc tmp + self.assembler.emit_lsl(Size::S32, src, tmp, src); // src << tmp => src + self.assembler.emit_cbnz_label(Size::S32, src, label_loop); // if src!=0 goto loop + self.assembler.emit_label(label_exit); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + } + fn i32_shl(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_lsl, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Shift32No0, + ); + } + fn i32_shr(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_lsr, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Shift32No0, + ); + } + fn i32_sar(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_asr, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Shift32No0, + ); + } + fn i32_rol(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let src2 = match loc_b { + Location::Imm8(imm) => Location::Imm8(32 - (imm & 31)), + Location::Imm32(imm) => Location::Imm8(32 - (imm & 31) as u8), + Location::Imm64(imm) => Location::Imm8(32 - (imm & 31) as u8), + _ => { + let tmp1 = self.location_to_reg( + Size::S32, + Location::Imm32(32), + &mut temps, + ImmType::None, + true, + None, + ); + let tmp2 = + self.location_to_reg(Size::S32, loc_b, &mut temps, ImmType::None, true, None); + self.assembler.emit_sub(Size::S32, tmp1, tmp2, tmp1); + tmp1 + } + }; + self.emit_relaxed_binop3( + Assembler::emit_ror, + Size::S32, + loc_a, + src2, + ret, + ImmType::Shift32No0, + ); + for r in temps { + self.release_gpr(r); + } + } + fn i32_ror(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_ror, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::Shift32No0, + ); + } + fn i32_load( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_ldr32(Size::S32, ret, Location::Memory(addr, 0)); + }, + ); + } + fn i32_load_8u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + addr, + memarg, + false, + 1, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_ldr8(Size::S32, ret, Location::Memory(addr, 0)); + }, + ); + } + fn i32_load_8s( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + addr, + memarg, + false, + 1, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_ldr8s(Size::S32, ret, Location::Memory(addr, 0)); + }, + ); + } + fn i32_load_16u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + addr, + memarg, + false, + 2, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_ldr16(Size::S32, ret, Location::Memory(addr, 0)); + }, + ); + } + fn i32_load_16s( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + addr, + memarg, + false, + 2, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_ldr16s(Size::S32, ret, Location::Memory(addr, 0)); + }, + ); + } + fn i32_atomic_load( + &mut self, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_atomic_load_8u( + &mut self, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_atomic_load_16u( + &mut self, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_save( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + target_addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_str32(target_value, Location::Memory(addr, 0)); + }, + ); + } + fn i32_save_8( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + target_addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_str8(target_value, Location::Memory(addr, 0)); + }, + ); + } + fn i32_save_16( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + target_addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_str16(target_value, Location::Memory(addr, 0)); + }, + ); + } + fn i32_atomic_save( + &mut self, + _value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_atomic_save_8( + &mut self, + _value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i32_atomic_save_16( + &mut self, + _value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Add with i32 + fn i32_atomic_add( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Add with u8 + fn i32_atomic_add_8u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Add with u16 + fn i32_atomic_add_16u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Sub with i32 + fn i32_atomic_sub( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Sub with u8 + fn i32_atomic_sub_8u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Sub with u16 + fn i32_atomic_sub_16u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic And with i32 + fn i32_atomic_and( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic And with u8 + fn i32_atomic_and_8u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic And with u16 + fn i32_atomic_and_16u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Or with i32 + fn i32_atomic_or( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Or with u8 + fn i32_atomic_or_8u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Or with u16 + fn i32_atomic_or_16u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Xor with i32 + fn i32_atomic_xor( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Xor with u8 + fn i32_atomic_xor_8u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Xor with u16 + fn i32_atomic_xor_16u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Exchange with i32 + fn i32_atomic_xchg( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Exchange with u8 + fn i32_atomic_xchg_8u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Exchange with u16 + fn i32_atomic_xchg_16u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Exchange with i32 + fn i32_atomic_cmpxchg( + &mut self, + _new: Location, + _cmp: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Exchange with u8 + fn i32_atomic_cmpxchg_8u( + &mut self, + _new: Location, + _cmp: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i32 atomic Exchange with u16 + fn i32_atomic_cmpxchg_16u( + &mut self, + _new: Location, + _cmp: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + + fn move_with_reloc( + &mut self, + reloc_target: RelocationTarget, + relocations: &mut Vec, + ) { + let reloc_at = self.assembler.get_offset().0; + relocations.push(Relocation { + kind: RelocationKind::Arm64Movw0, + reloc_target, + offset: reloc_at as u32, + addend: 0, + }); + self.assembler.emit_movz(Location::GPR(GPR::X27), 0); + let reloc_at = self.assembler.get_offset().0; + relocations.push(Relocation { + kind: RelocationKind::Arm64Movw1, + reloc_target, + offset: reloc_at as u32, + addend: 0, + }); + self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 16); + let reloc_at = self.assembler.get_offset().0; + relocations.push(Relocation { + kind: RelocationKind::Arm64Movw2, + reloc_target, + offset: reloc_at as u32, + addend: 0, + }); + self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 32); + let reloc_at = self.assembler.get_offset().0; + relocations.push(Relocation { + kind: RelocationKind::Arm64Movw3, + reloc_target, + offset: reloc_at as u32, + addend: 0, + }); + self.assembler.emit_movk(Location::GPR(GPR::X27), 0, 48); + } + + fn emit_binop_add64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_add, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Bits12, + ); + } + fn emit_binop_sub64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_sub, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Bits12, + ); + } + fn emit_binop_mul64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_mul, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::None, + ); + } + fn emit_binop_udiv64( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + _integer_overflow: Label, + ) -> usize { + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); + + self.assembler + .emit_cbz_label(Size::S64, src2, integer_division_by_zero); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_udiv(Size::S64, src1, src2, dest); + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset + } + fn emit_binop_sdiv64( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + integer_overflow: Label, + ) -> usize { + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); + + self.assembler + .emit_cbz_label(Size::S64, src2, integer_division_by_zero); + let label_nooverflow = self.assembler.get_label(); + let tmp = self.location_to_reg( + Size::S64, + Location::Imm64(0x8000000000000000), + &mut temps, + ImmType::None, + true, + None, + ); + self.assembler.emit_cmp(Size::S64, tmp, src1); + self.assembler + .emit_bcond_label(Condition::Ne, label_nooverflow); + self.assembler.emit_movn(Size::S64, tmp, 0); + self.assembler.emit_cmp(Size::S64, tmp, src2); + self.assembler + .emit_bcond_label(Condition::Eq, integer_overflow); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_label(label_nooverflow); + self.assembler.emit_sdiv(Size::S64, src1, src2, dest); + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset + } + fn emit_binop_urem64( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + _integer_overflow: Label, + ) -> usize { + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); + let dest = if dest == src1 || dest == src2 { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov(Size::S32, dest, Location::GPR(tmp)); + Location::GPR(tmp) + } else { + dest + }; + self.assembler + .emit_cbz_label(Size::S64, src2, integer_division_by_zero); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_udiv(Size::S64, src1, src2, dest); + // unsigned remainder : src1 - (src1/src2)*src2 + self.assembler.emit_msub(Size::S64, dest, src2, src1, dest); + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset + } + fn emit_binop_srem64( + &mut self, + loc_a: Location, + loc_b: Location, + ret: Location, + integer_division_by_zero: Label, + _integer_overflow: Label, + ) -> usize { + let mut temps = vec![]; + let src1 = self.location_to_reg(Size::S64, loc_a, &mut temps, ImmType::None, true, None); + let src2 = self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); + let dest = if dest == src1 || dest == src2 { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov(Size::S64, dest, Location::GPR(tmp)); + Location::GPR(tmp) + } else { + dest + }; + self.assembler + .emit_cbz_label(Size::S64, src2, integer_division_by_zero); + let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow); + self.assembler.emit_sdiv(Size::S64, src1, src2, dest); + // unsigned remainder : src1 - (src1/src2)*src2 + self.assembler.emit_msub(Size::S64, dest, src2, src1, dest); + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + offset + } + fn emit_binop_and64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_and, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Logical64, + ); + } + fn emit_binop_or64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_or, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Logical64, + ); + } + fn emit_binop_xor64(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_eor, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Logical64, + ); + } + fn i64_cmp_ge_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Ge, loc_a, loc_b, ret); + } + fn i64_cmp_gt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Gt, loc_a, loc_b, ret); + } + fn i64_cmp_le_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Le, loc_a, loc_b, ret); + } + fn i64_cmp_lt_s(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Lt, loc_a, loc_b, ret); + } + fn i64_cmp_ge_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Cs, loc_a, loc_b, ret); + } + fn i64_cmp_gt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Hi, loc_a, loc_b, ret); + } + fn i64_cmp_le_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Ls, loc_a, loc_b, ret); + } + fn i64_cmp_lt_u(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Cc, loc_a, loc_b, ret); + } + fn i64_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Ne, loc_a, loc_b, ret); + } + fn i64_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_cmpop_i64_dynamic_b(Condition::Eq, loc_a, loc_b, ret); + } + fn i64_clz(&mut self, src: Location, dst: Location) { + self.emit_relaxed_binop(Assembler::emit_clz, Size::S64, src, dst, true); + } + fn i64_ctz(&mut self, src: Location, dst: Location) { + let mut temps = vec![]; + let src = self.location_to_reg(Size::S64, src, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S64, dst, &mut temps, ImmType::None, false, None); + self.assembler.emit_rbit(Size::S64, src, dest); + self.assembler.emit_clz(Size::S64, dest, dest); + if dst != dest { + self.move_location(Size::S64, dest, dst); + } + for r in temps { + self.release_gpr(r); + } + } + fn i64_popcnt(&mut self, loc: Location, ret: Location) { + let mut temps = vec![]; + let src = self.location_to_reg(Size::S64, loc, &mut temps, ImmType::None, true, None); + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); + let src = if src == loc { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + self.assembler.emit_mov(Size::S64, src, Location::GPR(tmp)); + Location::GPR(tmp) + } else { + src + }; + let tmp = { + let tmp = self.acquire_temp_gpr().unwrap(); + temps.push(tmp.clone()); + Location::GPR(tmp) + }; + let label_loop = self.assembler.get_label(); + let label_exit = self.assembler.get_label(); + self.assembler + .emit_mov(Size::S32, Location::GPR(GPR::XzrSp), dest); + self.assembler.emit_cbz_label(Size::S64, src, label_exit); + self.assembler.emit_label(label_loop); + self.assembler + .emit_add(Size::S32, dest, Location::Imm8(1), dest); + self.assembler.emit_clz(Size::S64, src, tmp); + self.assembler + .emit_add(Size::S32, tmp, Location::Imm8(1), tmp); + self.assembler.emit_lsl(Size::S64, src, tmp, src); + self.assembler.emit_cbnz_label(Size::S64, src, label_loop); + self.assembler.emit_label(label_exit); + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + } + fn i64_shl(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_lsl, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Shift64No0, + ); + } + fn i64_shr(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_lsr, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Shift64No0, + ); + } + fn i64_sar(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_asr, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Shift64No0, + ); + } + fn i64_rol(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + // there is no ROL on ARM64. We use ROR with 64-value instead + let mut temps = vec![]; + let src2 = match loc_b { + Location::Imm8(imm) => Location::Imm8(64 - (imm & 63)), + Location::Imm32(imm) => Location::Imm8(64 - (imm & 63) as u8), + Location::Imm64(imm) => Location::Imm8(64 - (imm & 63) as u8), + _ => { + let tmp1 = self.location_to_reg( + Size::S64, + Location::Imm32(64), + &mut temps, + ImmType::None, + true, + None, + ); + let tmp2 = + self.location_to_reg(Size::S64, loc_b, &mut temps, ImmType::None, true, None); + self.assembler.emit_sub(Size::S64, tmp1, tmp2, tmp1); + tmp1 + } + }; + self.emit_relaxed_binop3( + Assembler::emit_ror, + Size::S64, + loc_a, + src2, + ret, + ImmType::Shift64No0, + ); + for r in temps { + self.release_gpr(r); + } + } + fn i64_ror(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3( + Assembler::emit_ror, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::Shift64No0, + ); + } + fn i64_load( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + addr, + memarg, + false, + 8, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_ldr64(Size::S64, ret, Location::Memory(addr, 0)); + }, + ); + } + fn i64_load_8u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + addr, + memarg, + false, + 1, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_ldr8(Size::S64, ret, Location::Memory(addr, 0)); + }, + ); + } + fn i64_load_8s( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + addr, + memarg, + false, + 1, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_ldr8s(Size::S64, ret, Location::Memory(addr, 0)); + }, + ); + } + fn i64_load_16u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + addr, + memarg, + false, + 2, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_ldr16(Size::S64, ret, Location::Memory(addr, 0)); + }, + ); + } + fn i64_load_16s( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + addr, + memarg, + false, + 2, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_ldr16s(Size::S64, ret, Location::Memory(addr, 0)); + }, + ); + } + fn i64_load_32u( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_ldr32(Size::S64, ret, Location::Memory(addr, 0)); + }, + ); + } + fn i64_load_32s( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_ldr32s(Size::S64, ret, Location::Memory(addr, 0)); + }, + ); + } + fn i64_atomic_load( + &mut self, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_atomic_load_8u( + &mut self, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_atomic_load_16u( + &mut self, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_atomic_load_32u( + &mut self, + _addr: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_save( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + target_addr, + memarg, + false, + 8, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_str64(target_value, Location::Memory(addr, 0)); + }, + ); + } + fn i64_save_8( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + target_addr, + memarg, + false, + 1, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_str8(target_value, Location::Memory(addr, 0)); + }, + ); + } + fn i64_save_16( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + target_addr, + memarg, + false, + 2, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_str16(target_value, Location::Memory(addr, 0)); + }, + ); + } + fn i64_save_32( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + target_addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.emit_relaxed_str32(target_value, Location::Memory(addr, 0)); + }, + ); + } + fn i64_atomic_save( + &mut self, + _value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_atomic_save_8( + &mut self, + _value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_atomic_save_16( + &mut self, + _value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + fn i64_atomic_save_32( + &mut self, + _value: Location, + _memarg: &MemoryImmediate, + _target_addr: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Add with i64 + fn i64_atomic_add( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Add with u8 + fn i64_atomic_add_8u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Add with u16 + fn i64_atomic_add_16u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Add with u32 + fn i64_atomic_add_32u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Sub with i64 + fn i64_atomic_sub( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Sub with u8 + fn i64_atomic_sub_8u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Sub with u16 + fn i64_atomic_sub_16u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Sub with u32 + fn i64_atomic_sub_32u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic And with i64 + fn i64_atomic_and( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic And with u8 + fn i64_atomic_and_8u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic And with u16 + fn i64_atomic_and_16u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic And with u32 + fn i64_atomic_and_32u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Or with i64 + fn i64_atomic_or( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Or with u8 + fn i64_atomic_or_8u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Or with u16 + fn i64_atomic_or_16u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Or with u32 + fn i64_atomic_or_32u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic xor with i64 + fn i64_atomic_xor( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic xor with u8 + fn i64_atomic_xor_8u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic xor with u16 + fn i64_atomic_xor_16u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic xor with u32 + fn i64_atomic_xor_32u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with i64 + fn i64_atomic_xchg( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with u8 + fn i64_atomic_xchg_8u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with u16 + fn i64_atomic_xchg_16u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with u32 + fn i64_atomic_xchg_32u( + &mut self, + _loc: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with i64 + fn i64_atomic_cmpxchg( + &mut self, + _new: Location, + _cmp: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with u8 + fn i64_atomic_cmpxchg_8u( + &mut self, + _new: Location, + _cmp: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with u16 + fn i64_atomic_cmpxchg_16u( + &mut self, + _new: Location, + _cmp: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + // i64 atomic Exchange with u32 + fn i64_atomic_cmpxchg_32u( + &mut self, + _new: Location, + _cmp: Location, + _target: Location, + _memarg: &MemoryImmediate, + _ret: Location, + _need_check: bool, + _imported_memories: bool, + _offset: i32, + _heap_access_oob: Label, + ) { + unimplemented!(); + } + + fn f32_load( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler + .emit_ldr(Size::S32, ret, Location::Memory(addr, 0)); + }, + ); + } + fn f32_save( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + canonicalize: bool, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + let canonicalize = canonicalize && self.arch_supports_canonicalize_nan(); + self.memory_op( + target_addr, + memarg, + false, + 4, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + if !canonicalize { + this.emit_relaxed_str32(target_value, Location::Memory(addr, 0)); + } else { + this.canonicalize_nan(Size::S32, target_value, Location::Memory(addr, 0)); + } + }, + ); + } + fn f64_load( + &mut self, + addr: Location, + memarg: &MemoryImmediate, + ret: Location, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + self.memory_op( + addr, + memarg, + false, + 8, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + this.assembler + .emit_ldr(Size::S64, ret, Location::Memory(addr, 0)); + }, + ); + } + fn f64_save( + &mut self, + target_value: Location, + memarg: &MemoryImmediate, + target_addr: Location, + canonicalize: bool, + need_check: bool, + imported_memories: bool, + offset: i32, + heap_access_oob: Label, + ) { + let canonicalize = canonicalize && self.arch_supports_canonicalize_nan(); + self.memory_op( + target_addr, + memarg, + false, + 8, + need_check, + imported_memories, + offset, + heap_access_oob, + |this, addr| { + if !canonicalize { + this.emit_relaxed_str64(target_value, Location::Memory(addr, 0)); + } else { + this.canonicalize_nan(Size::S64, target_value, Location::Memory(addr, 0)); + } + }, + ); + } + + fn convert_f64_i64(&mut self, loc: Location, signed: bool, ret: Location) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_reg(Size::S64, loc, &mut gprs, ImmType::NoneXzr, true, None); + let dest = self.location_to_neon(Size::S64, ret, &mut neons, ImmType::None, false); + if signed { + self.assembler.emit_scvtf(Size::S64, src, Size::S64, dest); + } else { + self.assembler.emit_ucvtf(Size::S64, src, Size::S64, dest); + } + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } + } + fn convert_f64_i32(&mut self, loc: Location, signed: bool, ret: Location) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_reg(Size::S32, loc, &mut gprs, ImmType::NoneXzr, true, None); + let dest = self.location_to_neon(Size::S64, ret, &mut neons, ImmType::None, false); + if signed { + self.assembler.emit_scvtf(Size::S32, src, Size::S64, dest); + } else { + self.assembler.emit_ucvtf(Size::S32, src, Size::S64, dest); + } + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } + } + fn convert_f32_i64(&mut self, loc: Location, signed: bool, ret: Location) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_reg(Size::S64, loc, &mut gprs, ImmType::NoneXzr, true, None); + let dest = self.location_to_neon(Size::S32, ret, &mut neons, ImmType::None, false); + if signed { + self.assembler.emit_scvtf(Size::S64, src, Size::S32, dest); + } else { + self.assembler.emit_ucvtf(Size::S64, src, Size::S32, dest); + } + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } + } + fn convert_f32_i32(&mut self, loc: Location, signed: bool, ret: Location) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_reg(Size::S32, loc, &mut gprs, ImmType::NoneXzr, true, None); + let dest = self.location_to_neon(Size::S32, ret, &mut neons, ImmType::None, false); + if signed { + self.assembler.emit_scvtf(Size::S32, src, Size::S32, dest); + } else { + self.assembler.emit_ucvtf(Size::S32, src, Size::S32, dest); + } + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } + } + fn convert_i64_f64(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_neon(Size::S64, loc, &mut neons, ImmType::None, true); + let dest = self.location_to_reg(Size::S64, ret, &mut gprs, ImmType::None, false, None); + let old_fpcr = if !sat { + self.reset_exception_fpsr(); + self.set_trap_enabled(&mut gprs) + } else { + GPR::XzrSp + }; + if signed { + self.assembler.emit_fcvtzs(Size::S64, src, Size::S64, dest); + } else { + self.assembler.emit_fcvtzu(Size::S64, src, Size::S64, dest); + } + if !sat { + self.trap_float_convertion_errors(old_fpcr, Size::S64, src, &mut gprs); + } + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } + } + fn convert_i32_f64(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_neon(Size::S64, loc, &mut neons, ImmType::None, true); + let dest = self.location_to_reg(Size::S32, ret, &mut gprs, ImmType::None, false, None); + let old_fpcr = if !sat { + self.reset_exception_fpsr(); + self.set_trap_enabled(&mut gprs) + } else { + GPR::XzrSp + }; + if signed { + self.assembler.emit_fcvtzs(Size::S64, src, Size::S32, dest); + } else { + self.assembler.emit_fcvtzu(Size::S64, src, Size::S32, dest); + } + if !sat { + self.trap_float_convertion_errors(old_fpcr, Size::S64, src, &mut gprs); + } + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } + } + fn convert_i64_f32(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_neon(Size::S32, loc, &mut neons, ImmType::None, true); + let dest = self.location_to_reg(Size::S64, ret, &mut gprs, ImmType::None, false, None); + let old_fpcr = if !sat { + self.reset_exception_fpsr(); + self.set_trap_enabled(&mut gprs) + } else { + GPR::XzrSp + }; + if signed { + self.assembler.emit_fcvtzs(Size::S32, src, Size::S64, dest); + } else { + self.assembler.emit_fcvtzu(Size::S32, src, Size::S64, dest); + } + if !sat { + self.trap_float_convertion_errors(old_fpcr, Size::S32, src, &mut gprs); + } + if ret != dest { + self.move_location(Size::S64, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } + } + fn convert_i32_f32(&mut self, loc: Location, ret: Location, signed: bool, sat: bool) { + let mut gprs = vec![]; + let mut neons = vec![]; + let src = self.location_to_neon(Size::S32, loc, &mut neons, ImmType::None, true); + let dest = self.location_to_reg(Size::S32, ret, &mut gprs, ImmType::None, false, None); + let old_fpcr = if !sat { + self.reset_exception_fpsr(); + self.set_trap_enabled(&mut gprs) + } else { + GPR::XzrSp + }; + if signed { + self.assembler.emit_fcvtzs(Size::S32, src, Size::S32, dest); + } else { + self.assembler.emit_fcvtzu(Size::S32, src, Size::S32, dest); + } + if !sat { + self.trap_float_convertion_errors(old_fpcr, Size::S32, src, &mut gprs); + } + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in gprs { + self.release_gpr(r); + } + for r in neons { + self.release_simd(r); + } + } + fn convert_f64_f32(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_fcvt, Size::S32, loc, ret, true); + } + fn convert_f32_f64(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_fcvt, Size::S64, loc, ret, true); + } + fn f64_neg(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_fneg, Size::S64, loc, ret, true); + } + fn f64_abs(&mut self, loc: Location, ret: Location) { + let tmp = self.acquire_temp_gpr().unwrap(); + + self.move_location(Size::S64, loc, Location::GPR(tmp)); + self.assembler.emit_and( + Size::S64, + Location::GPR(tmp), + Location::Imm64(0x7fffffffffffffffu64), + Location::GPR(tmp), + ); + self.move_location(Size::S64, Location::GPR(tmp), ret); + + self.release_gpr(tmp); + } + fn emit_i64_copysign(&mut self, tmp1: GPR, tmp2: GPR) { + self.assembler.emit_and( + Size::S64, + Location::GPR(tmp1), + Location::Imm64(0x7fffffffffffffffu64), + Location::GPR(tmp1), + ); + + self.assembler.emit_and( + Size::S64, + Location::GPR(tmp2), + Location::Imm64(0x8000000000000000u64), + Location::GPR(tmp2), + ); + + self.assembler.emit_or( + Size::S64, + Location::GPR(tmp1), + Location::GPR(tmp2), + Location::GPR(tmp1), + ); + } + fn f64_sqrt(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_fsqrt, Size::S64, loc, ret, true); + } + fn f64_trunc(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintz, Size::S64, loc, ret, true); + } + fn f64_ceil(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintp, Size::S64, loc, ret, true); + } + fn f64_floor(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintm, Size::S64, loc, ret, true); + } + fn f64_nearest(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintn, Size::S64, loc, ret, true); + } + fn f64_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_b, loc_a, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Ls); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + } + fn f64_cmp_gt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_b, loc_a, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Cc); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + } + fn f64_cmp_le(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Ls); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + } + fn f64_cmp_lt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Cc); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + } + fn f64_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Ne); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + } + fn f64_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S64, ret, &mut temps, ImmType::None, false, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S64, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Eq); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + } + fn f64_min(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let old_fpcr = self.set_default_nan(&mut temps); + self.emit_relaxed_binop3_neon( + Assembler::emit_fmin, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::None, + ); + self.restore_fpcr(old_fpcr); + for r in temps { + self.release_gpr(r); + } + } + fn f64_max(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let old_fpcr = self.set_default_nan(&mut temps); + self.emit_relaxed_binop3_neon( + Assembler::emit_fmax, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::None, + ); + self.restore_fpcr(old_fpcr); + for r in temps { + self.release_gpr(r); + } + } + fn f64_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fadd, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::None, + ); + } + fn f64_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fsub, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::None, + ); + } + fn f64_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fmul, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::None, + ); + } + fn f64_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fdiv, + Size::S64, + loc_a, + loc_b, + ret, + ImmType::None, + ); + } + fn f32_neg(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_fneg, Size::S32, loc, ret, true); + } + fn f32_abs(&mut self, loc: Location, ret: Location) { + let tmp = self.acquire_temp_gpr().unwrap(); + self.move_location(Size::S32, loc, Location::GPR(tmp)); + self.assembler.emit_and( + Size::S32, + Location::GPR(tmp), + Location::Imm32(0x7fffffffu32), + Location::GPR(tmp), + ); + self.move_location(Size::S32, Location::GPR(tmp), ret); + self.release_gpr(tmp); + } + fn emit_i32_copysign(&mut self, tmp1: GPR, tmp2: GPR) { + self.assembler.emit_and( + Size::S32, + Location::GPR(tmp1), + Location::Imm32(0x7fffffffu32), + Location::GPR(tmp1), + ); + self.assembler.emit_and( + Size::S32, + Location::GPR(tmp2), + Location::Imm32(0x80000000u32), + Location::GPR(tmp2), + ); + self.assembler.emit_or( + Size::S32, + Location::GPR(tmp1), + Location::GPR(tmp2), + Location::GPR(tmp1), + ); + } + fn f32_sqrt(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_fsqrt, Size::S32, loc, ret, true); + } + fn f32_trunc(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintz, Size::S32, loc, ret, true); + } + fn f32_ceil(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintp, Size::S32, loc, ret, true); + } + fn f32_floor(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintm, Size::S32, loc, ret, true); + } + fn f32_nearest(&mut self, loc: Location, ret: Location) { + self.emit_relaxed_binop_neon(Assembler::emit_frintn, Size::S32, loc, ret, true); + } + fn f32_cmp_ge(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_b, loc_a, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Ls); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + } + fn f32_cmp_gt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_b, loc_a, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Cc); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + } + fn f32_cmp_le(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Ls); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + } + fn f32_cmp_lt(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Cc); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + } + fn f32_cmp_ne(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Ne); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + } + fn f32_cmp_eq(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let dest = self.location_to_reg(Size::S32, ret, &mut temps, ImmType::None, false, None); + self.emit_relaxed_binop_neon(Assembler::emit_fcmp, Size::S32, loc_a, loc_b, false); + self.assembler.emit_cset(Size::S32, dest, Condition::Eq); + if ret != dest { + self.move_location(Size::S32, dest, ret); + } + for r in temps { + self.release_gpr(r); + } + } + fn f32_min(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let old_fpcr = self.set_default_nan(&mut temps); + self.emit_relaxed_binop3_neon( + Assembler::emit_fmin, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::None, + ); + self.restore_fpcr(old_fpcr); + for r in temps { + self.release_gpr(r); + } + } + fn f32_max(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + let mut temps = vec![]; + let old_fpcr = self.set_default_nan(&mut temps); + self.emit_relaxed_binop3_neon( + Assembler::emit_fmax, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::None, + ); + self.restore_fpcr(old_fpcr); + for r in temps { + self.release_gpr(r); + } + } + fn f32_add(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fadd, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::None, + ); + } + fn f32_sub(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fsub, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::None, + ); + } + fn f32_mul(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fmul, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::None, + ); + } + fn f32_div(&mut self, loc_a: Location, loc_b: Location, ret: Location) { + self.emit_relaxed_binop3_neon( + Assembler::emit_fdiv, + Size::S32, + loc_a, + loc_b, + ret, + ImmType::None, + ); + } + + fn gen_std_trampoline( + &self, + sig: &FunctionType, + calling_convention: CallingConvention, + ) -> FunctionBody { + gen_std_trampoline_arm64(sig, calling_convention) + } + // Generates dynamic import function call trampoline for a function type. + fn gen_std_dynamic_import_trampoline( + &self, + vmoffsets: &VMOffsets, + sig: &FunctionType, + calling_convention: CallingConvention, + ) -> FunctionBody { + gen_std_dynamic_import_trampoline_arm64(vmoffsets, sig, calling_convention) + } + // Singlepass calls import functions through a trampoline. + fn gen_import_call_trampoline( + &self, + vmoffsets: &VMOffsets, + index: FunctionIndex, + sig: &FunctionType, + calling_convention: CallingConvention, + ) -> CustomSection { + gen_import_call_trampoline_arm64(vmoffsets, index, sig, calling_convention) + } +} diff --git a/lib/compiler-singlepass/src/machine_x64.rs b/lib/compiler-singlepass/src/machine_x64.rs index e2e13a4c7d3..2d1c0f5f50d 100644 --- a/lib/compiler-singlepass/src/machine_x64.rs +++ b/lib/compiler-singlepass/src/machine_x64.rs @@ -1,8 +1,7 @@ use crate::common_decl::*; use crate::emitter_x64::*; use crate::location::Location as AbstractLocation; -use crate::machine::Machine; -use crate::machine::{MemoryImmediate, TrapTable}; +use crate::machine::*; use crate::x64_decl::new_machine_state; use crate::x64_decl::{ArgumentRegisterAllocator, X64Register, GPR, XMM}; use dynasmrt::{x64::X64Relocation, VecAssembler}; @@ -1627,11 +1626,12 @@ impl Machine for MachineX86_64 { self.used_gprs.insert(gpr); } - fn push_used_gpr(&mut self) { + fn push_used_gpr(&mut self) -> usize { let used_gprs = self.get_used_gprs(); for r in used_gprs.iter() { self.assembler.emit_push(Size::S64, Location::GPR(*r)); } + used_gprs.len() * 8 } fn pop_used_gpr(&mut self) { let used_gprs = self.get_used_gprs(); @@ -1682,7 +1682,7 @@ impl Machine for MachineX86_64 { assert_eq!(self.used_simd.remove(&simd), true); } - fn push_used_simd(&mut self) { + fn push_used_simd(&mut self) -> usize { let used_xmms = self.get_used_simd(); self.adjust_stack((used_xmms.len() * 8) as u32); @@ -1693,6 +1693,8 @@ impl Machine for MachineX86_64 { Location::Memory(GPR::RSP, (i * 8) as i32), ); } + + used_xmms.len() * 8 } fn pop_used_simd(&mut self) { let used_xmms = self.get_used_simd(); @@ -1775,6 +1777,11 @@ impl Machine for MachineX86_64 { Location::Memory(GPR::RBP, -stack_offset) } + // Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example) + fn round_stack_adjust(&self, value: usize) -> usize { + value + } + // Adjust stack for locals fn adjust_stack(&mut self, delta_stack_offset: u32) { self.assembler.emit_sub( @@ -1791,11 +1798,6 @@ impl Machine for MachineX86_64 { Location::GPR(GPR::RSP), ); } - fn push_callee_saved(&mut self) {} - fn pop_callee_saved(&mut self) { - self.assembler.emit_pop(Size::S64, Location::GPR(GPR::R14)); - self.assembler.emit_pop(Size::S64, Location::GPR(GPR::R15)); - } fn pop_stack_locals(&mut self, delta_stack_offset: u32) { self.assembler.emit_add( Size::S64, @@ -1881,14 +1883,80 @@ impl Machine for MachineX86_64 { } // Get param location - fn get_param_location(&self, idx: usize, calling_convention: CallingConvention) -> Location { + fn get_param_location( + &self, + idx: usize, + _sz: Size, + stack_location: &mut usize, + calling_convention: CallingConvention, + ) -> Location { + match calling_convention { + CallingConvention::WindowsFastcall => match idx { + 0 => Location::GPR(GPR::RCX), + 1 => Location::GPR(GPR::RDX), + 2 => Location::GPR(GPR::R8), + 3 => Location::GPR(GPR::R9), + _ => { + let loc = Location::Memory(GPR::RSP, *stack_location as i32); + *stack_location += 8; + loc + } + }, + _ => match idx { + 0 => Location::GPR(GPR::RDI), + 1 => Location::GPR(GPR::RSI), + 2 => Location::GPR(GPR::RDX), + 3 => Location::GPR(GPR::RCX), + 4 => Location::GPR(GPR::R8), + 5 => Location::GPR(GPR::R9), + _ => { + let loc = Location::Memory(GPR::RSP, *stack_location as i32); + *stack_location += 8; + loc + } + }, + } + } + // Get call param location + fn get_call_param_location( + &self, + idx: usize, + _sz: Size, + _stack_location: &mut usize, + calling_convention: CallingConvention, + ) -> Location { + match calling_convention { + CallingConvention::WindowsFastcall => match idx { + 0 => Location::GPR(GPR::RCX), + 1 => Location::GPR(GPR::RDX), + 2 => Location::GPR(GPR::R8), + 3 => Location::GPR(GPR::R9), + _ => Location::Memory(GPR::RBP, (32 + 16 + (idx - 4) * 8) as i32), + }, + _ => match idx { + 0 => Location::GPR(GPR::RDI), + 1 => Location::GPR(GPR::RSI), + 2 => Location::GPR(GPR::RDX), + 3 => Location::GPR(GPR::RCX), + 4 => Location::GPR(GPR::R8), + 5 => Location::GPR(GPR::R9), + _ => Location::Memory(GPR::RBP, (16 + (idx - 6) * 8) as i32), + }, + } + } + // Get simple param location + fn get_simple_param_location( + &self, + idx: usize, + calling_convention: CallingConvention, + ) -> Location { match calling_convention { CallingConvention::WindowsFastcall => match idx { 0 => Location::GPR(GPR::RCX), 1 => Location::GPR(GPR::RDX), 2 => Location::GPR(GPR::R8), 3 => Location::GPR(GPR::R9), - _ => Location::Memory(GPR::RBP, (16 + 32 + (idx - 4) * 8) as i32), + _ => Location::Memory(GPR::RBP, (32 + 16 + (idx - 4) * 8) as i32), }, _ => match idx { 0 => Location::GPR(GPR::RDI), @@ -1912,9 +1980,9 @@ impl Machine for MachineX86_64 { self.assembler.emit_mov(size, source, dest); } Location::Memory(_, _) | Location::Memory2(_, _, _, _) => { - self.assembler - .emit_mov(size, source, Location::GPR(GPR::RAX)); - self.assembler.emit_mov(size, Location::GPR(GPR::RAX), dest); + let tmp = self.pick_temp_gpr().unwrap(); + self.assembler.emit_mov(size, source, Location::GPR(tmp)); + self.assembler.emit_mov(size, Location::GPR(tmp), dest); } _ => unreachable!(), }, @@ -1923,9 +1991,9 @@ impl Machine for MachineX86_64 { self.assembler.emit_mov(size, source, dest); } Location::Memory(_, _) | Location::Memory2(_, _, _, _) => { - self.assembler - .emit_mov(size, source, Location::GPR(GPR::RAX)); - self.assembler.emit_mov(size, Location::GPR(GPR::RAX), dest); + let tmp = self.pick_temp_gpr().unwrap(); + self.assembler.emit_mov(size, source, Location::GPR(tmp)); + self.assembler.emit_mov(size, Location::GPR(tmp), dest); } _ => unreachable!(), }, @@ -1934,9 +2002,9 @@ impl Machine for MachineX86_64 { self.assembler.emit_mov(size, source, dest); } Location::Memory(_, _) | Location::Memory2(_, _, _, _) => { - self.assembler - .emit_mov(size, source, Location::GPR(GPR::RAX)); - self.assembler.emit_mov(size, Location::GPR(GPR::RAX), dest); + let tmp = self.pick_temp_gpr().unwrap(); + self.assembler.emit_mov(size, source, Location::GPR(tmp)); + self.assembler.emit_mov(size, Location::GPR(tmp), dest); } _ => unreachable!(), }, @@ -1955,21 +2023,35 @@ impl Machine for MachineX86_64 { size_op: Size, dest: Location, ) { + let dst = match dest { + Location::Memory(_, _) | Location::Memory2(_, _, _, _) => { + Location::GPR(self.acquire_temp_gpr().unwrap()) + } + Location::GPR(_) | Location::SIMD(_) => dest, + _ => unreachable!(), + }; match source { Location::GPR(_) | Location::Memory(_, _) | Location::Memory2(_, _, _, _) => { match size_val { - Size::S32 | Size::S64 => self.assembler.emit_mov(size_val, source, dest), + Size::S32 | Size::S64 => self.assembler.emit_mov(size_val, source, dst), Size::S16 | Size::S8 => { if signed { - self.assembler.emit_movsx(size_val, source, size_op, dest) + self.assembler.emit_movsx(size_val, source, size_op, dst) } else { - self.assembler.emit_movzx(size_val, source, size_op, dest) + self.assembler.emit_movzx(size_val, source, size_op, dst) } } } } _ => unreachable!(), } + if dst != dest { + self.assembler.emit_mov(size_op, dst, dest); + match dst { + Location::GPR(x) => self.release_gpr(x), + _ => unreachable!(), + }; + } } fn load_address(&mut self, size: Size, reg: Location, mem: Location) { match reg { @@ -2147,6 +2229,10 @@ impl Machine for MachineX86_64 { .arch_emit_indirect_call_with_trampoline(location); } + fn emit_debug_breakpoint(&mut self) { + self.assembler.emit_bkpt(); + } + fn emit_call_location(&mut self, location: Location) { self.assembler.emit_call_location(location); } @@ -2316,6 +2402,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. self.assembler @@ -2338,6 +2425,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. self.assembler @@ -2359,6 +2447,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. self.assembler @@ -2381,6 +2470,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. let normal_path = self.assembler.get_label(); @@ -3753,6 +3843,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. self.assembler @@ -3775,6 +3866,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. self.assembler @@ -3796,6 +3888,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. self.assembler @@ -3818,6 +3911,7 @@ impl Machine for MachineX86_64 { loc_b: Location, ret: Location, integer_division_by_zero: Label, + _integer_overflow: Label, ) -> usize { // We assume that RAX and RDX are temporary registers here. let normal_path = self.assembler.get_label(); @@ -6534,7 +6628,9 @@ impl Machine for MachineX86_64 { // Calculate stack offset. let mut stack_offset: u32 = 0; for (i, _param) in sig.params().iter().enumerate() { - if let Location::Memory(_, _) = self.get_param_location(1 + i, calling_convention) { + if let Location::Memory(_, _) = + self.get_simple_param_location(1 + i, calling_convention) + { stack_offset += 8; } } @@ -6562,12 +6658,12 @@ impl Machine for MachineX86_64 { // Arguments a.emit_mov( Size::S64, - self.get_param_location(1, calling_convention), + self.get_simple_param_location(1, calling_convention), Location::GPR(GPR::R15), ); // func_ptr a.emit_mov( Size::S64, - self.get_param_location(2, calling_convention), + self.get_simple_param_location(2, calling_convention), Location::GPR(GPR::R14), ); // args_rets @@ -6577,7 +6673,7 @@ impl Machine for MachineX86_64 { let mut n_stack_args: usize = 0; for (i, _param) in sig.params().iter().enumerate() { let src_loc = Location::Memory(GPR::R14, (i * 16) as _); // args_rets[i] - let dst_loc = self.get_param_location(1 + i, calling_convention); + let dst_loc = self.get_simple_param_location(1 + i, calling_convention); match dst_loc { Location::GPR(_) => { @@ -6922,42 +7018,3 @@ impl Machine for MachineX86_64 { } } } - -// Constants for the bounds of truncation operations. These are the least or -// greatest exact floats in either f32 or f64 representation less-than (for -// least) or greater-than (for greatest) the i32 or i64 or u32 or u64 -// min (for least) or max (for greatest), when rounding towards zero. - -/// Greatest Exact Float (32 bits) less-than i32::MIN when rounding towards zero. -const GEF32_LT_I32_MIN: f32 = -2147483904.0; -/// Least Exact Float (32 bits) greater-than i32::MAX when rounding towards zero. -const LEF32_GT_I32_MAX: f32 = 2147483648.0; -/// Greatest Exact Float (32 bits) less-than i64::MIN when rounding towards zero. -const GEF32_LT_I64_MIN: f32 = -9223373136366403584.0; -/// Least Exact Float (32 bits) greater-than i64::MAX when rounding towards zero. -const LEF32_GT_I64_MAX: f32 = 9223372036854775808.0; -/// Greatest Exact Float (32 bits) less-than u32::MIN when rounding towards zero. -const GEF32_LT_U32_MIN: f32 = -1.0; -/// Least Exact Float (32 bits) greater-than u32::MAX when rounding towards zero. -const LEF32_GT_U32_MAX: f32 = 4294967296.0; -/// Greatest Exact Float (32 bits) less-than u64::MIN when rounding towards zero. -const GEF32_LT_U64_MIN: f32 = -1.0; -/// Least Exact Float (32 bits) greater-than u64::MAX when rounding towards zero. -const LEF32_GT_U64_MAX: f32 = 18446744073709551616.0; - -/// Greatest Exact Float (64 bits) less-than i32::MIN when rounding towards zero. -const GEF64_LT_I32_MIN: f64 = -2147483649.0; -/// Least Exact Float (64 bits) greater-than i32::MAX when rounding towards zero. -const LEF64_GT_I32_MAX: f64 = 2147483648.0; -/// Greatest Exact Float (64 bits) less-than i64::MIN when rounding towards zero. -const GEF64_LT_I64_MIN: f64 = -9223372036854777856.0; -/// Least Exact Float (64 bits) greater-than i64::MAX when rounding towards zero. -const LEF64_GT_I64_MAX: f64 = 9223372036854775808.0; -/// Greatest Exact Float (64 bits) less-than u32::MIN when rounding towards zero. -const GEF64_LT_U32_MIN: f64 = -1.0; -/// Least Exact Float (64 bits) greater-than u32::MAX when rounding towards zero. -const LEF64_GT_U32_MAX: f64 = 4294967296.0; -/// Greatest Exact Float (64 bits) less-than u64::MIN when rounding towards zero. -const GEF64_LT_U64_MIN: f64 = -1.0; -/// Least Exact Float (64 bits) greater-than u64::MAX when rounding towards zero. -const LEF64_GT_U64_MAX: f64 = 18446744073709551616.0; diff --git a/lib/compiler-singlepass/src/x64_decl.rs b/lib/compiler-singlepass/src/x64_decl.rs index 96590527562..14f63ff5fe2 100644 --- a/lib/compiler-singlepass/src/x64_decl.rs +++ b/lib/compiler-singlepass/src/x64_decl.rs @@ -166,35 +166,6 @@ impl CombinedRegister for X64Register { _ => return None, }) } - - /// Returns the instruction prefix for `movq %this_reg, ?(%rsp)`. - /// - /// To build an instruction, append the memory location as a 32-bit - /// offset to the stack pointer to this prefix. - fn _prefix_mov_to_stack(&self) -> Option<&'static [u8]> { - Some(match *self { - X64Register::GPR(gpr) => match gpr { - GPR::RDI => &[0x48, 0x89, 0xbc, 0x24], - GPR::RSI => &[0x48, 0x89, 0xb4, 0x24], - GPR::RDX => &[0x48, 0x89, 0x94, 0x24], - GPR::RCX => &[0x48, 0x89, 0x8c, 0x24], - GPR::R8 => &[0x4c, 0x89, 0x84, 0x24], - GPR::R9 => &[0x4c, 0x89, 0x8c, 0x24], - _ => return None, - }, - X64Register::XMM(xmm) => match xmm { - XMM::XMM0 => &[0x66, 0x0f, 0xd6, 0x84, 0x24], - XMM::XMM1 => &[0x66, 0x0f, 0xd6, 0x8c, 0x24], - XMM::XMM2 => &[0x66, 0x0f, 0xd6, 0x94, 0x24], - XMM::XMM3 => &[0x66, 0x0f, 0xd6, 0x9c, 0x24], - XMM::XMM4 => &[0x66, 0x0f, 0xd6, 0xa4, 0x24], - XMM::XMM5 => &[0x66, 0x0f, 0xd6, 0xac, 0x24], - XMM::XMM6 => &[0x66, 0x0f, 0xd6, 0xb4, 0x24], - XMM::XMM7 => &[0x66, 0x0f, 0xd6, 0xbc, 0x24], - _ => return None, - }, - }) - } } /// An allocator that allocates registers for function arguments according to the System V ABI. diff --git a/tests/ignores.txt b/tests/ignores.txt index 5e1dd68bfee..bb1eed19183 100644 --- a/tests/ignores.txt +++ b/tests/ignores.txt @@ -68,6 +68,10 @@ cranelift spec::simd::simd_int_to_int_extend # Windows doesn't overcommit and fails to allocate 4GB of memory windows wasmer::max_size_of_memory +# Some AARCH64 CPU have issue with segfault writin 64bits on border page, where the 1 32bits might be written. +aarch64+linux spec::align +aarch64+linux spec::memory_trap + # Frontends ## WASI