From afe050fac1e81765c341eb9132a5123141f7c844 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 13 Dec 2024 17:22:30 -0800 Subject: [PATCH] Avoid dealing with big-endian vectors * Change wasm `global`s to store `v128` in little-endian format. * Change pulley stack loads/stores to work with vectors in little-endian format. prtest:full --- .../codegen/src/isa/pulley_shared/abi.rs | 16 ++++++++++++-- .../codegen/src/isa/pulley_shared/lower.isle | 3 +++ .../src/translate/code_translator.rs | 10 +++++++++ .../wasmtime/src/runtime/externals/global.rs | 4 ++-- .../wasmtime/src/runtime/trampoline/global.rs | 2 +- crates/wasmtime/src/runtime/vm/vmcontext.rs | 22 ++++++++++++------- crates/wast-util/src/lib.rs | 2 -- 7 files changed, 44 insertions(+), 15 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/abi.rs b/cranelift/codegen/src/isa/pulley_shared/abi.rs index 292e8b680ac3..66b1cc113dc2 100644 --- a/cranelift/codegen/src/isa/pulley_shared/abi.rs +++ b/cranelift/codegen/src/isa/pulley_shared/abi.rs @@ -160,11 +160,23 @@ where } fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Self::I { - Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted()).into() + let mut flags = MemFlags::trusted(); + // Stack loads/stores of vectors always use little-endianess to avoid + // implementing a byte-swap of vectors on big-endian platforms. + if ty.is_vector() { + flags.set_endianness(ir::Endianness::Little); + } + Inst::gen_load(into_reg, mem.into(), ty, flags).into() } fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I { - Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted()).into() + let mut flags = MemFlags::trusted(); + // Stack loads/stores of vectors always use little-endianess to avoid + // implementing a byte-swap of vectors on big-endian platforms. + if ty.is_vector() { + flags.set_endianness(ir::Endianness::Little); + } + Inst::gen_store(mem.into(), from_reg, ty, flags).into() } fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self::I { diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index a97d26bc7589..8927345a19f9 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -548,6 +548,9 @@ (rule (lower (has_type $I64 (bitcast _flags val @ (value_type $F64)))) (pulley_bitcast_int_from_float_64 val)) +(rule 1 (lower (has_type (ty_vec128 _) (bitcast _flags val @ (value_type (ty_vec128 _))))) + val) + ;;;; Rules for `fcvt_to_{u,s}int` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I32 (fcvt_to_uint val @ (value_type $F32)))) diff --git a/crates/cranelift/src/translate/code_translator.rs b/crates/cranelift/src/translate/code_translator.rs index 75efa4748383..6a687f41634a 100644 --- a/crates/cranelift/src/translate/code_translator.rs +++ b/crates/cranelift/src/translate/code_translator.rs @@ -177,6 +177,12 @@ pub fn translate_operator( GlobalVariable::Memory { gv, offset, ty } => { let addr = builder.ins().global_value(environ.pointer_type(), gv); let mut flags = ir::MemFlags::trusted(); + // Store vector globals in little-endian format to avoid + // byte swaps on big-endian platforms since at-rest vectors + // should already be in little-endian format anyway. + if ty.is_vector() { + flags.set_endianness(ir::Endianness::Little); + } // Put globals in the "table" abstract heap category as well. flags.set_alias_region(Some(ir::AliasRegion::Table)); builder.ins().load(ty, flags, addr, offset) @@ -191,6 +197,10 @@ pub fn translate_operator( GlobalVariable::Memory { gv, offset, ty } => { let addr = builder.ins().global_value(environ.pointer_type(), gv); let mut flags = ir::MemFlags::trusted(); + // Like `global.get`, store globals in little-endian format. + if ty.is_vector() { + flags.set_endianness(ir::Endianness::Little); + } // Put globals in the "table" abstract heap category as well. flags.set_alias_region(Some(ir::AliasRegion::Table)); let mut val = state.pop1(); diff --git a/crates/wasmtime/src/runtime/externals/global.rs b/crates/wasmtime/src/runtime/externals/global.rs index 3d53742f169e..22ca0ab7cad5 100644 --- a/crates/wasmtime/src/runtime/externals/global.rs +++ b/crates/wasmtime/src/runtime/externals/global.rs @@ -114,7 +114,7 @@ impl Global { ValType::I64 => Val::from(*definition.as_i64()), ValType::F32 => Val::F32(*definition.as_u32()), ValType::F64 => Val::F64(*definition.as_u64()), - ValType::V128 => Val::V128((*definition.as_u128()).into()), + ValType::V128 => Val::V128(definition.get_u128().into()), ValType::Ref(ref_ty) => { let reference: Ref = match ref_ty.heap_type() { HeapType::Func | HeapType::ConcreteFunc(_) => { @@ -187,7 +187,7 @@ impl Global { Val::I64(i) => *definition.as_i64_mut() = i, Val::F32(f) => *definition.as_u32_mut() = f, Val::F64(f) => *definition.as_u64_mut() = f, - Val::V128(i) => *definition.as_u128_mut() = i.into(), + Val::V128(i) => definition.set_u128(i.into()), Val::FuncRef(f) => { *definition.as_func_ref_mut() = f.map_or(ptr::null_mut(), |f| { f.vm_func_ref(&mut store).as_ptr().cast() diff --git a/crates/wasmtime/src/runtime/trampoline/global.rs b/crates/wasmtime/src/runtime/trampoline/global.rs index 62612b74b8a0..d7ecd7b87d4b 100644 --- a/crates/wasmtime/src/runtime/trampoline/global.rs +++ b/crates/wasmtime/src/runtime/trampoline/global.rs @@ -34,7 +34,7 @@ pub fn generate_global_export( Val::I64(x) => *global.as_i64_mut() = x, Val::F32(x) => *global.as_f32_bits_mut() = x, Val::F64(x) => *global.as_f64_bits_mut() = x, - Val::V128(x) => *global.as_u128_mut() = x.into(), + Val::V128(x) => global.set_u128(x.into()), Val::FuncRef(f) => { *global.as_func_ref_mut() = f.map_or(ptr::null_mut(), |f| f.vm_func_ref(&mut store).as_ptr()); diff --git a/crates/wasmtime/src/runtime/vm/vmcontext.rs b/crates/wasmtime/src/runtime/vm/vmcontext.rs index e59b80ee5b8a..450cfde37a0b 100644 --- a/crates/wasmtime/src/runtime/vm/vmcontext.rs +++ b/crates/wasmtime/src/runtime/vm/vmcontext.rs @@ -447,7 +447,7 @@ impl VMGlobalDefinition { WasmValType::I64 => *global.as_i64_mut() = raw.get_i64(), WasmValType::F32 => *global.as_f32_bits_mut() = raw.get_f32(), WasmValType::F64 => *global.as_f64_bits_mut() = raw.get_f64(), - WasmValType::V128 => *global.as_u128_mut() = raw.get_v128(), + WasmValType::V128 => global.set_u128(raw.get_v128()), WasmValType::Ref(r) => match r.heap_type.top() { WasmHeapTopType::Extern => { let r = VMGcRef::from_raw_u32(raw.get_externref()); @@ -478,7 +478,7 @@ impl VMGlobalDefinition { WasmValType::I64 => ValRaw::i64(*self.as_i64()), WasmValType::F32 => ValRaw::f32(*self.as_f32_bits()), WasmValType::F64 => ValRaw::f64(*self.as_f64_bits()), - WasmValType::V128 => ValRaw::v128(*self.as_u128()), + WasmValType::V128 => ValRaw::v128(self.get_u128()), WasmValType::Ref(r) => match r.heap_type.top() { WasmHeapTopType::Extern => ValRaw::externref(match self.as_gc_ref() { Some(r) => store.gc_store_mut()?.clone_gc_ref(r).as_raw_u32(), @@ -575,14 +575,20 @@ impl VMGlobalDefinition { &mut *(self.storage.as_mut().as_mut_ptr().cast::()) } - /// Return a reference to the value as an u128. - pub unsafe fn as_u128(&self) -> &u128 { - &*(self.storage.as_ref().as_ptr().cast::()) + /// Gets the underlying 128-bit vector value. + // + // Note that vectors are stored in little-endian format while other types + // are stored in native-endian format. + pub unsafe fn get_u128(&self) -> u128 { + u128::from_le(*(self.storage.as_ref().as_ptr().cast::())) } - /// Return a mutable reference to the value as an u128. - pub unsafe fn as_u128_mut(&mut self) -> &mut u128 { - &mut *(self.storage.as_mut().as_mut_ptr().cast::()) + /// Sets the 128-bit vector values. + // + // Note that vectors are stored in little-endian format while other types + // are stored in native-endian format. + pub unsafe fn set_u128(&mut self, val: u128) { + *self.storage.as_mut().as_mut_ptr().cast::() = val.to_le(); } /// Return a reference to the value as u128 bits. diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index 7c99c6eaaa83..63c0c84573f3 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -427,10 +427,8 @@ impl WastTest { "spec_testsuite/proposals/relaxed-simd/relaxed_min_max.wast", "spec_testsuite/proposals/threads/atomic.wast", "spec_testsuite/simd_align.wast", - "spec_testsuite/simd_bit_shift.wast", "spec_testsuite/simd_bitwise.wast", "spec_testsuite/simd_boolean.wast", - "spec_testsuite/simd_const.wast", "spec_testsuite/simd_conversions.wast", "spec_testsuite/simd_f32x4.wast", "spec_testsuite/simd_f32x4_arith.wast",