From f0a1a041105419cda0d742475a433eafa6045c38 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Thu, 21 Mar 2024 10:04:37 -0700 Subject: [PATCH] Fix non-clang build (probably) Fix some mistakes --- src/mono/mono/metadata/metadata.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/mono/mono/metadata/metadata.c b/src/mono/mono/metadata/metadata.c index 28ed8923cff82f..b4e2f4d2774e2e 100644 --- a/src/mono/mono/metadata/metadata.c +++ b/src/mono/mono/metadata/metadata.c @@ -1495,7 +1495,9 @@ mono_metadata_decode_row_col_raw (const MonoTableInfo *t, int idx, guint col) return 0; } -static MONO_ALWAYS_INLINE guint32 +// This will inline into mono_metadata_decode_value_simd on targets that can't use +// the simd version of the algorithm. +MONO_ALWAYS_INLINE static guint32 decode_value_scalar (const guint8 *ptr, const guint8 **new_ptr) { guint32 result; @@ -1519,9 +1521,13 @@ decode_value_scalar (const guint8 *ptr, const guint8 **new_ptr) ptr += 5; } + if (new_ptr) + *new_ptr = ptr; + return result; } +// This is meant to be wrapped by our existing decode_value functions, and inline into them. MONO_ALWAYS_INLINE guint32 mono_metadata_decode_value_simd (const guint8 *ptr, const guint8 **new_ptr) { @@ -1531,7 +1537,7 @@ mono_metadata_decode_value_simd (const guint8 *ptr, const guint8 **new_ptr) typedef guint8 v64_u1 __attribute__ ((vector_size (8))); typedef guint32 v64_u4 __attribute__ ((vector_size (8))); - // this will generate vectorized code on x64 and wasm as long as SIMD is enabled + // this will generate vectorized code on x64 and wasm as long as SIMD is enabled at build time. // if simd isn't enabled, it generates fairly adequate scalar code. // *(bytes *)ptr and *(guint32 *)ptr by themselves don't force an i32 load of // ptr in either x64 or wasm clang, so this is the only way to prefetch all the bytes @@ -1540,13 +1546,15 @@ mono_metadata_decode_value_simd (const guint8 *ptr, const guint8 **new_ptr) // we could overrun the source buffer by up to 7 bytes, but doing that on wasm is // safe unless we're decoding from the absolute end of memory. // we pad all buffers by 16 bytes in mono_wasm_load_bytes_into_heap, so we're fine - // clang happily upsizes these 8-byte vectors into 16-byte ones for wasm, and uses 8-byte + // clang happily upsizes these 8-byte vectors into 16-byte ones for wasm, and uses 8-byte-wide // insns on x64 as appropriate. armv8 looks fine too, albeit a little weird union { v64_u1 b; v64_u4 i; } v; - v.b = *(v64_u1 *)ptr; + // ideally we would load 5 bytes, but it won't use a vector load then + // memcpy instead of a regular pointer dereference, to say 'this is unaligned' + memcpy(&v, ptr, sizeof(v)); // mask and shift two bits so we can have a 4-element jump table in wasm guint8 flags = (v.b[0] & (0x80u | 0x40u)) >> 6; switch (flags) { @@ -1571,8 +1579,13 @@ mono_metadata_decode_value_simd (const guint8 *ptr, const guint8 **new_ptr) // i don't know why the default case is necessary here, but without it the jump table has 5 entries. default: // (b * 0x80) != 0, and (b & 0x40) != 0 + // for some reason on wasm the 'v.b[0]' load generates an '& 255', + // even if we cache it in a guint8 local if (v.b[0] == 0xFFu) { // v.b = { ptr[4], ptr[3], ptr[2], ptr[1] } + // on x64 this generates kind of gross code, i.e. + // pshufd, pshufhw, pshufd, pshuflw, packuswb + // but on wasm it's fine v.b = __builtin_shufflevector( v.b, v.b, 4, 3, 2, 1, -1, -1, -1, -1 @@ -1582,12 +1595,19 @@ mono_metadata_decode_value_simd (const guint8 *ptr, const guint8 **new_ptr) ptr += 5; } else { // v.b = { ptr[3], ptr[2], ptr[1], ptr[0] } +#ifdef USE_BSWAP + // generates much smaller x64 assembly, but terrible wasm + // interestingly, clang for arm automatically does this to the shuffle below + result = __builtin_bswap32(v.i[0]) & 0x1FFFFFFFu; +#else + // on x64 unlike the above this generates a single pshuflw + pack v.b = __builtin_shufflevector( v.b, v.b, 3, 2, 1, 0, -1, -1, -1, -1 ); // result = v.b[0..3] where v.b[0] &= 0x1F result = v.i[0] & 0x1FFFFFFFu; +#endif ptr += 4; } break; @@ -1598,7 +1618,7 @@ mono_metadata_decode_value_simd (const guint8 *ptr, const guint8 **new_ptr) return result; #else - return decode_value_scalar (ptr, rptr); + return decode_value_scalar (ptr, new_ptr); #endif }