From 01c86cec363e173e1b9d939da2e5aca098b2558c Mon Sep 17 00:00:00 2001 From: Christopher Serr Date: Thu, 5 Sep 2024 20:40:59 +0200 Subject: [PATCH] Support WebAssembly Relaxed SIMD (#126) --- .cargo/config.toml | 2 +- .github/workflows/main.yml | 13 +++++++++---- Cargo.toml | 4 ++-- src/wide/f32x4_t.rs | 10 ++++++++++ src/wide/f32x8_t.rs | 16 +++++++++++----- src/wide/i32x4_t.rs | 2 ++ 6 files changed, 35 insertions(+), 12 deletions(-) diff --git a/.cargo/config.toml b/.cargo/config.toml index 339b227..efffee6 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,2 +1,2 @@ -[target.wasm32-wasi] +[target.wasm32-wasip1] runner = "wasmtime run --dir ." diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1646867..65859f4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -66,7 +66,7 @@ jobs: uses: dtolnay/rust-toolchain@master with: toolchain: stable - target: wasm32-wasi + target: wasm32-wasip1 - name: Install wasmtime run: | @@ -74,15 +74,20 @@ jobs: echo "$HOME/.wasmtime/bin" >> $GITHUB_PATH - name: Build with minimal features (no_std) - run: cargo build --target wasm32-wasi --verbose --no-default-features --features no-std-float + run: cargo build --target wasm32-wasip1 --verbose --no-default-features --features no-std-float - name: Run tests without SIMD - run: cargo test --target wasm32-wasi --verbose --no-default-features --features png-format + run: cargo test --target wasm32-wasip1 --verbose --no-default-features --features png-format - name: Run tests with SIMD128 env: RUSTFLAGS: -Ctarget-feature=+simd128,+bulk-memory,+nontrapping-fptoint,+sign-ext - run: cargo test --target wasm32-wasi + run: cargo test --target wasm32-wasip1 + + - name: Run tests with Relaxed SIMD + env: + RUSTFLAGS: -Ctarget-feature=+simd128,+relaxed-simd,+bulk-memory,+nontrapping-fptoint,+sign-ext + run: cargo test --target wasm32-wasip1 aarch64: runs-on: ubuntu-20.04 diff --git a/Cargo.toml b/Cargo.toml index 8e3f7c6..b645a4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,8 +31,8 @@ default = ["std", "simd", "png-format"] std = ["tiny-skia-path/std"] no-std-float = ["tiny-skia-path/no-std-float"] -# Enables SIMD instructions on x86 (from SSE up to AVX2), WebAssembly (SIMD128) -# and AArch64 (Neon). +# Enables SIMD instructions on x86 (from SSE up to AVX2), WebAssembly (SIMD128, +# Relaxed SIMD) and AArch64 (Neon). # Has no effect on other targets. Present mainly for testing. simd = [] diff --git a/src/wide/f32x4_t.rs b/src/wide/f32x4_t.rs index 21d5140..5985203 100644 --- a/src/wide/f32x4_t.rs +++ b/src/wide/f32x4_t.rs @@ -90,6 +90,8 @@ impl f32x4 { cfg_if::cfg_if! { if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(unsafe { _mm_max_ps(self.0, rhs.0) }) + } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] { + Self(f32x4_relaxed_max(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { Self(f32x4_pmax(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { @@ -111,6 +113,8 @@ impl f32x4 { cfg_if::cfg_if! { if #[cfg(all(feature = "simd", target_feature = "sse2"))] { Self(unsafe { _mm_min_ps(self.0, rhs.0) }) + } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] { + Self(f32x4_relaxed_min(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { Self(f32x4_pmin(self.0, rhs.0)) } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { @@ -245,6 +249,8 @@ impl f32x4 { cfg_if::cfg_if! { if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] { Self(unsafe { _mm_blendv_ps(f.0, t.0, self.0) }) + } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] { + Self(i32x4_relaxed_laneselect(t.0, f.0, self.0)) } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { Self(v128_bitselect(t.0, f.0, self.0)) } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { @@ -302,6 +308,8 @@ impl f32x4 { cfg_if::cfg_if! { if #[cfg(all(feature = "simd", target_feature = "sse2"))] { i32x4(unsafe { _mm_cvtps_epi32(self.0) }) + } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] { + i32x4(i32x4_relaxed_trunc_f32x4(self.round().0)) } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { i32x4(i32x4_trunc_sat_f32x4(self.round().0)) } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { @@ -325,6 +333,8 @@ impl f32x4 { cfg_if::cfg_if! { if #[cfg(all(feature = "simd", target_feature = "sse2"))] { i32x4(unsafe { _mm_cvttps_epi32(self.0) }) + } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] { + i32x4(i32x4_relaxed_trunc_f32x4(self.0)) } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { i32x4(i32x4_trunc_sat_f32x4(self.0)) } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] { diff --git a/src/wide/f32x8_t.rs b/src/wide/f32x8_t.rs index df79279..231af49 100644 --- a/src/wide/f32x8_t.rs +++ b/src/wide/f32x8_t.rs @@ -43,11 +43,17 @@ impl f32x8 { } pub fn floor(self) -> Self { - let roundtrip: f32x8 = cast(self.trunc_int().to_f32x8()); - roundtrip - - roundtrip - .cmp_gt(self) - .blend(f32x8::splat(1.0), f32x8::default()) + cfg_if::cfg_if! { + if #[cfg(all(feature = "simd", target_feature = "simd128"))] { + Self(self.0.floor(), self.1.floor()) + } else { + let roundtrip: f32x8 = cast(self.trunc_int().to_f32x8()); + roundtrip + - roundtrip + .cmp_gt(self) + .blend(f32x8::splat(1.0), f32x8::default()) + } + } } pub fn fract(self) -> Self { diff --git a/src/wide/i32x4_t.rs b/src/wide/i32x4_t.rs index fb77a0f..810454d 100644 --- a/src/wide/i32x4_t.rs +++ b/src/wide/i32x4_t.rs @@ -56,6 +56,8 @@ impl i32x4 { cfg_if::cfg_if! { if #[cfg(all(feature = "simd", target_feature = "sse4.1"))] { Self(unsafe { _mm_blendv_epi8(f.0, t.0, self.0) }) + } else if #[cfg(all(feature = "simd", target_feature = "relaxed-simd"))] { + Self(i32x4_relaxed_laneselect(t.0, f.0, self.0)) } else if #[cfg(all(feature = "simd", target_feature = "simd128"))] { Self(v128_bitselect(t.0, f.0, self.0)) } else if #[cfg(all(feature = "simd", target_arch = "aarch64", target_feature = "neon"))] {