diff --git a/ci/docker/wasm32-wasi/Dockerfile b/ci/docker/wasm32-wasi/Dockerfile
index eca3f61c70..392294ed48 100644
--- a/ci/docker/wasm32-wasi/Dockerfile
+++ b/ci/docker/wasm32-wasi/Dockerfile
@@ -7,8 +7,20 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends \
   xz-utils \
   clang
 
-RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v0.22.1/wasmtime-v0.22.1-x86_64-linux.tar.xz | tar xJf -
-ENV PATH=$PATH:/wasmtime-v0.22.1-x86_64-linux
+#ENV WASMER_DIR="/.wasmer"
+
+#RUN curl https://get.wasmer.io -sSfL | sh
+
+#ENV PATH="$PATH:/.wasmer:/.wasmer/bin"
+
+#ENV CARGO_TARGET_WASM32_WASI_RUNNER="wasmer run \
+#  --llvm \
+#  --enable-simd \
+#  --mapdir .::/checkout/target/wasm32-wasi/release/deps \
+#  --"
+
+RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/dev/wasmtime-dev-x86_64-linux.tar.xz | tar xJf -
+ENV PATH=$PATH:/wasmtime-dev-x86_64-linux
 
 ENV CARGO_TARGET_WASM32_WASI_RUNNER="wasmtime \
   --enable-simd \
diff --git a/crates/core_arch/src/wasm32/simd128.rs b/crates/core_arch/src/wasm32/simd128.rs
index bb8e238a91..db19c2e78e 100644
--- a/crates/core_arch/src/wasm32/simd128.rs
+++ b/crates/core_arch/src/wasm32/simd128.rs
@@ -190,6 +190,9 @@ extern "C" {
     fn llvm_widen_low_i32x4_u(a: i16x8) -> i32x4;
     #[link_name = "llvm.wasm.widen.high.unsigned.v4i32.v8i16"]
     fn llvm_widen_high_i32x4_u(a: i16x8) -> i32x4;
+
+    //#[link_name = "llvm.wasm.bitmask.v2i64"]
+    //fn llvm_bitmask_i64x2(a: i64x2) -> i32;
 }
 
 /// Loads a `v128` vector from the given heap address.
@@ -1416,7 +1419,7 @@ pub unsafe fn v128_bitselect(v1: v128, v2: v128, c: v128) -> v128 {
 
 /// Lane-wise wrapping absolute value.
 #[inline]
-// #[cfg_attr(test, assert_instr(i8x16.abs))] // FIXME support not in our LLVM yet
+#[cfg_attr(test, assert_instr(i8x16.abs))]
 #[target_feature(enable = "simd128")]
 pub unsafe fn i8x16_abs(a: v128) -> v128 {
     let a = transmute::<_, i8x16>(a);
@@ -1452,14 +1455,14 @@ pub unsafe fn i8x16_all_true(a: v128) -> i32 {
     llvm_i8x16_all_true(a.as_i8x16())
 }
 
-// FIXME: not available in our LLVM yet
-// /// Extracts the high bit for each lane in `a` and produce a scalar mask with
-// /// all bits concatenated.
-// #[inline]
-// #[cfg_attr(test, assert_instr(i8x16.all_true))]
-// pub unsafe fn i8x16_bitmask(a: v128) -> i32 {
-//     llvm_bitmask_i8x16(transmute(a))
-// }
+/// Extracts the high bit for each lane in `a` and produce a scalar mask with
+/// all bits concatenated.
+#[inline]
+#[cfg_attr(test, assert_instr(i8x16.bitmask))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i8x16_bitmask(a: v128) -> i32 {
+    llvm_bitmask_i8x16(transmute(a))
+}
 
 /// Converts two input vectors into a smaller lane vector by narrowing each
 /// lane.
@@ -1626,7 +1629,7 @@ pub unsafe fn i8x16_avgr_u(a: v128, b: v128) -> v128 {
 
 /// Lane-wise wrapping absolute value.
 #[inline]
-// #[cfg_attr(test, assert_instr(i16x8.abs))] // FIXME support not in our LLVM yet
+#[cfg_attr(test, assert_instr(i16x8.abs))]
 #[target_feature(enable = "simd128")]
 pub unsafe fn i16x8_abs(a: v128) -> v128 {
     let a = transmute::<_, i16x8>(a);
@@ -1662,14 +1665,14 @@ pub unsafe fn i16x8_all_true(a: v128) -> i32 {
     llvm_i16x8_all_true(a.as_i16x8())
 }
 
-// FIXME: not available in our LLVM yet
-// /// Extracts the high bit for each lane in `a` and produce a scalar mask with
-// /// all bits concatenated.
-// #[inline]
-// #[cfg_attr(test, assert_instr(i16x8.all_true))]
-// pub unsafe fn i16x8_bitmask(a: v128) -> i32 {
-//     llvm_bitmask_i16x8(transmute(a))
-// }
+/// Extracts the high bit for each lane in `a` and produce a scalar mask with
+/// all bits concatenated.
+#[inline]
+#[cfg_attr(test, assert_instr(i16x8.bitmask))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i16x8_bitmask(a: v128) -> i32 {
+    llvm_bitmask_i16x8(transmute(a))
+}
 
 /// Converts two input vectors into a smaller lane vector by narrowing each
 /// lane.
@@ -1877,7 +1880,7 @@ pub unsafe fn i16x8_avgr_u(a: v128, b: v128) -> v128 {
 
 /// Lane-wise wrapping absolute value.
 #[inline]
-// #[cfg_attr(test, assert_instr(i32x4.abs))] // FIXME support not in our LLVM yet
+#[cfg_attr(test, assert_instr(i32x4.abs))]
 #[target_feature(enable = "simd128")]
 pub unsafe fn i32x4_abs(a: v128) -> v128 {
     let a = transmute::<_, i32x4>(a);
@@ -1913,14 +1916,14 @@ pub unsafe fn i32x4_all_true(a: v128) -> i32 {
     llvm_i32x4_all_true(a.as_i32x4())
 }
 
-// FIXME: not available in our LLVM yet
-// /// Extracts the high bit for each lane in `a` and produce a scalar mask with
-// /// all bits concatenated.
-// #[inline]
-// #[cfg_attr(test, assert_instr(i32x4.all_true))]
-// pub unsafe fn i32x4_bitmask(a: v128) -> i32 {
-//     llvm_bitmask_i32x4(transmute(a))
-// }
+/// Extracts the high bit for each lane in `a` and produce a scalar mask with
+/// all bits concatenated.
+#[inline]
+#[cfg_attr(test, assert_instr(i32x4.bitmask))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i32x4_bitmask(a: v128) -> i32 {
+    llvm_bitmask_i32x4(transmute(a))
+}
 
 /// Converts low half of the smaller lane vector to a larger lane
 /// vector, sign extended.
@@ -2119,12 +2122,21 @@ pub unsafe fn i64x2_sub(a: v128, b: v128) -> v128 {
 
 /// Multiplies two 128-bit vectors as if they were two packed two 64-bit integers.
 #[inline]
-// #[cfg_attr(test, assert_instr(i64x2.mul))] // FIXME: not present in our LLVM
+#[cfg_attr(test, assert_instr(i64x2.mul))]
 #[target_feature(enable = "simd128")]
 pub unsafe fn i64x2_mul(a: v128, b: v128) -> v128 {
     transmute(simd_mul(a.as_i64x2(), b.as_i64x2()))
 }
 
+/*/// Extracts the high bit for each lane in `a` and produce a scalar mask with
+/// all bits concatenated.
+#[inline]
+#[cfg_attr(test, assert_instr(i64x2.bitmask))]
+#[target_feature(enable = "simd128")]
+pub unsafe fn i64x2_bitmask(a: v128) -> i32 {
+    llvm_bitmask_i64x2(transmute(a))
+}*/
+
 /// Calculates the absolute value of each lane of a 128-bit vector interpreted
 /// as four 32-bit floating point numbers.
 #[inline]
@@ -2806,6 +2818,46 @@ pub mod tests {
         }
     }
 
+    #[test]
+    fn test_bitmask_ops() {
+        unsafe {
+            let a: [u32; 4] = [u32::MAX, 0, u32::MAX, 0];
+            let b: [u32; 4] = [u32::MAX; 4];
+            let c: [u32; 4] = [0; 4];
+
+            let vec_a: v128 = transmute(a);
+            let vec_b: v128 = transmute(b);
+            let vec_c: v128 = transmute(c);
+
+            let r: i32 = i8x16_bitmask(vec_a);
+            assert_eq!(r, 0b1111000011110000);
+            let r: i32 = i16x8_bitmask(vec_a);
+            assert_eq!(r, 0b11001100);
+            let r: i32 = i32x4_bitmask(vec_a);
+            assert_eq!(r, 0b1010);
+            /*let r: i32 = i64x2_bitmask(vec_a);
+            assert_eq!(r, 0b11);*/
+
+            let r: i32 = i8x16_bitmask(vec_b);
+            assert_eq!(r, 0xFFFF);
+            let r: i32 = i16x8_bitmask(vec_b);
+            assert_eq!(r, 0xFF);
+            let r: i32 = i32x4_bitmask(vec_b);
+            assert_eq!(r, 0xF);
+            /*let r: i32 = i64x2_bitmask(vec_b);
+            assert_eq!(r, 0b11);*/
+
+            let r: i32 = i8x16_bitmask(vec_c);
+            assert_eq!(r, 0);
+            let r: i32 = i16x8_bitmask(vec_c);
+            assert_eq!(r, 0);
+            let r: i32 = i32x4_bitmask(vec_c);
+            assert_eq!(r, 0);
+            /*let r: i32 = i64x2_bitmask(vec_c);
+            assert_eq!(r, 0);*/
+        }
+    }
+
     macro_rules! test_bool_red {
          ([$test_id:ident, $any:ident, $all:ident] | [$($true:expr),*] | [$($false:expr),*] | [$($alt:expr),*]) => {
              #[test]