Canonicalize NaNs.

With WebAssembly/component-model#279, component-model floating-point types have a single NaN value. Canonicalization isn't required, and may be omitted as an optimization, but users shouldn't depend on NaN bitpatterns being preserved. To help users avoid depending on NaN bitpatterns being preserved as they propagate through component-model values, canonicalize NaN values in Wave.
sunfishcode · Jan 4, 2024 · 90a263d · 90a263d
1 parent 2643913
commit 90a263d
Show file tree

Hide file tree

Showing 7 changed files with 174 additions and 9 deletions.
diff --git a/src/lib.rs b/src/lib.rs
@@ -58,3 +58,19 @@ pub fn to_string(val: &impl WasmValue) -> Result<String, writer::WriterError> {
     Writer::new(&mut buf).write_value(val)?;
     Ok(String::from_utf8(buf).unwrap_or_else(|err| panic!("invalid UTF-8: {err:?}")))
 }
+
+fn canonicalize_nan32(val: f32) -> f32 {
+    if val.is_nan() {
+        f32::from_bits(0x7fc00000)
+    } else {
+        val
+    }
+}
+
+fn canonicalize_nan64(val: f64) -> f64 {
+    if val.is_nan() {
+        f64::from_bits(0x7ff8000000000000)
+    } else {
+        val
+    }
+}
diff --git a/src/parser.rs b/src/parser.rs
@@ -719,6 +719,7 @@ impl ParserError {
 #[cfg(test)]
 mod tests {
     use crate::value::{Type, Value};
+    use crate::{canonicalize_nan32, canonicalize_nan64};
 
     use super::*;
 
@@ -738,11 +739,11 @@ mod tests {
             ("2", Val::U64(2)),
             ("1.1", Val::Float32(1.1)),
             ("-1.1e+10", Val::Float32(-1.1e+10)),
-            ("nan", Val::Float32(f32::NAN)),
+            ("nan", Val::Float32(canonicalize_nan32(f32::NAN))),
             ("inf", Val::Float32(f32::INFINITY)),
             ("-inf", Val::Float32(f32::NEG_INFINITY)),
             ("1.1e-123", Val::Float64(1.1e-123)),
-            ("nan", Val::Float64(f64::NAN)),
+            ("nan", Val::Float64(canonicalize_nan64(f64::NAN))),
             ("inf", Val::Float64(f64::INFINITY)),
             ("-inf", Val::Float64(f64::NEG_INFINITY)),
             ("'x'", Val::Char('x')),

diff --git a/src/val.rs b/src/val.rs
@@ -72,12 +72,22 @@ pub trait WasmValue: Clone + Sized {
         unimplemented!()
     }
     /// Returns a new WasmValue of the given type.
+    ///
+    /// The Rust `f32` type has many distinct NaN bitpatterns, however the
+    /// component-model `float32` type only has a single NaN value, so this
+    /// function does not preserve NaN bitpatterns.
+    ///
     /// # Panics
     /// Panics if the type is not implemented (the trait default).
     fn make_float32(val: f32) -> Self {
         unimplemented!()
     }
     /// Returns a new WasmValue of the given type.
+    ///
+    /// The Rust `f64` type has many distinct NaN bitpatterns, however the
+    /// component-model `float64` type only has a single NaN value, so this
+    /// function does not preserve NaN bitpatterns.
+    ///
     /// # Panics
     /// Panics if the type is not implemented (the trait default).
     fn make_float64(val: f64) -> Self {
@@ -214,12 +224,22 @@ pub trait WasmValue: Clone + Sized {
         unimplemented!()
     }
     /// Returns the underlying value of the WasmValue, panicing if it's the wrong type.
+    ///
+    /// The Rust `f32` type has many distinct NaN bitpatterns, however the
+    /// component-model `float64` type only has a single NaN value, so this
+    /// function does not preserve NaN bitpatterns.
+    ///
     /// # Panics
     /// Panics if `self` is not of the right type.
     fn unwrap_float32(&self) -> f32 {
         unimplemented!()
     }
     /// Returns the underlying value of the WasmValue, panicing if it's the wrong type.
+    ///
+    /// The Rust `f64` type has many distinct NaN bitpatterns, however the
+    /// component-model `float64` type only has a single NaN value, so this
+    /// function does not preserve NaN bitpatterns.
+    ///
     /// # Panics
     /// Panics if `self` is not of the right type.
     fn unwrap_float64(&self) -> f64 {

diff --git a/src/value/mod.rs b/src/value/mod.rs
@@ -18,6 +18,7 @@ use self::ty::{
     EnumType, FlagsType, ListType, OptionType, RecordType, ResultType, TupleType, TypeEnum,
     VariantType,
 };
+use crate::{canonicalize_nan32, canonicalize_nan64};
 use crate::{ty::maybe_unwrap, val::unwrap_val, WasmType, WasmValue};
 
 pub use func::FuncType;
@@ -165,11 +166,19 @@ impl WasmValue for Value {
         (U16, u16, make_u16, unwrap_u16),
         (U32, u32, make_u32, unwrap_u32),
         (U64, u64, make_u64, unwrap_u64),
-        (Float32, f32, make_float32, unwrap_float32),
-        (Float64, f64, make_float64, unwrap_float64),
         (Char, char, make_char, unwrap_char)
     );
 
+    fn make_float32(val: f32) -> Self {
+        let val = canonicalize_nan32(val);
+        Self(ValueEnum::Float32(val))
+    }
+
+    fn make_float64(val: f64) -> Self {
+        let val = canonicalize_nan64(val);
+        Self(ValueEnum::Float64(val))
+    }
+
     fn make_string(val: std::borrow::Cow<str>) -> Self {
         Self(ValueEnum::String(val.into()))
     }
@@ -303,6 +312,16 @@ impl WasmValue for Value {
         Ok(Self(ValueEnum::Flags(Flags { ty, flags })))
     }
 
+    fn unwrap_float32(&self) -> f32 {
+        let val = *unwrap_val!(&self.0, ValueEnum::Float32, "float32");
+        canonicalize_nan32(val)
+    }
+
+    fn unwrap_float64(&self) -> f64 {
+        let val = *unwrap_val!(&self.0, ValueEnum::Float64, "float64");
+        canonicalize_nan64(val)
+    }
+
     fn unwrap_string(&self) -> std::borrow::Cow<str> {
         unwrap_val!(&self.0, ValueEnum::String, "string")
             .as_ref()

diff --git a/src/wasmtime/component.rs b/src/wasmtime/component.rs
@@ -3,6 +3,7 @@ use std::borrow::Cow;
 use wasmtime::component;
 
 use crate::{
+    canonicalize_nan32, canonicalize_nan64,
     fmt::DisplayFunc,
     func::WasmFunc,
     ty::{maybe_unwrap, WasmTypeKind},
@@ -121,11 +122,17 @@ impl WasmValue for component::Val {
         (U16, u16, make_u16, unwrap_u16),
         (U32, u32, make_u32, unwrap_u32),
         (U64, u64, make_u64, unwrap_u64),
-        (Float32, f32, make_float32, unwrap_float32),
-        (Float64, f64, make_float64, unwrap_float64),
         (Char, char, make_char, unwrap_char)
     );
 
+    fn make_float32(val: f32) -> Self {
+        let val = canonicalize_nan32(val);
+        Self::Float32(val)
+    }
+    fn make_float64(val: f64) -> Self {
+        let val = canonicalize_nan64(val);
+        Self::Float64(val)
+    }
     fn make_string(val: Cow<str>) -> Self {
         Self::String(val.into())
     }
@@ -170,6 +177,14 @@ impl WasmValue for component::Val {
             .new_val(&names.into_iter().collect::<Vec<_>>())
     }
 
+    fn unwrap_float32(&self) -> f32 {
+        let val = *unwrap_val!(self, Self::Float32, "float32");
+        canonicalize_nan32(val)
+    }
+    fn unwrap_float64(&self) -> f64 {
+        let val = *unwrap_val!(self, Self::Float64, "float64");
+        canonicalize_nan64(val)
+    }
     fn unwrap_string(&self) -> Cow<str> {
         unwrap_val!(self, Self::String, "string").as_ref().into()
     }

diff --git a/src/wasmtime/core.rs b/src/wasmtime/core.rs
@@ -1,6 +1,9 @@
 use std::borrow::Cow;
 
-use crate::{func::WasmFunc, ty::WasmTypeKind, val::unwrap_val, WasmType, WasmValue};
+use crate::{
+    canonicalize_nan32, canonicalize_nan64, func::WasmFunc, ty::WasmTypeKind, val::unwrap_val,
+    WasmType, WasmValue,
+};
 
 impl WasmType for wasmtime::ValType {
     fn kind(&self) -> WasmTypeKind {
@@ -38,9 +41,11 @@ impl WasmValue for wasmtime::Val {
         Self::I64(val)
     }
     fn make_float32(val: f32) -> Self {
+        let val = canonicalize_nan32(val);
         Self::F32(val.to_bits())
     }
     fn make_float64(val: f64) -> Self {
+        let val = canonicalize_nan64(val);
         Self::F64(val.to_bits())
     }
     fn make_tuple(
@@ -71,11 +76,13 @@ impl WasmValue for wasmtime::Val {
     }
 
     fn unwrap_float32(&self) -> f32 {
-        f32::from_bits(*unwrap_val!(self, Self::F32, "float32"))
+        let val = f32::from_bits(*unwrap_val!(self, Self::F32, "float32"));
+        canonicalize_nan32(val)
     }
 
     fn unwrap_float64(&self) -> f64 {
-        f64::from_bits(*unwrap_val!(self, Self::F64, "float64"))
+        let val = f64::from_bits(*unwrap_val!(self, Self::F64, "float64"));
+        canonicalize_nan64(val)
     }
 
     fn unwrap_tuple(&self) -> Box<dyn Iterator<Item = Cow<Self>> + '_> {

diff --git a/tests/nan.rs b/tests/nan.rs
@@ -0,0 +1,87 @@
+//! Test that NaN bitpatterns are not propagated through Wave values.
+//!
+//! The component-model floating-point types only have a single NaN value, to
+//! make it easier to exchange values with source languages and protocols where
+//! there is only one NaN value. To help users avoid depending on NaN bits being
+//! propagated, we canonicalize NaNs.
+
+use std::{f32, f64};
+
+use wasm_wave::WasmValue;
+
+#[test]
+fn nan() {
+    for bits in [
+        0,
+        i32::MIN as u32,
+        1.0_f32.to_bits(),
+        (-f32::consts::TAU).to_bits(),
+        0xffffffff,
+        0x7fff0f0f,
+        0x8f800000,
+        f32::NAN.to_bits(),
+    ] {
+        let val = f32::from_bits(bits);
+        let expected = if val.is_nan() { 0x7fc00000 } else { bits };
+
+        {
+            use wasm_wave::value::Value;
+            assert_eq!(
+                Value::make_float32(val).unwrap_float32().to_bits(),
+                expected
+            );
+        }
+
+        #[cfg(feature = "wasmtime")]
+        {
+            use wasmtime::component::Val;
+
+            let v = Val::make_float32(val);
+            match v {
+                Val::Float32(val) => assert_eq!(val.to_bits(), expected),
+                _ => unreachable!(),
+            }
+
+            assert_eq!(Val::Float32(val).unwrap_float32().to_bits(), expected);
+        }
+    }
+
+    for bits in [
+        0,
+        i64::MIN as u64,
+        1.0_f64.to_bits(),
+        (-f64::consts::TAU).to_bits(),
+        0xffffffffffffffff,
+        0x7fff0f0f0f0f0f0f,
+        0x8ff0000000000000,
+        f64::NAN.to_bits(),
+    ] {
+        let val = f64::from_bits(bits);
+        let expected = if val.is_nan() {
+            0x7ff8000000000000
+        } else {
+            bits
+        };
+
+        {
+            use wasm_wave::value::Value;
+            assert_eq!(
+                Value::make_float64(val).unwrap_float64().to_bits(),
+                expected
+            );
+        }
+
+        #[cfg(feature = "wasmtime")]
+        {
+            use wasmtime::component::Val;
+
+            let v = Val::make_float64(val);
+            match v {
+                Val::Float64(val) => assert_eq!(val.to_bits(), expected),
+                _ => unreachable!(),
+            }
+
+            assert_eq!(Val::Float64(val).unwrap_float64().to_bits(), expected);
+        }
+    }
+}