Merge pull request #415 from linebender/stroke-flatten-cpu

Port recent GPU stroke expansion changes in `flatten` to CPU version
linebender · Dec 6, 2023 · 3f24d8c · 3f24d8c
2 parents 9732854 + fe75059
commit 3f24d8c
Show file tree

Hide file tree

Showing 6 changed files with 741 additions and 124 deletions.
diff --git a/crates/encoding/src/lib.rs b/crates/encoding/src/lib.rs
@@ -16,7 +16,7 @@ mod glyph;
 mod glyph_cache;
 #[cfg(feature = "full")]
 mod image_cache;
-mod math;
+pub mod math;
 mod monoid;
 mod path;
 #[cfg(feature = "full")]

diff --git a/crates/encoding/src/math.rs b/crates/encoding/src/math.rs
@@ -83,7 +83,6 @@ pub fn point_to_f32(point: kurbo::Point) -> [f32; 2] {
 ///
 /// TODO: We should consider adopting <https://crates.io/crates/half> as a dependency since it nicely
 /// wraps native ARM and x86 instructions for floating-point conversion.
-#[allow(unused)] // for now
 pub(crate) fn f32_to_f16(val: f32) -> u16 {
     const INF_32: u32 = 255 << 23;
     const INF_16: u32 = 31 << 23;
@@ -123,9 +122,34 @@ pub(crate) fn f32_to_f16(val: f32) -> u16 {
     output | (sign >> 16) as u16
 }
 
+/// Convertes a 16-bit precision IEEE-754 binary16 float to a f32.
+/// This implementation was adapted from Fabian Giesen's `half_to_float()`
+/// function which can be found at <https://gist.github.com/rygorous/2156668#file-gistfile1-cpp-L574>
+pub fn f16_to_f32(bits: u16) -> f32 {
+    let bits = bits as u32;
+    const MAGIC: u32 = 113 << 23;
+    const SHIFTED_EXP: u32 = 0x7c00 << 13; // exponent mask after shift
+
+    let mut o = (bits & 0x7fff) << 13; // exponent/mantissa bits
+    let exp = SHIFTED_EXP & o; // just the exponent
+    o += (127 - 15) << 23; // exponent adjust
+
+    // handle exponent special cases
+    if exp == SHIFTED_EXP {
+        // Inf/NaN?
+        o += (128 - 16) << 23; // extra exp adjust
+    } else if exp == 0 {
+        // Zero/Denormal?
+        o += 1 << 23; // extra exp adjust
+        o = (f32::from_bits(o) - f32::from_bits(MAGIC)).to_bits(); // normalize
+    }
+
+    f32::from_bits(o | ((bits & 0x8000) << 16)) // sign bit
+}
+
 #[cfg(test)]
 mod tests {
-    use super::f32_to_f16;
+    use super::{f16_to_f32, f32_to_f16};
 
     #[test]
     fn test_f32_to_f16_simple() {
@@ -145,31 +169,116 @@ mod tests {
     }
 
     #[test]
-    fn test_f32_to_16_inf() {
+    fn test_f32_to_f16_inf() {
         let input: f32 = f32::from_bits(0x7F800000u32);
         assert!(input.is_infinite());
         let output: u16 = f32_to_f16(input);
         assert_eq!(0x7C00, output);
     }
 
     #[test]
-    fn test_f32_to_16_exponent_rebias() {
+    fn test_f32_to_f16_exponent_rebias() {
         let input: f32 = 0.00003051758;
         let output: u16 = f32_to_f16(input);
         assert_eq!(0x0200, output); // 0.00003052
     }
 
     #[test]
-    fn test_f32_to_16_exponent_overflow() {
+    fn test_f32_to_f16_exponent_overflow() {
         let input: f32 = 1.701412e38;
         let output: u16 = f32_to_f16(input);
         assert_eq!(0x7C00, output); // +inf
     }
 
     #[test]
-    fn test_f32_to_16_exponent_overflow_neg_inf() {
+    fn test_f32_to_f16_exponent_overflow_neg_inf() {
         let input: f32 = -1.701412e38;
         let output: u16 = f32_to_f16(input);
         assert_eq!(0xFC00, output); // -inf
     }
+
+    #[test]
+    fn test_f16_to_f32_simple() {
+        let input: u16 = 0x4248u16;
+        let output: f32 = f16_to_f32(input);
+        assert_eq!(3.140625, output);
+    }
+
+    #[test]
+    fn test_f16_to_f32_inf() {
+        let input: u16 = 0x7C00;
+        let output = f16_to_f32(input);
+        assert!(output.is_infinite());
+    }
+
+    #[test]
+    fn test_f16_to_f32_neg_inf() {
+        let input: u16 = 0xFC00;
+        let output = f16_to_f32(input);
+        assert!(output.is_infinite());
+    }
+
+    #[test]
+    fn test_f16_to_f32_inf_roundtrip() {
+        let input: u16 = 0x7C00;
+        let output = f32_to_f16(f16_to_f32(input));
+        assert_eq!(input, output);
+    }
+
+    #[test]
+    fn test_f16_to_f32_neg_inf_roundtrip() {
+        let input: u16 = 0xFC00;
+        let output = f32_to_f16(f16_to_f32(input));
+        assert_eq!(input, output);
+    }
+
+    #[test]
+    fn test_f16_to_f32_nan() {
+        let input: u16 = 0x7C01;
+        let output = f16_to_f32(input);
+        assert!(output.is_nan());
+    }
+
+    #[test]
+    fn test_f16_to_f32_nan_roundtrip() {
+        let input: u16 = 0x7C01;
+        // Roundtrip 3 times and land on a f32 to check that NaN'ness is preserved.
+        let output = f16_to_f32(f32_to_f16(f16_to_f32(input)));
+        assert!(output.is_nan());
+    }
+
+    #[test]
+    fn test_f16_to_f32_large_pos_roundtrip() {
+        let input: u16 = 0x7BFF; // 65504.0
+        let output = f32_to_f16(f16_to_f32(input));
+        assert_eq!(input, output);
+    }
+
+    #[test]
+    fn test_f16_to_f32_large_neg_roundtrip() {
+        let input: u16 = 0xFBFF; // -65504.0
+        let output = f32_to_f16(f16_to_f32(input));
+        assert_eq!(input, output);
+    }
+
+    #[test]
+    fn test_f16_to_f32_small_pos_roundtrip() {
+        let input: u16 = 0x0001; // 5.97e-8
+        let output = f32_to_f16(f16_to_f32(input));
+        assert_eq!(input, output);
+    }
+
+    #[test]
+    fn test_f16_to_f32_small_neg_roundtrip() {
+        let input: u16 = 0x8001; // -5.97e-8
+        let output = f32_to_f16(f16_to_f32(input));
+        assert_eq!(input, output);
+    }
+
+    #[test]
+    fn test_f16_to_f32_roundtrip() {
+        const EPS: f32 = 0.001;
+        let input: f32 = std::f32::consts::PI;
+        assert!((input - f16_to_f32(f32_to_f16(input))).abs() < EPS);
+    }
 }
diff --git a/crates/encoding/src/path.rs b/crates/encoding/src/path.rs
@@ -48,20 +48,22 @@ impl Style {
     pub const FLAGS_JOIN_BITS_BEVEL: u32 = 0;
     pub const FLAGS_JOIN_BITS_MITER: u32 = 0x1000_0000;
     pub const FLAGS_JOIN_BITS_ROUND: u32 = 0x2000_0000;
-
-    #[cfg(test)]
     pub const FLAGS_JOIN_MASK: u32 = 0x3000_0000;
 
     /// Encodings for cap style:
     ///    - 0b00 -> butt
     ///    - 0b01 -> square
     ///    - 0b10 -> round
-    pub const FLAGS_START_CAP_BITS_BUTT: u32 = 0;
-    pub const FLAGS_START_CAP_BITS_SQUARE: u32 = 0x0400_0000;
-    pub const FLAGS_START_CAP_BITS_ROUND: u32 = 0x0800_0000;
-    pub const FLAGS_END_CAP_BITS_BUTT: u32 = 0;
-    pub const FLAGS_END_CAP_BITS_SQUARE: u32 = 0x0100_0000;
-    pub const FLAGS_END_CAP_BITS_ROUND: u32 = 0x0200_0000;
+    pub const FLAGS_CAP_BITS_BUTT: u32 = 0;
+    pub const FLAGS_CAP_BITS_SQUARE: u32 = 0x0100_0000;
+    pub const FLAGS_CAP_BITS_ROUND: u32 = 0x0200_0000;
+
+    pub const FLAGS_START_CAP_BITS_BUTT: u32 = Self::FLAGS_CAP_BITS_BUTT << 2;
+    pub const FLAGS_START_CAP_BITS_SQUARE: u32 = Self::FLAGS_CAP_BITS_SQUARE << 2;
+    pub const FLAGS_START_CAP_BITS_ROUND: u32 = Self::FLAGS_CAP_BITS_ROUND << 2;
+    pub const FLAGS_END_CAP_BITS_BUTT: u32 = Self::FLAGS_CAP_BITS_BUTT;
+    pub const FLAGS_END_CAP_BITS_SQUARE: u32 = Self::FLAGS_CAP_BITS_SQUARE;
+    pub const FLAGS_END_CAP_BITS_ROUND: u32 = Self::FLAGS_CAP_BITS_ROUND;
 
     pub const FLAGS_START_CAP_MASK: u32 = 0x0C00_0000;
     pub const FLAGS_END_CAP_MASK: u32 = 0x0300_0000;
@@ -272,13 +274,13 @@ impl PathTag {
     /// Style setting.
     pub const STYLE: Self = Self(0x40);
 
+    /// Bit that marks a segment that is the end of a subpath.
+    pub const SUBPATH_END_BIT: u8 = 0x4;
+
     /// Bit for path segments that are represented as f32 values. If unset
     /// they are represented as i16.
     const F32_BIT: u8 = 0x8;
 
-    /// Bit that marks a segment that is the end of a subpath.
-    const SUBPATH_END_BIT: u8 = 0x4;
-
     /// Mask for bottom 3 bits that contain the [`PathSegmentType`].
     const SEGMENT_MASK: u8 = 0x3;
 

diff --git a/shader/flatten.wgsl b/shader/flatten.wgsl
@@ -297,17 +297,16 @@ fn flatten_arc(
     var p0 = transform_apply(transform, begin);
     var r = begin - center;
 
-    let EPS = 1e-9;
-    let tol = 0.5;
+    let MIN_THETA = 0.0001;
+    let tol = 0.1;
     let radius = max(tol, length(p0 - transform_apply(transform, center)));
-    let x = 1. - tol / radius;
-    let theta = acos(clamp(2. * x * x - 1., -1., 1.));
-    let MAX_LINES = 1000u;
-    let n_lines = select(min(MAX_LINES, u32(ceil(6.2831853 / theta))), MAX_LINES, theta <= EPS);
-
-    let th = angle / f32(n_lines);
-    let c = cos(th);
-    let s = sin(th);
+    let theta = max(MIN_THETA, 2. * acos(1. - tol / radius));
+
+    // Always output at least one line so that we always draw the chord.
+    let n_lines = max(1u, u32(ceil(angle / theta)));
+
+    let c = cos(theta);
+    let s = sin(theta);
     let rot = mat2x2(c, -s, s, c);
 
     let line_ix = atomicAdd(&bump.lines, n_lines);
@@ -580,7 +579,6 @@ fn output_two_lines_with_transform(
 
 struct NeighboringSegment {
     do_join: bool,
-    p0: vec2f,
 
     // Device-space start tangent vector
     tangent: vec2f,
@@ -593,9 +591,8 @@ fn read_neighboring_segment(ix: u32) -> NeighboringSegment {
     let is_closed = (tag.tag_byte & PATH_TAG_SEG_TYPE) == PATH_TAG_LINETO;
     let is_stroke_cap_marker = (tag.tag_byte & PATH_TAG_SUBPATH_END) != 0u;
     let do_join = !is_stroke_cap_marker || is_closed;
-    let p0 = pts.p0;
     let tangent = cubic_start_tangent(pts.p0, pts.p1, pts.p2, pts.p3);
-    return NeighboringSegment(do_join, p0, tangent);
+    return NeighboringSegment(do_join, tangent);
 }
 
 // `pathdata_base` is decoded once and reused by helpers above.