From 7893c210591d0d86a19ab57b5a582fc041565991 Mon Sep 17 00:00:00 2001
From: Connor Fitzgerald <connorwadefitzgerald@gmail.com>
Date: Thu, 30 May 2024 16:53:34 -0400
Subject: [PATCH] [hal/vk] Rework Submission and Surface Synchronization
 (#5681)

Fix two major synchronization issues in `wgpu_val::vulkan`:

- Properly order queue command buffer submissions. Due to Mesa bugs, two semaphores are required even though the Vulkan spec says that only one should be necessary.

- Properly manage surface texture acquisition and presentation:

    - Acquiring a surface texture can return while the presentation engine is still displaying the texture. Applications must wait for a semaphore to be signaled before using the acquired texture.

    - Presenting a surface texture requires a semaphore to ensure that drawing is complete before presentation occurs.

Co-authored-by: Jim Blandy <jimb@red-bean.com>
---
 wgpu-core/src/device/queue.rs | 32 ++++++++++++-
 wgpu-hal/src/lib.rs           |  3 --
 wgpu-hal/src/vulkan/device.rs | 67 +++++++++++++++++++++++++++
 wgpu-hal/src/vulkan/mod.rs    | 86 +++++++++++++++++++++++++++++++----
 4 files changed, 175 insertions(+), 13 deletions(-)

diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index 833d6c2c953..53253c763e5 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -1272,7 +1272,37 @@ impl Global {
                 }
             }
 
-            if let Some(pending_execution) = pending_writes.pre_submit(
+            let refs = pending_writes
+                .pre_submit()?
+                .into_iter()
+                .chain(
+                    active_executions
+                        .iter()
+                        .flat_map(|pool_execution| pool_execution.cmd_buffers.iter()),
+                )
+                .collect::<Vec<_>>();
+
+            let mut submit_surface_textures =
+                SmallVec::<[_; 2]>::with_capacity(submit_surface_textures_owned.len());
+
+            for texture in &submit_surface_textures_owned {
+                submit_surface_textures.extend(match texture.inner.get(&snatch_guard) {
+                    Some(TextureInner::Surface { raw, .. }) => raw.as_ref(),
+                    _ => None,
+                });
+            }
+
+            unsafe {
+                queue
+                    .raw
+                    .as_ref()
+                    .unwrap()
+                    .submit(&refs, &submit_surface_textures, (fence, submit_index))
+                    .map_err(DeviceError::from)?;
+            }
+
+            profiling::scope!("cleanup");
+            if let Some(pending_execution) = pending_writes.post_submit(
                 &device.command_allocator,
                 device.raw(),
                 queue.raw.as_ref().unwrap(),
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 706c369eb5c..57c82295822 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -956,9 +956,6 @@ pub trait Queue: WasmNotSendSync {
     /// - All calls to this function that include a given [`SurfaceTexture`][st]
     ///   in `surface_textures` must use the same [`Fence`].
     ///
-    /// - The [`Fence`] passed as `signal_fence.0` must remain alive until
-    ///   all submissions that will signal it have completed.
-    ///
     /// [`Fence`]: Api::Fence
     /// [cb]: Api::CommandBuffer
     /// [ce]: Api::CommandEncoder
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 86bfa564424..8851ffa12cb 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -2459,6 +2459,73 @@ impl super::DeviceShared {
     }
 }
 
+impl super::DeviceShared {
+    pub(super) fn new_binary_semaphore(&self) -> Result<vk::Semaphore, crate::DeviceError> {
+        unsafe {
+            self.raw
+                .create_semaphore(&vk::SemaphoreCreateInfo::default(), None)
+                .map_err(crate::DeviceError::from)
+        }
+    }
+
+    pub(super) fn wait_for_fence(
+        &self,
+        fence: &super::Fence,
+        wait_value: crate::FenceValue,
+        timeout_ns: u64,
+    ) -> Result<bool, crate::DeviceError> {
+        profiling::scope!("Device::wait");
+        match *fence {
+            super::Fence::TimelineSemaphore(raw) => {
+                let semaphores = [raw];
+                let values = [wait_value];
+                let vk_info = vk::SemaphoreWaitInfo::builder()
+                    .semaphores(&semaphores)
+                    .values(&values);
+                let result = match self.extension_fns.timeline_semaphore {
+                    Some(super::ExtensionFn::Extension(ref ext)) => unsafe {
+                        ext.wait_semaphores(&vk_info, timeout_ns)
+                    },
+                    Some(super::ExtensionFn::Promoted) => unsafe {
+                        self.raw.wait_semaphores(&vk_info, timeout_ns)
+                    },
+                    None => unreachable!(),
+                };
+                match result {
+                    Ok(()) => Ok(true),
+                    Err(vk::Result::TIMEOUT) => Ok(false),
+                    Err(other) => Err(other.into()),
+                }
+            }
+            super::Fence::FencePool {
+                last_completed,
+                ref active,
+                free: _,
+            } => {
+                if wait_value <= last_completed {
+                    Ok(true)
+                } else {
+                    match active.iter().find(|&&(value, _)| value >= wait_value) {
+                        Some(&(_, raw)) => {
+                            match unsafe {
+                                self.raw.wait_for_fences(&[raw], true, timeout_ns)
+                            } {
+                                Ok(()) => Ok(true),
+                                Err(vk::Result::TIMEOUT) => Ok(false),
+                                Err(other) => Err(other.into()),
+                            }
+                        }
+                        None => {
+                            log::error!("No signals reached value {}", wait_value);
+                            Err(crate::DeviceError::Lost)
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
 impl From<gpu_alloc::AllocationError> for crate::DeviceError {
     fn from(error: gpu_alloc::AllocationError) -> Self {
         use gpu_alloc::AllocationError as Ae;
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index f0d881614c3..00568eb83d3 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -31,14 +31,7 @@ mod conv;
 mod device;
 mod instance;
 
-use std::{
-    borrow::Borrow,
-    collections::HashSet,
-    ffi::{CStr, CString},
-    fmt, mem,
-    num::NonZeroU32,
-    sync::Arc,
-};
+use std::{borrow::Borrow, collections::HashSet, ffi::CStr, fmt, mem, num::NonZeroU32, sync::Arc};
 
 use arrayvec::ArrayVec;
 use ash::{ext, khr, vk};
@@ -618,6 +611,81 @@ impl RelaySemaphores {
     }
 }
 
+/// Semaphores for forcing queue submissions to run in order.
+///
+/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
+/// ordered, then the first submission will finish on the GPU before the second
+/// submission begins. To get this behavior on Vulkan we need to pass semaphores
+/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
+/// and to signal when their execution is done.
+///
+/// Normally this can be done with a single semaphore, waited on and then
+/// signalled for each submission. At any given time there's exactly one
+/// submission that would signal the semaphore, and exactly one waiting on it,
+/// as Vulkan requires.
+///
+/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
+/// hang if we use a single semaphore. The workaround is to alternate between
+/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
+/// the workaround until, say, Oct 2026.
+///
+/// [`wgpu_hal::Queue`]: crate::Queue
+/// [`submit`]: crate::Queue::submit
+/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
+/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
+#[derive(Clone)]
+struct RelaySemaphores {
+    /// The semaphore the next submission should wait on before beginning
+    /// execution on the GPU. This is `None` for the first submission, which
+    /// should not wait on anything at all.
+    wait: Option<vk::Semaphore>,
+
+    /// The semaphore the next submission should signal when it has finished
+    /// execution on the GPU.
+    signal: vk::Semaphore,
+}
+
+impl RelaySemaphores {
+    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
+        Ok(Self {
+            wait: None,
+            signal: device.new_binary_semaphore()?,
+        })
+    }
+
+    /// Advances the semaphores, returning the semaphores that should be used for a submission.
+    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
+        let old = self.clone();
+
+        // Build the state for the next submission.
+        match self.wait {
+            None => {
+                // The `old` values describe the first submission to this queue.
+                // The second submission should wait on `old.signal`, and then
+                // signal a new semaphore which we'll create now.
+                self.wait = Some(old.signal);
+                self.signal = device.new_binary_semaphore()?;
+            }
+            Some(ref mut wait) => {
+                // What this submission signals, the next should wait.
+                mem::swap(wait, &mut self.signal);
+            }
+        };
+
+        Ok(old)
+    }
+
+    /// Destroys the semaphores.
+    unsafe fn destroy(&self, device: &ash::Device) {
+        unsafe {
+            if let Some(wait) = self.wait {
+                device.destroy_semaphore(wait, None);
+            }
+            device.destroy_semaphore(self.signal, None);
+        }
+    }
+}
+
 pub struct Queue {
     raw: vk::Queue,
     swapchain_fn: khr::swapchain::Device,
@@ -1081,7 +1149,7 @@ impl crate::Queue for Queue {
 
         let swapchains = [ssc.raw];
         let image_indices = [texture.index];
-        let vk_info = vk::PresentInfoKHR::default()
+        let vk_info = vk::PresentInfoKHR::builder()
             .swapchains(&swapchains)
             .image_indices(&image_indices)
             .wait_semaphores(swapchain_semaphores.get_present_wait_semaphores());