From 7893c210591d0d86a19ab57b5a582fc041565991 Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Thu, 30 May 2024 16:53:34 -0400 Subject: [PATCH] [hal/vk] Rework Submission and Surface Synchronization (#5681) Fix two major synchronization issues in `wgpu_val::vulkan`: - Properly order queue command buffer submissions. Due to Mesa bugs, two semaphores are required even though the Vulkan spec says that only one should be necessary. - Properly manage surface texture acquisition and presentation: - Acquiring a surface texture can return while the presentation engine is still displaying the texture. Applications must wait for a semaphore to be signaled before using the acquired texture. - Presenting a surface texture requires a semaphore to ensure that drawing is complete before presentation occurs. Co-authored-by: Jim Blandy --- wgpu-core/src/device/queue.rs | 32 ++++++++++++- wgpu-hal/src/lib.rs | 3 -- wgpu-hal/src/vulkan/device.rs | 67 +++++++++++++++++++++++++++ wgpu-hal/src/vulkan/mod.rs | 86 +++++++++++++++++++++++++++++++---- 4 files changed, 175 insertions(+), 13 deletions(-) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 833d6c2c953..53253c763e5 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -1272,7 +1272,37 @@ impl Global { } } - if let Some(pending_execution) = pending_writes.pre_submit( + let refs = pending_writes + .pre_submit()? + .into_iter() + .chain( + active_executions + .iter() + .flat_map(|pool_execution| pool_execution.cmd_buffers.iter()), + ) + .collect::>(); + + let mut submit_surface_textures = + SmallVec::<[_; 2]>::with_capacity(submit_surface_textures_owned.len()); + + for texture in &submit_surface_textures_owned { + submit_surface_textures.extend(match texture.inner.get(&snatch_guard) { + Some(TextureInner::Surface { raw, .. }) => raw.as_ref(), + _ => None, + }); + } + + unsafe { + queue + .raw + .as_ref() + .unwrap() + .submit(&refs, &submit_surface_textures, (fence, submit_index)) + .map_err(DeviceError::from)?; + } + + profiling::scope!("cleanup"); + if let Some(pending_execution) = pending_writes.post_submit( &device.command_allocator, device.raw(), queue.raw.as_ref().unwrap(), diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 706c369eb5c..57c82295822 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -956,9 +956,6 @@ pub trait Queue: WasmNotSendSync { /// - All calls to this function that include a given [`SurfaceTexture`][st] /// in `surface_textures` must use the same [`Fence`]. /// - /// - The [`Fence`] passed as `signal_fence.0` must remain alive until - /// all submissions that will signal it have completed. - /// /// [`Fence`]: Api::Fence /// [cb]: Api::CommandBuffer /// [ce]: Api::CommandEncoder diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 86bfa564424..8851ffa12cb 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -2459,6 +2459,73 @@ impl super::DeviceShared { } } +impl super::DeviceShared { + pub(super) fn new_binary_semaphore(&self) -> Result { + unsafe { + self.raw + .create_semaphore(&vk::SemaphoreCreateInfo::default(), None) + .map_err(crate::DeviceError::from) + } + } + + pub(super) fn wait_for_fence( + &self, + fence: &super::Fence, + wait_value: crate::FenceValue, + timeout_ns: u64, + ) -> Result { + profiling::scope!("Device::wait"); + match *fence { + super::Fence::TimelineSemaphore(raw) => { + let semaphores = [raw]; + let values = [wait_value]; + let vk_info = vk::SemaphoreWaitInfo::builder() + .semaphores(&semaphores) + .values(&values); + let result = match self.extension_fns.timeline_semaphore { + Some(super::ExtensionFn::Extension(ref ext)) => unsafe { + ext.wait_semaphores(&vk_info, timeout_ns) + }, + Some(super::ExtensionFn::Promoted) => unsafe { + self.raw.wait_semaphores(&vk_info, timeout_ns) + }, + None => unreachable!(), + }; + match result { + Ok(()) => Ok(true), + Err(vk::Result::TIMEOUT) => Ok(false), + Err(other) => Err(other.into()), + } + } + super::Fence::FencePool { + last_completed, + ref active, + free: _, + } => { + if wait_value <= last_completed { + Ok(true) + } else { + match active.iter().find(|&&(value, _)| value >= wait_value) { + Some(&(_, raw)) => { + match unsafe { + self.raw.wait_for_fences(&[raw], true, timeout_ns) + } { + Ok(()) => Ok(true), + Err(vk::Result::TIMEOUT) => Ok(false), + Err(other) => Err(other.into()), + } + } + None => { + log::error!("No signals reached value {}", wait_value); + Err(crate::DeviceError::Lost) + } + } + } + } + } + } +} + impl From for crate::DeviceError { fn from(error: gpu_alloc::AllocationError) -> Self { use gpu_alloc::AllocationError as Ae; diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index f0d881614c3..00568eb83d3 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -31,14 +31,7 @@ mod conv; mod device; mod instance; -use std::{ - borrow::Borrow, - collections::HashSet, - ffi::{CStr, CString}, - fmt, mem, - num::NonZeroU32, - sync::Arc, -}; +use std::{borrow::Borrow, collections::HashSet, ffi::CStr, fmt, mem, num::NonZeroU32, sync::Arc}; use arrayvec::ArrayVec; use ash::{ext, khr, vk}; @@ -618,6 +611,81 @@ impl RelaySemaphores { } } +/// Semaphores for forcing queue submissions to run in order. +/// +/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are +/// ordered, then the first submission will finish on the GPU before the second +/// submission begins. To get this behavior on Vulkan we need to pass semaphores +/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution, +/// and to signal when their execution is done. +/// +/// Normally this can be done with a single semaphore, waited on and then +/// signalled for each submission. At any given time there's exactly one +/// submission that would signal the semaphore, and exactly one waiting on it, +/// as Vulkan requires. +/// +/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to +/// hang if we use a single semaphore. The workaround is to alternate between +/// two semaphores. The bug has been fixed in Mesa, but we should probably keep +/// the workaround until, say, Oct 2026. +/// +/// [`wgpu_hal::Queue`]: crate::Queue +/// [`submit`]: crate::Queue::submit +/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit +/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508 +#[derive(Clone)] +struct RelaySemaphores { + /// The semaphore the next submission should wait on before beginning + /// execution on the GPU. This is `None` for the first submission, which + /// should not wait on anything at all. + wait: Option, + + /// The semaphore the next submission should signal when it has finished + /// execution on the GPU. + signal: vk::Semaphore, +} + +impl RelaySemaphores { + fn new(device: &DeviceShared) -> Result { + Ok(Self { + wait: None, + signal: device.new_binary_semaphore()?, + }) + } + + /// Advances the semaphores, returning the semaphores that should be used for a submission. + fn advance(&mut self, device: &DeviceShared) -> Result { + let old = self.clone(); + + // Build the state for the next submission. + match self.wait { + None => { + // The `old` values describe the first submission to this queue. + // The second submission should wait on `old.signal`, and then + // signal a new semaphore which we'll create now. + self.wait = Some(old.signal); + self.signal = device.new_binary_semaphore()?; + } + Some(ref mut wait) => { + // What this submission signals, the next should wait. + mem::swap(wait, &mut self.signal); + } + }; + + Ok(old) + } + + /// Destroys the semaphores. + unsafe fn destroy(&self, device: &ash::Device) { + unsafe { + if let Some(wait) = self.wait { + device.destroy_semaphore(wait, None); + } + device.destroy_semaphore(self.signal, None); + } + } +} + pub struct Queue { raw: vk::Queue, swapchain_fn: khr::swapchain::Device, @@ -1081,7 +1149,7 @@ impl crate::Queue for Queue { let swapchains = [ssc.raw]; let image_indices = [texture.index]; - let vk_info = vk::PresentInfoKHR::default() + let vk_info = vk::PresentInfoKHR::builder() .swapchains(&swapchains) .image_indices(&image_indices) .wait_semaphores(swapchain_semaphores.get_present_wait_semaphores());