diff --git a/deno_webgpu/src/buffer.rs b/deno_webgpu/src/buffer.rs index 45eab483d1..1eb331863e 100644 --- a/deno_webgpu/src/buffer.rs +++ b/deno_webgpu/src/buffer.rs @@ -128,7 +128,7 @@ pub async fn op_webgpu_buffer_get_map_async( { let state = state.borrow(); let instance = state.borrow::(); - gfx_select!(device => instance.device_poll(device, false)).unwrap(); + gfx_select!(device => instance.device_poll(device, false, None)).unwrap(); } tokio::time::sleep(Duration::from_millis(10)).await; } diff --git a/player/src/bin/play.rs b/player/src/bin/play.rs index 524c859430..f05e66f56d 100644 --- a/player/src/bin/play.rs +++ b/player/src/bin/play.rs @@ -95,7 +95,7 @@ fn main() { } gfx_select!(device => global.device_stop_capture(device)); - gfx_select!(device => global.device_poll(device, true)).unwrap(); + gfx_select!(device => global.device_poll(device, true, None)).unwrap(); } #[cfg(feature = "winit")] { @@ -181,7 +181,7 @@ fn main() { }, Event::LoopDestroyed => { log::info!("Closing"); - gfx_select!(device => global.device_poll(device, true)).unwrap(); + gfx_select!(device => global.device_poll(device, true, None)).unwrap(); } _ => {} } diff --git a/player/tests/test.rs b/player/tests/test.rs index 6fa3395ae2..fd96f32173 100644 --- a/player/tests/test.rs +++ b/player/tests/test.rs @@ -120,7 +120,7 @@ impl Test<'_> { } println!("\t\t\tWaiting..."); - wgc::gfx_select!(device => global.device_poll(device, true)).unwrap(); + wgc::gfx_select!(device => global.device_poll(device, true, None)).unwrap(); for expect in self.expectations { println!("\t\t\tChecking {}", expect.name); diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index 8c6705276f..27f1708860 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -224,6 +224,8 @@ struct ActiveSubmission { pub enum WaitIdleError { #[error(transparent)] Device(#[from] DeviceError), + #[error("Tried to wait using a submission index from the wrong device. Submission index is from device {0:?}. Called poll on device {1:?}.")] + WrongSubmissionIndex(id::QueueId, id::DeviceId), #[error("GPU got stuck :(")] StuckGpu, } diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 24fa72e571..495c62d299 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -440,6 +440,7 @@ impl Device { &'this self, hub: &Hub, force_wait: bool, + submission_index: Option, token: &mut Token<'token, Self>, ) -> Result<(UserClosures, bool), WaitIdleError> { profiling::scope!("maintain", "Device"); @@ -464,13 +465,20 @@ impl Device { life_tracker.triage_mapped(hub, token); let last_done_index = if force_wait { - let current_index = self.active_submission_index; + let index_to_wait_for = match submission_index { + Some(submission_index) => { + // We don't need to check to see if the queue id matches + // as we already checked this from inside the poll call. + submission_index.index + } + None => self.active_submission_index, + }; unsafe { self.raw - .wait(&self.fence, current_index, CLEANUP_WAIT_MS) + .wait(&self.fence, index_to_wait_for, CLEANUP_WAIT_MS) .map_err(DeviceError::from)? }; - current_index + index_to_wait_for } else { unsafe { self.raw @@ -4968,15 +4976,25 @@ impl Global { &self, device_id: id::DeviceId, force_wait: bool, + submission_index: Option, ) -> Result { let (closures, queue_empty) = { + if let Some(submission_index) = submission_index { + if submission_index.queue_id != device_id { + return Err(WaitIdleError::WrongSubmissionIndex( + submission_index.queue_id, + device_id, + )); + } + } + let hub = A::hub(self); let mut token = Token::root(); let (device_guard, mut token) = hub.devices.read(&mut token); device_guard .get(device_id) .map_err(|_| DeviceError::Invalid)? - .maintain(hub, force_wait, &mut token)? + .maintain(hub, force_wait, submission_index, &mut token)? }; unsafe { closures.fire(); @@ -5004,7 +5022,7 @@ impl Global { let (device_guard, mut token) = hub.devices.read(&mut token); for (id, device) in device_guard.iter(A::VARIANT) { - let (cbs, queue_empty) = device.maintain(hub, force_wait, &mut token)?; + let (cbs, queue_empty) = device.maintain(hub, force_wait, None, &mut token)?; all_queue_empty = all_queue_empty && queue_empty; // If the device's own `RefCount` clone is the only one left, and diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index e76255f772..7246199c2b 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -12,7 +12,7 @@ use crate::{ id, init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange}, resource::{BufferAccessError, BufferMapState, TextureInner}, - track, FastHashSet, + track, FastHashSet, SubmissionIndex, }; use hal::{CommandEncoder as _, Device as _, Queue as _}; @@ -39,6 +39,13 @@ pub struct SubmittedWorkDoneClosure { unsafe impl Send for SubmittedWorkDoneClosure {} unsafe impl Sync for SubmittedWorkDoneClosure {} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct WrappedSubmissionIndex { + pub queue_id: id::QueueId, + pub index: SubmissionIndex, +} + struct StagingData { buffer: A::Buffer, } @@ -580,10 +587,10 @@ impl Global { &self, queue_id: id::QueueId, command_buffer_ids: &[id::CommandBufferId], - ) -> Result<(), QueueSubmitError> { + ) -> Result { profiling::scope!("submit", "Queue"); - let callbacks = { + let (submit_index, callbacks) = { let hub = A::hub(self); let mut token = Token::root(); @@ -918,24 +925,28 @@ impl Global { // This will schedule destruction of all resources that are no longer needed // by the user but used in the command stream, among other things. - let (closures, _) = match device.maintain(hub, false, &mut token) { + let (closures, _) = match device.maintain(hub, false, None, &mut token) { Ok(closures) => closures, Err(WaitIdleError::Device(err)) => return Err(QueueSubmitError::Queue(err)), Err(WaitIdleError::StuckGpu) => return Err(QueueSubmitError::StuckGpu), + Err(WaitIdleError::WrongSubmissionIndex(..)) => unreachable!(), }; device.pending_writes.temp_resources = pending_write_resources; device.temp_suspected.clear(); device.lock_life(&mut token).post_submit(); - closures + (submit_index, closures) }; // the closures should execute with nothing locked! unsafe { callbacks.fire(); } - Ok(()) + Ok(WrappedSubmissionIndex { + queue_id, + index: submit_index, + }) } pub fn queue_get_timestamp_period( diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 82a3243986..0737053514 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -304,6 +304,7 @@ pub trait Device: Send + Sync { unsafe fn create_fence(&self) -> Result; unsafe fn destroy_fence(&self, fence: A::Fence); unsafe fn get_fence_value(&self, fence: &A::Fence) -> Result; + /// Calling wait with a lower value than the current fence value will immediately return. unsafe fn wait( &self, fence: &A::Fence, diff --git a/wgpu/examples/capture/main.rs b/wgpu/examples/capture/main.rs index 2f4b2a56a5..52aa8fad2c 100644 --- a/wgpu/examples/capture/main.rs +++ b/wgpu/examples/capture/main.rs @@ -5,7 +5,7 @@ use std::env; use std::fs::File; use std::io::Write; use std::mem::size_of; -use wgpu::{Buffer, Device}; +use wgpu::{Buffer, Device, SubmissionIndex}; async fn run(png_output_path: &str) { let args: Vec<_> = env::args().collect(); @@ -20,14 +20,22 @@ async fn run(png_output_path: &str) { return; } }; - let (device, buffer, buffer_dimensions) = create_red_image_with_dimensions(width, height).await; - create_png(png_output_path, device, buffer, &buffer_dimensions).await; + let (device, buffer, buffer_dimensions, submission_index) = + create_red_image_with_dimensions(width, height).await; + create_png( + png_output_path, + device, + buffer, + &buffer_dimensions, + submission_index, + ) + .await; } async fn create_red_image_with_dimensions( width: usize, height: usize, -) -> (Device, Buffer, BufferDimensions) { +) -> (Device, Buffer, BufferDimensions, SubmissionIndex) { let adapter = wgpu::Instance::new( wgpu::util::backend_bits_from_env().unwrap_or_else(wgpu::Backends::all), ) @@ -114,8 +122,8 @@ async fn create_red_image_with_dimensions( encoder.finish() }; - queue.submit(Some(command_buffer)); - (device, output_buffer, buffer_dimensions) + let index = queue.submit(Some(command_buffer)); + (device, output_buffer, buffer_dimensions, index) } async fn create_png( @@ -123,6 +131,7 @@ async fn create_png( device: Device, output_buffer: Buffer, buffer_dimensions: &BufferDimensions, + submission_index: SubmissionIndex, ) { // Note that we're not calling `.await` here. let buffer_slice = output_buffer.slice(..); @@ -131,7 +140,9 @@ async fn create_png( // Poll the device in a blocking manner so that our future resolves. // In an actual application, `device.poll(...)` should // be called in an event loop or on another thread. - device.poll(wgpu::Maintain::Wait); + // + // We pass our submission index so we don't need to wait for any other possible submissions. + device.poll(wgpu::Maintain::Wait(Some(submission_index))); // If a file system is available, write the buffer as a PNG let has_file_system_available = cfg!(not(target_arch = "wasm32")); if !has_file_system_available { diff --git a/wgpu/examples/framework.rs b/wgpu/examples/framework.rs index 0ed135f1cd..ddab7ce68f 100644 --- a/wgpu/examples/framework.rs +++ b/wgpu/examples/framework.rs @@ -518,7 +518,7 @@ pub fn test(mut params: FrameworkRefTest) { let dst_buffer_slice = dst_buffer.slice(..); let _ = dst_buffer_slice.map_async(wgpu::MapMode::Read); - ctx.device.poll(wgpu::Maintain::Wait); + ctx.device.poll(wgpu::Maintain::Wait(None)); let bytes = dst_buffer_slice.get_mapped_range().to_vec(); test_common::image::compare_image_output( diff --git a/wgpu/examples/hello-compute/main.rs b/wgpu/examples/hello-compute/main.rs index bbff9130f4..c5d58921ab 100644 --- a/wgpu/examples/hello-compute/main.rs +++ b/wgpu/examples/hello-compute/main.rs @@ -153,7 +153,7 @@ async fn execute_gpu_inner( // Poll the device in a blocking manner so that our future resolves. // In an actual application, `device.poll(...)` should // be called in an event loop or on another thread. - device.poll(wgpu::Maintain::Wait); + device.poll(wgpu::Maintain::Wait(None)); // Awaits until `buffer_future` can be read from if let Ok(()) = buffer_future.await { diff --git a/wgpu/examples/mipmap/main.rs b/wgpu/examples/mipmap/main.rs index 9603895d57..3570bdaf46 100644 --- a/wgpu/examples/mipmap/main.rs +++ b/wgpu/examples/mipmap/main.rs @@ -386,7 +386,7 @@ impl framework::Example for Example { .slice(..) .map_async(wgpu::MapMode::Read); // Wait for device to be done rendering mipmaps - device.poll(wgpu::Maintain::Wait); + device.poll(wgpu::Maintain::Wait(None)); // This is guaranteed to be ready. let timestamp_view = query_sets .data_buffer diff --git a/wgpu/src/backend/direct.rs b/wgpu/src/backend/direct.rs index fdd599e57b..72dd696759 100644 --- a/wgpu/src/backend/direct.rs +++ b/wgpu/src/backend/direct.rs @@ -800,6 +800,7 @@ impl crate::Context for Context { type SurfaceId = Surface; type SurfaceOutputDetail = SurfaceOutputDetail; + type SubmissionIndex = wgc::device::queue::WrappedSubmissionIndex; type RequestAdapterFuture = Ready>; #[allow(clippy::type_complexity)] @@ -1571,7 +1572,7 @@ impl crate::Context for Context { #[cfg(any(not(target_arch = "wasm32"), feature = "emscripten"))] { - match wgc::gfx_select!(device.id => global.device_poll(device.id, true)) { + match wgc::gfx_select!(device.id => global.device_poll(device.id, true, None)) { Ok(_) => (), Err(err) => self.handle_error_fatal(err, "Device::drop"), } @@ -1582,12 +1583,14 @@ impl crate::Context for Context { fn device_poll(&self, device: &Self::DeviceId, maintain: crate::Maintain) -> bool { let global = &self.0; + let (wait, index) = match maintain { + crate::Maintain::Poll => (false, None), + crate::Maintain::Wait(index) => (true, index.map(|i| i.0)), + }; match wgc::gfx_select!(device.id => global.device_poll( device.id, - match maintain { - crate::Maintain::Poll => false, - crate::Maintain::Wait => true, - } + wait, + index )) { Ok(queue_empty) => queue_empty, Err(err) => self.handle_error_fatal(err, "Device::poll"), @@ -2190,12 +2193,12 @@ impl crate::Context for Context { &self, queue: &Self::QueueId, command_buffers: I, - ) { + ) -> Self::SubmissionIndex { let temp_command_buffers = command_buffers.collect::>(); let global = &self.0; match wgc::gfx_select!(*queue => global.queue_submit(*queue, &temp_command_buffers)) { - Ok(()) => (), + Ok(index) => index, Err(err) => self.handle_error_fatal(err, "Queue::submit"), } } diff --git a/wgpu/src/backend/web.rs b/wgpu/src/backend/web.rs index 18b84cb25c..48738f4f18 100644 --- a/wgpu/src/backend/web.rs +++ b/wgpu/src/backend/web.rs @@ -995,6 +995,7 @@ impl crate::Context for Context { type SurfaceId = Sendable; type SurfaceOutputDetail = SurfaceOutputDetail; + type SubmissionIndex = (); type RequestAdapterFuture = MakeSendFuture< wasm_bindgen_futures::JsFuture, @@ -2200,10 +2201,12 @@ impl crate::Context for Context { &self, queue: &Self::QueueId, command_buffers: I, - ) { + ) -> Self::SubmissionIndex { let temp_command_buffers = command_buffers.map(|i| i.0).collect::(); queue.0.submit(&temp_command_buffers); + + // SubmissionIndex is (), so just let this function end } fn queue_get_timestamp_period(&self, _queue: &Self::QueueId) -> f32 { diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index 15408d8a7d..899cc1a925 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -187,6 +187,7 @@ trait Context: Debug + Send + Sized + Sync { type SurfaceId: Debug + Send + Sync + 'static; type SurfaceOutputDetail: Send; + type SubmissionIndex: Debug + Copy + Clone + Send + 'static; type RequestAdapterFuture: Future> + Send; type RequestDeviceFuture: Future> @@ -490,7 +491,7 @@ trait Context: Debug + Send + Sized + Sync { &self, queue: &Self::QueueId, command_buffers: I, - ); + ) -> Self::SubmissionIndex; fn queue_get_timestamp_period(&self, queue: &Self::QueueId) -> f32; fn queue_on_submitted_work_done( &self, @@ -550,13 +551,24 @@ pub struct Device { id: ::DeviceId, } -/// Passed to [`Device::poll`] to control if it should block or not. This has no effect on -/// the web. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] +/// Identifier for a particular call to [`Queue::submit`]. Can be used +/// as part of an argument to [`Device::poll`] to block for a particular +/// submission to finish. +#[derive(Debug, Copy, Clone)] +pub struct SubmissionIndex(::SubmissionIndex); + +/// Passed to [`Device::poll`] to control how and if it should block. +#[derive(Clone)] pub enum Maintain { - /// Block - Wait, - /// Don't block + /// Block until the callbacks for the given submission will resolve on their own. + /// + /// If the submission index is None it will wait for the most recent submission. + /// + /// On native this will block the thread until the submission is finished. + /// + /// On web this is a no-op but all the callbacks will automatically fire. + Wait(Option), + /// Check the device for a single time without blocking. Poll, } @@ -3366,14 +3378,19 @@ impl Queue { } /// Submits a series of finished command buffers for execution. - pub fn submit>(&self, command_buffers: I) { - Context::queue_submit( + pub fn submit>( + &self, + command_buffers: I, + ) -> SubmissionIndex { + let raw = Context::queue_submit( &*self.context, &self.id, command_buffers .into_iter() .map(|mut comb| comb.id.take().unwrap()), ); + + SubmissionIndex(raw) } /// Gets the amount of nanoseconds each tick of a timestamp query represents. diff --git a/wgpu/tests/vertex_indices/mod.rs b/wgpu/tests/vertex_indices/mod.rs index fa85ae62d9..1bc362f7db 100644 --- a/wgpu/tests/vertex_indices/mod.rs +++ b/wgpu/tests/vertex_indices/mod.rs @@ -124,7 +124,7 @@ fn pulling_common( ctx.queue.submit(Some(encoder.finish())); let slice = buffer.slice(..); let _ = slice.map_async(wgpu::MapMode::Read); - ctx.device.poll(wgpu::Maintain::Wait); + ctx.device.poll(wgpu::Maintain::Wait(None)); let data: Vec = bytemuck::cast_slice(&*slice.get_mapped_range()).to_vec(); assert_eq!(data, expected); diff --git a/wgpu/tests/zero_init_texture_after_discard.rs b/wgpu/tests/zero_init_texture_after_discard.rs index ec77e909f0..488778aff1 100644 --- a/wgpu/tests/zero_init_texture_after_discard.rs +++ b/wgpu/tests/zero_init_texture_after_discard.rs @@ -283,7 +283,7 @@ fn assert_buffer_is_zero(readback_buffer: &wgpu::Buffer, device: &wgpu::Device) { let buffer_slice = readback_buffer.slice(..); let _ = buffer_slice.map_async(wgpu::MapMode::Read); - device.poll(wgpu::Maintain::Wait); + device.poll(wgpu::Maintain::Wait(None)); let buffer_view = buffer_slice.get_mapped_range(); assert!(