diff --git a/deno_webgpu/src/buffer.rs b/deno_webgpu/src/buffer.rs index 592ba7ace5..97934c0159 100644 --- a/deno_webgpu/src/buffer.rs +++ b/deno_webgpu/src/buffer.rs @@ -119,7 +119,7 @@ pub async fn op_webgpu_buffer_get_map_async( { let state = state.borrow(); let instance = state.borrow::(); - gfx_select!(device => instance.device_poll(device, false)).unwrap(); + gfx_select!(device => instance.device_poll(device, wgpu_types::Maintain::Wait)).unwrap(); } tokio::time::sleep(Duration::from_millis(10)).await; } diff --git a/player/src/bin/play.rs b/player/src/bin/play.rs index 524c859430..d973ec48c0 100644 --- a/player/src/bin/play.rs +++ b/player/src/bin/play.rs @@ -95,7 +95,7 @@ fn main() { } gfx_select!(device => global.device_stop_capture(device)); - gfx_select!(device => global.device_poll(device, true)).unwrap(); + gfx_select!(device => global.device_poll(device, wgt::Maintain::Wait)).unwrap(); } #[cfg(feature = "winit")] { @@ -181,7 +181,7 @@ fn main() { }, Event::LoopDestroyed => { log::info!("Closing"); - gfx_select!(device => global.device_poll(device, true)).unwrap(); + gfx_select!(device => global.device_poll(device, wgt::Maintain::Wait)).unwrap(); } _ => {} } diff --git a/player/tests/test.rs b/player/tests/test.rs index 8f75902d9a..0ced0fad8c 100644 --- a/player/tests/test.rs +++ b/player/tests/test.rs @@ -121,7 +121,7 @@ impl Test<'_> { } println!("\t\t\tWaiting..."); - wgc::gfx_select!(device => global.device_poll(device, true)).unwrap(); + wgc::gfx_select!(device => global.device_poll(device, wgt::Maintain::Wait)).unwrap(); for expect in self.expectations { println!("\t\t\tChecking {}", expect.name); diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index 5062f9189f..3f8cd6e7cf 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -224,6 +224,8 @@ struct ActiveSubmission { pub enum WaitIdleError { #[error(transparent)] Device(#[from] DeviceError), + #[error("Tried to wait using a submission index from the wrong device. Submission index is from device {0:?}. Called poll on device {1:?}.")] + WrongSubmissionIndex(id::QueueId, id::DeviceId), #[error("GPU got stuck :(")] StuckGpu, } diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 0832bedd69..8a9eb0fbbe 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -428,6 +428,9 @@ impl Device { /// Check this device for completed commands. /// + /// The `maintain` argument tells how the maintence function should behave, either + /// blocking or just polling the current state of the gpu. + /// /// Return a pair `(closures, queue_empty)`, where: /// /// - `closures` is a list of actions to take: mapping buffers, notifying the user @@ -439,7 +442,7 @@ impl Device { fn maintain<'this, 'token: 'this, G: GlobalIdentityHandlerFactory>( &'this self, hub: &Hub, - force_wait: bool, + maintain: wgt::Maintain, token: &mut Token<'token, Self>, ) -> Result<(UserClosures, bool), WaitIdleError> { profiling::scope!("maintain", "Device"); @@ -463,14 +466,21 @@ impl Device { ); life_tracker.triage_mapped(hub, token); - let last_done_index = if force_wait { - let current_index = self.active_submission_index; + let last_done_index = if maintain.is_wait() { + let index_to_wait_for = match maintain { + wgt::Maintain::WaitForSubmissionIndex(submission_index) => { + // We don't need to check to see if the queue id matches + // as we already checked this from inside the poll call. + submission_index.index + } + _ => self.active_submission_index, + }; unsafe { self.raw - .wait(&self.fence, current_index, CLEANUP_WAIT_MS) + .wait(&self.fence, index_to_wait_for, CLEANUP_WAIT_MS) .map_err(DeviceError::from)? }; - current_index + index_to_wait_for } else { unsafe { self.raw @@ -4957,16 +4967,25 @@ impl Global { pub fn device_poll( &self, device_id: id::DeviceId, - force_wait: bool, + maintain: wgt::Maintain, ) -> Result { let (closures, queue_empty) = { + if let wgt::Maintain::WaitForSubmissionIndex(submission_index) = maintain { + if submission_index.queue_id != device_id { + return Err(WaitIdleError::WrongSubmissionIndex( + submission_index.queue_id, + device_id, + )); + } + } + let hub = A::hub(self); let mut token = Token::root(); let (device_guard, mut token) = hub.devices.read(&mut token); device_guard .get(device_id) .map_err(|_| DeviceError::Invalid)? - .maintain(hub, force_wait, &mut token)? + .maintain(hub, maintain, &mut token)? }; closures.fire(); @@ -4994,7 +5013,12 @@ impl Global { let (device_guard, mut token) = hub.devices.read(&mut token); for (id, device) in device_guard.iter(A::VARIANT) { - let (cbs, queue_empty) = device.maintain(hub, force_wait, &mut token)?; + let maintain = if force_wait { + wgt::Maintain::Wait + } else { + wgt::Maintain::Poll + }; + let (cbs, queue_empty) = device.maintain(hub, maintain, &mut token)?; all_queue_empty = all_queue_empty && queue_empty; // If the device's own `RefCount` clone is the only one left, and diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 83de498ef6..6e0f589cae 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -12,7 +12,7 @@ use crate::{ id, init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange}, resource::{BufferAccessError, BufferMapState, TextureInner}, - track, FastHashSet, + track, FastHashSet, SubmissionIndex, }; use hal::{CommandEncoder as _, Device as _, Queue as _}; @@ -79,6 +79,13 @@ impl SubmittedWorkDoneClosure { } } +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct WrappedSubmissionIndex { + pub queue_id: id::QueueId, + pub index: SubmissionIndex, +} + struct StagingData { buffer: A::Buffer, } @@ -620,10 +627,10 @@ impl Global { &self, queue_id: id::QueueId, command_buffer_ids: &[id::CommandBufferId], - ) -> Result<(), QueueSubmitError> { + ) -> Result { profiling::scope!("submit", "Queue"); - let callbacks = { + let (submit_index, callbacks) = { let hub = A::hub(self); let mut token = Token::root(); @@ -958,23 +965,27 @@ impl Global { // This will schedule destruction of all resources that are no longer needed // by the user but used in the command stream, among other things. - let (closures, _) = match device.maintain(hub, false, &mut token) { + let (closures, _) = match device.maintain(hub, wgt::Maintain::Wait, &mut token) { Ok(closures) => closures, Err(WaitIdleError::Device(err)) => return Err(QueueSubmitError::Queue(err)), Err(WaitIdleError::StuckGpu) => return Err(QueueSubmitError::StuckGpu), + Err(WaitIdleError::WrongSubmissionIndex(..)) => unreachable!(), }; device.pending_writes.temp_resources = pending_write_resources; device.temp_suspected.clear(); device.lock_life(&mut token).post_submit(); - closures + (submit_index, closures) }; // the closures should execute with nothing locked! callbacks.fire(); - Ok(()) + Ok(WrappedSubmissionIndex { + queue_id, + index: submit_index, + }) } pub fn queue_get_timestamp_period( diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 1ce7c657ba..6f1c2dde1e 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -304,6 +304,7 @@ pub trait Device: Send + Sync { unsafe fn create_fence(&self) -> Result; unsafe fn destroy_fence(&self, fence: A::Fence); unsafe fn get_fence_value(&self, fence: &A::Fence) -> Result; + /// Calling wait with a lower value than the current fence value will immediately return. unsafe fn wait( &self, fence: &A::Fence, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 5e3a010b5a..d9144b8a8f 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -2287,6 +2287,43 @@ impl Default for ColorWrites { } } +/// Passed to `Device::poll` to control how and if it should block. +#[derive(Clone)] +pub enum Maintain { + /// On native backends, block until the given submission has + /// completed execution, and any callbacks have been invoked. + /// + /// On the web, this has no effect. Callbacks are invoked from the + /// window event loop. + WaitForSubmissionIndex(T), + /// Same as WaitForSubmissionIndex but waits for the most recent submission. + Wait, + /// Check the device for a single time without blocking. + Poll, +} + +impl Maintain { + /// This maintain represents a wait of some kind. + pub fn is_wait(&self) -> bool { + match *self { + Self::WaitForSubmissionIndex(..) | Self::Wait => true, + Self::Poll => false, + } + } + + /// Map on the wait index type. + pub fn map_index(self, func: F) -> Maintain + where + F: FnOnce(T) -> U, + { + match self { + Self::WaitForSubmissionIndex(i) => Maintain::WaitForSubmissionIndex(func(i)), + Self::Wait => Maintain::Wait, + Self::Poll => Maintain::Poll, + } + } +} + /// State of the stencil operation (fixed-pipeline stage). /// /// For use in [`DepthStencilState`]. diff --git a/wgpu/examples/capture/main.rs b/wgpu/examples/capture/main.rs index 44d53e8e34..6e85a9a16a 100644 --- a/wgpu/examples/capture/main.rs +++ b/wgpu/examples/capture/main.rs @@ -5,7 +5,7 @@ use std::env; use std::fs::File; use std::io::Write; use std::mem::size_of; -use wgpu::{Buffer, Device}; +use wgpu::{Buffer, Device, SubmissionIndex}; async fn run(png_output_path: &str) { let args: Vec<_> = env::args().collect(); @@ -20,14 +20,22 @@ async fn run(png_output_path: &str) { return; } }; - let (device, buffer, buffer_dimensions) = create_red_image_with_dimensions(width, height).await; - create_png(png_output_path, device, buffer, &buffer_dimensions).await; + let (device, buffer, buffer_dimensions, submission_index) = + create_red_image_with_dimensions(width, height).await; + create_png( + png_output_path, + device, + buffer, + &buffer_dimensions, + submission_index, + ) + .await; } async fn create_red_image_with_dimensions( width: usize, height: usize, -) -> (Device, Buffer, BufferDimensions) { +) -> (Device, Buffer, BufferDimensions, SubmissionIndex) { let adapter = wgpu::Instance::new( wgpu::util::backend_bits_from_env().unwrap_or_else(wgpu::Backends::all), ) @@ -114,8 +122,8 @@ async fn create_red_image_with_dimensions( encoder.finish() }; - queue.submit(Some(command_buffer)); - (device, output_buffer, buffer_dimensions) + let index = queue.submit(Some(command_buffer)); + (device, output_buffer, buffer_dimensions, index) } async fn create_png( @@ -123,6 +131,7 @@ async fn create_png( device: Device, output_buffer: Buffer, buffer_dimensions: &BufferDimensions, + submission_index: SubmissionIndex, ) { // Note that we're not calling `.await` here. let buffer_slice = output_buffer.slice(..); @@ -133,7 +142,9 @@ async fn create_png( // Poll the device in a blocking manner so that our future resolves. // In an actual application, `device.poll(...)` should // be called in an event loop or on another thread. - device.poll(wgpu::Maintain::Wait); + // + // We pass our submission index so we don't need to wait for any other possible submissions. + device.poll(wgpu::Maintain::WaitForSubmissionIndex(submission_index)); // If a file system is available, write the buffer as a PNG let has_file_system_available = cfg!(not(target_arch = "wasm32")); if !has_file_system_available { diff --git a/wgpu/src/backend/direct.rs b/wgpu/src/backend/direct.rs index e9aac2a289..58acf88c3b 100644 --- a/wgpu/src/backend/direct.rs +++ b/wgpu/src/backend/direct.rs @@ -800,6 +800,7 @@ impl crate::Context for Context { type SurfaceId = Surface; type SurfaceOutputDetail = SurfaceOutputDetail; + type SubmissionIndex = wgc::device::queue::WrappedSubmissionIndex; type RequestAdapterFuture = Ready>; #[allow(clippy::type_complexity)] @@ -1569,7 +1570,8 @@ impl crate::Context for Context { #[cfg(any(not(target_arch = "wasm32"), feature = "emscripten"))] { - match wgc::gfx_select!(device.id => global.device_poll(device.id, true)) { + match wgc::gfx_select!(device.id => global.device_poll(device.id, wgt::Maintain::Wait)) + { Ok(_) => (), Err(err) => self.handle_error_fatal(err, "Device::drop"), } @@ -1580,12 +1582,10 @@ impl crate::Context for Context { fn device_poll(&self, device: &Self::DeviceId, maintain: crate::Maintain) -> bool { let global = &self.0; + let maintain_inner = maintain.map_index(|i| i.0); match wgc::gfx_select!(device.id => global.device_poll( device.id, - match maintain { - crate::Maintain::Poll => false, - crate::Maintain::Wait => true, - } + maintain_inner )) { Ok(queue_empty) => queue_empty, Err(err) => self.handle_error_fatal(err, "Device::poll"), @@ -2179,12 +2179,12 @@ impl crate::Context for Context { &self, queue: &Self::QueueId, command_buffers: I, - ) { + ) -> Self::SubmissionIndex { let temp_command_buffers = command_buffers.collect::>(); let global = &self.0; match wgc::gfx_select!(*queue => global.queue_submit(*queue, &temp_command_buffers)) { - Ok(()) => (), + Ok(index) => index, Err(err) => self.handle_error_fatal(err, "Queue::submit"), } } diff --git a/wgpu/src/backend/web.rs b/wgpu/src/backend/web.rs index 823b76d061..af496942c8 100644 --- a/wgpu/src/backend/web.rs +++ b/wgpu/src/backend/web.rs @@ -995,6 +995,7 @@ impl crate::Context for Context { type SurfaceId = Sendable; type SurfaceOutputDetail = SurfaceOutputDetail; + type SubmissionIndex = (); type RequestAdapterFuture = MakeSendFuture< wasm_bindgen_futures::JsFuture, @@ -2213,10 +2214,12 @@ impl crate::Context for Context { &self, queue: &Self::QueueId, command_buffers: I, - ) { + ) -> Self::SubmissionIndex { let temp_command_buffers = command_buffers.map(|i| i.0).collect::(); queue.0.submit(&temp_command_buffers); + + // SubmissionIndex is (), so just let this function end } fn queue_get_timestamp_period(&self, _queue: &Self::QueueId) -> f32 { diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index bed6d8743b..38ae0fab0f 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -187,6 +187,7 @@ trait Context: Debug + Send + Sized + Sync { type SurfaceId: Debug + Send + Sync + 'static; type SurfaceOutputDetail: Send; + type SubmissionIndex: Debug + Copy + Clone + Send + 'static; type RequestAdapterFuture: Future> + Send; type RequestDeviceFuture: Future> @@ -492,7 +493,7 @@ trait Context: Debug + Send + Sized + Sync { &self, queue: &Self::QueueId, command_buffers: I, - ); + ) -> Self::SubmissionIndex; fn queue_get_timestamp_period(&self, queue: &Self::QueueId) -> f32; fn queue_on_submitted_work_done( &self, @@ -557,15 +558,11 @@ pub struct Device { id: ::DeviceId, } -/// Passed to [`Device::poll`] to control if it should block or not. This has no effect on -/// the web. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum Maintain { - /// Block - Wait, - /// Don't block - Poll, -} +/// Identifier for a particular call to [`Queue::submit`]. Can be used +/// as part of an argument to [`Device::poll`] to block for a particular +/// submission to finish. +#[derive(Debug, Copy, Clone)] +pub struct SubmissionIndex(::SubmissionIndex); /// The main purpose of this struct is to resolve mapped ranges (convert sizes /// to end points), and to ensure that the sub-ranges don't intersect. @@ -1258,6 +1255,9 @@ pub type TextureDescriptor<'a> = wgt::TextureDescriptor>; /// Corresponds to [WebGPU `GPUQuerySetDescriptor`]( /// https://gpuweb.github.io/gpuweb/#dictdef-gpuquerysetdescriptor). pub type QuerySetDescriptor<'a> = wgt::QuerySetDescriptor>; +pub use wgt::Maintain as MaintainBase; +/// Passed to [`Device::poll`] to control how and if it should block. +pub type Maintain = wgt::Maintain; /// Describes a [`TextureView`]. /// @@ -3374,14 +3374,19 @@ impl Queue { } /// Submits a series of finished command buffers for execution. - pub fn submit>(&self, command_buffers: I) { - Context::queue_submit( + pub fn submit>( + &self, + command_buffers: I, + ) -> SubmissionIndex { + let raw = Context::queue_submit( &*self.context, &self.id, command_buffers .into_iter() .map(|mut comb| comb.id.take().unwrap()), ); + + SubmissionIndex(raw) } /// Gets the amount of nanoseconds each tick of a timestamp query represents. diff --git a/wgpu/tests/common/mod.rs b/wgpu/tests/common/mod.rs index a98de71ac6..b0df54d2f1 100644 --- a/wgpu/tests/common/mod.rs +++ b/wgpu/tests/common/mod.rs @@ -110,7 +110,7 @@ impl TestParameters { self } - /// Mark the test as always failing, equivilant to specific_failure(None, None, None) + /// Mark the test as always failing, equivalent to specific_failure(None, None, None) pub fn failure(mut self) -> Self { self.failures.push(FailureCase { backends: None, @@ -121,7 +121,18 @@ impl TestParameters { self } - /// Mark the test as always failing on a specific backend, equivilant to specific_failure(backend, None, None) + /// Mark the test as always failing and needing to be skipped, equivalent to specific_failure(None, None, None) + pub fn skip(mut self) -> Self { + self.failures.push(FailureCase { + backends: None, + vendor: None, + adapter: None, + skip: true, + }); + self + } + + /// Mark the test as always failing on a specific backend, equivalent to specific_failure(backend, None, None) pub fn backend_failure(mut self, backends: wgpu::Backends) -> Self { self.failures.push(FailureCase { backends: Some(backends), diff --git a/wgpu/tests/poll.rs b/wgpu/tests/poll.rs new file mode 100644 index 0000000000..6113436d0b --- /dev/null +++ b/wgpu/tests/poll.rs @@ -0,0 +1,108 @@ +use std::num::NonZeroU64; + +use wgpu::{ + BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, BindGroupLayoutEntry, + BindingResource, BindingType, BufferBindingType, BufferDescriptor, BufferUsages, CommandBuffer, + CommandEncoderDescriptor, ComputePassDescriptor, Maintain, ShaderStages, +}; + +use crate::common::{initialize_test, TestParameters, TestingContext}; + +fn generate_dummy_work(ctx: &TestingContext) -> CommandBuffer { + let buffer = ctx.device.create_buffer(&BufferDescriptor { + label: None, + size: 16, + usage: BufferUsages::UNIFORM, + mapped_at_creation: false, + }); + + let bind_group_layout = ctx + .device + .create_bind_group_layout(&BindGroupLayoutDescriptor { + label: None, + entries: &[BindGroupLayoutEntry { + binding: 0, + visibility: ShaderStages::COMPUTE, + ty: BindingType::Buffer { + ty: BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: Some(NonZeroU64::new(16).unwrap()), + }, + count: None, + }], + }); + + let bind_group = ctx.device.create_bind_group(&BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &[BindGroupEntry { + binding: 0, + resource: BindingResource::Buffer(buffer.as_entire_buffer_binding()), + }], + }); + + let mut cmd_buf = ctx + .device + .create_command_encoder(&CommandEncoderDescriptor::default()); + + let mut cpass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::default()); + cpass.set_bind_group(0, &bind_group, &[]); + drop(cpass); + + cmd_buf.finish() +} + +#[test] +fn wait() { + initialize_test(TestParameters::default().skip(), |ctx| { + let cmd_buf = generate_dummy_work(&ctx); + + ctx.queue.submit(Some(cmd_buf)); + ctx.device.poll(Maintain::Wait); + }) +} + +#[test] +fn double_wait() { + initialize_test(TestParameters::default().skip(), |ctx| { + let cmd_buf = generate_dummy_work(&ctx); + + ctx.queue.submit(Some(cmd_buf)); + ctx.device.poll(Maintain::Wait); + ctx.device.poll(Maintain::Wait); + }) +} + +#[test] +fn wait_on_submission() { + initialize_test(TestParameters::default().skip(), |ctx| { + let cmd_buf = generate_dummy_work(&ctx); + + let index = ctx.queue.submit(Some(cmd_buf)); + ctx.device.poll(Maintain::WaitForSubmissionIndex(index)); + }) +} + +#[test] +fn double_wait_on_submission() { + initialize_test(TestParameters::default().skip(), |ctx| { + let cmd_buf = generate_dummy_work(&ctx); + + let index = ctx.queue.submit(Some(cmd_buf)); + ctx.device.poll(Maintain::WaitForSubmissionIndex(index)); + ctx.device.poll(Maintain::WaitForSubmissionIndex(index)); + }) +} + +#[test] +fn wait_out_of_order() { + initialize_test(TestParameters::default().skip(), |ctx| { + let cmd_buf1 = generate_dummy_work(&ctx); + let cmd_buf2 = generate_dummy_work(&ctx); + + let index1 = ctx.queue.submit(Some(cmd_buf1)); + let index2 = ctx.queue.submit(Some(cmd_buf2)); + ctx.device.poll(Maintain::WaitForSubmissionIndex(index2)); + ctx.device.poll(Maintain::WaitForSubmissionIndex(index1)); + }) +} diff --git a/wgpu/tests/root.rs b/wgpu/tests/root.rs index 95bf28bf34..49b7a33872 100644 --- a/wgpu/tests/root.rs +++ b/wgpu/tests/root.rs @@ -5,5 +5,6 @@ mod clear_texture; mod device; mod example_wgsl; mod instance; +mod poll; mod vertex_indices; mod zero_init_texture_after_discard;