From fff56b6f990427eb4fa85420b72a066827038294 Mon Sep 17 00:00:00 2001 From: Brad Werth Date: Mon, 11 Sep 2023 15:20:31 -0700 Subject: [PATCH] Implement Device validity, and make it invalid when Device is lost. This tracks Device validity explicity, but it could be changed to instead change Device Storage to Error when the Device is invalid. This also could go further with standardizing the error reporting for an invalid Device. --- wgpu-core/src/command/clear.rs | 3 ++ wgpu-core/src/command/compute.rs | 6 +++ wgpu-core/src/command/render.rs | 6 +++ wgpu-core/src/command/transfer.rs | 16 +++++- wgpu-core/src/device/global.rs | 84 +++++++++++++++++++++++++++++-- wgpu-core/src/device/mod.rs | 8 ++- wgpu-core/src/device/resource.rs | 7 +++ wgpu-core/src/present.rs | 9 ++++ 8 files changed, 130 insertions(+), 9 deletions(-) diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs index ceceb2ba58..abb9c1cbaf 100644 --- a/wgpu-core/src/command/clear.rs +++ b/wgpu-core/src/command/clear.rs @@ -219,6 +219,9 @@ impl Global { } let device = &device_guard[cmd_buf.device_id.value]; + if !device.is_valid() { + return Err(ClearError::InvalidDevice(cmd_buf.device_id.value.0)); + } clear_texture( &*texture_guard, diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index c9b20c5a0e..a803929935 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -16,6 +16,7 @@ use crate::{ hal_api::HalApi, hub::Token, id, + id::DeviceId, identity::GlobalIdentityHandlerFactory, init_tracker::MemoryInitKind, pipeline, @@ -193,6 +194,8 @@ pub enum ComputePassErrorInner { Encoder(#[from] CommandEncoderError), #[error("Bind group {0:?} is invalid")] InvalidBindGroup(id::BindGroupId), + #[error("Device {0:?} is invalid")] + InvalidDevice(DeviceId), #[error("Bind group index {index} is greater than the device's requested `max_bind_group` limit {max}")] BindGroupIndexOutOfRange { index: u32, max: u32 }, #[error("Compute pipeline {0:?} is invalid")] @@ -390,6 +393,9 @@ impl Global { let raw = cmd_buf.encoder.open(); let device = &device_guard[cmd_buf.device_id.value]; + if !device.is_valid() { + return Err(ComputePassErrorInner::InvalidDevice(cmd_buf.device_id.value.0)).map_pass_err(init_scope); + } #[cfg(feature = "trace")] if let Some(ref mut list) = cmd_buf.commands { diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index f9202ee3f5..4f0232d1ec 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -18,6 +18,7 @@ use crate::{ hal_api::HalApi, hub::Token, id, + id::DeviceId, identity::GlobalIdentityHandlerFactory, init_tracker::{MemoryInitKind, TextureInitRange, TextureInitTrackerAction}, pipeline::{self, PipelineFlags}, @@ -523,6 +524,8 @@ pub enum RenderPassErrorInner { ColorAttachment(#[from] ColorAttachmentError), #[error(transparent)] Encoder(#[from] CommandEncoderError), + #[error("Device {0:?} is invalid")] + InvalidDevice(DeviceId), #[error("Attachment texture view {0:?} is invalid")] InvalidAttachment(id::TextureViewId), #[error("The format of the depth-stencil attachment ({0:?}) is not a depth-stencil format")] @@ -1346,6 +1349,9 @@ impl Global { } let device = &device_guard[cmd_buf.device_id.value]; + if !device.is_valid() { + return Err(RenderPassErrorInner::InvalidDevice(cmd_buf.device_id.value.0)).map_pass_err(init_scope); + } cmd_buf.encoder.open_pass(base.label); let (bundle_guard, mut token) = hub.render_bundles.read(&mut token); diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index 58f88a70e1..d3d062591b 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -8,7 +8,7 @@ use crate::{ global::Global, hal_api::HalApi, hub::Token, - id::{BufferId, CommandEncoderId, TextureId, Valid}, + id::{BufferId, CommandEncoderId, DeviceId, TextureId, Valid}, identity::GlobalIdentityHandlerFactory, init_tracker::{ has_copy_partial_init_tracker_coverage, MemoryInitKind, TextureInitRange, @@ -40,6 +40,8 @@ pub enum CopySide { #[derive(Clone, Debug, Error)] #[non_exhaustive] pub enum TransferError { + #[error("Device {0:?} is invalid")] + InvalidDevice(DeviceId), #[error("Buffer {0:?} is invalid or destroyed")] InvalidBuffer(BufferId), #[error("Texture {0:?} is invalid or destroyed")] @@ -569,6 +571,9 @@ impl Global { let (buffer_guard, _) = hub.buffers.read(&mut token); let device = &device_guard[cmd_buf.device_id.value]; + if !device.is_valid() { + return Err(TransferError::InvalidDevice(cmd_buf.device_id.value.0).into()); + } #[cfg(feature = "trace")] if let Some(ref mut list) = cmd_buf.commands { @@ -714,6 +719,9 @@ impl Global { let (texture_guard, _) = hub.textures.read(&mut token); let device = &device_guard[cmd_buf.device_id.value]; + if !device.is_valid() { + return Err(TransferError::InvalidDevice(cmd_buf.device_id.value.0).into()); + } #[cfg(feature = "trace")] if let Some(ref mut list) = cmd_buf.commands { @@ -858,6 +866,9 @@ impl Global { let (texture_guard, _) = hub.textures.read(&mut token); let device = &device_guard[cmd_buf.device_id.value]; + if !device.is_valid() { + return Err(TransferError::InvalidDevice(cmd_buf.device_id.value.0).into()); + } #[cfg(feature = "trace")] if let Some(ref mut list) = cmd_buf.commands { @@ -1020,6 +1031,9 @@ impl Global { let (texture_guard, _) = hub.textures.read(&mut token); let device = &device_guard[cmd_buf.device_id.value]; + if !device.is_valid() { + return Err(TransferError::InvalidDevice(cmd_buf.device_id.value.0).into()); + } #[cfg(feature = "trace")] if let Some(ref mut list) = cmd_buf.commands { diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index 8fe5a6fcc9..5edd8dd04c 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -148,6 +148,9 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid.into(), }; + if !device.valid { + break DeviceError::Invalid.into(); + } if desc.usage.is_empty() { // Per spec, `usage` must not be zero. @@ -586,6 +589,9 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid.into(), }; + if !device.valid { + break DeviceError::Invalid.into(); + } #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { trace @@ -641,6 +647,9 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid.into(), }; + if !device.valid { + break DeviceError::Invalid.into(); + } // NB: Any change done through the raw texture handle will not be // recorded in the replay @@ -717,6 +726,9 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid.into(), }; + if !device.valid { + break DeviceError::Invalid.into(); + } // NB: Any change done through the raw buffer handle will not be // recorded in the replay @@ -978,6 +990,10 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid.into(), }; + if !device.valid { + break DeviceError::Invalid.into(); + } + #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { trace @@ -1056,6 +1072,10 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid.into(), }; + if !device.valid { + break DeviceError::Invalid.into(); + } + #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { trace @@ -1170,6 +1190,10 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid.into(), }; + if !device.valid { + break DeviceError::Invalid.into(); + } + #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { trace @@ -1249,6 +1273,10 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid.into(), }; + if !device.valid { + break DeviceError::Invalid.into(); + } + #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { trace @@ -1351,6 +1379,10 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid.into(), }; + if !device.valid { + break DeviceError::Invalid.into(); + } + #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { let mut trace = trace.lock(); @@ -1416,6 +1448,10 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid.into(), }; + if !device.valid { + break DeviceError::Invalid.into(); + } + #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { let mut trace = trace.lock(); @@ -1486,6 +1522,10 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid, }; + if !device.valid { + break DeviceError::Invalid; + } + let dev_stored = Stored { value: id::Valid(device_id), ref_count: device.life_guard.add_ref(), @@ -1579,6 +1619,10 @@ impl Global { Ok(device) => device, Err(_) => break command::RenderBundleError::INVALID_DEVICE, }; + if !device.valid { + break command::RenderBundleError::INVALID_DEVICE; + } + #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { trace.lock().add(trace::Action::CreateRenderBundle { @@ -1661,6 +1705,10 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid.into(), }; + if !device.valid { + break DeviceError::Invalid.into(); + } + #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { trace.lock().add(trace::Action::CreateQuerySet { @@ -1750,6 +1798,10 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid.into(), }; + if !device.valid { + break DeviceError::Invalid.into(); + } + let adapter = &adapter_guard[device.adapter_id.value]; #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { @@ -1893,6 +1945,10 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid.into(), }; + if !device.valid { + break DeviceError::Invalid.into(); + } + #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { trace.lock().add(trace::Action::CreateComputePipeline { @@ -2141,6 +2197,10 @@ impl Global { Ok(device) => device, Err(_) => break DeviceError::Invalid.into(), }; + if !device.valid { + break DeviceError::Invalid.into(); + } + #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { trace @@ -2423,6 +2483,22 @@ impl Global { } } + pub fn device_lose(&self, device_id: DeviceId) { + let hub = A::hub(self); + let mut token = Token::root(); + + let (mut device_guard, _) = hub.devices.write(&mut token); + if let Ok(device) = device_guard.get_mut(device_id) { + // Mark the device explicitly as invalid. This is checked in various + // places to prevent new work from being submitted, allowing a later + // call to `poll_devices` to drop this device when the work queues + // are cleared. + device.valid = false; + } + + // TODO(BJW): inform the GPUDevice that it is lost, and provide a reason. + } + /// Exit the unreferenced, inactive device `device_id`. fn exit_device(&self, device_id: DeviceId) { let hub = A::hub(self); @@ -2507,6 +2583,11 @@ impl Global { } }; + let device = &device_guard[buffer.device_id.value]; + if !device.valid { + return Err((op, BufferAccessError::Invalid)); + } + if let Err(e) = check_buffer_usage(buffer.usage, pub_usage) { return Err((op, e.into())); } @@ -2547,8 +2628,6 @@ impl Global { }; log::debug!("Buffer {:?} map state -> Waiting", buffer_id); - let device = &device_guard[buffer.device_id.value]; - let ret = (buffer.device_id.value, buffer.life_guard.add_ref()); let mut trackers = device.trackers.lock(); @@ -2561,7 +2640,6 @@ impl Global { }; let device = &device_guard[device_id]; - device .lock_life(&mut token) .map(id::Valid(buffer_id), ref_count); diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 0ae6d7a2dd..6e6c340085 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -293,11 +293,9 @@ pub struct InvalidDevice; #[derive(Clone, Debug, Error)] pub enum DeviceError { - #[error("Parent device is invalid")] + #[error("Parent device is invalid or lost.")] Invalid, - #[error("Parent device is lost")] - Lost, - #[error("Not enough memory left")] + #[error("Not enough memory left.")] OutOfMemory, #[error("Creation of a resource failed for a reason other than running out of memory.")] ResourceCreationFailed, @@ -306,7 +304,7 @@ pub enum DeviceError { impl From for DeviceError { fn from(error: hal::DeviceError) -> Self { match error { - hal::DeviceError::Lost => DeviceError::Lost, + hal::DeviceError::Lost => DeviceError::Invalid, hal::DeviceError::OutOfMemory => DeviceError::OutOfMemory, hal::DeviceError::ResourceCreationFailed => DeviceError::ResourceCreationFailed, } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 73f1887e10..06cf5a6b07 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -72,6 +72,8 @@ pub struct Device { pub(crate) active_submission_index: SubmissionIndex, pub(super) fence: A::Fence, + pub(super) valid: bool, + /// All live resources allocated with this [`Device`]. /// /// Has to be locked temporarily only (locked last) @@ -189,6 +191,7 @@ impl Device { command_allocator: Mutex::new(com_alloc), active_submission_index: 0, fence, + valid: true, trackers: Mutex::new(Tracker::new()), life_tracker: Mutex::new(life::LifetimeTracker::new()), temp_suspected: life::SuspectedResources::default(), @@ -214,6 +217,10 @@ impl Device { }) } + pub fn is_valid(&self) -> bool { + self.valid + } + pub(super) fn lock_life<'this, 'token: 'this>( &'this self, //TODO: fix this - the token has to be borrowed for the lock diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs index c9df46ad93..f6ac0356ec 100644 --- a/wgpu-core/src/present.rs +++ b/wgpu-core/src/present.rs @@ -126,6 +126,9 @@ impl Global { let (device, config) = match surface.presentation { Some(ref present) => { let device = &device_guard[present.device_id.value]; + if !device.is_valid() { + return Err(DeviceError::Invalid.into()); + } (device, present.config.clone()) } None => return Err(SurfaceError::NotConfigured), @@ -278,6 +281,9 @@ impl Global { }; let device = &mut device_guard[present.device_id.value]; + if !device.is_valid() { + return Err(DeviceError::Invalid.into()); + } #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { @@ -372,6 +378,9 @@ impl Global { }; let device = &mut device_guard[present.device_id.value]; + if !device.is_valid() { + return Err(DeviceError::Invalid.into()); + } #[cfg(feature = "trace")] if let Some(ref trace) = device.trace {