From 215884184bcd7592b6ee50de0ad60fb2d68e29dc Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Thu, 13 Oct 2022 11:34:44 -0700 Subject: [PATCH] Reformat comments in wgpu-core. (#3102) --- wgpu-core/src/binding_model.rs | 22 +++- wgpu-core/src/command/bind.rs | 5 +- wgpu-core/src/command/bundle.rs | 26 ++-- wgpu-core/src/command/clear.rs | 32 +++-- wgpu-core/src/command/compute.rs | 13 +- wgpu-core/src/command/memory_init.rs | 76 +++++++---- wgpu-core/src/command/mod.rs | 3 +- wgpu-core/src/command/query.rs | 3 +- wgpu-core/src/command/render.rs | 79 +++++++---- wgpu-core/src/command/transfer.rs | 46 +++++-- wgpu-core/src/device/mod.rs | 80 +++++++---- wgpu-core/src/device/queue.rs | 69 ++++++---- wgpu-core/src/hub.rs | 5 +- wgpu-core/src/init_tracker/buffer.rs | 6 +- wgpu-core/src/init_tracker/mod.rs | 73 +++++++---- wgpu-core/src/init_tracker/texture.rs | 8 +- wgpu-core/src/instance.rs | 25 ++-- wgpu-core/src/lib.rs | 5 +- wgpu-core/src/resource.rs | 29 ++-- wgpu-core/src/track/mod.rs | 182 +++++++++++++------------- wgpu-core/src/track/texture.rs | 24 ++-- wgpu-core/src/validation.rs | 3 +- 22 files changed, 525 insertions(+), 289 deletions(-) diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs index 71f95a723d..81d0c5ccff 100644 --- a/wgpu-core/src/binding_model.rs +++ b/wgpu-core/src/binding_model.rs @@ -403,7 +403,9 @@ pub struct BindGroupEntry<'a> { #[cfg_attr(feature = "trace", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] pub struct BindGroupDescriptor<'a> { - /// Debug label of the bind group. This will show up in graphics debuggers for easy identification. + /// Debug label of the bind group. + /// + /// This will show up in graphics debuggers for easy identification. pub label: Label<'a>, /// The [`BindGroupLayout`] that corresponds to this bind group. pub layout: BindGroupLayoutId, @@ -416,7 +418,9 @@ pub struct BindGroupDescriptor<'a> { #[cfg_attr(feature = "trace", derive(serde::Serialize))] #[cfg_attr(feature = "replay", derive(serde::Deserialize))] pub struct BindGroupLayoutDescriptor<'a> { - /// Debug label of the bind group layout. This will show up in graphics debuggers for easy identification. + /// Debug label of the bind group layout. + /// + /// This will show up in graphics debuggers for easy identification. pub label: Label<'a>, /// Array of entries in this BindGroupLayout pub entries: Cow<'a, [wgt::BindGroupLayoutEntry]>, @@ -537,16 +541,20 @@ pub enum PushConstantUploadError { #[cfg_attr(feature = "trace", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] pub struct PipelineLayoutDescriptor<'a> { - /// Debug label of the pipeine layout. This will show up in graphics debuggers for easy identification. + /// Debug label of the pipeine layout. + /// + /// This will show up in graphics debuggers for easy identification. pub label: Label<'a>, /// Bind groups that this pipeline uses. The first entry will provide all the bindings for /// "set = 0", second entry will provide all the bindings for "set = 1" etc. pub bind_group_layouts: Cow<'a, [BindGroupLayoutId]>, - /// Set of push constant ranges this pipeline uses. Each shader stage that uses push constants - /// must define the range in push constant memory that corresponds to its single `layout(push_constant)` - /// uniform block. + /// Set of push constant ranges this pipeline uses. Each shader stage that + /// uses push constants must define the range in push constant memory that + /// corresponds to its single `layout(push_constant)` uniform block. /// - /// If this array is non-empty, the [`Features::PUSH_CONSTANTS`](wgt::Features::PUSH_CONSTANTS) must be enabled. + /// If this array is non-empty, the + /// [`Features::PUSH_CONSTANTS`](wgt::Features::PUSH_CONSTANTS) feature must + /// be enabled. pub push_constant_ranges: Cow<'a, [wgt::PushConstantRange]>, } diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs index cc928ef178..fdcb60c52d 100644 --- a/wgpu-core/src/command/bind.rs +++ b/wgpu-core/src/command/bind.rs @@ -309,8 +309,9 @@ struct PushConstantChange { enable: bool, } -/// Break up possibly overlapping push constant ranges into a set of non-overlapping ranges -/// which contain all the stage flags of the original ranges. This allows us to zero out (or write any value) +/// Break up possibly overlapping push constant ranges into a set of +/// non-overlapping ranges which contain all the stage flags of the +/// original ranges. This allows us to zero out (or write any value) /// to every possible value. pub fn compute_nonoverlapping_ranges( ranges: &[wgt::PushConstantRange], diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index c756137f60..8731edc0ca 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -110,18 +110,28 @@ use hal::CommandEncoder as _; #[cfg_attr(feature = "trace", derive(serde::Serialize))] #[cfg_attr(feature = "replay", derive(serde::Deserialize))] pub struct RenderBundleEncoderDescriptor<'a> { - /// Debug label of the render bundle encoder. This will show up in graphics debuggers for easy identification. + /// Debug label of the render bundle encoder. + /// + /// This will show up in graphics debuggers for easy identification. pub label: Label<'a>, - /// The formats of the color attachments that this render bundle is capable to rendering to. This - /// must match the formats of the color attachments in the renderpass this render bundle is executed in. + /// The formats of the color attachments that this render bundle is capable + /// to rendering to. + /// + /// This must match the formats of the color attachments in the + /// renderpass this render bundle is executed in. pub color_formats: Cow<'a, [Option]>, - /// Information about the depth attachment that this render bundle is capable to rendering to. The format - /// must match the format of the depth attachments in the renderpass this render bundle is executed in. + /// Information about the depth attachment that this render bundle is + /// capable to rendering to. + /// + /// The format must match the format of the depth attachments in the + /// renderpass this render bundle is executed in. pub depth_stencil: Option, - /// Sample count this render bundle is capable of rendering to. This must match the pipelines and - /// the renderpasses it is used in. + /// Sample count this render bundle is capable of rendering to. + /// + /// This must match the pipelines and the renderpasses it is used in. pub sample_count: u32, - /// If this render bundle will rendering to multiple array layers in the attachments at the same time. + /// If this render bundle will rendering to multiple array layers in the + /// attachments at the same time. pub multiview: Option, } diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs index bf0c90ff49..52d02a889e 100644 --- a/wgpu-core/src/command/clear.rs +++ b/wgpu-core/src/command/clear.rs @@ -266,12 +266,19 @@ pub(crate) fn clear_texture( layers: range.layer_range.clone(), }; - // If we're in a texture-init usecase, we know that the texture is already tracked since whatever caused the init requirement, - // will have caused the usage tracker to be aware of the texture. Meaning, that it is safe to call call change_replace_tracked if the life_guard is already gone - // (i.e. the user no longer holds on to this texture). - // On the other hand, when coming via command_encoder_clear_texture, the life_guard is still there since in order to call it a texture object is needed. + // If we're in a texture-init usecase, we know that the texture is already + // tracked since whatever caused the init requirement, will have caused the + // usage tracker to be aware of the texture. Meaning, that it is safe to + // call call change_replace_tracked if the life_guard is already gone (i.e. + // the user no longer holds on to this texture). // - // We could in theory distinguish these two scenarios in the internal clear_texture api in order to remove this check and call the cheaper change_replace_tracked whenever possible. + // On the other hand, when coming via command_encoder_clear_texture, the + // life_guard is still there since in order to call it a texture object is + // needed. + // + // We could in theory distinguish these two scenarios in the internal + // clear_texture api in order to remove this check and call the cheaper + // change_replace_tracked whenever possible. let dst_barrier = texture_tracker .set_single(storage, dst_texture_id.0, selector, clear_usage) .unwrap() @@ -332,8 +339,13 @@ fn clear_texture_via_buffer_copies( // round down to a multiple of rows needed by the texture format let max_rows_per_copy = max_rows_per_copy / format_desc.block_dimensions.1 as u32 * format_desc.block_dimensions.1 as u32; - assert!(max_rows_per_copy > 0, "Zero buffer size is too small to fill a single row of a texture with format {:?} and desc {:?}", - texture_desc.format, texture_desc.size); + assert!( + max_rows_per_copy > 0, + "Zero buffer size is too small to fill a single row \ + of a texture with format {:?} and desc {:?}", + texture_desc.format, + texture_desc.size + ); let z_range = 0..(if texture_desc.dimension == wgt::TextureDimension::D3 { mip_size.depth_or_array_layers @@ -344,7 +356,8 @@ fn clear_texture_via_buffer_copies( for array_layer in range.layer_range.clone() { // TODO: Only doing one layer at a time for volume textures right now. for z in z_range.clone() { - // May need multiple copies for each subresource! However, we assume that we never need to split a row. + // May need multiple copies for each subresource! However, we + // assume that we never need to split a row. let mut num_rows_left = mip_size.height; while num_rows_left > 0 { let num_rows = num_rows_left.min(max_rows_per_copy); @@ -400,7 +413,8 @@ fn clear_texture_via_render_passes( for mip_level in range.mip_range { let extent = extent_base.mip_level_size(mip_level, is_3d_texture); let layer_or_depth_range = if dst_texture.desc.dimension == wgt::TextureDimension::D3 { - // TODO: We assume that we're allowed to do clear operations on volume texture slices, this is not properly specified. + // TODO: We assume that we're allowed to do clear operations on + // volume texture slices, this is not properly specified. 0..extent.depth_or_array_layers } else { range.layer_range.clone() diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index cbbb2e054e..a4f3365627 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -268,7 +268,8 @@ impl State { Ok(()) } - // `extra_buffer` is there to represent the indirect buffer that is also part of the usage scope. + // `extra_buffer` is there to represent the indirect buffer that is also + // part of the usage scope. fn flush_states( &mut self, raw_encoder: &mut A::CommandEncoder, @@ -391,7 +392,8 @@ impl Global { raw.begin_compute_pass(&hal_desc); } - // Immediate texture inits required because of prior discards. Need to be inserted before texture reads. + // Immediate texture inits required because of prior discards. Need to + // be inserted before texture reads. let mut pending_discard_init_fixups = SurfacesInDiscardState::new(); for command in base.commands { @@ -763,8 +765,11 @@ impl Global { } cmd_buf.status = CommandEncoderStatus::Recording; - // There can be entries left in pending_discard_init_fixups if a bind group was set, but not used (i.e. no Dispatch occurred) - // However, we already altered the discard/init_action state on this cmd_buf, so we need to apply the promised changes. + // There can be entries left in pending_discard_init_fixups if a bind + // group was set, but not used (i.e. no Dispatch occurred) + // + // However, we already altered the discard/init_action state on this + // cmd_buf, so we need to apply the promised changes. fixup_discarded_surfaces( pending_discard_init_fixups.into_iter(), raw, diff --git a/wgpu-core/src/command/memory_init.rs b/wgpu-core/src/command/memory_init.rs index d974575f91..52735fec51 100644 --- a/wgpu-core/src/command/memory_init.rs +++ b/wgpu-core/src/command/memory_init.rs @@ -27,10 +27,12 @@ pub(crate) type SurfacesInDiscardState = Vec; #[derive(Default)] pub(crate) struct CommandBufferTextureMemoryActions { - // init actions describe the tracker actions that we need to be executed before the command buffer is executed + /// The tracker actions that we need to be executed before the command + /// buffer is executed. init_actions: Vec, - // discards describe all the discards that haven't been followed by init again within the command buffer - // i.e. everything in this list resets the texture init state *after* the command buffer execution + /// All the discards that haven't been followed by init again within the + /// command buffer i.e. everything in this list resets the texture init + /// state *after* the command buffer execution discards: Vec, } @@ -54,19 +56,22 @@ impl CommandBufferTextureMemoryActions { ) -> SurfacesInDiscardState { let mut immediately_necessary_clears = SurfacesInDiscardState::new(); - // Note that within a command buffer we may stack arbitrary memory init actions on the same texture - // Since we react to them in sequence, they are going to be dropped again at queue submit + // Note that within a command buffer we may stack arbitrary memory init + // actions on the same texture Since we react to them in sequence, they + // are going to be dropped again at queue submit // - // We don't need to add MemoryInitKind::NeedsInitializedMemory to init_actions if a surface is part of the discard list. - // But that would mean splitting up the action which is more than we'd win here. + // We don't need to add MemoryInitKind::NeedsInitializedMemory to + // init_actions if a surface is part of the discard list. But that would + // mean splitting up the action which is more than we'd win here. self.init_actions .extend(match texture_guard.get(action.id) { Ok(texture) => texture.initialization_status.check_action(action), Err(_) => return immediately_necessary_clears, // texture no longer exists }); - // We expect very few discarded surfaces at any point in time which is why a simple linear search is likely best. - // (i.e. most of the time self.discards is empty!) + // We expect very few discarded surfaces at any point in time which is + // why a simple linear search is likely best. (i.e. most of the time + // self.discards is empty!) let init_actions = &mut self.init_actions; self.discards.retain(|discarded_surface| { if discarded_surface.texture == action.id @@ -79,7 +84,9 @@ impl CommandBufferTextureMemoryActions { if let MemoryInitKind::NeedsInitializedMemory = action.kind { immediately_necessary_clears.push(discarded_surface.clone()); - // Mark surface as implicitly initialized (this is relevant because it might have been uninitialized prior to discarding + // Mark surface as implicitly initialized (this is relevant + // because it might have been uninitialized prior to + // discarding init_actions.push(TextureInitTrackerAction { id: discarded_surface.texture, range: TextureInitRange { @@ -99,7 +106,8 @@ impl CommandBufferTextureMemoryActions { immediately_necessary_clears } - // Shortcut for register_init_action when it is known that the action is an implicit init, not requiring any immediate resource init. + // Shortcut for register_init_action when it is known that the action is an + // implicit init, not requiring any immediate resource init. pub(crate) fn register_implicit_init( &mut self, id: id::Valid, @@ -118,7 +126,9 @@ impl CommandBufferTextureMemoryActions { } } -// Utility function that takes discarded surfaces from (several calls to) register_init_action and initializes them on the spot. +// Utility function that takes discarded surfaces from (several calls to) +// register_init_action and initializes them on the spot. +// // Takes care of barriers as well! pub(crate) fn fixup_discarded_surfaces< A: HalApi, @@ -148,14 +158,16 @@ pub(crate) fn fixup_discarded_surfaces< } impl BakedCommands { - // inserts all buffer initializations that are going to be needed for executing the commands and updates resource init states accordingly + // inserts all buffer initializations that are going to be needed for + // executing the commands and updates resource init states accordingly pub(crate) fn initialize_buffer_memory( &mut self, device_tracker: &mut Tracker, buffer_guard: &mut Storage, id::BufferId>, ) -> Result<(), DestroyedBufferError> { // Gather init ranges for each buffer so we can collapse them. - // It is not possible to do this at an earlier point since previously executed command buffer change the resource init state. + // It is not possible to do this at an earlier point since previously + // executed command buffer change the resource init state. let mut uninitialized_ranges_per_buffer = FastHashMap::default(); for buffer_use in self.buffer_memory_init_actions.drain(..) { let buffer = buffer_guard @@ -194,15 +206,19 @@ impl BakedCommands { // Collapse touching ranges. ranges.sort_by_key(|r| r.start); for i in (1..ranges.len()).rev() { - assert!(ranges[i - 1].end <= ranges[i].start); // The memory init tracker made sure of this! + // The memory init tracker made sure of this! + assert!(ranges[i - 1].end <= ranges[i].start); if ranges[i].start == ranges[i - 1].end { ranges[i - 1].end = ranges[i].end; ranges.swap_remove(i); // Ordering not important at this point } } - // Don't do use_replace since the buffer may already no longer have a ref_count. - // However, we *know* that it is currently in use, so the tracker must already know about it. + // Don't do use_replace since the buffer may already no longer have + // a ref_count. + // + // However, we *know* that it is currently in use, so the tracker + // must already know about it. let transition = device_tracker .buffers .set_single(buffer_guard, buffer_id, hal::BufferUses::COPY_DST) @@ -223,8 +239,20 @@ impl BakedCommands { } for range in ranges.iter() { - assert!(range.start % wgt::COPY_BUFFER_ALIGNMENT == 0, "Buffer {:?} has an uninitialized range with a start not aligned to 4 (start was {})", raw_buf, range.start); - assert!(range.end % wgt::COPY_BUFFER_ALIGNMENT == 0, "Buffer {:?} has an uninitialized range with an end not aligned to 4 (end was {})", raw_buf, range.end); + assert!( + range.start % wgt::COPY_BUFFER_ALIGNMENT == 0, + "Buffer {:?} has an uninitialized range with a start \ + not aligned to 4 (start was {})", + raw_buf, + range.start + ); + assert!( + range.end % wgt::COPY_BUFFER_ALIGNMENT == 0, + "Buffer {:?} has an uninitialized range with an end \ + not aligned to 4 (end was {})", + raw_buf, + range.end + ); unsafe { self.encoder.clear_buffer(raw_buf, range.clone()); @@ -234,8 +262,10 @@ impl BakedCommands { Ok(()) } - // inserts all texture initializations that are going to be needed for executing the commands and updates resource init states accordingly - // any textures that are left discarded by this command buffer will be marked as uninitialized + // inserts all texture initializations that are going to be needed for + // executing the commands and updates resource init states accordingly any + // textures that are left discarded by this command buffer will be marked as + // uninitialized pub(crate) fn initialize_texture_memory( &mut self, device_tracker: &mut Tracker, @@ -290,7 +320,9 @@ impl BakedCommands { } } - // Now that all buffers/textures have the proper init state for before cmdbuf start, we discard init states for textures it left discarded after its execution. + // Now that all buffers/textures have the proper init state for before + // cmdbuf start, we discard init states for textures it left discarded + // after its execution. for surface_discard in self.texture_memory_actions.discards.iter() { let texture = texture_guard .get_mut(surface_discard.texture) diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index 257bb2edaf..06eee0bd51 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -511,7 +511,8 @@ impl BindGroupStateChange { ) -> bool { // For now never deduplicate bind groups with dynamic offsets. if offset_length == 0 { - // If this get returns None, that means we're well over the limit, so let the call through to get a proper error + // If this get returns None, that means we're well over the limit, + // so let the call through to get a proper error if let Some(current_bind_group) = self.last_states.get_mut(index as usize) { // Bail out if we're binding the same bind group. if current_bind_group.set_and_check_redundant(bind_group_id) { diff --git a/wgpu-core/src/command/query.rs b/wgpu-core/src/command/query.rs index 0d6fc8a558..d4176195a8 100644 --- a/wgpu-core/src/command/query.rs +++ b/wgpu-core/src/command/query.rs @@ -180,7 +180,8 @@ impl QuerySet { query_index: u32, reset_state: Option<&mut QueryResetMap>, ) -> Result<&A::QuerySet, QueryUseError> { - // We need to defer our resets because we are in a renderpass, add the usage to the reset map. + // We need to defer our resets because we are in a renderpass, + // add the usage to the reset map. if let Some(reset) = reset_state { let used = reset.use_query_set(query_set_id, self, query_index); if used { diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index a68f3e030d..1b434956b5 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -63,7 +63,9 @@ pub enum LoadOp { #[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] #[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))] pub enum StoreOp { - /// Discards the content of the render target. If you don't care about the contents of the target, this can be faster. + /// Discards the content of the render target. + /// + /// If you don't care about the contents of the target, this can be faster. Discard = 0, /// Store the result of the renderpass. Store = 1, @@ -75,15 +77,20 @@ pub enum StoreOp { #[cfg_attr(any(feature = "serial-pass", feature = "trace"), derive(Serialize))] #[cfg_attr(any(feature = "serial-pass", feature = "replay"), derive(Deserialize))] pub struct PassChannel { - /// Operation to perform to the output attachment at the start of a renderpass. This must be clear if it - /// is the first renderpass rendering to a swap chain image. + /// Operation to perform to the output attachment at the start of a + /// renderpass. + /// + /// This must be clear if it is the first renderpass rendering to a swap + /// chain image. pub load_op: LoadOp, /// Operation to perform to the output attachment at the end of a renderpass. pub store_op: StoreOp, - /// If load_op is [`LoadOp::Clear`], the attachment will be cleared to this color. + /// If load_op is [`LoadOp::Clear`], the attachment will be cleared to this + /// color. pub clear_value: V, - /// If true, the relevant channel is not changed by a renderpass, and the corresponding attachment - /// can be used inside the pass by other read-only usages. + /// If true, the relevant channel is not changed by a renderpass, and the + /// corresponding attachment can be used inside the pass by other read-only + /// usages. pub read_only: bool, } @@ -596,7 +603,8 @@ type AttachmentDataVec = ArrayVec; struct RenderPassInfo<'a, A: HalApi> { context: RenderPassContext, usage_scope: UsageScope, - render_attachments: AttachmentDataVec>, // All render attachments, including depth/stencil + /// All render attachments, including depth/stencil + render_attachments: AttachmentDataVec>, is_depth_read_only: bool, is_stencil_read_only: bool, extent: wgt::Extent3d, @@ -634,8 +642,9 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { ); } if channel.store_op == StoreOp::Discard { - // the discard happens at the *end* of a pass - // but recording the discard right away be alright since the texture can't be used during the pass anyways + // the discard happens at the *end* of a pass, but recording the + // discard right away be alright since the texture can't be used + // during the pass anyways texture_memory_actions.discard(TextureSurfaceDiscard { texture: view.parent_id.value.0, mip_level: view.selector.mips.start, @@ -770,15 +779,27 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { &mut pending_discard_init_fixups, ); } else { - // This is the only place (anywhere in wgpu) where Stencil & Depth init state can diverge. - // To safe us the overhead of tracking init state of texture aspects everywhere, - // we're going to cheat a little bit in order to keep the init state of both Stencil and Depth aspects in sync. - // The expectation is that we hit this path extremely rarely! - + // This is the only place (anywhere in wgpu) where Stencil & + // Depth init state can diverge. + // + // To safe us the overhead of tracking init state of texture + // aspects everywhere, we're going to cheat a little bit in + // order to keep the init state of both Stencil and Depth + // aspects in sync. The expectation is that we hit this path + // extremely rarely! + // // Diverging LoadOp, i.e. Load + Clear: - // Record MemoryInitKind::NeedsInitializedMemory for the entire surface, a bit wasteful on unit but no negative effect! - // Rationale: If the loaded channel is uninitialized it needs clearing, the cleared channel doesn't care. (If everything is already initialized nothing special happens) - // (possible minor optimization: Clear caused by NeedsInitializedMemory should know that it doesn't need to clear the aspect that was set to C) + // + // Record MemoryInitKind::NeedsInitializedMemory for the entire + // surface, a bit wasteful on unit but no negative effect! + // + // Rationale: If the loaded channel is uninitialized it needs + // clearing, the cleared channel doesn't care. (If everything is + // already initialized nothing special happens) + // + // (possible minor optimization: Clear caused by + // NeedsInitializedMemory should know that it doesn't need to + // clear the aspect that was set to C) let need_init_beforehand = at.depth.load_op == LoadOp::Load || at.stencil.load_op == LoadOp::Load; if need_init_beforehand { @@ -795,8 +816,12 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { } // Diverging Store, i.e. Discard + Store: - // Immediately zero out channel that is set to discard after we're done with the render pass. - // This allows us to set the entire surface to MemoryInitKind::ImplicitlyInitialized (if it isn't already set to NeedsInitializedMemory). + // + // Immediately zero out channel that is set to discard after + // we're done with the render pass. This allows us to set the + // entire surface to MemoryInitKind::ImplicitlyInitialized (if + // it isn't already set to NeedsInitializedMemory). + // // (possible optimization: Delay and potentially drop this zeroing) if at.depth.store_op != at.stencil.store_op { if !need_init_beforehand { @@ -1026,10 +1051,15 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { }; } - // If either only stencil or depth was discarded, we put in a special clear pass to keep the init status of the aspects in sync. - // We do this so we don't need to track init state for depth/stencil aspects individually. - // Note that we don't go the usual route of "brute force" initializing the texture when need arises here, - // since this path is actually something a user may genuinely want (where as the other cases are more seen along the lines as gracefully handling a user error). + // If either only stencil or depth was discarded, we put in a special + // clear pass to keep the init status of the aspects in sync. We do this + // so we don't need to track init state for depth/stencil aspects + // individually. + // + // Note that we don't go the usual route of "brute force" initializing + // the texture when need arises here, since this path is actually + // something a user may genuinely want (where as the other cases are + // more seen along the lines as gracefully handling a user error). if let Some((aspect, view)) = self.divergent_discarded_depth_stencil_aspect { let (depth_ops, stencil_ops) = if aspect == wgt::TextureAspect::DepthOnly { ( @@ -1631,7 +1661,8 @@ impl Global { }; state.is_ready(indexed).map_pass_err(scope)?; - //TODO: validate that base_vertex + max_index() is within the provided range + //TODO: validate that base_vertex + max_index() is + // within the provided range let last_index = first_index + index_count; let index_limit = state.index.limit; if last_index > index_limit { diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index b1d0278ad0..dd46e30a26 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -201,8 +201,15 @@ pub(crate) fn extract_texture_selector( Ok((selector, base, format)) } -/// Function copied with some modifications from webgpu standard -/// If successful, returns (number of buffer bytes required for this copy, number of bytes between array layers). +/// WebGPU's [validating linear texture data][vltd] algorithm. +/// +/// Copied with some modifications from WebGPU standard. +/// +/// If successful, returns a pair `(bytes, stride)`, where: +/// - `bytes` is the number of buffer bytes required for this copy, and +/// - `stride` number of bytes between array layers. +/// +/// [vltd]: https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-linear-texture-data pub(crate) fn validate_linear_texture_data( layout: &wgt::ImageDataLayout, format: wgt::TextureFormat, @@ -292,8 +299,13 @@ pub(crate) fn validate_linear_texture_data( Ok((required_bytes_in_copy, bytes_per_image)) } -/// Function copied with minor modifications from webgpu standard +/// WebGPU's [validating texture copy range][vtcr] algorithm. +/// +/// Copied with minor modifications from WebGPU standard. +/// /// Returns the HAL copy extent and the layer count. +/// +/// [vtcr]: https://gpuweb.github.io/gpuweb/#valid-texture-copy-range pub(crate) fn validate_texture_copy_range( texture_copy_view: &ImageCopyTexture, desc: &wgt::TextureDescriptor<()>, @@ -445,7 +457,10 @@ fn handle_texture_init( } } -// Ensures the source texture of a transfer is in the right initialization state and records the state for after the transfer operation. +/// Prepare a transfer's source texture. +/// +/// Ensure the source texture of a transfer is in the right initialization +/// state, and record the state for after the transfer operation. fn handle_src_texture_init( cmd_buf: &mut CommandBuffer, device: &Device, @@ -468,7 +483,10 @@ fn handle_src_texture_init( Ok(()) } -// Ensures the destination texture of a transfer is in the right initialization state and records the state for after the transfer operation. +/// Prepare a transfer's destination texture. +/// +/// Ensure the destination texture of a transfer is in the right initialization +/// state, and record the state for after the transfer operation. fn handle_dst_texture_init( cmd_buf: &mut CommandBuffer, device: &Device, @@ -480,8 +498,10 @@ fn handle_dst_texture_init( .get(destination.texture) .map_err(|_| TransferError::InvalidTexture(destination.texture))?; - // Attention: If we don't write full texture subresources, we need to a full clear first since we don't track subrects. - // This means that in rare cases even a *destination* texture of a transfer may need an immediate texture init. + // Attention: If we don't write full texture subresources, we need to a full + // clear first since we don't track subrects. This means that in rare cases + // even a *destination* texture of a transfer may need an immediate texture + // init. let dst_init_kind = if has_copy_partial_init_tracker_coverage( copy_size, destination.mip_level, @@ -667,7 +687,9 @@ impl Global { let (dst_range, dst_base, _) = extract_texture_selector(destination, copy_size, &*texture_guard)?; - // Handle texture init *before* dealing with barrier transitions so we have an easier time inserting "immediate-inits" that may be required by prior discards in rare cases. + // Handle texture init *before* dealing with barrier transitions so we + // have an easier time inserting "immediate-inits" that may be required + // by prior discards in rare cases. handle_dst_texture_init(cmd_buf, device, destination, copy_size, &texture_guard)?; let (src_buffer, src_pending) = cmd_buf @@ -794,7 +816,9 @@ impl Global { let (src_range, src_base, _) = extract_texture_selector(source, copy_size, &*texture_guard)?; - // Handle texture init *before* dealing with barrier transitions so we have an easier time inserting "immediate-inits" that may be required by prior discards in rare cases. + // Handle texture init *before* dealing with barrier transitions so we + // have an easier time inserting "immediate-inits" that may be required + // by prior discards in rare cases. handle_src_texture_init(cmd_buf, device, source, copy_size, &texture_guard)?; let (src_texture, src_pending) = cmd_buf @@ -956,7 +980,9 @@ impl Global { return Err(TransferError::MismatchedAspects.into()); } - // Handle texture init *before* dealing with barrier transitions so we have an easier time inserting "immediate-inits" that may be required by prior discards in rare cases. + // Handle texture init *before* dealing with barrier transitions so we + // have an easier time inserting "immediate-inits" that may be required + // by prior discards in rare cases. handle_src_texture_init(cmd_buf, device, source, copy_size, &texture_guard)?; handle_dst_texture_init(cmd_buf, device, destination, copy_size, &texture_guard)?; diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 8f3e5ce4d3..ab209699f6 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -32,7 +32,8 @@ pub mod queue; pub mod trace; pub const SHADER_STAGE_COUNT: usize = 3; -// Should be large enough for the largest possible texture row. This value is enough for a 16k texture with float4 format. +// Should be large enough for the largest possible texture row. This +// value is enough for a 16k texture with float4 format. pub(crate) const ZERO_BUFFER_SIZE: BufferAddress = 512 << 10; const CLEANUP_WAIT_MS: u32 = 5000; @@ -181,19 +182,27 @@ fn map_buffer( assert_eq!(offset % wgt::COPY_BUFFER_ALIGNMENT, 0); assert_eq!(size % wgt::COPY_BUFFER_ALIGNMENT, 0); - // Zero out uninitialized parts of the mapping. (Spec dictates all resources behave as if they were initialized with zero) + // Zero out uninitialized parts of the mapping. (Spec dictates all resources + // behave as if they were initialized with zero) // - // If this is a read mapping, ideally we would use a `clear_buffer` command before reading the data from GPU (i.e. `invalidate_range`). - // However, this would require us to kick off and wait for a command buffer or piggy back on an existing one (the later is likely the only worthwhile option). - // As reading uninitialized memory isn't a particular important path to support, - // we instead just initialize the memory here and make sure it is GPU visible, so this happens at max only once for every buffer region. + // If this is a read mapping, ideally we would use a `clear_buffer` command + // before reading the data from GPU (i.e. `invalidate_range`). However, this + // would require us to kick off and wait for a command buffer or piggy back + // on an existing one (the later is likely the only worthwhile option). As + // reading uninitialized memory isn't a particular important path to + // support, we instead just initialize the memory here and make sure it is + // GPU visible, so this happens at max only once for every buffer region. // - // If this is a write mapping zeroing out the memory here is the only reasonable way as all data is pushed to GPU anyways. - let zero_init_needs_flush_now = mapping.is_coherent && buffer.sync_mapped_writes.is_none(); // No need to flush if it is flushed later anyways. + // If this is a write mapping zeroing out the memory here is the only + // reasonable way as all data is pushed to GPU anyways. + + // No need to flush if it is flushed later anyways. + let zero_init_needs_flush_now = mapping.is_coherent && buffer.sync_mapped_writes.is_none(); let mapped = unsafe { std::slice::from_raw_parts_mut(mapping.ptr.as_ptr(), size as usize) }; for uninitialized in buffer.initialization_status.drain(offset..(size + offset)) { - // The mapping's pointer is already offset, however we track the uninitialized range relative to the buffer's start. + // The mapping's pointer is already offset, however we track the + // uninitialized range relative to the buffer's start. let fill_range = (uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize; mapped[fill_range].fill(0); @@ -243,6 +252,7 @@ impl CommandAllocator { /// Structure describing a logical device. Some members are internally mutable, /// stored behind mutexes. +/// /// TODO: establish clear order of locking for these: /// `mem_allocator`, `desc_allocator`, `life_tracker`, `trackers`, /// `render_passes`, `pending_writes`, `trace`. @@ -286,8 +296,9 @@ pub struct Device { pub(crate) limits: wgt::Limits, pub(crate) features: wgt::Features, pub(crate) downlevel: wgt::DownlevelCapabilities, - //TODO: move this behind another mutex. This would allow several methods to switch - // to borrow Device immutably, such as `write_buffer`, `write_texture`, and `buffer_unmap`. + // TODO: move this behind another mutex. This would allow several methods to + // switch to borrow Device immutably, such as `write_buffer`, `write_texture`, + // and `buffer_unmap`. pending_writes: queue::PendingWrites, #[cfg(feature = "trace")] pub(crate) trace: Option>, @@ -608,15 +619,16 @@ impl Device { usage |= hal::BufferUses::COPY_DST; } } else { - // We are required to zero out (initialize) all memory. - // This is done on demand using clear_buffer which requires write transfer usage! + // We are required to zero out (initialize) all memory. This is done + // on demand using clear_buffer which requires write transfer usage! usage |= hal::BufferUses::COPY_DST; } let actual_size = if desc.size == 0 { wgt::COPY_BUFFER_ALIGNMENT } else if desc.usage.contains(wgt::BufferUsages::VERTEX) { - // Bumping the size by 1 so that we can bind an empty range at the end of the buffer. + // Bumping the size by 1 so that we can bind an empty range at the + // end of the buffer. desc.size + 1 } else { desc.size @@ -823,7 +835,8 @@ impl Device { // TODO: validate missing TextureDescriptor::view_formats. - // Enforce having COPY_DST/DEPTH_STENCIL_WRIT/COLOR_TARGET otherwise we wouldn't be able to initialize the texture. + // Enforce having COPY_DST/DEPTH_STENCIL_WRIT/COLOR_TARGET otherwise we + // wouldn't be able to initialize the texture. let hal_usage = conv::map_texture_usage(desc.usage, desc.format.into()) | if format_desc.sample_type == wgt::TextureSampleType::Depth { hal::TextureUses::DEPTH_STENCIL_WRITE @@ -1579,8 +1592,8 @@ impl Device { for entry in entry_map.values() { count_validator.add_binding(entry); } - // If a single bind group layout violates limits, the pipeline layout is definitely - // going to violate limits too, lets catch it now. + // If a single bind group layout violates limits, the pipeline layout is + // definitely going to violate limits too, lets catch it now. count_validator .validate(&self.limits) .map_err(binding_model::CreateBindGroupLayoutError::TooManyBindings)?; @@ -2093,8 +2106,10 @@ impl Device { (Tst::Float { filterable: true }, Tst::Float { filterable: true }) | // if we expect float, also accept depth (Tst::Float { .. }, Tst::Depth, ..) => {} - // if we expect filterable, also accept Float that is defined as unfilterable if filterable feature is explicitly enabled - // (only hit if wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES is enabled) + // if we expect filterable, also accept Float that is defined as + // unfilterable if filterable feature is explicitly enabled (only hit + // if wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES is + // enabled) (Tst::Float { filterable: true }, Tst::Float { .. }) if view.format_features.flags.contains(wgt::TextureFormatFeatureFlags::FILTERABLE) => {} _ => { return Err(Error::InvalidTextureSampleType { @@ -2628,8 +2643,10 @@ impl Device { let adapter_specific = self .features .contains(wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES); - // according to WebGPU specifications the texture needs to be [`TextureFormatFeatureFlags::FILTERABLE`] - // if blending is set - use [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] to elude this limitation + // according to WebGPU specifications the texture needs to be + // [`TextureFormatFeatureFlags::FILTERABLE`] if blending is set - use + // [`Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES`] to elude + // this limitation if cs.blend.is_some() && (!blendable || (!filterable && !adapter_specific)) { break Some(pipeline::ColorStateError::FormatNotBlendable(cs.format)); } @@ -2959,7 +2976,8 @@ impl Device { let using_device_features = self .features .contains(wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES); - // If we're running downlevel, we need to manually ask the backend what we can use as we can't trust WebGPU. + // If we're running downlevel, we need to manually ask the backend what + // we can use as we can't trust WebGPU. let downlevel = !self.downlevel.is_webgpu_compliant(); if using_device_features || downlevel { @@ -3767,7 +3785,8 @@ impl Global { Err(_) => break DeviceError::Invalid.into(), }; - // NB: Any change done through the raw texture handle will not be recorded in the replay + // NB: Any change done through the raw texture handle will not be + // recorded in the replay #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { trace @@ -4432,7 +4451,8 @@ impl Global { (id, Some(error)) } - #[allow(unused_unsafe)] // Unsafe-ness of internal calls has little to do with unsafe-ness of this. + // Unsafe-ness of internal calls has little to do with unsafe-ness of this. + #[allow(unused_unsafe)] /// # Safety /// /// This function passes SPIR-V binary to the backend as-is and can potentially result in a @@ -5148,7 +5168,11 @@ impl Global { } } - unreachable!("Fallback system failed to choose alpha mode. This is a bug. AlphaMode: {:?}, Options: {:?}", config.composite_alpha_mode, &caps.composite_alpha_modes); + unreachable!( + "Fallback system failed to choose alpha mode. This is a bug. \ + AlphaMode: {:?}, Options: {:?}", + config.composite_alpha_mode, &caps.composite_alpha_modes + ); }; log::info!( @@ -5318,7 +5342,8 @@ impl Global { /// /// If `force_wait` is true, block until all buffer mappings are done. /// - /// Return `all_queue_empty` indicating whether there are more queue submissions still in flight. + /// Return `all_queue_empty` indicating whether there are more queue + /// submissions still in flight. fn poll_devices( &self, force_wait: bool, @@ -5362,7 +5387,8 @@ impl Global { /// /// This is the implementation of `wgpu::Instance::poll_all`. /// - /// Return `all_queue_empty` indicating whether there are more queue submissions still in flight. + /// Return `all_queue_empty` indicating whether there are more queue + /// submissions still in flight. pub fn poll_all_devices(&self, force_wait: bool) -> Result { let mut closures = UserClosures::default(); let mut all_queue_empty = true; diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 748f1057da..aea5598731 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -60,8 +60,11 @@ impl SubmittedWorkDoneClosure { /// # Safety /// - /// - The callback pointer must be valid to call with the provided user_data pointer. - /// - Both pointers must point to 'static data as the callback may happen at an unspecified time. + /// - The callback pointer must be valid to call with the provided `user_data` + /// pointer. + /// + /// - Both pointers must point to `'static` data, as the callback may happen at + /// an unspecified time. pub unsafe fn from_c(inner: SubmittedWorkDoneClosureC) -> Self { Self { inner: SubmittedWorkDoneClosureInner::C { inner }, @@ -118,17 +121,20 @@ impl EncoderInFlight { } } -/// Writes made directly on the device or queue, not as part of a wgpu command buffer. +/// A private command encoder for writes made directly on the device +/// or queue. /// /// Operations like `buffer_unmap`, `queue_write_buffer`, and -/// `queue_write_texture` need to copy data to the GPU. This must be -/// done by encoding and submitting commands at the hal level, but these -/// operations are not associated with any specific wgpu command buffer. +/// `queue_write_texture` need to copy data to the GPU. At the hal +/// level, this must be done by encoding and submitting commands, but +/// these operations are not associated with any specific wgpu command +/// buffer. /// -/// Instead, `Device::pending_writes` owns one of these values, which has its -/// own hal command encoder and resource lists. The commands accumulated here -/// are automatically submitted to the queue the next time the user submits a -/// wgpu command buffer, ahead of the user's commands. +/// Instead, `Device::pending_writes` owns one of these values, which +/// has its own hal command encoder and resource lists. The commands +/// accumulated here are automatically submitted to the queue the next +/// time the user submits a wgpu command buffer, ahead of the user's +/// commands. /// /// All uses of [`StagingBuffer`]s end up here. #[derive(Debug)] @@ -540,7 +546,8 @@ impl Global { device.pending_writes.dst_buffers.insert(buffer_id); - // Ensure the overwritten bytes are marked as initialized so they don't need to be nulled prior to mapping or binding. + // Ensure the overwritten bytes are marked as initialized so + // they don't need to be nulled prior to mapping or binding. { drop(buffer_guard); let mut buffer_guard = hub.buffers.write(device_token).0; @@ -587,12 +594,14 @@ impl Global { return Ok(()); } - let (mut texture_guard, _) = hub.textures.write(&mut token); // For clear we need write access to the texture. TODO: Can we acquire write lock later? + // For clear we need write access to the texture. + // TODO: Can we acquire write lock later? + let (mut texture_guard, _) = hub.textures.write(&mut token); let (selector, dst_base, texture_format) = extract_texture_selector(destination, size, &*texture_guard)?; let format_desc = texture_format.describe(); - //Note: `_source_bytes_per_array_layer` is ignored since we have a staging copy, - // and it can have a different value. + // Note: `_source_bytes_per_array_layer` is ignored since we + // have a staging copy, and it can have a different value. let (_, _source_bytes_per_array_layer) = validate_linear_texture_data( data_layout, texture_format, @@ -613,7 +622,9 @@ impl Global { let block_rows_per_image = match data_layout.rows_per_image { Some(rows_per_image) => rows_per_image.get(), None => { - // doesn't really matter because we need this only if we copy more than one layer, and then we validate for this being not None + // doesn't really matter because we need this only if we copy + // more than one layer, and then we validate for this being not + // None size.height } }; @@ -641,11 +652,14 @@ impl Global { let mut trackers = device.trackers.lock(); let encoder = device.pending_writes.activate(); - // If the copy does not fully cover the layers, we need to initialize to zero *first* as we don't keep track of partial texture layer inits. - // Strictly speaking we only need to clear the areas of a layer untouched, but this would get increasingly messy. - + // If the copy does not fully cover the layers, we need to initialize to + // zero *first* as we don't keep track of partial texture layer inits. + // + // Strictly speaking we only need to clear the areas of a layer + // untouched, but this would get increasingly messy. let init_layer_range = if dst.desc.dimension == wgt::TextureDimension::D3 { - 0..1 // volume textures don't have a layer range as array volumes aren't supported + // volume textures don't have a layer range as array volumes aren't supported + 0..1 } else { destination.origin.z..destination.origin.z + size.depth_or_array_layers }; @@ -830,7 +844,8 @@ impl Global { // finish all the command buffers first for &cmb_id in command_buffer_ids { - // we reset the used surface textures every time we use it, so make sure to set_size on it. + // we reset the used surface textures every time we use + // it, so make sure to set_size on it. used_surface_textures.set_size(texture_guard.len()); #[allow(unused_mut)] @@ -1034,12 +1049,14 @@ impl Global { } = *device; { - //TODO: these blocks have a few organizational issues and should be refactored - // (1) it's similar to the code we have per-command-buffer (at the begin and end) - // Maybe we an merge some? - // (2) it's doing the extra locking unconditionally - // Maybe we can only do so if any surfaces are being written to? - + // TODO: These blocks have a few organizational issues, and + // should be refactored. + // + // 1) It's similar to the code we have per-command-buffer + // (at the begin and end) Maybe we can merge some? + // + // 2) It's doing the extra locking unconditionally. Maybe we + // can only do so if any surfaces are being written to? let (_, mut token) = hub.buffers.read(&mut token); // skip token let (mut texture_guard, _) = hub.textures.write(&mut token); diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs index 2c586c930e..38918b8cbe 100644 --- a/wgpu-core/src/hub.rs +++ b/wgpu-core/src/hub.rs @@ -316,8 +316,9 @@ impl Storage { /// /// Returns [`None`] if there is an epoch mismatch, or the entry is empty. /// - /// This function is primarily intended for the `as_hal` family of functions where you may need to - /// fallibly get a object backed by an id that could be in a different hub. + /// This function is primarily intended for the `as_hal` family of functions + /// where you may need to fallibly get a object backed by an id that could + /// be in a different hub. pub(crate) fn try_get(&self, id: I) -> Result, InvalidId> { let (index, epoch, _) = id.unzip(); let (result, storage_epoch) = match self.map.get(index as usize) { diff --git a/wgpu-core/src/init_tracker/buffer.rs b/wgpu-core/src/init_tracker/buffer.rs index 541d0aefd0..ea9b9f6a8d 100644 --- a/wgpu-core/src/init_tracker/buffer.rs +++ b/wgpu-core/src/init_tracker/buffer.rs @@ -12,7 +12,8 @@ pub(crate) struct BufferInitTrackerAction { pub(crate) type BufferInitTracker = InitTracker; impl BufferInitTracker { - /// Checks if an action has/requires any effect on the initialization status and shrinks its range if possible. + /// Checks if an action has/requires any effect on the initialization status + /// and shrinks its range if possible. pub(crate) fn check_action( &self, action: &BufferInitTrackerAction, @@ -20,7 +21,8 @@ impl BufferInitTracker { self.create_action(action.id, action.range.clone(), action.kind) } - /// Creates an action if it would have any effect on the initialization status and shrinks the range if possible. + /// Creates an action if it would have any effect on the initialization + /// status and shrinks the range if possible. pub(crate) fn create_action( &self, id: BufferId, diff --git a/wgpu-core/src/init_tracker/mod.rs b/wgpu-core/src/init_tracker/mod.rs index 1111735f6b..c7e78bc993 100644 --- a/wgpu-core/src/init_tracker/mod.rs +++ b/wgpu-core/src/init_tracker/mod.rs @@ -1,17 +1,35 @@ -// WebGPU specification requires all texture & buffer memory to be zero initialized on first read. -// To avoid unnecessary inits, we track the initialization status of every resource and perform inits lazily. -// -// The granularity is different for buffers and textures: -// * Buffer: Byte granularity to support usecases with large, partially bound buffers well. -// * Texture: Mip-level per layer. I.e. a 2D surface is either completely initialized or not, subrects are not tracked. -// -// Every use of a buffer/texture generates a InitTrackerAction which are recorded and later resolved at queue submit by merging them with the current state and each other in execution order. -// It is important to note that from the point of view of the memory init system there are two kind of writes: -// * Full writes: -// Any kind of memcpy operation. These cause a `MemoryInitKind.ImplicitlyInitialized` action. -// * (Potentially) partial writes: -// E.g. write use in a Shader. The system is not able to determine if a resource is fully initialized afterwards but is no longer allowed to perform any clears, -// therefore this leads to a `MemoryInitKind.ImplicitlyInitialized` action, exactly like a read would. +/*! Lazy initialization of texture and buffer memory. + +The WebGPU specification requires all texture & buffer memory to be +zero initialized on first read. To avoid unnecessary inits, we track +the initialization status of every resource and perform inits lazily. + +The granularity is different for buffers and textures: + +- Buffer: Byte granularity to support usecases with large, partially + bound buffers well. + +- Texture: Mip-level per layer. That is, a 2D surface is either + completely initialized or not, subrects are not tracked. + +Every use of a buffer/texture generates a InitTrackerAction which are +recorded and later resolved at queue submit by merging them with the +current state and each other in execution order. + +It is important to note that from the point of view of the memory init +system there are two kind of writes: + +- **Full writes**: Any kind of memcpy operation. These cause a + `MemoryInitKind.ImplicitlyInitialized` action. + +- **(Potentially) partial writes**: For example, write use in a + Shader. The system is not able to determine if a resource is fully + initialized afterwards but is no longer allowed to perform any + clears, therefore this leads to a + `MemoryInitKind.ImplicitlyInitialized` action, exactly like a read + would. + + */ use smallvec::SmallVec; use std::{fmt, iter, ops::Range}; @@ -27,13 +45,16 @@ pub(crate) use texture::{ #[derive(Debug, Clone, Copy)] pub(crate) enum MemoryInitKind { - // The memory range is going to be written by an already initialized source, thus doesn't need extra attention other than marking as initialized. + // The memory range is going to be written by an already initialized source, + // thus doesn't need extra attention other than marking as initialized. ImplicitlyInitialized, - // The memory range is going to be read, therefore needs to ensure prior initialization. + // The memory range is going to be read, therefore needs to ensure prior + // initialization. NeedsInitializedMemory, } -// Most of the time a resource is either fully uninitialized (one element) or initialized (zero elements). +// Most of the time a resource is either fully uninitialized (one element) or +// initialized (zero elements). type UninitializedRangeVec = SmallVec<[Range; 1]>; /// Tracks initialization status of a linear range from 0..size @@ -134,8 +155,10 @@ where } // Checks if there's any uninitialized ranges within a query. - // If there are any, the range returned a the subrange of the query_range that contains all these uninitialized regions. - // Returned range may be larger than necessary (tradeoff for making this function O(log n)) + // + // If there are any, the range returned a the subrange of the query_range + // that contains all these uninitialized regions. Returned range may be + // larger than necessary (tradeoff for making this function O(log n)) pub(crate) fn check(&self, query_range: Range) -> Option> { let index = self .uninitialized_ranges @@ -148,7 +171,8 @@ where match self.uninitialized_ranges.get(index + 1) { Some(next_range) => { if next_range.start < query_range.end { - // Would need to keep iterating for more accurate upper bound. Don't do that here. + // Would need to keep iterating for more + // accurate upper bound. Don't do that here. Some(start..query_range.end) } else { Some(start..start_range.end.min(query_range.end)) @@ -248,9 +272,12 @@ mod test { assert_eq!(tracker.check(3..8), Some(5..8)); // left overlapping region assert_eq!(tracker.check(3..17), Some(5..17)); // left overlapping region + contained region - assert_eq!(tracker.check(8..22), Some(8..22)); // right overlapping region + contained region (yes, doesn't fix range end!) - assert_eq!(tracker.check(17..22), Some(17..20)); // right overlapping region - assert_eq!(tracker.check(20..25), None); // right non-overlapping + // right overlapping region + contained region (yes, doesn't fix range end!) + assert_eq!(tracker.check(8..22), Some(8..22)); + // right overlapping region + assert_eq!(tracker.check(17..22), Some(17..20)); + // right non-overlapping + assert_eq!(tracker.check(20..25), None); } #[test] diff --git a/wgpu-core/src/init_tracker/texture.rs b/wgpu-core/src/init_tracker/texture.rs index 87bfcdc887..90167e6fa6 100644 --- a/wgpu-core/src/init_tracker/texture.rs +++ b/wgpu-core/src/init_tracker/texture.rs @@ -6,11 +6,13 @@ use std::ops::Range; #[derive(Debug, Clone)] pub(crate) struct TextureInitRange { pub(crate) mip_range: Range, - pub(crate) layer_range: Range, // Strictly array layers. We do *not* track volume slices separately. + // Strictly array layers. We do *not* track volume slices separately. + pub(crate) layer_range: Range, } -// Returns true if a copy operation doesn't fully cover the texture init tracking granularity. -// I.e. if this function returns true for a pending copy operation, the target texture needs to be ensured to be initialized first! +// Returns true if a copy operation doesn't fully cover the texture init +// tracking granularity. I.e. if this function returns true for a pending copy +// operation, the target texture needs to be ensured to be initialized first! pub(crate) fn has_copy_partial_init_tracker_coverage( copy_size: &wgt::Extent3d, mip_level: u32, diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs index e1e9831dc3..f6d19e5b89 100644 --- a/wgpu-core/src/instance.rs +++ b/wgpu-core/src/instance.rs @@ -336,7 +336,10 @@ impl Adapter { .contains(wgt::Features::MAPPABLE_PRIMARY_BUFFERS) && self.raw.info.device_type == wgt::DeviceType::DiscreteGpu { - log::warn!("Feature MAPPABLE_PRIMARY_BUFFERS enabled on a discrete gpu. This is a massive performance footgun and likely not what you wanted"); + log::warn!( + "Feature MAPPABLE_PRIMARY_BUFFERS enabled on a discrete gpu. \ + This is a massive performance footgun and likely not what you wanted" + ); } if let Some(_) = desc.label { @@ -445,8 +448,10 @@ impl Global { ) -> SurfaceId { profiling::scope!("Instance::create_surface"); - //Note: using a dummy argument to work around the following error: - //> cannot provide explicit generic arguments when `impl Trait` is used in argument position + // Note: using a dummy argument to work around the following + // error: + + // > cannot provide explicit generic arguments when `impl Trait` is used in argument position fn init( _: A, inst: &Option, @@ -731,7 +736,8 @@ impl Global { if let Some(surface) = compatible_surface { let surface = &A::get_surface(surface); adapters.retain(|exposed| unsafe { - // If the surface does not exist for this backend, then the surface is not supported. + // If the surface does not exist for this backend, + // then the surface is not supported. surface.is_some() && exposed .adapter @@ -835,10 +841,13 @@ impl Global { } let preferred_gpu = match desc.power_preference { - // Since devices of type "Other" might really be "Unknown" and come from APIs like OpenGL that don't specify device type, - // Prefer more Specific types over Other. - // This means that backends which do provide accurate device types will be preferred - // if their device type indicates an actual hardware GPU (integrated or discrete). + // Since devices of type "Other" might really be "Unknown" and come + // from APIs like OpenGL that don't specify device type, Prefer more + // Specific types over Other. + // + // This means that backends which do provide accurate device types + // will be preferred if their device type indicates an actual + // hardware GPU (integrated or discrete). PowerPreference::LowPower => integrated.or(discrete).or(other).or(virt).or(cpu), PowerPreference::HighPerformance => discrete.or(integrated).or(other).or(virt).or(cpu), }; diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs index 245d16d18c..b3ed54204e 100644 --- a/wgpu-core/src/lib.rs +++ b/wgpu-core/src/lib.rs @@ -289,8 +289,9 @@ platform supports."; #[macro_export] macro_rules! gfx_select { ($id:expr => $global:ident.$method:ident( $($param:expr),* )) => { - // Note: For some reason the cfg aliases defined in build.rs don't succesfully apply in this - // macro so we must specify their equivalents manually + // Note: For some reason the cfg aliases defined in build.rs + // don't succesfully apply in this macro so we must specify + // their equivalents manually. match $id.backend() { #[cfg(any( all(not(target_arch = "wasm32"), not(target_os = "ios"), not(target_os = "macos")), diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index dd66b48d14..e4d7d90828 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -100,8 +100,11 @@ impl BufferMapCallback { /// # Safety /// - /// - The callback pointer must be valid to call with the provided user_data pointer. - /// - Both pointers must point to valid memory until the callback is invoked, which may happen at an unspecified time. + /// - The callback pointer must be valid to call with the provided user_data + /// pointer. + /// + /// - Both pointers must point to valid memory until the callback is + /// invoked, which may happen at an unspecified time. pub unsafe fn from_c(inner: BufferMapCallbackC) -> Self { Self { inner: Some(BufferMapCallbackInner::C { inner }), @@ -483,13 +486,20 @@ impl Borrow for Texture { #[cfg_attr(feature = "trace", derive(serde::Serialize))] #[cfg_attr(feature = "replay", derive(serde::Deserialize), serde(default))] pub struct TextureViewDescriptor<'a> { - /// Debug label of the texture view. This will show up in graphics debuggers for easy identification. + /// Debug label of the texture view. + /// + /// This will show up in graphics debuggers for easy identification. pub label: Label<'a>, - /// Format of the texture view, or `None` for the same format as the texture itself. + /// Format of the texture view, or `None` for the same format as the texture + /// itself. + /// /// At this time, it must be the same the underlying format of the texture. pub format: Option, - /// The dimension of the texture view. For 1D textures, this must be `1D`. For 2D textures it must be one of - /// `D2`, `D2Array`, `Cube`, and `CubeArray`. For 3D textures it must be `3D` + /// The dimension of the texture view. + /// + /// - For 1D textures, this must be `1D`. + /// - For 2D textures it must be one of `D2`, `D2Array`, `Cube`, or `CubeArray`. + /// - For 3D textures it must be `3D`. pub dimension: Option, /// Range within the texture that is accessible via this view. pub range: wgt::ImageSubresourceRange, @@ -580,7 +590,9 @@ impl Resource for TextureView { #[cfg_attr(feature = "trace", derive(serde::Serialize))] #[cfg_attr(feature = "replay", derive(serde::Deserialize))] pub struct SamplerDescriptor<'a> { - /// Debug label of the sampler. This will show up in graphics debuggers for easy identification. + /// Debug label of the sampler. + /// + /// This will show up in graphics debuggers for easy identification. pub label: Label<'a>, /// How to deal with out of bounds accesses in the u (i.e. x) direction pub address_modes: [wgt::AddressMode; 3], @@ -598,7 +610,8 @@ pub struct SamplerDescriptor<'a> { pub compare: Option, /// Valid values: 1, 2, 4, 8, and 16. pub anisotropy_clamp: Option, - /// Border color to use when address_mode is [`AddressMode::ClampToBorder`](wgt::AddressMode::ClampToBorder) + /// Border color to use when address_mode is + /// [`AddressMode::ClampToBorder`](wgt::AddressMode::ClampToBorder) pub border_color: Option, } diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs index 830244bee4..50ef96c874 100644 --- a/wgpu-core/src/track/mod.rs +++ b/wgpu-core/src/track/mod.rs @@ -1,93 +1,97 @@ /*! Resource State and Lifetime Trackers - * - * These structures are responsible for keeping track of resource state, - * generating barriers where needed, and making sure resources are kept - * alive until the trackers die. - * - * ## General Architecture - * - * Tracking is some of the hottest code in the entire codebase, so the trackers - * are designed to be as cache efficient as possible. They store resource state - * in flat vectors, storing metadata SOA style, one vector per type of metadata. - * - * A lot of the tracker code is deeply unsafe, using unchecked accesses all over - * to make performance as good as possible. However, for all unsafe accesses, there - * is a corresponding debug assert the checks if that access is valid. This helps - * get bugs caught fast, while still letting users not need to pay for the bounds - * checks. - * - * In wgpu, resource IDs are allocated and re-used, so will always be as low - * as reasonably possible. This allows us to use the ID as an index into an array. - * - * ## Statefulness - * - * There are two main types of trackers, stateful and stateless. - * - * Stateful trackers are for buffers and textures. They both have - * resource state attached to them which needs to be used to generate - * automatic synchronization. Because of the different requirements of - * buffers and textures, they have two separate tracking structures. - * - * Stateless trackers only store metadata and own the given resource. - * - * ## Use Case - * - * Within each type of tracker, the trackers are further split into 3 different - * use cases, Bind Group, Usage Scope, and a full Tracker. - * - * Bind Group trackers are just a list of different resources, their refcount, - * and how they are used. Textures are used via a selector and a usage type. - * Buffers by just a usage type. Stateless resources don't have a usage type. - * - * Usage Scope trackers are only for stateful resources. These trackers represent - * a single [`UsageScope`] in the spec. When a use is added to a usage scope, - * it is merged with all other uses of that resource in that scope. If there - * is a usage conflict, merging will fail and an error will be reported. - * - * Full trackers represent a before and after state of a resource. These - * are used for tracking on the device and on command buffers. The before - * state represents the state the resource is first used as in the command buffer, - * the after state is the state the command buffer leaves the resource in. - * These double ended buffers can then be used to generate the needed transitions - * between command buffers. - * - * ## Dense Datastructure with Sparse Data - * - * This tracking system is based on having completely dense data, but trackers do - * not always contain every resource. Some resources (or even most resources) go - * unused in any given command buffer. So to help speed up the process of iterating - * through possibly thousands of resources, we use a bit vector to represent if - * a resource is in the buffer or not. This allows us extremely efficient memory - * utilization, as well as being able to bail out of whole blocks of 32-64 resources - * with a single usize comparison with zero. In practice this means that merging - * partially resident buffers is extremely quick. - * - * The main advantage of this dense datastructure is that we can do merging - * of trackers in an extremely efficient fashion that results in us doing linear - * scans down a couple of buffers. CPUs and their caches absolutely eat this up. - * - * ## Stateful Resource Operations - * - * All operations on stateful trackers boil down to one of four operations: - * - `insert(tracker, new_state)` adds a resource with a given state to the tracker - * for the first time. - * - `merge(tracker, new_state)` merges this new state with the previous state, checking - * for usage conflicts. - * - `barrier(tracker, new_state)` compares the given state to the existing state and - * generates the needed barriers. - * - `update(tracker, new_state)` takes the given new state and overrides the old state. - * - * This allows us to compose the operations to form the various kinds of tracker merges - * that need to happen in the codebase. For each resource in the given merger, the following - * operation applies: - * - * UsageScope <- Resource = insert(scope, usage) OR merge(scope, usage) - * UsageScope <- UsageScope = insert(scope, scope) OR merge(scope, scope) - * CommandBuffer <- UsageScope = insert(buffer.start, buffer.end, scope) OR barrier(buffer.end, scope) + update(buffer.end, scope) - * Deivce <- CommandBuffer = insert(device.start, device.end, buffer.start, buffer.end) OR barrier(device.end, buffer.start) + update(device.end, buffer.end) - * - * [`UsageScope`]: https://gpuweb.github.io/gpuweb/#programming-model-synchronization -!*/ + +These structures are responsible for keeping track of resource state, +generating barriers where needed, and making sure resources are kept +alive until the trackers die. + +## General Architecture + +Tracking is some of the hottest code in the entire codebase, so the trackers +are designed to be as cache efficient as possible. They store resource state +in flat vectors, storing metadata SOA style, one vector per type of metadata. + +A lot of the tracker code is deeply unsafe, using unchecked accesses all over +to make performance as good as possible. However, for all unsafe accesses, there +is a corresponding debug assert the checks if that access is valid. This helps +get bugs caught fast, while still letting users not need to pay for the bounds +checks. + +In wgpu, resource IDs are allocated and re-used, so will always be as low +as reasonably possible. This allows us to use the ID as an index into an array. + +## Statefulness + +There are two main types of trackers, stateful and stateless. + +Stateful trackers are for buffers and textures. They both have +resource state attached to them which needs to be used to generate +automatic synchronization. Because of the different requirements of +buffers and textures, they have two separate tracking structures. + +Stateless trackers only store metadata and own the given resource. + +## Use Case + +Within each type of tracker, the trackers are further split into 3 different +use cases, Bind Group, Usage Scope, and a full Tracker. + +Bind Group trackers are just a list of different resources, their refcount, +and how they are used. Textures are used via a selector and a usage type. +Buffers by just a usage type. Stateless resources don't have a usage type. + +Usage Scope trackers are only for stateful resources. These trackers represent +a single [`UsageScope`] in the spec. When a use is added to a usage scope, +it is merged with all other uses of that resource in that scope. If there +is a usage conflict, merging will fail and an error will be reported. + +Full trackers represent a before and after state of a resource. These +are used for tracking on the device and on command buffers. The before +state represents the state the resource is first used as in the command buffer, +the after state is the state the command buffer leaves the resource in. +These double ended buffers can then be used to generate the needed transitions +between command buffers. + +## Dense Datastructure with Sparse Data + +This tracking system is based on having completely dense data, but trackers do +not always contain every resource. Some resources (or even most resources) go +unused in any given command buffer. So to help speed up the process of iterating +through possibly thousands of resources, we use a bit vector to represent if +a resource is in the buffer or not. This allows us extremely efficient memory +utilization, as well as being able to bail out of whole blocks of 32-64 resources +with a single usize comparison with zero. In practice this means that merging +partially resident buffers is extremely quick. + +The main advantage of this dense datastructure is that we can do merging +of trackers in an extremely efficient fashion that results in us doing linear +scans down a couple of buffers. CPUs and their caches absolutely eat this up. + +## Stateful Resource Operations + +All operations on stateful trackers boil down to one of four operations: +- `insert(tracker, new_state)` adds a resource with a given state to the tracker + for the first time. +- `merge(tracker, new_state)` merges this new state with the previous state, checking + for usage conflicts. +- `barrier(tracker, new_state)` compares the given state to the existing state and + generates the needed barriers. +- `update(tracker, new_state)` takes the given new state and overrides the old state. + +This allows us to compose the operations to form the various kinds of tracker merges +that need to happen in the codebase. For each resource in the given merger, the following +operation applies: + +``` +UsageScope <- Resource = insert(scope, usage) OR merge(scope, usage) +UsageScope <- UsageScope = insert(scope, scope) OR merge(scope, scope) +CommandBuffer <- UsageScope = insert(buffer.start, buffer.end, scope) + OR barrier(buffer.end, scope) + update(buffer.end, scope) +Device <- CommandBuffer = insert(device.start, device.end, buffer.start, buffer.end) + OR barrier(device.end, buffer.start) + update(device.end, buffer.end) +``` + +[`UsageScope`]: https://gpuweb.github.io/gpuweb/#programming-model-synchronization +*/ mod buffer; mod range; diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs index 4ad833dc05..ea25011bc4 100644 --- a/wgpu-core/src/track/texture.rs +++ b/wgpu-core/src/track/texture.rs @@ -1124,8 +1124,9 @@ unsafe fn merge( *current_simple = merged_state; } (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Many(new_many)) => { - // Because we are now demoting this simple state to a complex state, we actually need to make a whole - // new complex state for us to use as there wasn't one before. + // Because we are now demoting this simple state to a complex state, + // we actually need to make a whole new complex state for us to use + // as there wasn't one before. let mut new_complex = ComplexTextureState::from_selector_state_iter( texture_data.1.clone(), iter::once((texture_data.1.clone(), *current_simple)), @@ -1170,7 +1171,8 @@ unsafe fn merge( for &mut (ref layers, ref mut current_layer_state) in mip.iter_mut() { let merged_state = *current_layer_state | new_simple; - // Once we remove unknown, this will never be empty, as simple states are never unknown. + // Once we remove unknown, this will never be empty, as + // simple states are never unknown. let merged_state = merged_state - TextureUses::UNKNOWN; log::trace!( @@ -1404,8 +1406,9 @@ unsafe fn update( *current_simple = new_simple; } (SingleOrManyStates::Single(current_simple), SingleOrManyStates::Many(new_many)) => { - // Because we are now demoting this simple state to a complex state, we actually need to make a whole - // new complex state for us to use as there wasn't one before. + // Because we are now demoting this simple state to a complex state, + // we actually need to make a whole new complex state for us to use + // as there wasn't one before. let mut new_complex = ComplexTextureState::from_selector_state_iter( texture_data.1.clone(), iter::once((texture_data.1.clone(), *current_simple)), @@ -1479,11 +1482,12 @@ unsafe fn update( if *current_layer_state == TextureUses::UNKNOWN && new_state != TextureUses::UNKNOWN { - // We now know something about this subresource that we didn't before - // so we should go back and update the start state. - - // We know we must have starter state be complex, otherwise we would know - // about this state. + // We now know something about this subresource that + // we didn't before so we should go back and update + // the start state. + // + // We know we must have starter state be complex, + // otherwise we would know about this state. strict_assert!(start_complex.is_some()); let start_complex = start_complex.as_deref_mut().unwrap_unchecked(); diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs index a6dee7d974..17d322db26 100644 --- a/wgpu-core/src/validation.rs +++ b/wgpu-core/src/validation.rs @@ -1166,7 +1166,8 @@ impl Interface { // Check all vertex outputs and make sure the fragment shader consumes them. if shader_stage == naga::ShaderStage::Fragment { for &index in inputs.keys() { - // This is a linear scan, but the count should be low enough that this should be fine. + // This is a linear scan, but the count should be low enough + // that this should be fine. let found = entry_point.inputs.iter().any(|v| match *v { Varying::Local { location, .. } => location == index, Varying::BuiltIn(_) => false,