From 21f56dc8d261c146ec7027d6ce12a82161017a21 Mon Sep 17 00:00:00 2001 From: Wumpf Date: Tue, 28 Dec 2021 23:05:53 +0100 Subject: [PATCH] Do texture init via clear passes when possible (#2307) * CLEAR_COMMANDS extension is now more of a window into wgpu zero-init this has mostly implications on the constraints, but also allows a more leaky documentation which makes sense for this non-standard function as there is no other place to look it up * clear_texture via renderpasses wip * 3D depth textures are no longer allowed, volumes are always cleared via CPY_DST * cleanup texture's clear_views * rename CLEAR_COMMANDS to CLEAR_TEXTURE * separate clear_texture into reusable & more descriptive parts * texture clear views are now created ahead of time * discarded surface fixup goes through new clear_texture method now * onq ueue texture initialization now goes threw clear_texture pending inits need to store Stored textures now though, causing more ref count bumping * texture init on queue_write_texture now also goes through new clear_texture * transfer functions on commandbuffer use now new texture init route * merge collect_zero_buffer_copies_for_clear_texture into clear_texture_via_buffer_copies * clear functions now take TextureInitRange * Fix clippy lints * command_encoder_clear_texture no longer takes write lock on texture * TextureClearMode encodes now is_color * code cleanup, mostly about `use` * Handle volume textures in clear_texture_via_render_passes properly * texture clear no longer requires id::Stored * init tracking fixes for volumes and init on partial subresource writes * texture creation enforces COPY_DST only if absolutely necessary * unrolled functional chain, reduce unsafe scope size * fix clippy lints * clear_texture test no longer creates 1D textures see #2323 * 3D textures are no longer cleared as render target since this isn't supported on Metal * fix deno building issue, fix formatting * TextureInner::Surface can now be zero initialized --- deno_webgpu/src/lib.rs | 6 +- wgpu-core/src/command/clear.rs | 262 ++++++++++++++++++-------- wgpu-core/src/command/memory_init.rs | 126 ++++--------- wgpu-core/src/command/mod.rs | 17 +- wgpu-core/src/command/render.rs | 6 +- wgpu-core/src/command/transfer.rs | 208 ++++++++++---------- wgpu-core/src/conv.rs | 4 + wgpu-core/src/device/life.rs | 26 ++- wgpu-core/src/device/mod.rs | 150 ++++++++++++--- wgpu-core/src/device/queue.rs | 141 +++++++------- wgpu-core/src/hub.rs | 9 +- wgpu-core/src/init_tracker/mod.rs | 5 +- wgpu-core/src/init_tracker/texture.rs | 16 +- wgpu-core/src/present.rs | 41 ++++ wgpu-core/src/resource.rs | 42 +++++ wgpu-hal/src/dx12/adapter.rs | 2 +- wgpu-hal/src/gles/adapter.rs | 2 +- wgpu-hal/src/metal/adapter.rs | 2 +- wgpu-hal/src/vulkan/adapter.rs | 2 +- wgpu-types/src/lib.rs | 4 +- wgpu/examples/boids/main.rs | 4 +- wgpu/src/lib.rs | 10 +- wgpu/tests/clear_texture.rs | 53 +++--- 23 files changed, 702 insertions(+), 436 deletions(-) diff --git a/deno_webgpu/src/lib.rs b/deno_webgpu/src/lib.rs index db5ac1a8cf..fa193e5704 100644 --- a/deno_webgpu/src/lib.rs +++ b/deno_webgpu/src/lib.rs @@ -202,8 +202,8 @@ fn deserialize_features(features: &wgpu_types::Features) -> Vec<&'static str> { if features.contains(wgpu_types::Features::VERTEX_WRITABLE_STORAGE) { return_features.push("vertex-writable-storage"); } - if features.contains(wgpu_types::Features::CLEAR_COMMANDS) { - return_features.push("clear-commands"); + if features.contains(wgpu_types::Features::CLEAR_TEXTURE) { + return_features.push("clear-texture"); } if features.contains(wgpu_types::Features::SPIRV_SHADER_PASSTHROUGH) { return_features.push("spirv-shader-passthrough"); @@ -417,7 +417,7 @@ impl From for wgpu_types::Features { required_features.0.contains("vertex-writable-storage"), ); features.set( - wgpu_types::Features::CLEAR_COMMANDS, + wgpu_types::Features::CLEAR_TEXTURE, required_features.0.contains("clear-commands"), ); features.set( diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs index d36569edfe..42d742ab5c 100644 --- a/wgpu-core/src/command/clear.rs +++ b/wgpu-core/src/command/clear.rs @@ -5,24 +5,24 @@ use crate::device::trace::Command as TraceCommand; use crate::{ align_to, command::CommandBuffer, + device::Device, get_lowest_common_denom, - hub::{Global, GlobalIdentityHandlerFactory, HalApi, Token}, - id::{BufferId, CommandEncoderId, DeviceId, TextureId}, - init_tracker::MemoryInitKind, - track::TextureSelector, + hub::{Global, GlobalIdentityHandlerFactory, HalApi, Resource, Token}, + id::{BufferId, CommandEncoderId, DeviceId, TextureId, Valid}, + init_tracker::{MemoryInitKind, TextureInitRange}, + resource::{Texture, TextureClearMode}, + track::{ResourceTracker, TextureSelector, TextureState}, }; use hal::CommandEncoder as _; use thiserror::Error; -use wgt::{ - BufferAddress, BufferSize, BufferUsages, ImageSubresourceRange, TextureAspect, TextureUsages, -}; +use wgt::{BufferAddress, BufferSize, BufferUsages, ImageSubresourceRange, TextureAspect}; /// Error encountered while attempting a clear. #[derive(Clone, Debug, Error)] pub enum ClearError { - #[error("to use clear_buffer/texture the CLEAR_COMMANDS feature needs to be enabled")] - MissingClearCommandsFeature, + #[error("to use clear_texture the CLEAR_TEXTURE feature needs to be enabled")] + MissingClearTextureFeature, #[error("command encoder {0:?} is invalid")] InvalidCommandEncoder(CommandEncoderId), #[error("device {0:?} is invalid")] @@ -31,6 +31,8 @@ pub enum ClearError { InvalidBuffer(BufferId), #[error("texture {0:?} is invalid or destroyed")] InvalidTexture(TextureId), + #[error("texture {0:?} can not be cleared")] + NoValidTextureClearMode(TextureId), #[error("buffer clear size {0:?} is not a multiple of `COPY_BUFFER_ALIGNMENT`")] UnalignedFillSize(BufferSize), #[error("buffer offset {0:?} is not a multiple of `COPY_BUFFER_ALIGNMENT`")] @@ -41,17 +43,13 @@ pub enum ClearError { end_offset: BufferAddress, buffer_size: BufferAddress, }, - #[error("destination buffer/texture is missing the `COPY_DST` usage flag")] + #[error("destination buffer is missing the `COPY_DST` usage flag")] MissingCopyDstUsageFlag(Option, Option), #[error("texture lacks the aspects that were specified in the image subresource range. Texture with format {texture_format:?}, specified was {subresource_range_aspects:?}")] MissingTextureAspect { texture_format: wgt::TextureFormat, subresource_range_aspects: TextureAspect, }, - #[error("Depth/Stencil formats are not supported for clearing")] - DepthStencilFormatNotSupported, - #[error("Multisampled textures are not supported for clearing")] - MultisampledTextureUnsupported, #[error("image subresource level range is outside of the texture's level range. texture range is {texture_level_range:?}, \ whereas subesource range specified start {subresource_base_mip_level} and count {subresource_mip_level_count:?}")] InvalidTextureLevelRange { @@ -173,8 +171,8 @@ impl Global { }); } - if !cmd_buf.support_clear_buffer_texture { - return Err(ClearError::MissingClearCommandsFeature); + if !cmd_buf.support_clear_texture { + return Err(ClearError::MissingClearTextureFeature); } let dst_texture = texture_guard @@ -191,14 +189,6 @@ impl Global { }); }; - // Check if texture is supported for clearing - if dst_texture.desc.format.describe().sample_type == wgt::TextureSampleType::Depth { - return Err(ClearError::DepthStencilFormatNotSupported); - } - if dst_texture.desc.sample_count > 1 { - return Err(ClearError::MultisampledTextureUnsupported); - } - // Check if subresource level range is valid let subresource_level_end = match subresource_range.mip_level_count { Some(count) => subresource_range.base_mip_level + count.get(), @@ -228,68 +218,123 @@ impl Global { }); } - // query from tracker with usage (and check usage) - let (dst_texture, dst_pending) = cmd_buf - .trackers - .textures - .use_replace( - &*texture_guard, - dst, - TextureSelector { - levels: subresource_range.base_mip_level..subresource_level_end, - layers: subresource_range.base_array_layer..subresource_layer_end, - }, - hal::TextureUses::COPY_DST, - ) - .map_err(ClearError::InvalidTexture)?; - let dst_raw = dst_texture - .inner - .as_raw() - .ok_or(ClearError::InvalidTexture(dst))?; - if !dst_texture.desc.usage.contains(TextureUsages::COPY_DST) { - return Err(ClearError::MissingCopyDstUsageFlag(None, Some(dst))); + clear_texture( + Valid(dst), + dst_texture, + TextureInitRange { + mip_range: subresource_range.base_mip_level..subresource_level_end, + layer_range: subresource_range.base_array_layer..subresource_layer_end, + }, + cmd_buf.encoder.open(), + &mut cmd_buf.trackers.textures, + &device_guard[cmd_buf.device_id.value], + ) + } +} + +pub(crate) fn clear_texture( + dst_texture_id: Valid, + dst_texture: &Texture, + range: TextureInitRange, + encoder: &mut A::CommandEncoder, + texture_tracker: &mut ResourceTracker, + device: &Device, +) -> Result<(), ClearError> { + clear_texture_no_device( + dst_texture_id, + dst_texture, + range, + encoder, + texture_tracker, + &device.alignments, + &device.zero_buffer, + ) +} + +pub(crate) fn clear_texture_no_device( + dst_texture_id: Valid, + dst_texture: &Texture, + range: TextureInitRange, + encoder: &mut A::CommandEncoder, + texture_tracker: &mut ResourceTracker, + alignments: &hal::Alignments, + zero_buffer: &A::Buffer, +) -> Result<(), ClearError> { + let dst_raw = dst_texture + .inner + .as_raw() + .ok_or(ClearError::InvalidTexture(dst_texture_id.0))?; + + // Issue the right barrier. + let clear_usage = match dst_texture.clear_mode { + TextureClearMode::BufferCopy => hal::TextureUses::COPY_DST, + TextureClearMode::RenderPass { + is_color: false, .. + } => hal::TextureUses::DEPTH_STENCIL_WRITE, + TextureClearMode::RenderPass { is_color: true, .. } => hal::TextureUses::COLOR_TARGET, + TextureClearMode::None => { + return Err(ClearError::NoValidTextureClearMode(dst_texture_id.0)); } + }; - // actual hal barrier & operation - let dst_barrier = dst_pending.map(|pending| pending.into_hal(dst_texture)); - let encoder = cmd_buf.encoder.open(); - let device = &device_guard[cmd_buf.device_id.value]; + let selector = TextureSelector { + levels: range.mip_range.clone(), + layers: range.layer_range.clone(), + }; - let mut zero_buffer_copy_regions = Vec::new(); - collect_zero_buffer_copies_for_clear_texture( + // If we're in a texture-init usecase, we know that the texture is already tracked since whatever caused the init requirement, + // will have caused the usage tracker to be aware of the texture. Meaning, that it is safe to call call change_replace_tracked if the life_guard is already gone + // (i.e. the user no longer holds on to this texture). + // On the other hand, when coming via command_encoder_clear_texture, the life_guard is still there since in order to call it a texture object is needed. + // + // We could in theory distinguish these two scenarios in the internal clear_texture api in order to remove this check and call the cheaper change_replace_tracked whenever possible. + let dst_barrier = if let Some(ref_count) = dst_texture.life_guard().ref_count.as_ref() { + texture_tracker.change_replace(dst_texture_id, ref_count, selector, clear_usage) + } else { + texture_tracker.change_replace_tracked(dst_texture_id, selector, clear_usage) + } + .map(|pending| pending.into_hal(dst_texture)); + unsafe { + encoder.transition_textures(dst_barrier); + } + + // Record actual clearing + match dst_texture.clear_mode { + TextureClearMode::BufferCopy => clear_texture_via_buffer_copies::( &dst_texture.desc, - device.alignments.buffer_copy_pitch.get() as u32, - subresource_range.base_mip_level..subresource_level_end, - subresource_range.base_array_layer..subresource_layer_end, - &mut zero_buffer_copy_regions, - ); - unsafe { - encoder.transition_textures(dst_barrier); - if !zero_buffer_copy_regions.is_empty() { - encoder.copy_buffer_to_texture( - &device.zero_buffer, - dst_raw, - zero_buffer_copy_regions.into_iter(), - ); - } + alignments, + zero_buffer, + range, + encoder, + dst_raw, + ), + TextureClearMode::RenderPass { is_color, .. } => { + clear_texture_via_render_passes(dst_texture, range, is_color, encoder)? + } + TextureClearMode::None => { + return Err(ClearError::NoValidTextureClearMode(dst_texture_id.0)); } - Ok(()) } + Ok(()) } -pub(crate) fn collect_zero_buffer_copies_for_clear_texture( +fn clear_texture_via_buffer_copies( texture_desc: &wgt::TextureDescriptor<()>, - buffer_copy_pitch: u32, - mip_range: Range, - layer_range: Range, - out_copy_regions: &mut Vec, // TODO: Something better than Vec + alignments: &hal::Alignments, + zero_buffer: &A::Buffer, // Buffer of size device::ZERO_BUFFER_SIZE + range: TextureInitRange, + encoder: &mut A::CommandEncoder, + dst_raw: &A::Texture, ) { + // Gather list of zero_buffer copies and issue a single command then to perform them + let mut zero_buffer_copy_regions = Vec::new(); + let buffer_copy_pitch = alignments.buffer_copy_pitch.get() as u32; let format_desc = texture_desc.format.describe(); let bytes_per_row_alignment = get_lowest_common_denom(buffer_copy_pitch, format_desc.block_size as u32); - for mip_level in mip_range { + for mip_level in range.mip_range { let mut mip_size = texture_desc.mip_level_size(mip_level).unwrap(); // Round to multiple of block size mip_size.width = align_to(mip_size.width, format_desc.block_dimensions.0 as u32); @@ -305,7 +350,7 @@ pub(crate) fn collect_zero_buffer_copies_for_clear_texture( let max_rows_per_copy = max_rows_per_copy / format_desc.block_dimensions.1 as u32 * format_desc.block_dimensions.1 as u32; assert!(max_rows_per_copy > 0, "Zero buffer size is too small to fill a single row of a texture with format {:?} and desc {:?}", - texture_desc.format, texture_desc.size); + texture_desc.format, texture_desc.size); let z_range = 0..(if texture_desc.dimension == wgt::TextureDimension::D3 { mip_size.depth_or_array_layers @@ -313,7 +358,7 @@ pub(crate) fn collect_zero_buffer_copies_for_clear_texture( 1 }); - for array_layer in layer_range.clone() { + for array_layer in range.layer_range.clone() { // TODO: Only doing one layer at a time for volume textures right now. for z in z_range.clone() { // May need multiple copies for each subresource! However, we assume that we never need to split a row. @@ -321,7 +366,7 @@ pub(crate) fn collect_zero_buffer_copies_for_clear_texture( while num_rows_left > 0 { let num_rows = num_rows_left.min(max_rows_per_copy); - out_copy_regions.push(hal::BufferTextureCopy { + zero_buffer_copy_regions.push(hal::BufferTextureCopy { buffer_layout: wgt::ImageDataLayout { offset: 0, bytes_per_row: NonZeroU32::new(bytes_per_row), @@ -349,4 +394,73 @@ pub(crate) fn collect_zero_buffer_copies_for_clear_texture( } } } + + unsafe { + encoder.copy_buffer_to_texture(zero_buffer, dst_raw, zero_buffer_copy_regions.into_iter()); + } +} + +fn clear_texture_via_render_passes( + dst_texture: &Texture, + range: TextureInitRange, + is_color: bool, + encoder: &mut A::CommandEncoder, +) -> Result<(), ClearError> { + let extent_base = wgt::Extent3d { + width: dst_texture.desc.size.width, + height: dst_texture.desc.size.height, + depth_or_array_layers: 1, // Only one layer or slice is cleared at a time. + }; + + let sample_count = dst_texture.desc.sample_count; + let is_3d_texture = dst_texture.desc.dimension == wgt::TextureDimension::D3; + for mip_level in range.mip_range { + let extent = extent_base.mip_level_size(mip_level, is_3d_texture); + let layer_or_depth_range = if dst_texture.desc.dimension == wgt::TextureDimension::D3 { + // TODO: We assume that we're allowed to do clear operations on volume texture slices, this is not properly specified. + 0..extent.depth_or_array_layers + } else { + range.layer_range.clone() + }; + for depth_or_layer in layer_or_depth_range { + let color_attachments_tmp; + let (color_attachments, depth_stencil_attachment) = if is_color { + color_attachments_tmp = [hal::ColorAttachment { + target: hal::Attachment { + view: dst_texture.get_clear_view(mip_level, depth_or_layer), + usage: hal::TextureUses::COLOR_TARGET, + }, + resolve_target: None, + ops: hal::AttachmentOps::STORE, + clear_value: wgt::Color::TRANSPARENT, + }]; + (&color_attachments_tmp[..], None) + } else { + ( + &[][..], + Some(hal::DepthStencilAttachment { + target: hal::Attachment { + view: dst_texture.get_clear_view(mip_level, depth_or_layer), + usage: hal::TextureUses::DEPTH_STENCIL_WRITE, + }, + depth_ops: hal::AttachmentOps::STORE, + stencil_ops: hal::AttachmentOps::STORE, + clear_value: (0.0, 0), + }), + ) + }; + unsafe { + encoder.begin_render_pass(&hal::RenderPassDescriptor { + label: Some("clear_texture clear pass"), + extent, + sample_count, + color_attachments, + depth_stencil_attachment, + multiview: None, + }); + encoder.end_render_pass(); + } + } + } + Ok(()) } diff --git a/wgpu-core/src/command/memory_init.rs b/wgpu-core/src/command/memory_init.rs index 8167d7cdd9..ea0b303609 100644 --- a/wgpu-core/src/command/memory_init.rs +++ b/wgpu-core/src/command/memory_init.rs @@ -3,17 +3,16 @@ use std::{collections::hash_map::Entry, ops::Range, vec::Drain}; use hal::CommandEncoder; use crate::{ - command::collect_zero_buffer_copies_for_clear_texture, device::Device, hub::Storage, id::{self, TextureId}, init_tracker::*, resource::{Buffer, Texture}, - track::{ResourceTracker, TextureSelector, TextureState, TrackerSet}, + track::{ResourceTracker, TextureState, TrackerSet}, FastHashMap, }; -use super::{BakedCommands, DestroyedBufferError, DestroyedTextureError}; +use super::{clear::clear_texture, BakedCommands, DestroyedBufferError, DestroyedTextureError}; /// Surface that was discarded by `StoreOp::Discard` of a preceding renderpass. /// Any read access to this surface needs to be preceded by a texture initialization. @@ -103,13 +102,13 @@ impl CommandBufferTextureMemoryActions { // Shortcut for register_init_action when it is known that the action is an implicit init, not requiring any immediate resource init. pub(crate) fn register_implicit_init( &mut self, - id: TextureId, + id: id::Valid, range: TextureInitRange, texture_guard: &Storage, TextureId>, ) { let must_be_empty = self.register_init_action( &TextureInitTrackerAction { - id, + id: id.0, range, kind: MemoryInitKind::ImplicitlyInitialized, }, @@ -119,7 +118,7 @@ impl CommandBufferTextureMemoryActions { } } -// Utility function that takes discarded surfaces from register_init_action and initializes them on the spot. +// Utility function that takes discarded surfaces from (several calls to) register_init_action and initializes them on the spot. // Takes care of barriers as well! pub(crate) fn fixup_discarded_surfaces< A: hal::Api, @@ -131,43 +130,19 @@ pub(crate) fn fixup_discarded_surfaces< texture_tracker: &mut ResourceTracker, device: &Device, ) { - let mut zero_buffer_copy_regions = Vec::new(); for init in inits { - let mip_range = init.mip_level..(init.mip_level + 1); - let layer_range = init.layer..(init.layer + 1); - - let (texture, pending) = texture_tracker - .use_replace( - &*texture_guard, - init.texture, - TextureSelector { - levels: mip_range.clone(), - layers: layer_range.clone(), - }, - hal::TextureUses::COPY_DST, - ) - .unwrap(); - - collect_zero_buffer_copies_for_clear_texture( - &texture.desc, - device.alignments.buffer_copy_pitch.get() as u32, - mip_range, - layer_range, - &mut zero_buffer_copy_regions, - ); - - let barriers = pending.map(|pending| pending.into_hal(texture)); - let raw_texture = texture.inner.as_raw().unwrap(); - - unsafe { - // TODO: Should first gather all barriers, do a single transition_textures call, and then send off copy_buffer_to_texture commands. - encoder.transition_textures(barriers); - encoder.copy_buffer_to_texture( - &device.zero_buffer, - raw_texture, - zero_buffer_copy_regions.drain(..), - ); - } + clear_texture( + id::Valid(init.texture), + texture_guard.get(init.texture).unwrap(), + TextureInitRange { + mip_range: init.mip_level..(init.mip_level + 1), + layer_range: init.layer..(init.layer + 1), + }, + encoder, + texture_tracker, + device, + ) + .unwrap(); } } @@ -285,66 +260,29 @@ impl BakedCommands { } } MemoryInitKind::NeedsInitializedMemory => { - ranges.clear(); for (mip_level, mip_tracker) in affected_mip_trackers { for layer_range in mip_tracker.drain(use_range.layer_range.clone()) { ranges.push(TextureInitRange { - mip_range: mip_level as u32..(mip_level as u32 + 1), + mip_range: (mip_level as u32)..(mip_level as u32 + 1), layer_range, - }) - } - } - - let raw_texture = texture - .inner - .as_raw() - .ok_or(DestroyedTextureError(texture_use.id))?; - - let mut texture_barriers = Vec::new(); - let mut zero_buffer_copy_regions = Vec::new(); - for range in &ranges { - // Don't do use_replace since the texture may already no longer have a ref_count. - // However, we *know* that it is currently in use, so the tracker must already know about it. - texture_barriers.extend( - device_tracker - .textures - .change_replace_tracked( - id::Valid(texture_use.id), - TextureSelector { - levels: range.mip_range.clone(), - layers: range.layer_range.clone(), - }, - hal::TextureUses::COPY_DST, - ) - .map(|pending| pending.into_hal(texture)), - ); - - collect_zero_buffer_copies_for_clear_texture( - &texture.desc, - device.alignments.buffer_copy_pitch.get() as u32, - range.mip_range.clone(), - range.layer_range.clone(), - &mut zero_buffer_copy_regions, - ); - } - - if !zero_buffer_copy_regions.is_empty() { - debug_assert!(texture.hal_usage.contains(hal::TextureUses::COPY_DST), - "Texture needs to have the COPY_DST flag. Otherwise we can't ensure initialized memory!"); - unsafe { - // TODO: Could safe on transition_textures calls by bundling barriers from *all* textures. - // (a bbit more tricky because a naive approach would have to borrow same texture several times then) - self.encoder - .transition_textures(texture_barriers.into_iter()); - self.encoder.copy_buffer_to_texture( - &device.zero_buffer, - raw_texture, - zero_buffer_copy_regions.into_iter(), - ); + }); } } } } + + // TODO: Could we attempt some range collapsing here? + for range in ranges.drain(..) { + clear_texture( + id::Valid(texture_use.id), + &*texture, + range, + &mut self.encoder, + &mut device_tracker.textures, + device, + ) + .unwrap(); + } } // Now that all buffers/textures have the proper init state for before cmdbuf start, we discard init states for textures it left discarded after its execution. diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index aa45ec9ab7..e288acfd29 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -8,15 +8,12 @@ mod query; mod render; mod transfer; -pub use self::bundle::*; -pub(crate) use self::clear::collect_zero_buffer_copies_for_clear_texture; -pub use self::clear::ClearError; -pub use self::compute::*; -pub use self::draw::*; +pub(crate) use self::clear::clear_texture_no_device; +pub use self::{ + bundle::*, clear::ClearError, compute::*, draw::*, query::*, render::*, transfer::*, +}; + use self::memory_init::CommandBufferTextureMemoryActions; -pub use self::query::*; -pub use self::render::*; -pub use self::transfer::*; use crate::error::{ErrorFormatter, PrettyError}; use crate::init_tracker::BufferInitTrackerAction; @@ -101,7 +98,7 @@ pub struct CommandBuffer { buffer_memory_init_actions: Vec, texture_memory_actions: CommandBufferTextureMemoryActions, limits: wgt::Limits, - support_clear_buffer_texture: bool, + support_clear_texture: bool, #[cfg(feature = "trace")] pub(crate) commands: Option>, } @@ -129,7 +126,7 @@ impl CommandBuffer { buffer_memory_init_actions: Default::default(), texture_memory_actions: Default::default(), limits, - support_clear_buffer_texture: features.contains(wgt::Features::CLEAR_COMMANDS), + support_clear_texture: features.contains(wgt::Features::CLEAR_TEXTURE), #[cfg(feature = "trace")] commands: if enable_tracing { Some(Vec::new()) diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index 10ec405ee2..5940461f87 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -578,7 +578,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { } else if channel.store_op == StoreOp::Store { // Clear + Store texture_memory_actions.register_implicit_init( - view.parent_id.value.0, + view.parent_id.value, TextureInitRange::from(view.selector.clone()), texture_guard, ); @@ -745,7 +745,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { if at.depth.store_op != at.stencil.store_op { if !need_init_beforehand { cmd_buf.texture_memory_actions.register_implicit_init( - view.parent_id.value.0, + view.parent_id.value, TextureInitRange::from(view.selector.clone()), texture_guard, ); @@ -838,7 +838,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { } cmd_buf.texture_memory_actions.register_implicit_init( - resolve_view.parent_id.value.0, + resolve_view.parent_id.value, TextureInitRange::from(resolve_view.selector.clone()), texture_guard, ); diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index 7a533656cb..9e19828375 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -1,16 +1,16 @@ #[cfg(feature = "trace")] use crate::device::trace::Command as TraceCommand; use crate::{ - command::{ - collect_zero_buffer_copies_for_clear_texture, memory_init::fixup_discarded_surfaces, - CommandBuffer, CommandEncoderError, - }, + command::{CommandBuffer, CommandEncoderError}, conv, device::Device, error::{ErrorFormatter, PrettyError}, hub::{Global, GlobalIdentityHandlerFactory, HalApi, Storage, Token}, - id::{BufferId, CommandEncoderId, TextureId}, - init_tracker::{MemoryInitKind, TextureInitRange, TextureInitTrackerAction}, + id::{BufferId, CommandEncoderId, Id, TextureId, Valid}, + init_tracker::{ + has_copy_partial_init_tracker_coverage, MemoryInitKind, TextureInitRange, + TextureInitTrackerAction, + }, resource::{Texture, TextureErrorDimension}, track::TextureSelector, }; @@ -21,6 +21,8 @@ use wgt::{BufferAddress, BufferUsages, Extent3d, TextureUsages}; use std::iter; +use super::clear::clear_texture; + pub type ImageCopyBuffer = wgt::ImageCopyBuffer; pub type ImageCopyTexture = wgt::ImageCopyTexture; @@ -104,6 +106,8 @@ pub enum TransferError { src_format: wgt::TextureFormat, dst_format: wgt::TextureFormat, }, + #[error(transparent)] + MemoryInitFailure(#[from] super::ClearError), } impl PrettyError for TransferError { @@ -374,60 +378,108 @@ pub(crate) fn validate_texture_copy_range( Ok((copy_extent, array_layer_count)) } -fn get_copy_dst_texture_init_requirement( - texture: &Texture, - copy_texture: &wgt::ImageCopyTexture, +fn handle_texture_init( + init_kind: MemoryInitKind, + cmd_buf: &mut CommandBuffer, + device: &Device, + copy_texture: &ImageCopyTexture, copy_size: &Extent3d, -) -> TextureInitTrackerAction { - // Attention: If we don't write full texture subresources, we need to a full clear first since we don't track subrects. - let dst_init_kind = if copy_size.width == texture.desc.size.width - && copy_size.height == texture.desc.size.height - { - MemoryInitKind::ImplicitlyInitialized - } else { - MemoryInitKind::NeedsInitializedMemory - }; - TextureInitTrackerAction { + texture_guard: &Storage, Id>>, + texture: &Texture, +) { + let init_action = TextureInitTrackerAction { id: copy_texture.texture, range: TextureInitRange { mip_range: copy_texture.mip_level..copy_texture.mip_level + 1, layer_range: copy_texture.origin.z ..(copy_texture.origin.z + copy_size.depth_or_array_layers), }, - kind: dst_init_kind, + kind: init_kind, + }; + + // Register the init action. + let immediate_inits = cmd_buf + .texture_memory_actions + .register_init_action(&{ init_action }, texture_guard); + + // In rare cases we may need to insert an init operation immediately onto the command buffer. + if !immediate_inits.is_empty() { + let cmd_buf_raw = cmd_buf.encoder.open(); + for init in immediate_inits { + clear_texture( + Valid(init.texture), + texture, + TextureInitRange { + mip_range: init.mip_level..(init.mip_level + 1), + layer_range: init.layer..(init.layer + 1), + }, + cmd_buf_raw, + &mut cmd_buf.trackers.textures, + device, + ) + .unwrap(); + } } } +// Ensures the source texture of a transfer is in the right initialization state and records the state for after the transfer operation. fn handle_src_texture_init( cmd_buf: &mut CommandBuffer, device: &Device, source: &ImageCopyTexture, - src_base: &hal::TextureCopyBase, copy_size: &Extent3d, texture_guard: &Storage, TextureId>, -) { - let immediate_src_init = cmd_buf.texture_memory_actions.register_init_action( - &TextureInitTrackerAction { - id: source.texture, - range: TextureInitRange { - mip_range: src_base.mip_level..src_base.mip_level + 1, - layer_range: src_base.origin.z - ..(src_base.origin.z + copy_size.depth_or_array_layers), - }, - kind: MemoryInitKind::NeedsInitializedMemory, - }, +) -> Result<(), TransferError> { + let texture = texture_guard + .get(source.texture) + .map_err(|_| TransferError::InvalidTexture(source.texture))?; + + handle_texture_init( + MemoryInitKind::NeedsInitializedMemory, + cmd_buf, + device, + source, + copy_size, texture_guard, + texture, ); - if !immediate_src_init.is_empty() { - let cmd_buf_raw = cmd_buf.encoder.open(); - fixup_discarded_surfaces( - immediate_src_init.into_iter(), - cmd_buf_raw, - texture_guard, - &mut cmd_buf.trackers.textures, - device, - ); - } + Ok(()) +} + +// Ensures the destination texture of a transfer is in the right initialization state and records the state for after the transfer operation. +fn handle_dst_texture_init( + cmd_buf: &mut CommandBuffer, + device: &Device, + destination: &ImageCopyTexture, + copy_size: &Extent3d, + texture_guard: &Storage, TextureId>, +) -> Result<(), TransferError> { + let texture = texture_guard + .get(destination.texture) + .map_err(|_| TransferError::InvalidTexture(destination.texture))?; + + // Attention: If we don't write full texture subresources, we need to a full clear first since we don't track subrects. + // This means that in rare cases even a *destination* texture of a transfer may need an immediate texture init. + let dst_init_kind = if has_copy_partial_init_tracker_coverage( + copy_size, + destination.mip_level, + &texture.desc, + ) { + MemoryInitKind::NeedsInitializedMemory + } else { + MemoryInitKind::ImplicitlyInitialized + }; + + handle_texture_init( + dst_init_kind, + cmd_buf, + device, + destination, + copy_size, + texture_guard, + texture, + ); + Ok(()) } impl Global { @@ -598,6 +650,9 @@ impl Global { let (dst_range, dst_base, _) = extract_texture_selector(destination, copy_size, &*texture_guard)?; + // Handle texture init *before* dealing with barrier transitions so we have an easier time inserting "immediate-inits" that may be required by prior discards in rare cases. + handle_dst_texture_init(cmd_buf, device, destination, copy_size, &texture_guard)?; + let (src_buffer, src_pending) = cmd_buf .trackers .buffers @@ -663,19 +718,6 @@ impl Global { source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy), MemoryInitKind::NeedsInitializedMemory, )); - let mut dst_zero_buffer_copy_regions = Vec::new(); - for immediate_init in cmd_buf.texture_memory_actions.register_init_action( - &get_copy_dst_texture_init_requirement(dst_texture, destination, copy_size), - &texture_guard, - ) { - collect_zero_buffer_copies_for_clear_texture( - &dst_texture.desc, - device.alignments.buffer_copy_pitch.get() as u32, - immediate_init.mip_level..(immediate_init.mip_level + 1), - immediate_init.layer..(immediate_init.layer + 1), - &mut dst_zero_buffer_copy_regions, - ); - } let regions = (0..array_layer_count).map(|rel_array_layer| { let mut texture_base = dst_base.clone(); @@ -688,17 +730,10 @@ impl Global { size: hal_copy_size, } }); + let cmd_buf_raw = cmd_buf.encoder.open(); unsafe { cmd_buf_raw.transition_textures(dst_barriers); - // potential dst buffer init (for previously discarded dst_texture + partial copy) - if !dst_zero_buffer_copy_regions.is_empty() { - cmd_buf_raw.copy_buffer_to_texture( - &device.zero_buffer, - dst_raw, - dst_zero_buffer_copy_regions.into_iter(), - ); - } cmd_buf_raw.transition_buffers(src_barriers); cmd_buf_raw.copy_buffer_to_texture(src_raw, dst_raw, regions); } @@ -742,15 +777,8 @@ impl Global { let (src_range, src_base, _) = extract_texture_selector(source, copy_size, &*texture_guard)?; - // Handle src texture init *before* dealing with barrier transitions so we have an easier time inserting "immediate-inits" that may be required by prior discards in rare cases. - handle_src_texture_init( - cmd_buf, - device, - source, - &src_base, - copy_size, - &texture_guard, - ); + // Handle texture init *before* dealing with barrier transitions so we have an easier time inserting "immediate-inits" that may be required by prior discards in rare cases. + handle_src_texture_init(cmd_buf, device, source, copy_size, &texture_guard)?; let (src_texture, src_pending) = cmd_buf .trackers @@ -887,15 +915,9 @@ impl Global { return Err(TransferError::MismatchedAspects.into()); } - // Handle src texture init *before* dealing with barrier transitions so we have an easier time inserting "immediate-inits" that may be required by prior discards in rare cases. - handle_src_texture_init( - cmd_buf, - device, - source, - &src_tex_base, - copy_size, - &texture_guard, - ); + // Handle texture init *before* dealing with barrier transitions so we have an easier time inserting "immediate-inits" that may be required by prior discards in rare cases. + handle_src_texture_init(cmd_buf, device, source, copy_size, &texture_guard)?; + handle_dst_texture_init(cmd_buf, device, destination, copy_size, &texture_guard)?; let (src_texture, src_pending) = cmd_buf .trackers @@ -964,20 +986,6 @@ impl Global { copy_size, )?; - let mut dst_zero_buffer_copy_regions = Vec::new(); - for immediate_init in cmd_buf.texture_memory_actions.register_init_action( - &get_copy_dst_texture_init_requirement(dst_texture, destination, copy_size), - &texture_guard, - ) { - collect_zero_buffer_copies_for_clear_texture( - &dst_texture.desc, - device.alignments.buffer_copy_pitch.get() as u32, - immediate_init.mip_level..(immediate_init.mip_level + 1), - immediate_init.layer..(immediate_init.layer + 1), - &mut dst_zero_buffer_copy_regions, - ); - } - let hal_copy_size = hal::CopyExtent { width: src_copy_size.width.min(dst_copy_size.width), height: src_copy_size.height.min(dst_copy_size.height), @@ -997,16 +1005,6 @@ impl Global { let cmd_buf_raw = cmd_buf.encoder.open(); unsafe { cmd_buf_raw.transition_textures(barriers.into_iter()); - - // potential dst buffer init (for previously discarded dst_texture + partial copy) - if !dst_zero_buffer_copy_regions.is_empty() { - cmd_buf_raw.copy_buffer_to_texture( - &device.zero_buffer, - dst_raw, - dst_zero_buffer_copy_regions.into_iter(), - ); - } - cmd_buf_raw.copy_texture_to_texture( src_raw, hal::TextureUses::COPY_SRC, diff --git a/wgpu-core/src/conv.rs b/wgpu-core/src/conv.rs index 930a532976..b6632b5903 100644 --- a/wgpu-core/src/conv.rs +++ b/wgpu-core/src/conv.rs @@ -70,6 +70,10 @@ pub fn map_texture_usage( hal::TextureUses::COPY_SRC, usage.contains(wgt::TextureUsages::COPY_SRC), ); + u.set( + hal::TextureUses::COPY_DST, + usage.contains(wgt::TextureUsages::COPY_DST), + ); u.set( hal::TextureUses::RESOURCE, usage.contains(wgt::TextureUsages::TEXTURE_BINDING), diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index a12e202c0e..91756ec776 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -256,7 +256,10 @@ impl LifetimeTracker { for res in temp_resources { match res { TempResource::Buffer(raw) => last_resources.buffers.push(raw), - TempResource::Texture(raw) => last_resources.textures.push(raw), + TempResource::Texture(raw, views) => { + last_resources.textures.push(raw); + last_resources.texture_views.extend(views); + } } } @@ -336,7 +339,10 @@ impl LifetimeTracker { .map_or(&mut self.free_resources, |a| &mut a.last_resources); match temp_resource { TempResource::Buffer(raw) => resources.buffers.push(raw), - TempResource::Texture(raw) => resources.textures.push(raw), + TempResource::Texture(raw, views) => { + resources.texture_views.extend(views); + resources.textures.push(raw); + } } } @@ -455,12 +461,20 @@ impl LifetimeTracker { resource::TextureInner::Native { raw: Some(raw) } => raw, _ => continue, }; - self.active + let non_referenced_resources = self + .active .iter_mut() .find(|a| a.index == submit_index) - .map_or(&mut self.free_resources, |a| &mut a.last_resources) - .textures - .push(raw); + .map_or(&mut self.free_resources, |a| &mut a.last_resources); + + non_referenced_resources.textures.push(raw); + if let resource::TextureClearMode::RenderPass { clear_views, .. } = + res.clear_mode + { + non_referenced_resources + .texture_views + .extend(clear_views.into_iter()); + } } } } diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index ea74421f0f..f2f86fbcda 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -597,14 +597,14 @@ impl Device { fn create_texture_from_hal( &self, hal_texture: A::Texture, + hal_usage: hal::TextureUses, self_id: id::DeviceId, desc: &resource::TextureDescriptor, format_features: wgt::TextureFormatFeatures, + clear_mode: resource::TextureClearMode, ) -> resource::Texture { debug_assert_eq!(self_id.backend(), A::VARIANT); - let hal_usage = conv::map_texture_usage(desc.usage, desc.format.into()); - resource::Texture { inner: resource::TextureInner::Native { raw: Some(hal_texture), @@ -625,6 +625,7 @@ impl Device { layers: 0..desc.array_layer_count(), }, life_guard: LifeGuard::new(desc.label.borrow_or_default()), + clear_mode, } } @@ -634,20 +635,14 @@ impl Device { adapter: &crate::instance::Adapter, desc: &resource::TextureDescriptor, ) -> Result, resource::CreateTextureError> { - // Enforce COPY_DST, otherwise we wouldn't be able to initialize the texture. - let hal_usage = - conv::map_texture_usage(desc.usage, desc.format.into()) | hal::TextureUses::COPY_DST; + let format_desc = desc.format.describe(); - let hal_desc = hal::TextureDescriptor { - label: desc.label.borrow_option(), - size: desc.size, - mip_level_count: desc.mip_level_count, - sample_count: desc.sample_count, - dimension: desc.dimension, - format: desc.format, - usage: hal_usage, - memory_flags: hal::MemoryFlags::empty(), - }; + // Depth volume textures can't be written to - depth forbids COPY_DST and volume textures can't be rendered to - therefore they aren't allowed. + if format_desc.sample_type == wgt::TextureSampleType::Depth + && desc.dimension == wgt::TextureDimension::D3 + { + return Err(resource::CreateTextureError::CannotCreateDepthVolumeTexture(desc.format)); + } let format_features = self .describe_format_features(adapter, desc.format) @@ -677,13 +672,96 @@ impl Device { return Err(resource::CreateTextureError::InvalidMipLevelCount(mips)); } - let raw = unsafe { + // Enforce having COPY_DST/DEPTH_STENCIL_WRIT/COLOR_TARGET otherwise we wouldn't be able to initialize the texture. + let hal_usage = conv::map_texture_usage(desc.usage, desc.format.into()) + | if format_desc.sample_type == wgt::TextureSampleType::Depth { + hal::TextureUses::DEPTH_STENCIL_WRITE + } else if desc.usage.contains(wgt::TextureUsages::COPY_DST) { + hal::TextureUses::COPY_DST // (set already) + } else { + // Use COPY_DST only if we can't use COLOR_TARGET + if format_features + .allowed_usages + .contains(wgt::TextureUsages::RENDER_ATTACHMENT) + && desc.dimension != wgt::TextureDimension::D3 + // Render targets into 3D textures are not + { + hal::TextureUses::COLOR_TARGET + } else { + hal::TextureUses::COPY_DST + } + }; + + let hal_desc = hal::TextureDescriptor { + label: desc.label.borrow_option(), + size: desc.size, + mip_level_count: desc.mip_level_count, + sample_count: desc.sample_count, + dimension: desc.dimension, + format: desc.format, + usage: hal_usage, + memory_flags: hal::MemoryFlags::empty(), + }; + + let raw_texture = unsafe { self.raw .create_texture(&hal_desc) .map_err(DeviceError::from)? }; - let mut texture = self.create_texture_from_hal(raw, self_id, desc, format_features); + let clear_mode = if hal_usage + .intersects(hal::TextureUses::DEPTH_STENCIL_WRITE | hal::TextureUses::COLOR_TARGET) + { + let (is_color, usage) = + if desc.format.describe().sample_type == wgt::TextureSampleType::Depth { + (false, hal::TextureUses::DEPTH_STENCIL_WRITE) + } else { + (true, hal::TextureUses::COLOR_TARGET) + }; + let dimension = match desc.dimension { + wgt::TextureDimension::D1 => wgt::TextureViewDimension::D1, + wgt::TextureDimension::D2 => wgt::TextureViewDimension::D2, + wgt::TextureDimension::D3 => unreachable!(), + }; + + let mut clear_views = SmallVec::new(); + for mip_level in 0..desc.mip_level_count { + for array_layer in 0..desc.size.depth_or_array_layers { + let desc = hal::TextureViewDescriptor { + label: Some("clear texture view"), + format: desc.format, + dimension, + usage, + range: wgt::ImageSubresourceRange { + aspect: wgt::TextureAspect::All, + base_mip_level: mip_level, + mip_level_count: NonZeroU32::new(1), + base_array_layer: array_layer, + array_layer_count: NonZeroU32::new(1), + }, + }; + clear_views.push( + unsafe { self.raw.create_texture_view(&raw_texture, &desc) } + .map_err(DeviceError::from)?, + ); + } + } + resource::TextureClearMode::RenderPass { + clear_views, + is_color, + } + } else { + resource::TextureClearMode::BufferCopy + }; + + let mut texture = self.create_texture_from_hal( + raw_texture, + hal_usage, + self_id, + desc, + format_features, + clear_mode, + ); texture.hal_usage = hal_usage; Ok(texture) } @@ -1455,11 +1533,10 @@ impl Device { ) -> Result<(), binding_model::CreateBindGroupError> { // Careful here: the texture may no longer have its own ref count, // if it was deleted by the user. - let parent_id = view.parent_id.value; - let texture = &texture_guard[parent_id]; + let texture = &texture_guard[view.parent_id.value]; used.textures .change_extend( - parent_id, + view.parent_id.value, &view.parent_id.ref_count, view.selector.clone(), internal_use, @@ -1468,7 +1545,7 @@ impl Device { check_texture_usage(texture.desc.usage, pub_usage)?; used_texture_ranges.push(TextureInitTrackerAction { - id: parent_id.0, + id: view.parent_id.value.0, range: TextureInitRange { mip_range: view.desc.range.mip_range(&texture.desc), layer_range: view.desc.range.layer_range(&texture.desc), @@ -3323,8 +3400,14 @@ impl Global { Err(error) => break error, }; - let mut texture = - device.create_texture_from_hal(hal_texture, device_id, desc, format_features); + let mut texture = device.create_texture_from_hal( + hal_texture, + conv::map_texture_usage(desc.usage, desc.format.into()), + device_id, + desc, + format_features, + resource::TextureClearMode::None, + ); if desc.usage.contains(wgt::TextureUsages::COPY_DST) { texture.hal_usage |= hal::TextureUses::COPY_DST; } @@ -3380,22 +3463,37 @@ impl Global { trace.lock().add(trace::Action::FreeTexture(texture_id)); } + let last_submit_index = texture.life_guard.life_count(); + + let clear_views = + match std::mem::replace(&mut texture.clear_mode, resource::TextureClearMode::None) { + resource::TextureClearMode::BufferCopy => SmallVec::new(), + resource::TextureClearMode::RenderPass { clear_views, .. } => clear_views, + resource::TextureClearMode::None => SmallVec::new(), + }; + match texture.inner { resource::TextureInner::Native { ref mut raw } => { let raw = raw.take().ok_or(resource::DestroyError::AlreadyDestroyed)?; - let temp = queue::TempResource::Texture(raw); + let temp = queue::TempResource::Texture(raw, clear_views); if device.pending_writes.dst_textures.contains(&texture_id) { device.pending_writes.temp_resources.push(temp); } else { - let last_submit_index = texture.life_guard.life_count(); drop(texture_guard); device .lock_life(&mut token) .schedule_resource_destruction(temp, last_submit_index); } } - resource::TextureInner::Surface { .. } => {} //TODO + resource::TextureInner::Surface { .. } => { + for clear_view in clear_views { + unsafe { + device.raw.destroy_texture_view(clear_view); + } + } + // TODO? + } } Ok(()) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 2cfe984f30..ff3d640321 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -4,19 +4,21 @@ use crate::{ align_to, command::{ extract_texture_selector, validate_linear_texture_data, validate_texture_copy_range, - CommandBuffer, CopySide, ImageCopyTexture, TransferError, + ClearError, CommandBuffer, CopySide, ImageCopyTexture, TransferError, }, conv, device::{DeviceError, WaitIdleError}, get_lowest_common_denom, hub::{Global, GlobalIdentityHandlerFactory, HalApi, Token}, id, + init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange}, resource::{BufferAccessError, BufferMapState, TextureInner}, track, FastHashSet, }; use hal::{CommandEncoder as _, Device as _, Queue as _}; use parking_lot::Mutex; +use smallvec::SmallVec; use std::{iter, mem, num::NonZeroU32, ptr}; use thiserror::Error; @@ -63,7 +65,7 @@ impl StagingData { #[derive(Debug)] pub enum TempResource { Buffer(A::Buffer), - Texture(A::Texture), + Texture(A::Texture, SmallVec<[A::TextureView; 1]>), } /// A queue execution for a particular command encoder. @@ -116,7 +118,10 @@ impl PendingWrites { TempResource::Buffer(buffer) => unsafe { device.destroy_buffer(buffer); }, - TempResource::Texture(texture) => unsafe { + TempResource::Texture(texture, views) => unsafe { + for view in views.into_iter() { + device.destroy_texture_view(view); + } device.destroy_texture(texture); }, } @@ -212,6 +217,8 @@ pub enum QueueWriteError { Queue(#[from] DeviceError), #[error(transparent)] Transfer(#[from] TransferError), + #[error(transparent)] + MemoryInitFailure(#[from] ClearError), } #[derive(Clone, Debug, Error)] @@ -373,7 +380,7 @@ impl Global { return Ok(()); } - let (texture_guard, _) = hub.textures.read(&mut token); + let (mut texture_guard, _) = hub.textures.write(&mut token); // For clear we need write access to the texture. TODO: Can we acquire write lock later? let (selector, dst_base, texture_format) = extract_texture_selector(destination, size, &*texture_guard)?; let format_desc = texture_format.describe(); @@ -418,7 +425,53 @@ impl Global { let stage_size = stage_bytes_per_row as u64 * block_rows_in_copy as u64; let stage = device.prepare_stage(stage_size)?; + let dst = texture_guard.get_mut(destination.texture).unwrap(); + if !dst.desc.usage.contains(wgt::TextureUsages::COPY_DST) { + return Err( + TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(), + ); + } + let mut trackers = device.trackers.lock(); + let encoder = device.pending_writes.activate(); + + // If the copy does not fully cover the layers, we need to initialize to zero *first* as we don't keep track of partial texture layer inits. + // Strictly speaking we only need to clear the areas of a layer untouched, but this would get increasingly messy. + + let init_layer_range = if dst.desc.dimension == wgt::TextureDimension::D3 { + 0..1 // volume textures don't have a layer range as array volumes aren't supported + } else { + destination.origin.z..destination.origin.z + size.depth_or_array_layers + }; + if dst.initialization_status.mips[destination.mip_level as usize] + .check(init_layer_range.clone()) + .is_some() + { + if has_copy_partial_init_tracker_coverage(size, destination.mip_level, &dst.desc) { + for layer_range in dst.initialization_status.mips[destination.mip_level as usize] + .drain(init_layer_range) + .collect::>>() + { + crate::command::clear_texture_no_device( + id::Valid(destination.texture), + &*dst, + TextureInitRange { + mip_range: destination.mip_level..(destination.mip_level + 1), + layer_range, + }, + encoder, + &mut trackers.textures, + &device.alignments, + &device.zero_buffer, + ) + .map_err(QueueWriteError::from)?; + } + } else { + dst.initialization_status.mips[destination.mip_level as usize] + .drain(init_layer_range); + } + } + let (dst, transition) = trackers .textures .use_replace( @@ -429,11 +482,6 @@ impl Global { ) .unwrap(); - if !dst.desc.usage.contains(wgt::TextureUsages::COPY_DST) { - return Err( - TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(), - ); - } let (hal_copy_size, array_layer_count) = validate_texture_copy_range(destination, &dst.desc, CopySide::Destination, size)?; dst.life_guard.use_at(device.active_submission_index + 1); @@ -508,78 +556,15 @@ impl Global { usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, }; - let encoder = device.pending_writes.activate(); + let dst_raw = dst + .inner + .as_raw() + .ok_or(TransferError::InvalidTexture(destination.texture))?; + unsafe { encoder.transition_textures(transition.map(|pending| pending.into_hal(dst))); encoder.transition_buffers(iter::once(barrier)); - } - - // If the copy does not fully cover the layers, we need to initialize to zero *first* as we don't keep track of partial texture layer inits. - // Strictly speaking we only need to clear the areas of a layer untouched, but this would get increasingly messy. - - let init_layer_range = - destination.origin.z..destination.origin.z + size.depth_or_array_layers; - if dst.initialization_status.mips[destination.mip_level as usize] - .check(init_layer_range.clone()) - .is_some() - { - // For clear we need write access to the texture! - drop(texture_guard); - let (mut texture_guard, _) = hub.textures.write(&mut token); - let dst = texture_guard.get_mut(destination.texture).unwrap(); - let dst_raw = dst - .inner - .as_raw() - .ok_or(TransferError::InvalidTexture(destination.texture))?; - - let layers_to_initialize = dst.initialization_status.mips - [destination.mip_level as usize] - .drain(init_layer_range); - - let mut zero_buffer_copy_regions = Vec::new(); - if size.width != dst.desc.size.width || size.height != dst.desc.size.height { - for layer in layers_to_initialize { - crate::command::collect_zero_buffer_copies_for_clear_texture( - &dst.desc, - device.alignments.buffer_copy_pitch.get() as u32, - destination.mip_level..(destination.mip_level + 1), - layer, - &mut zero_buffer_copy_regions, - ); - } - } - unsafe { - if !zero_buffer_copy_regions.is_empty() { - encoder.copy_buffer_to_texture( - &device.zero_buffer, - dst_raw, - zero_buffer_copy_regions.iter().cloned(), - ); - encoder.transition_textures(zero_buffer_copy_regions.iter().map(|copy| { - hal::TextureBarrier { - texture: dst_raw, - range: wgt::ImageSubresourceRange { - aspect: wgt::TextureAspect::All, - base_mip_level: copy.texture_base.mip_level, - mip_level_count: NonZeroU32::new(1), - base_array_layer: copy.texture_base.array_layer, - array_layer_count: NonZeroU32::new(1), - }, - usage: hal::TextureUses::COPY_DST..hal::TextureUses::COPY_DST, - } - })); - } - encoder.copy_buffer_to_texture(&stage.buffer, dst_raw, regions); - } - } else { - let dst_raw = dst - .inner - .as_raw() - .ok_or(TransferError::InvalidTexture(destination.texture))?; - - unsafe { - encoder.copy_buffer_to_texture(&stage.buffer, dst_raw, regions); - } + encoder.copy_buffer_to_texture(&stage.buffer, dst_raw, regions); } device.pending_writes.consume(stage); diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs index 8ec2823aed..2f78ab5faf 100644 --- a/wgpu-core/src/hub.rs +++ b/wgpu-core/src/hub.rs @@ -5,7 +5,7 @@ use crate::{ id, instance::{Adapter, HalSurface, Instance, Surface}, pipeline::{ComputePipeline, RenderPipeline, ShaderModule}, - resource::{Buffer, QuerySet, Sampler, Texture, TextureView}, + resource::{Buffer, QuerySet, Sampler, Texture, TextureClearMode, TextureView}, Epoch, Index, }; @@ -649,6 +649,13 @@ impl Hub { device.raw.destroy_texture(raw); } } + if let TextureClearMode::RenderPass { clear_views, .. } = texture.clear_mode { + for view in clear_views { + unsafe { + device.raw.destroy_texture_view(view); + } + } + } } } for element in self.buffers.data.write().map.drain(..) { diff --git a/wgpu-core/src/init_tracker/mod.rs b/wgpu-core/src/init_tracker/mod.rs index ac25ea7e24..b5c6160244 100644 --- a/wgpu-core/src/init_tracker/mod.rs +++ b/wgpu-core/src/init_tracker/mod.rs @@ -20,7 +20,10 @@ mod buffer; mod texture; pub(crate) use buffer::{BufferInitTracker, BufferInitTrackerAction}; -pub(crate) use texture::{TextureInitRange, TextureInitTracker, TextureInitTrackerAction}; +pub(crate) use texture::{ + has_copy_partial_init_tracker_coverage, TextureInitRange, TextureInitTracker, + TextureInitTrackerAction, +}; #[derive(Debug, Clone, Copy)] pub(crate) enum MemoryInitKind { diff --git a/wgpu-core/src/init_tracker/texture.rs b/wgpu-core/src/init_tracker/texture.rs index e8603a9922..a2913ea3d4 100644 --- a/wgpu-core/src/init_tracker/texture.rs +++ b/wgpu-core/src/init_tracker/texture.rs @@ -6,7 +6,21 @@ use std::ops::Range; #[derive(Debug, Clone)] pub(crate) struct TextureInitRange { pub(crate) mip_range: Range, - pub(crate) layer_range: Range, + pub(crate) layer_range: Range, // Strictly array layers. We do *not* track volume slices separately. +} + +// Returns true if a copy operation doesn't fully cover the texture init tracking granularity. +// I.e. if this function returns true for a pending copy operation, the target texture needs to be ensured to be initialized first! +pub(crate) fn has_copy_partial_init_tracker_coverage( + copy_size: &wgt::Extent3d, + mip_level: u32, + desc: &wgt::TextureDescriptor<()>, +) -> bool { + let target_size = desc.mip_level_size(mip_level).unwrap(); + copy_size.width != target_size.width + || copy_size.height != target_size.height + || (desc.dimension == wgt::TextureDimension::D3 + && copy_size.depth_or_array_layers != target_size.depth_or_array_layers) } impl From for TextureInitRange { diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs index dabd9bd642..7421377ed7 100644 --- a/wgpu-core/src/present.rs +++ b/wgpu-core/src/present.rs @@ -9,6 +9,8 @@ When this texture is presented, we remove it from the device tracker as well as extract it from the hub. !*/ +use std::{borrow::Borrow, num::NonZeroU32}; + #[cfg(feature = "trace")] use crate::device::trace::Action; use crate::{ @@ -125,6 +127,31 @@ impl Global { let suf = A::get_surface_mut(surface); let (texture_id, status) = match unsafe { suf.raw.acquire_texture(FRAME_TIMEOUT_MS) } { Ok(Some(ast)) => { + let clear_view_desc = hal::TextureViewDescriptor { + label: Some("clear texture view"), + format: config.format, + dimension: wgt::TextureViewDimension::D2, + usage: hal::TextureUses::COLOR_TARGET, + range: wgt::ImageSubresourceRange { + aspect: wgt::TextureAspect::All, + base_mip_level: 0, + mip_level_count: NonZeroU32::new(1), + base_array_layer: 0, + array_layer_count: NonZeroU32::new(1), + }, + }; + let mut clear_views = smallvec::SmallVec::new(); + clear_views.push( + unsafe { + hal::Device::create_texture_view( + &device.raw, + &ast.texture.borrow(), + &clear_view_desc, + ) + } + .map_err(DeviceError::from)?, + ); + let present = surface.presentation.as_mut().unwrap(); let texture = resource::Texture { inner: resource::TextureInner::Surface { @@ -158,6 +185,10 @@ impl Global { levels: 0..1, }, life_guard: LifeGuard::new(""), + clear_mode: resource::TextureClearMode::RenderPass { + clear_views, + is_color: true, + }, }; let ref_count = texture.life_guard.add_ref(); @@ -239,6 +270,16 @@ impl Global { let (texture, _) = hub.textures.unregister(texture_id.value.0, &mut token); if let Some(texture) = texture { + if let resource::TextureClearMode::RenderPass { clear_views, .. } = + texture.clear_mode + { + for clear_view in clear_views { + unsafe { + hal::Device::destroy_texture_view(&device.raw, clear_view); + } + } + } + let suf = A::get_surface_mut(surface); match texture.inner { resource::TextureInner::Surface { diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 270202df53..6c3085f17d 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -8,6 +8,7 @@ use crate::{ Label, LifeGuard, RefCount, Stored, }; +use smallvec::SmallVec; use thiserror::Error; use std::{borrow::Borrow, num::NonZeroU8, ops::Range, ptr::NonNull}; @@ -178,6 +179,19 @@ impl TextureInner { } } +#[derive(Debug)] +pub enum TextureClearMode { + BufferCopy, + // View for clear via RenderPass for every subsurface (mip/layer/slice) + RenderPass { + clear_views: SmallVec<[A::TextureView; 1]>, + is_color: bool, + }, + // Texture can't be cleared, attempting to do so will cause panic. + // (either because it is impossible for the type of texture or it is being destroyed) + None, +} + #[derive(Debug)] pub struct Texture { pub(crate) inner: TextureInner, @@ -188,6 +202,32 @@ pub struct Texture { pub(crate) initialization_status: TextureInitTracker, pub(crate) full_range: TextureSelector, pub(crate) life_guard: LifeGuard, + pub(crate) clear_mode: TextureClearMode, +} + +impl Texture { + pub(crate) fn get_clear_view(&self, mip_level: u32, depth_or_layer: u32) -> &A::TextureView { + match self.clear_mode { + TextureClearMode::BufferCopy => { + panic!("Given texture is cleared with buffer copies, not render passes") + } + TextureClearMode::None => { + panic!("Given texture can't be cleared") + } + TextureClearMode::RenderPass { + ref clear_views, .. + } => { + let index = if self.desc.dimension == wgt::TextureDimension::D3 { + (0..mip_level).fold(0, |acc, mip| { + acc + (self.desc.size.depth_or_array_layers >> mip).max(1) + }) + } else { + mip_level * self.desc.size.depth_or_array_layers + } + depth_or_layer; + &clear_views[index as usize] + } + } + } } impl Global { @@ -255,6 +295,8 @@ pub enum TextureDimensionError { pub enum CreateTextureError { #[error(transparent)] Device(#[from] DeviceError), + #[error("Depth Texture format {0:?} can't be used for volume textures")] + CannotCreateDepthVolumeTexture(wgt::TextureFormat), #[error("Textures cannot have empty usage flags")] EmptyUsage, #[error(transparent)] diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 37046dffcc..a3e6f0ea35 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -186,7 +186,7 @@ impl super::Adapter { | wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES | wgt::Features::TIMESTAMP_QUERY | wgt::Features::TEXTURE_COMPRESSION_BC - | wgt::Features::CLEAR_COMMANDS + | wgt::Features::CLEAR_TEXTURE | wgt::Features::TEXTURE_FORMAT_16BIT_NORM; //TODO: in order to expose this, we need to run a compute shader // that extract the necessary statistics out of the D3D12 result. diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 0f14c0c334..9c882c8c41 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -286,7 +286,7 @@ impl super::Adapter { let mut features = wgt::Features::empty() | wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES - | wgt::Features::CLEAR_COMMANDS; + | wgt::Features::CLEAR_TEXTURE; features.set( wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER, extensions.contains("GL_EXT_texture_border_clamp"), diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index bb7b5d3832..80187a01f5 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -919,7 +919,7 @@ impl super::PrivateCapabilities { | F::VERTEX_WRITABLE_STORAGE | F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES | F::POLYGON_MODE_LINE - | F::CLEAR_COMMANDS + | F::CLEAR_TEXTURE | F::TEXTURE_FORMAT_16BIT_NORM; features.set(F::DEPTH_CLIP_CONTROL, self.supports_depth_clip_control); diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index be4e94046a..6d37b6c87b 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -332,7 +332,7 @@ impl PhysicalDeviceFeatures { | F::TIMESTAMP_QUERY | F::PIPELINE_STATISTICS_QUERY | F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES - | F::CLEAR_COMMANDS; + | F::CLEAR_TEXTURE; let mut dl_flags = Df::all(); dl_flags.set(Df::CUBE_ARRAY_TEXTURES, self.core.image_cube_array != 0); diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 36cdb069c7..aeed35619d 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -505,13 +505,13 @@ bitflags::bitflags! { /// /// This is a native-only feature. const VERTEX_WRITABLE_STORAGE = 1 << 36; - /// Enables clear to zero for buffers & textures. + /// Enables clear to zero for textures. /// /// Supported platforms: /// - All /// /// This is a native only feature. - const CLEAR_COMMANDS = 1 << 37; + const CLEAR_TEXTURE = 1 << 37; /// Enables creating shader modules from SPIR-V binary data (unsafe). /// /// SPIR-V data is not parsed or interpreted in any way; you can use diff --git a/wgpu/examples/boids/main.rs b/wgpu/examples/boids/main.rs index 919ba37074..043d4bc3f9 100644 --- a/wgpu/examples/boids/main.rs +++ b/wgpu/examples/boids/main.rs @@ -279,7 +279,9 @@ impl framework::Example for Example { view, resolve_target: None, ops: wgpu::Operations { - load: wgpu::LoadOp::Clear(wgpu::Color::BLACK), + // Not clearing here in order to test wgpu's zero texture initialization on a surface texture. + // Users should avoid loading uninitialized memory since this can cause additional overhead. + load: wgpu::LoadOp::Load, store: true, }, }]; diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index bffc6f5072..35af72a1da 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -2388,12 +2388,16 @@ impl CommandEncoder { /// Clears texture to zero. /// - /// Where possible it may be significantly more efficient to perform clears via render passes! + /// Note that unlike with clear_buffer, `COPY_DST` usage is not required. + /// + /// # Implementation notes + /// + /// - implemented either via buffer copies and render/depth target clear, path depends on texture usages + /// - behaves like texture zero init, but is performed immediately (clearing is *not* delayed via marking it as uninitialized) /// /// # Panics /// - /// - `CLEAR_COMMANDS` extension not enabled - /// - Texture does not have `COPY_DST` usage. + /// - `CLEAR_TEXTURE` extension not enabled /// - Range is out of bounds pub fn clear_texture(&mut self, texture: &Texture, subresource_range: &ImageSubresourceRange) { Context::command_encoder_clear_texture( diff --git a/wgpu/tests/clear_texture.rs b/wgpu/tests/clear_texture.rs index 8f56e3b00e..8794c2d85d 100644 --- a/wgpu/tests/clear_texture.rs +++ b/wgpu/tests/clear_texture.rs @@ -36,10 +36,13 @@ static TEXTURE_FORMATS_UNCOMPRESSED: &[wgpu::TextureFormat] = &[ wgpu::TextureFormat::Rgba32Uint, wgpu::TextureFormat::Rgba32Sint, wgpu::TextureFormat::Rgba32Float, + wgpu::TextureFormat::Rgb9e5Ufloat, +]; + +static TEXTURE_FORMATS_DEPTH: &[wgpu::TextureFormat] = &[ wgpu::TextureFormat::Depth32Float, wgpu::TextureFormat::Depth24Plus, wgpu::TextureFormat::Depth24PlusStencil8, - wgpu::TextureFormat::Rgb9e5Ufloat, ]; // needs TEXTURE_COMPRESSION_BC @@ -112,12 +115,10 @@ fn single_texture_clear_test( size: wgpu::Extent3d, dimension: wgpu::TextureDimension, ) { - // clear_texture not supported for depth textures. - if format.describe().sample_type == wgpu::TextureSampleType::Depth { - return; - } - - println!("clearing texture with {:?}", format); + println!( + "clearing texture with {:?}, dimension {:?}, size {:?}", + format, dimension, size + ); let texture = ctx.device.create_texture(&wgpu::TextureDescriptor { label: Some(&format!("texture {:?}", format)), @@ -131,7 +132,9 @@ fn single_texture_clear_test( sample_count: 1, // multisampling is not supported for clear dimension, format, - usage: wgpu::TextureUsages::COPY_DST, + // Forces internally the required usages to be able to clear it. + // This is not visible on the API level. + usage: wgpu::TextureUsages::TEXTURE_BINDING, }); let mut encoder = ctx .device @@ -189,25 +192,28 @@ fn clear_texture_tests(ctx: &TestingContext, formats: &[wgpu::TextureFormat], su wgpu::TextureDimension::D2, ); // volume texture - single_texture_clear_test( - ctx, - format, - wgpu::Extent3d { - width: 16, - height: 16, - depth_or_array_layers: 16, - }, - wgpu::TextureDimension::D3, - ); + if format.describe().sample_type != wgt::TextureSampleType::Depth { + single_texture_clear_test( + ctx, + format, + wgpu::Extent3d { + width: 16, + height: 16, + depth_or_array_layers: 16, + }, + wgpu::TextureDimension::D3, + ); + } } } #[test] fn clear_texture_2d_uncompressed() { initialize_test( - TestParameters::default().features(wgpu::Features::CLEAR_COMMANDS), + TestParameters::default().features(wgpu::Features::CLEAR_TEXTURE), |ctx| { clear_texture_tests(&ctx, TEXTURE_FORMATS_UNCOMPRESSED, true); + clear_texture_tests(&ctx, TEXTURE_FORMATS_DEPTH, false); }, ) } @@ -216,7 +222,7 @@ fn clear_texture_2d_uncompressed() { fn clear_texture_2d_bc() { initialize_test( TestParameters::default() - .features(wgpu::Features::CLEAR_COMMANDS | wgpu::Features::TEXTURE_COMPRESSION_BC), + .features(wgpu::Features::CLEAR_TEXTURE | wgpu::Features::TEXTURE_COMPRESSION_BC), |ctx| { clear_texture_tests(&ctx, TEXTURE_FORMATS_BC, false); }, @@ -226,9 +232,8 @@ fn clear_texture_2d_bc() { #[test] fn clear_texture_2d_astc() { initialize_test( - TestParameters::default().features( - wgpu::Features::CLEAR_COMMANDS | wgpu::Features::TEXTURE_COMPRESSION_ASTC_LDR, - ), + TestParameters::default() + .features(wgpu::Features::CLEAR_TEXTURE | wgpu::Features::TEXTURE_COMPRESSION_ASTC_LDR), |ctx| { clear_texture_tests(&ctx, TEXTURE_FORMATS_ASTC, false); }, @@ -239,7 +244,7 @@ fn clear_texture_2d_astc() { fn clear_texture_2d_etc2() { initialize_test( TestParameters::default() - .features(wgpu::Features::CLEAR_COMMANDS | wgpu::Features::TEXTURE_COMPRESSION_ETC2), + .features(wgpu::Features::CLEAR_TEXTURE | wgpu::Features::TEXTURE_COMPRESSION_ETC2), |ctx| { clear_texture_tests(&ctx, TEXTURE_FORMATS_ETC2, false); },