From 8b2e6fe3a22837403fdaf5ecc5bf37d5297340cd Mon Sep 17 00:00:00 2001 From: Jinlei Li Date: Thu, 19 May 2022 12:09:22 +0800 Subject: [PATCH 01/11] Roll naga to 571302e (50 revisions) (#2672) --- Cargo.lock | 6 +++--- wgpu-core/Cargo.toml | 2 +- wgpu-hal/Cargo.toml | 8 ++++---- wgpu/Cargo.toml | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 41a974044d..495bdf8817 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1033,7 +1033,7 @@ dependencies = [ [[package]] name = "naga" version = "0.8.0" -source = "git+https://github.com/gfx-rs/naga?rev=1aa91549#1aa9154964238af8c692cf521ff90e1f2395e147" +source = "git+https://github.com/gfx-rs/naga?rev=571302e#571302e3ff09cb856f63a3683da308159872b7cc" dependencies = [ "bit-set", "bitflags", @@ -1741,9 +1741,9 @@ checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" [[package]] name = "unicode-xid" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +checksum = "957e51f3646910546462e67d5f7599b9e4fb8acdd304b087a6494730f9eebf04" [[package]] name = "version_check" diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml index e93995d2a4..58d9723166 100644 --- a/wgpu-core/Cargo.toml +++ b/wgpu-core/Cargo.toml @@ -41,7 +41,7 @@ thiserror = "1" [dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "1aa91549" +rev = "571302e" #version = "0.8" features = ["span", "validate", "wgsl-in"] diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 9cc851d609..caad696d40 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -51,7 +51,7 @@ foreign-types = { version = "0.3", optional = true } ash = { version = "0.37", optional = true } gpu-alloc = { version = "0.5", optional = true } gpu-descriptor = { version = "0.2", optional = true } -inplace_it = { version ="0.3.3", optional = true } +inplace_it = { version = "0.3.3", optional = true } # backend: Gles glow = { version = "0.11.1", optional = true } @@ -92,20 +92,20 @@ js-sys = { version = "0.3" } [dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "1aa91549" +rev = "571302e" #version = "0.8" # DEV dependencies [dev-dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "1aa91549" +rev = "571302e" #version = "0.8" features = ["wgsl-in"] [dev-dependencies] env_logger = "0.9" -winit = "0.26" # for "halmark" example +winit = "0.26" # for "halmark" example [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] glutin = "0.28" # for "gles" example diff --git a/wgpu/Cargo.toml b/wgpu/Cargo.toml index e4adc476f0..834eeefe87 100644 --- a/wgpu/Cargo.toml +++ b/wgpu/Cargo.toml @@ -139,20 +139,20 @@ env_logger = "0.9" [dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "1aa91549" +rev = "571302e" #version = "0.8" optional = true # used to test all the example shaders [dev-dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "1aa91549" +rev = "571302e" #version = "0.8" features = ["wgsl-in"] [target.'cfg(target_arch = "wasm32")'.dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "1aa91549" +rev = "571302e" #version = "0.8" features = ["wgsl-out"] From b53a8bcb1733374f00c738d7b2622da85dc3386d Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Thu, 19 May 2022 10:10:47 -0700 Subject: [PATCH 02/11] New function: `Global::create_buffer_error`. (#2673) --- wgpu-core/src/device/mod.rs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index c6d1583ce1..890e5a95bc 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -3193,6 +3193,43 @@ impl Global { (id, Some(error)) } + /// Assign `id_in` an error with the given `label`. + /// + /// Ensure that future attempts to use `id_in` as a buffer ID will propagate + /// the error, following the WebGPU ["contagious invalidity"] style. + /// + /// Firefox uses this function to comply strictly with the WebGPU spec, + /// which requires [`GPUBufferDescriptor`] validation to be generated on the + /// Device timeline and leave the newly created [`GPUBuffer`] invalid. + /// + /// Ideally, we would simply let [`device_create_buffer`] take care of all + /// of this, but some errors must be detected before we can even construct a + /// [`wgpu_types::BufferDescriptor`] to give it. For example, the WebGPU API + /// allows a `GPUBufferDescriptor`'s [`usage`] property to be any WebIDL + /// `unsigned long` value, but we can't construct a + /// [`wgpu_types::BufferUsages`] value from values with unassigned bits + /// set. This means we must validate `usage` before we can call + /// `device_create_buffer`. + /// + /// When that validation fails, we must arrange for the buffer id to be + /// considered invalid. This method provides the means to do so. + /// + /// ["contagious invalidity"]: https://www.w3.org/TR/webgpu/#invalidity + /// [`GPUBufferDescriptor`]: https://www.w3.org/TR/webgpu/#dictdef-gpubufferdescriptor + /// [`GPUBuffer`]: https://www.w3.org/TR/webgpu/#gpubuffer + /// [`wgpu_types::BufferDescriptor`]: wgt::BufferDescriptor + /// [`device_create_buffer`]: Global::device_create_buffer + /// [`usage`]: https://www.w3.org/TR/webgpu/#dom-gputexturedescriptor-usage + /// [`wgpu_types::BufferUsages`]: wgt::BufferUsages + pub fn create_buffer_error(&self, id_in: Input, label: Label) { + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.buffers.prepare(id_in); + + let (_, mut token) = hub.devices.read(&mut token); + fid.assign_error(label.borrow_or_default(), &mut token); + } + #[cfg(feature = "replay")] pub fn device_wait_for_buffer( &self, From 1ec26784c45e8453dea8e1d5331483abbf8e4022 Mon Sep 17 00:00:00 2001 From: Ashley Date: Thu, 19 May 2022 23:10:27 +0200 Subject: [PATCH 03/11] [Gles] Fix clearing depth and stencil at the same time (#2675) --- wgpu-hal/src/gles/command.rs | 13 ++++++++++--- wgpu-hal/src/gles/mod.rs | 5 +++++ wgpu-hal/src/gles/queue.rs | 3 +++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index e316844b66..f0f6028c62 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -520,12 +520,19 @@ impl crate::CommandEncoder for super::CommandEncoder { } } if let Some(ref dsat) = desc.depth_stencil_attachment { - if !dsat.depth_ops.contains(crate::AttachmentOps::LOAD) { + let clear_depth = !dsat.depth_ops.contains(crate::AttachmentOps::LOAD); + let clear_stencil = !dsat.stencil_ops.contains(crate::AttachmentOps::LOAD); + + if clear_depth && clear_stencil { + self.cmd_buffer.commands.push(C::ClearDepthAndStencil( + dsat.clear_value.0, + dsat.clear_value.1, + )); + } else if clear_depth { self.cmd_buffer .commands .push(C::ClearDepth(dsat.clear_value.0)); - } - if !dsat.stencil_ops.contains(crate::AttachmentOps::LOAD) { + } else if clear_stencil { self.cmd_buffer .commands .push(C::ClearStencil(dsat.clear_value.1)); diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 321db19027..338cbca774 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -667,6 +667,11 @@ enum Command { ClearColorI(u32, [i32; 4]), ClearDepth(f32), ClearStencil(u32), + // Clearing both the depth and stencil buffer individually appears to + // result in the stencil buffer failing to clear, atleast in WebGL. + // It is also more efficient to emit a single command instead of two for + // this. + ClearDepthAndStencil(f32, u32), BufferBarrier(glow::Buffer, crate::BufferUses), TextureBarrier(crate::TextureUses), SetViewport { diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs index c223c2a851..fdc9268676 100644 --- a/wgpu-hal/src/gles/queue.rs +++ b/wgpu-hal/src/gles/queue.rs @@ -758,6 +758,9 @@ impl super::Queue { C::ClearStencil(value) => { gl.clear_buffer_i32_slice(glow::STENCIL, 0, &[value as i32]); } + C::ClearDepthAndStencil(depth, stencil_value) => { + gl.clear_buffer_depth_stencil(glow::DEPTH_STENCIL, 0, depth, stencil_value as i32); + } C::BufferBarrier(raw, usage) => { let mut flags = 0; if usage.contains(crate::BufferUses::VERTEX) { From 84efe2b18b31de5cad1e554d1c8db5baf8beef59 Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Fri, 20 May 2022 19:48:35 -0700 Subject: [PATCH 04/11] Actually use RenderBundleEncoder::set_bind_group in tests. --- wgpu/examples/water/main.rs | 44 ++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/wgpu/examples/water/main.rs b/wgpu/examples/water/main.rs index 7ff7c7e465..694c7f35e6 100644 --- a/wgpu/examples/water/main.rs +++ b/wgpu/examples/water/main.rs @@ -64,11 +64,6 @@ struct Example { terrain_vertex_buf: wgpu::Buffer, terrain_vertex_count: usize, terrain_normal_bind_group: wgpu::BindGroup, - /// - /// Binds to the uniform buffer where the - /// camera has been placed underwater. - /// - terrain_flipped_bind_group: wgpu::BindGroup, terrain_normal_uniform_buf: wgpu::Buffer, /// /// Contains uniform variables where the camera @@ -77,6 +72,13 @@ struct Example { terrain_flipped_uniform_buf: wgpu::Buffer, terrain_pipeline: wgpu::RenderPipeline, + /// A render bundle for drawing the terrain. + /// + /// This isn't really necessary, but it does make sure we have at + /// least one use of `RenderBundleEncoder::set_bind_group` among + /// the examples. + terrain_bundle: wgpu::RenderBundle, + reflect_view: wgpu::TextureView, depth_buffer: wgpu::TextureView, @@ -480,6 +482,9 @@ impl framework::Example for Example { }], label: Some("Terrain Normal Bind Group"), }); + + // Binds to the uniform buffer where the + // camera has been placed underwater. let terrain_flipped_bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { layout: &terrain_bind_group_layout, entries: &[wgpu::BindGroupEntry { @@ -605,6 +610,27 @@ impl framework::Example for Example { multiview: None, }); + // A render bundle to draw the terrain. + let terrain_bundle = { + let mut encoder = + device.create_render_bundle_encoder(&wgpu::RenderBundleEncoderDescriptor { + label: None, + color_formats: &[config.format], + depth_stencil: Some(wgpu::RenderBundleDepthStencil { + format: wgpu::TextureFormat::Depth32Float, + depth_read_only: false, + stencil_read_only: true, + }), + sample_count: 1, + multiview: None, + }); + encoder.set_pipeline(&terrain_pipeline); + encoder.set_bind_group(0, &terrain_flipped_bind_group, &[]); + encoder.set_vertex_buffer(0, terrain_vertex_buf.slice(..)); + encoder.draw(0..terrain_vertices.len() as u32, 0..1); + encoder.finish(&wgpu::RenderBundleDescriptor::default()) + }; + // Done Example { water_vertex_buf, @@ -617,10 +643,10 @@ impl framework::Example for Example { terrain_vertex_buf, terrain_vertex_count: terrain_vertices.len(), terrain_normal_bind_group, - terrain_flipped_bind_group, terrain_normal_uniform_buf, terrain_flipped_uniform_buf, terrain_pipeline, + terrain_bundle, reflect_view, @@ -729,10 +755,8 @@ impl framework::Example for Example { stencil_ops: None, }), }); - rpass.set_pipeline(&self.terrain_pipeline); - rpass.set_bind_group(0, &self.terrain_flipped_bind_group, &[]); - rpass.set_vertex_buffer(0, self.terrain_vertex_buf.slice(..)); - rpass.draw(0..self.terrain_vertex_count as u32, 0..1); + + rpass.execute_bundles([&self.terrain_bundle]); } // Terrain right side up. This time we need to use the // depth values, so we must use StoreOp::Store. From eb260ba7a62de42de2ff73e2083c366cba3b096f Mon Sep 17 00:00:00 2001 From: Jinlei Li Date: Sun, 22 May 2022 23:31:15 +0800 Subject: [PATCH 05/11] metal: fix Depth24Plus | Depth24PlusStencil8 capabilities (#2686) --- wgpu-hal/src/metal/adapter.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 28ae85ac08..2f60f50718 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -181,20 +181,24 @@ impl crate::Adapter for super::Adapter { flags } Tf::Depth32Float | Tf::Depth32FloatStencil8 => { - let mut flats = + let mut flags = Tfc::DEPTH_STENCIL_ATTACHMENT | Tfc::MULTISAMPLE | msaa_resolve_apple3x_if; if pc.format_depth32float_filter { - flats |= Tfc::SAMPLED_LINEAR + flags |= Tfc::SAMPLED_LINEAR } - flats + flags } - Tf::Depth24Plus => Tfc::empty(), - Tf::Depth24PlusStencil8 => { - if pc.msaa_desktop { - Tfc::DEPTH_STENCIL_ATTACHMENT | Tfc::SAMPLED_LINEAR | Tfc::MULTISAMPLE + Tf::Depth24Plus | Tf::Depth24PlusStencil8 => { + let mut flags = Tfc::DEPTH_STENCIL_ATTACHMENT | Tfc::MULTISAMPLE; + if pc.format_depth24_stencil8 { + flags |= Tfc::SAMPLED_LINEAR | Tfc::MULTISAMPLE_RESOLVE } else { - Tfc::empty() + flags |= msaa_resolve_apple3x_if; + if pc.format_depth32float_filter { + flags |= Tfc::SAMPLED_LINEAR + } } + flags } Tf::Rgb9e5Ufloat => { if pc.msaa_apple3 { From 26a7c8c2a54acc1660ef48bfd388e4e13ed77995 Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Sun, 22 May 2022 08:34:33 -0700 Subject: [PATCH 06/11] Document `VertexStepMode`. (#2685) --- wgpu-types/src/lib.rs | 55 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 986f8397bd..7a74f71334 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -2515,10 +2515,63 @@ impl CompareFunction { } } -/// Rate that determines when vertex data is advanced. +/// Whether a vertex buffer is indexed by vertex or by instance. +/// +/// Consider a call to [`RenderPass::draw`] like this: +/// +/// ```ignore +/// render_pass.draw(vertices, instances) +/// ``` +/// +/// where `vertices` is a `Range` of vertex indices, and +/// `instances` is a `Range` of instance indices. +/// +/// For this call, `wgpu` invokes the vertex shader entry point once +/// for every possible `(v, i)` pair, where `v` is drawn from +/// `vertices` and `i` is drawn from `instances`. These invocations +/// may happen in any order, and will usually run in parallel. +/// +/// Each vertex buffer has a step mode, established by the +/// [`step_mode`] field of its [`VertexBufferLayout`], given when the +/// pipeline was created. Buffers whose step mode is [`Vertex`] use +/// `v` as the index into their contents, whereas buffers whose step +/// mode is [`Instance`] use `i`. The indicated buffer element then +/// contributes zero or more attribute values for the `(v, i)` vertex +/// shader invocation to use, based on the [`VertexBufferLayout`]'s +/// [`attributes`] list. +/// +/// You can visualize the results from all these vertex shader +/// invocations as a matrix with a row for each `i` from `instances`, +/// and with a column for each `v` from `vertices`. In one sense, `v` +/// and `i` are symmetrical: both are used to index vertex buffers and +/// provide attribute values. But the key difference between `v` and +/// `i` is that line and triangle primitives are built from the values +/// of each row, along which `i` is constant and `v` varies, not the +/// columns. +/// +/// An indexed draw call works similarly: +/// +/// ```ignore +/// render_pass.draw_indexed(indices, base_vertex, instances) +/// ``` +/// +/// The only difference is that `v` values are drawn from the contents +/// of the index buffer—specifically, the subrange of the index +/// buffer given by `indices`—instead of simply being sequential +/// integers, as they are in a `draw` call. +/// +/// A non-instanced call, where `instances` is `0..1`, is simply a +/// matrix with only one row. /// /// Corresponds to [WebGPU `GPUVertexStepMode`]( /// https://gpuweb.github.io/gpuweb/#enumdef-gpuvertexstepmode). +/// +/// [`RenderPass::draw`]: ../wgpu/struct.RenderPass.html#method.draw +/// [`VertexBufferLayout`]: ../wgpu/struct.VertexBufferLayout.html +/// [`step_mode`]: ../wgpu/struct.VertexBufferLayout.html#structfield.step_mode +/// [`attributes`]: ../wgpu/struct.VertexBufferLayout.html#structfield.attributes +/// [`Vertex`]: VertexStepMode::Vertex +/// [`Instance`]: VertexStepMode::Instance #[repr(C)] #[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] #[cfg_attr(feature = "trace", derive(Serialize))] From d3235484f9b6fea24132ce673ed7ca31e534359c Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Fri, 20 May 2022 22:27:12 -0700 Subject: [PATCH 07/11] Document some parts of wgpu_core's render bundle code. --- wgpu-core/src/command/bundle.rs | 61 +++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index fc51927858..4d8eccd538 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -167,6 +167,16 @@ impl RenderBundleEncoder { self.parent_id } + /// Convert this encoder's commands into a [`RenderBundle`]. + /// + /// We want executing a [`RenderBundle`] to be quick, so we take + /// this opportunity to clean up the [`RenderBundleEncoder`]'s + /// command stream and gather metadata about it that will help + /// keep [`ExecuteBundle`] simple and fast. We remove redundant + /// commands (along with their side data), note resource usage, + /// and accumulate buffer and texture initialization actions. + /// + /// [`ExecuteBundle`]: RenderCommand::ExecuteBundle pub(crate) fn finish( self, desc: &RenderBundleDescriptor, @@ -823,6 +833,15 @@ impl Resource for RenderBundle { } } +/// A render bundle's current index buffer state. +/// +/// [`RenderBundleEncoder::finish`] uses this to drop redundant +/// `SetIndexBuffer` commands from the final [`RenderBundle`]. It +/// records index buffer state changes here, and then calls this +/// type's [`flush`] method before any indexed draw command to produce +/// a `SetIndexBuffer` command if one is necessary. +/// +/// [`flush`]: IndexState::flush #[derive(Debug)] struct IndexState { buffer: Option, @@ -833,6 +852,7 @@ struct IndexState { } impl IndexState { + /// Return a fresh state: no index buffer has been set yet. fn new() -> Self { Self { buffer: None, @@ -843,6 +863,9 @@ impl IndexState { } } + /// Return the number of entries in the current index buffer. + /// + /// Panic if no index buffer has been set. fn limit(&self) -> u32 { assert!(self.buffer.is_some()); let bytes_per_index = match self.format { @@ -852,6 +875,8 @@ impl IndexState { ((self.range.end - self.range.start) / bytes_per_index) as u32 } + /// Prepare for an indexed draw, producing a `SetIndexBuffer` + /// command if necessary. fn flush(&mut self) -> Option { if self.is_dirty { self.is_dirty = false; @@ -866,6 +891,7 @@ impl IndexState { } } + /// Set the current index buffer's format. fn set_format(&mut self, format: wgt::IndexFormat) { if self.format != format { self.format = format; @@ -873,6 +899,7 @@ impl IndexState { } } + /// Set the current index buffer. fn set_buffer(&mut self, id: id::BufferId, range: Range) { self.buffer = Some(id); self.range = range; @@ -880,6 +907,15 @@ impl IndexState { } } +/// The state of a single vertex buffer slot during render bundle encoding. +/// +/// [`RenderBundleEncoder::finish`] uses this to drop redundant +/// `SetVertexBuffer` commands from the final [`RenderBundle`]. It +/// records one vertex buffer slot's state changes here, and then +/// calls this type's [`flush`] method just before any draw command to +/// produce a `SetVertexBuffer` commands if one is necessary. +/// +/// [`flush`]: IndexState::flush #[derive(Debug)] struct VertexState { buffer: Option, @@ -890,6 +926,8 @@ struct VertexState { } impl VertexState { + /// Construct a fresh `VertexState`: no buffer has been set for + /// this slot. fn new() -> Self { Self { buffer: None, @@ -900,12 +938,16 @@ impl VertexState { } } + /// Set this slot's vertex buffer. fn set_buffer(&mut self, buffer_id: id::BufferId, range: Range) { self.buffer = Some(buffer_id); self.range = range; self.is_dirty = true; } + /// Generate a `SetVertexBuffer` command for this slot, if necessary. + /// + /// `slot` is the index of the vertex buffer slot that `self` tracks. fn flush(&mut self, slot: u32) -> Option { if self.is_dirty { self.is_dirty = false; @@ -992,14 +1034,33 @@ struct VertexLimitState { instance_limit_slot: u32, } +/// State for analyzing and cleaning up bundle command streams. +/// +/// To minimize state updates, [`RenderBundleEncoder::finish`] +/// actually just applies commands like [`SetBindGroup`] and +/// [`SetIndexBuffer`] to the simulated state stored here, and then +/// calls the `flush_foo` methods before draw calls to produce the +/// update commands we actually need. #[derive(Debug)] struct State { + /// Resources used by this bundle. This will become `RenderBundle::used`. trackers: TrackerSet, + + /// The current index buffer. We flush this state before indexed + /// draw commands. index: IndexState, + + /// The state of each vertex buffer slot. vertex: ArrayVec, bind: ArrayVec, push_constant_ranges: PushConstantState, + + /// The accumulated dynamic offsets for all `SetBindGroup` commands seen. + /// + /// Each occupied entry of `bind` has a `dynamic_offsets` range that says + /// which elements of this vector it owns. raw_dynamic_offsets: Vec, + flat_dynamic_offsets: Vec, used_bind_groups: usize, pipeline: Option, From afed25fa5c839ba02a2f8887ddfc53cf2c292ec4 Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Fri, 20 May 2022 23:18:53 -0700 Subject: [PATCH 08/11] Clean up render bundle bind group tracking. The `dynamic_offsets` and `is_dirty` flags only make sense when the slot is occupied, so they should be inside the `Option`. This makes `State::bind` into an `ArrayVec>`, and cleans up various other bits. --- wgpu-core/src/command/bundle.rs | 141 +++++++++++++++++--------------- 1 file changed, 77 insertions(+), 64 deletions(-) diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index 4d8eccd538..4ac8e2c3fe 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -195,9 +195,7 @@ impl RenderBundleEncoder { vertex: (0..hal::MAX_VERTEX_BUFFERS) .map(|_| VertexState::new()) .collect(), - bind: (0..hal::MAX_BIND_GROUPS) - .map(|_| BindState::new()) - .collect(), + bind: (0..hal::MAX_BIND_GROUPS).map(|_| None).collect(), push_constant_ranges: PushConstantState::new(), raw_dynamic_offsets: Vec::new(), flat_dynamic_offsets: Vec::new(), @@ -228,6 +226,8 @@ impl RenderBundleEncoder { .map_pass_err(scope); } + // Peel off the front `num_dynamic_offsets` entries from + // `base.dynamic_offsets`. let offsets = &base.dynamic_offsets[..num_dynamic_offsets as usize]; base.dynamic_offsets = &base.dynamic_offsets[num_dynamic_offsets as usize..]; @@ -963,39 +963,22 @@ impl VertexState { } } +/// A bind group that has been set at a particular index during render bundle encoding. #[derive(Debug)] struct BindState { - bind_group: Option<(id::BindGroupId, id::BindGroupLayoutId)>, - dynamic_offsets: Range, - is_dirty: bool, -} + /// The id of the bind group set at this index. + bind_group_id: id::BindGroupId, -impl BindState { - fn new() -> Self { - Self { - bind_group: None, - dynamic_offsets: 0..0, - is_dirty: false, - } - } + /// The layout of `group`. + layout_id: id::Valid, - fn set_group( - &mut self, - bind_group_id: id::BindGroupId, - layout_id: id::BindGroupLayoutId, - dyn_offset: usize, - dyn_count: usize, - ) -> bool { - match self.bind_group { - Some((bg_id, _)) if bg_id == bind_group_id && dyn_count == 0 => false, - _ => { - self.bind_group = Some((bind_group_id, layout_id)); - self.dynamic_offsets = dyn_offset..dyn_offset + dyn_count; - self.is_dirty = true; - true - } - } - } + /// The range of dynamic offsets in `State::raw_dynamic_offsets` + /// for this bind group. + dynamic_offsets: Range, + + /// True if this index's contents have been changed since the last time we + /// generated a `SetBindGroup` command. + is_dirty: bool, } #[derive(Debug)] @@ -1052,16 +1035,24 @@ struct State { /// The state of each vertex buffer slot. vertex: ArrayVec, - bind: ArrayVec, + + /// The bind group set at each index, if any. + bind: ArrayVec, { hal::MAX_BIND_GROUPS }>, + push_constant_ranges: PushConstantState, - /// The accumulated dynamic offsets for all `SetBindGroup` commands seen. + /// The dynamic offsets for all `SetBindGroup` commands we've seen so far. /// /// Each occupied entry of `bind` has a `dynamic_offsets` range that says /// which elements of this vector it owns. raw_dynamic_offsets: Vec, + /// Dynamic offset values used by the cleaned-up command sequence. + /// + /// These end up in the final `RenderBundle`. Each `SetBindGroup` command + /// consumes the next `num_dynamic_offsets` entries off the front. flat_dynamic_offsets: Vec, + used_bind_groups: usize, pipeline: Option, } @@ -1097,11 +1088,10 @@ impl State { vert_state } - fn invalidate_group_from(&mut self, slot: usize) { - for bind in self.bind[slot..].iter_mut() { - if bind.bind_group.is_some() { - bind.is_dirty = true; - } + /// Mark all non-empty bind group table entries from `index` onwards as dirty. + fn invalidate_group_from(&mut self, index: usize) { + for contents in self.bind[index..].iter_mut().flatten() { + contents.is_dirty = true; } } @@ -1112,15 +1102,33 @@ impl State { layout_id: id::Valid, offsets: &[wgt::DynamicOffset], ) { - if self.bind[slot as usize].set_group( - bind_group_id, - layout_id.0, - self.raw_dynamic_offsets.len(), - offsets.len(), - ) { - self.invalidate_group_from(slot as usize + 1); + // If this call wouldn't actually change this index's state, we can + // return early. (If there are dynamic offsets, the range will always + // be different.) + if offsets.is_empty() { + if let Some(ref contents) = self.bind[slot as usize] { + if contents.bind_group_id == bind_group_id { + return; + } + } } + + // Save `offsets` in the side array, and note where they landed. + let raw_start = self.raw_dynamic_offsets.len(); self.raw_dynamic_offsets.extend(offsets); + let raw_end = self.raw_dynamic_offsets.len(); + + // Record the index's new state. + self.bind[slot as usize] = Some(BindState { + bind_group_id, + layout_id, + dynamic_offsets: raw_start..raw_end, + is_dirty: true, + }); + + // Once we've changed the bind group at a particular index, all + // subsequent indices need to be rewritten. + self.invalidate_group_from(slot as usize + 1); } fn set_pipeline( @@ -1151,8 +1159,8 @@ impl State { self.bind .iter() .zip(layout_ids) - .position(|(bs, layout_id)| match bs.bind_group { - Some((_, bgl_id)) => bgl_id != layout_id.0, + .position(|(entry, &layout_id)| match *entry { + Some(ref contents) => contents.layout_id != layout_id, None => false, }) }; @@ -1190,29 +1198,34 @@ impl State { .flat_map(|(i, vs)| vs.flush(i as u32)) } + /// Generate `SetBindGroup` commands for any bind groups that need to be updated. fn flush_binds(&mut self) -> impl Iterator + '_ { - for bs in self.bind[..self.used_bind_groups].iter() { - if bs.is_dirty { + // Append each dirty bind group's dynamic offsets to `flat_dynamic_offsets`. + for contents in self.bind[..self.used_bind_groups].iter().flatten() { + if contents.is_dirty { self.flat_dynamic_offsets - .extend_from_slice(&self.raw_dynamic_offsets[bs.dynamic_offsets.clone()]); + .extend_from_slice(&self.raw_dynamic_offsets[contents.dynamic_offsets.clone()]); } } - self.bind + + // Then, generate `SetBindGroup` commands to update the dirty bind + // groups. After this, all entries are clean. + self.bind[..self.used_bind_groups] .iter_mut() - .take(self.used_bind_groups) .enumerate() - .flat_map(|(i, bs)| { - if bs.is_dirty { - bs.is_dirty = false; - Some(RenderCommand::SetBindGroup { - index: i as u8, - bind_group_id: bs.bind_group.unwrap().0, - num_dynamic_offsets: (bs.dynamic_offsets.end - bs.dynamic_offsets.start) - as u8, - }) - } else { - None + .flat_map(|(i, entry)| { + if let Some(ref mut contents) = *entry { + if contents.is_dirty { + contents.is_dirty = false; + let offsets = &contents.dynamic_offsets; + return Some(RenderCommand::SetBindGroup { + index: i as u8, + bind_group_id: contents.bind_group_id, + num_dynamic_offsets: (offsets.end - offsets.start) as u8, + }); + } } + None }) } } From 87d75d9c7bd75b60854bd5b8d7d6e5497040b759 Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Sat, 21 May 2022 15:11:27 -0700 Subject: [PATCH 09/11] Document command::BasePass. --- wgpu-core/src/command/mod.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index a4e7967307..f16704c53f 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -219,6 +219,17 @@ pub struct BasePassRef<'a, C> { pub push_constant_data: &'a [u32], } +/// A stream of commands for a render pass or compute pass. +/// +/// This also contains side tables referred to by certain commands, +/// like dynamic offsets for [`SetBindGroup`] or string data for +/// [`InsertDebugMarker`]. +/// +/// Render passes use `BasePass`, whereas compute +/// passes use `BasePass`. +/// +/// [`SetBindGroup`]: RenderCommand::SetBindGroup +/// [`InsertDebugMarker`]: RenderCommand::InsertDebugMarker #[doc(hidden)] #[derive(Debug)] #[cfg_attr( @@ -231,9 +242,22 @@ pub struct BasePassRef<'a, C> { )] pub struct BasePass { pub label: Option, + + /// The stream of commands. pub commands: Vec, + + /// Dynamic offsets consumed by [`SetBindGroup`] commands in `commands`. + /// + /// Each successive `SetBindGroup` consumes the next + /// [`num_dynamic_offsets`] values from this list. pub dynamic_offsets: Vec, + + /// Strings used by debug instructions. + /// + /// Each successive [`PushDebugGroup`] or [`InsertDebugMarker`] + /// instruction consumes the next `len` bytes from this vector. pub string_data: Vec, + pub push_constant_data: Vec, } From c7872ab8b543a0ca0d682851e16f88a478334d8f Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Sat, 21 May 2022 15:13:08 -0700 Subject: [PATCH 10/11] Eliminate wgpu_core::commands::bundle::State::raw_dynamic_offsets. This vector's contents always ended up identical to the `RenderBundleEncoder`'s `BasePass`'s `dynamic_offsets` vector, so we can just take values from there instead of copying them. --- wgpu-core/src/command/bundle.rs | 63 ++++++++++++++++----------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index 4ac8e2c3fe..dc0eaf733e 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -197,17 +197,18 @@ impl RenderBundleEncoder { .collect(), bind: (0..hal::MAX_BIND_GROUPS).map(|_| None).collect(), push_constant_ranges: PushConstantState::new(), - raw_dynamic_offsets: Vec::new(), flat_dynamic_offsets: Vec::new(), used_bind_groups: 0, pipeline: None, }; let mut commands = Vec::new(); - let mut base = self.base.as_ref(); let mut pipeline_layout_id = None::>; let mut buffer_memory_init_actions = Vec::new(); let mut texture_memory_init_actions = Vec::new(); + let base = self.base.as_ref(); + let mut next_dynamic_offset = 0; + for &command in base.commands { match command { RenderCommand::SetBindGroup { @@ -226,10 +227,12 @@ impl RenderBundleEncoder { .map_pass_err(scope); } - // Peel off the front `num_dynamic_offsets` entries from - // `base.dynamic_offsets`. - let offsets = &base.dynamic_offsets[..num_dynamic_offsets as usize]; - base.dynamic_offsets = &base.dynamic_offsets[num_dynamic_offsets as usize..]; + // Identify the next `num_dynamic_offsets` entries from `base.dynamic_offsets`. + let num_dynamic_offsets = num_dynamic_offsets as usize; + let offsets_range = + next_dynamic_offset..next_dynamic_offset + num_dynamic_offsets; + next_dynamic_offset = offsets_range.end; + let offsets = &base.dynamic_offsets[offsets_range.clone()]; let bind_group = state .trackers @@ -264,7 +267,7 @@ impl RenderBundleEncoder { buffer_memory_init_actions.extend_from_slice(&bind_group.used_buffer_ranges); texture_memory_init_actions.extend_from_slice(&bind_group.used_texture_ranges); - state.set_bind_group(index, bind_group_id, bind_group.layout_id, offsets); + state.set_bind_group(index, bind_group_id, bind_group.layout_id, offsets_range); state .trackers .merge_extend_stateful(&bind_group.used) @@ -414,7 +417,7 @@ impl RenderBundleEncoder { .map_pass_err(scope); } commands.extend(state.flush_vertices()); - commands.extend(state.flush_binds()); + commands.extend(state.flush_binds(base.dynamic_offsets)); commands.push(command); } RenderCommand::DrawIndexed { @@ -451,7 +454,7 @@ impl RenderBundleEncoder { } commands.extend(state.index.flush()); commands.extend(state.flush_vertices()); - commands.extend(state.flush_binds()); + commands.extend(state.flush_binds(base.dynamic_offsets)); commands.push(command); } RenderCommand::MultiDrawIndirect { @@ -484,7 +487,7 @@ impl RenderBundleEncoder { )); commands.extend(state.flush_vertices()); - commands.extend(state.flush_binds()); + commands.extend(state.flush_binds(base.dynamic_offsets)); commands.push(command); } RenderCommand::MultiDrawIndirect { @@ -519,7 +522,7 @@ impl RenderBundleEncoder { commands.extend(state.index.flush()); commands.extend(state.flush_vertices()); - commands.extend(state.flush_binds()); + commands.extend(state.flush_binds(base.dynamic_offsets)); commands.push(command); } RenderCommand::MultiDrawIndirect { .. } @@ -972,8 +975,8 @@ struct BindState { /// The layout of `group`. layout_id: id::Valid, - /// The range of dynamic offsets in `State::raw_dynamic_offsets` - /// for this bind group. + /// The range of dynamic offsets for this bind group, in the original + /// command stream's `BassPass::dynamic_offsets` array. dynamic_offsets: Range, /// True if this index's contents have been changed since the last time we @@ -1026,7 +1029,7 @@ struct VertexLimitState { /// update commands we actually need. #[derive(Debug)] struct State { - /// Resources used by this bundle. This will become `RenderBundle::used`. + /// Resources used by this bundle. This will become [`RenderBundle::used`]. trackers: TrackerSet, /// The current index buffer. We flush this state before indexed @@ -1041,16 +1044,12 @@ struct State { push_constant_ranges: PushConstantState, - /// The dynamic offsets for all `SetBindGroup` commands we've seen so far. - /// - /// Each occupied entry of `bind` has a `dynamic_offsets` range that says - /// which elements of this vector it owns. - raw_dynamic_offsets: Vec, - /// Dynamic offset values used by the cleaned-up command sequence. /// - /// These end up in the final `RenderBundle`. Each `SetBindGroup` command - /// consumes the next `num_dynamic_offsets` entries off the front. + /// This becomes the final [`RenderBundle`]'s [`BasePass`]'s + /// [`dynamic_offsets`] list. + /// + /// [`dynamic_offsets`]: BasePass::dynamic_offsets flat_dynamic_offsets: Vec, used_bind_groups: usize, @@ -1100,12 +1099,12 @@ impl State { slot: u8, bind_group_id: id::BindGroupId, layout_id: id::Valid, - offsets: &[wgt::DynamicOffset], + dynamic_offsets: Range, ) { // If this call wouldn't actually change this index's state, we can // return early. (If there are dynamic offsets, the range will always // be different.) - if offsets.is_empty() { + if dynamic_offsets.is_empty() { if let Some(ref contents) = self.bind[slot as usize] { if contents.bind_group_id == bind_group_id { return; @@ -1113,16 +1112,11 @@ impl State { } } - // Save `offsets` in the side array, and note where they landed. - let raw_start = self.raw_dynamic_offsets.len(); - self.raw_dynamic_offsets.extend(offsets); - let raw_end = self.raw_dynamic_offsets.len(); - // Record the index's new state. self.bind[slot as usize] = Some(BindState { bind_group_id, layout_id, - dynamic_offsets: raw_start..raw_end, + dynamic_offsets, is_dirty: true, }); @@ -1199,17 +1193,20 @@ impl State { } /// Generate `SetBindGroup` commands for any bind groups that need to be updated. - fn flush_binds(&mut self) -> impl Iterator + '_ { + fn flush_binds( + &mut self, + dynamic_offsets: &[wgt::DynamicOffset], + ) -> impl Iterator + '_ { // Append each dirty bind group's dynamic offsets to `flat_dynamic_offsets`. for contents in self.bind[..self.used_bind_groups].iter().flatten() { if contents.is_dirty { self.flat_dynamic_offsets - .extend_from_slice(&self.raw_dynamic_offsets[contents.dynamic_offsets.clone()]); + .extend_from_slice(&dynamic_offsets[contents.dynamic_offsets.clone()]); } } // Then, generate `SetBindGroup` commands to update the dirty bind - // groups. After this, all entries are clean. + // groups. After this, all bind groups are clean. self.bind[..self.used_bind_groups] .iter_mut() .enumerate() From 435188cb895ce1826b7366cd46e1c20ce9155ee4 Mon Sep 17 00:00:00 2001 From: i509VCB Date: Tue, 17 May 2022 01:28:05 -0500 Subject: [PATCH 11/11] expose some underlying types in Vulkan hal --- wgpu-hal/src/vulkan/adapter.rs | 4 ++++ wgpu-hal/src/vulkan/instance.rs | 17 +++++++++++++++++ wgpu-hal/src/vulkan/mod.rs | 1 + 3 files changed, 22 insertions(+) diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 1d2fb08a1f..4a016714cd 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -1113,6 +1113,10 @@ impl super::Instance { } impl super::Adapter { + pub fn raw_physical_device(&self) -> ash::vk::PhysicalDevice { + self.raw + } + pub fn required_device_extensions(&self, features: wgt::Features) -> Vec<&'static CStr> { let (supported_extensions, unsupported_extensions) = self .phd_capabilities diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs index 6dff2fd485..4a814fc16c 100644 --- a/wgpu-hal/src/vulkan/instance.rs +++ b/wgpu-hal/src/vulkan/instance.rs @@ -133,6 +133,22 @@ impl super::Swapchain { } impl super::Instance { + pub fn entry(&self) -> &ash::Entry { + &self.shared.entry + } + + pub fn raw_instance(&self) -> &ash::Instance { + &self.shared.raw + } + + pub fn driver_api_version(&self) -> u32 { + self.shared.driver_api_version + } + + pub fn extensions(&self) -> &[&'static CStr] { + &self.extensions[..] + } + pub fn required_extensions( entry: &ash::Entry, flags: crate::InstanceFlags, @@ -266,6 +282,7 @@ impl super::Instance { get_physical_device_properties, entry, has_nv_optimus, + driver_api_version, }), extensions, }) diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 0b7e116437..b3e1a0aa7f 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -87,6 +87,7 @@ struct InstanceShared { get_physical_device_properties: Option, entry: ash::Entry, has_nv_optimus: bool, + driver_api_version: u32, } pub struct Instance {