diff --git a/Cargo.lock b/Cargo.lock index f6e1073a54..29f2149f22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1558,8 +1558,7 @@ checksum = "b5418c17512bdf42730f9032c74e1ae39afc408745ebb2acf72fbc4691c17945" [[package]] name = "glow" version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "886c2a30b160c4c6fec8f987430c26b526b7988ca71f664e6a699ddf6f9601e4" +source = "git+https://github.com/grovesNL/glow.git?rev=29ff917a2b2ff7ce0a81b2cc5681de6d4735b36e#29ff917a2b2ff7ce0a81b2cc5681de6d4735b36e" dependencies = [ "js-sys", "slotmap", diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs index 592c72a9a5..b33c904f30 100644 --- a/naga/src/back/glsl/mod.rs +++ b/naga/src/back/glsl/mod.rs @@ -309,6 +309,8 @@ pub struct ReflectionInfo { pub uniforms: crate::FastHashMap, String>, /// Mapping between names and attribute locations. pub varying: crate::FastHashMap, + /// List of push constant items in the shader. + pub push_constant_items: Vec, } /// Mapping between a texture and its sampler, if it exists. @@ -328,6 +330,50 @@ pub struct TextureMapping { pub sampler: Option>, } +/// All information to bind a single uniform value to the shader. +/// +/// Push constants are emulated using traditional uniforms in OpenGL. +/// +/// These are composed of a set of primatives (scalar, vector, matrix) that +/// are given names. Because they are not backed by the concept of a buffer, +/// we must do the work of calculating the offset of each primative in the +/// push constant block. +#[derive(Debug, Clone)] +pub struct PushConstantItem { + /// GL uniform name for the item. This name is the same as if you were + /// to access it directly from a GLSL shader. + /// + /// The with the following example, the following names will be generated, + /// one name per GLSL uniform. + /// + /// ```glsl + /// struct InnerStruct { + /// value: f32, + /// } + /// + /// struct PushConstant { + /// InnerStruct inner; + /// vec4 array[2]; + /// } + /// + /// uniform PushConstants _push_constant_binding_cs; + /// ``` + /// + /// ```text + /// - _push_constant_binding_cs.inner.value + /// - _push_constant_binding_cs.array[0] + /// - _push_constant_binding_cs.array[1] + /// ``` + /// + pub access_path: String, + /// Type of the uniform. This will only ever be a scalar, vector, or matrix. + pub ty: Handle, + /// The offset in the push constant memory block this uniform maps to. + /// + /// The size of the uniform can be derived from the type. + pub offset: u32, +} + /// Helper structure that generates a number #[derive(Default)] struct IdGenerator(u32); @@ -1264,8 +1310,8 @@ impl<'a, W: Write> Writer<'a, W> { handle: Handle, global: &crate::GlobalVariable, ) -> String { - match global.binding { - Some(ref br) => { + match (&global.binding, global.space) { + (&Some(ref br), _) => { format!( "_group_{}_binding_{}_{}", br.group, @@ -1273,7 +1319,10 @@ impl<'a, W: Write> Writer<'a, W> { self.entry_point.stage.to_str() ) } - None => self.names[&NameKey::GlobalVariable(handle)].clone(), + (&None, crate::AddressSpace::PushConstant) => { + format!("_push_constant_binding_{}", self.entry_point.stage.to_str()) + } + (&None, _) => self.names[&NameKey::GlobalVariable(handle)].clone(), } } @@ -1283,15 +1332,20 @@ impl<'a, W: Write> Writer<'a, W> { handle: Handle, global: &crate::GlobalVariable, ) -> BackendResult { - match global.binding { - Some(ref br) => write!( + match (&global.binding, global.space) { + (&Some(ref br), _) => write!( self.out, "_group_{}_binding_{}_{}", br.group, br.binding, self.entry_point.stage.to_str() )?, - None => write!( + (&None, crate::AddressSpace::PushConstant) => write!( + self.out, + "_push_constant_binding_{}", + self.entry_point.stage.to_str() + )?, + (&None, _) => write!( self.out, "{}", &self.names[&NameKey::GlobalVariable(handle)] @@ -4069,6 +4123,7 @@ impl<'a, W: Write> Writer<'a, W> { } } + let mut push_constant_info = None; for (handle, var) in self.module.global_variables.iter() { if info[handle].is_empty() { continue; @@ -4093,17 +4148,105 @@ impl<'a, W: Write> Writer<'a, W> { let name = self.reflection_names_globals[&handle].clone(); uniforms.insert(handle, name); } + crate::AddressSpace::PushConstant => { + let name = self.reflection_names_globals[&handle].clone(); + push_constant_info = Some((name, var.ty)); + } _ => (), }, } } + let mut push_constant_segments = Vec::new(); + let mut push_constant_items = vec![]; + + if let Some((name, ty)) = push_constant_info { + // We don't have a layouter available to us, so we need to create one. + // + // This is potentially a bit wasteful, but the set of types in the program + // shouldn't be too large. + let mut layouter = crate::proc::Layouter::default(); + layouter.update(self.module.to_ctx()).unwrap(); + + // We start with the name of the binding itself. + push_constant_segments.push(name); + + // We then recursively collect all the uniform fields of the push constant. + self.collect_push_constant_items( + ty, + &mut push_constant_segments, + &layouter, + &mut 0, + &mut push_constant_items, + ); + } + Ok(ReflectionInfo { texture_mapping, uniforms, varying: mem::take(&mut self.varying), + push_constant_items, }) } + + fn collect_push_constant_items( + &mut self, + ty: Handle, + segments: &mut Vec, + layouter: &crate::proc::Layouter, + offset: &mut u32, + items: &mut Vec, + ) { + // At this point in the recursion, `segments` contains the path + // needed to access `ty` from the root. + + let layout = &layouter[ty]; + *offset = layout.alignment.round_up(*offset); + match self.module.types[ty].inner { + // All these types map directly to GL uniforms. + TypeInner::Scalar { .. } | TypeInner::Vector { .. } | TypeInner::Matrix { .. } => { + // Build the full name, by combining all current segments. + let name: String = segments.iter().map(String::as_str).collect(); + items.push(PushConstantItem { + access_path: name, + offset: *offset, + ty, + }); + *offset += layout.size; + } + // Arrays are recursed into. + TypeInner::Array { base, size, .. } => { + let crate::ArraySize::Constant(count) = size else { + unreachable!("Cannot have dynamic arrays in push constants"); + }; + + for i in 0..count.get() { + // Add the array accessor and recurse. + segments.push(format!("[{}]", i)); + self.collect_push_constant_items(base, segments, layouter, offset, items); + segments.pop(); + } + + // Ensure the stride is kept by rounding up to the alignment. + *offset = layout.alignment.round_up(*offset) + } + TypeInner::Struct { ref members, .. } => { + for (index, member) in members.iter().enumerate() { + // Add struct accessor and recurse. + segments.push(format!( + ".{}", + self.names[&NameKey::StructMember(ty, index as u32)] + )); + self.collect_push_constant_items(member.ty, segments, layouter, offset, items); + segments.pop(); + } + + // Ensure ending padding is kept by rounding up to the alignment. + *offset = layout.alignment.round_up(*offset) + } + _ => unreachable!(), + } + } } /// Structure returned by [`glsl_scalar`] diff --git a/naga/tests/out/glsl/push-constants.main.Fragment.glsl b/naga/tests/out/glsl/push-constants.main.Fragment.glsl index fa1be9f61f..8131e9e897 100644 --- a/naga/tests/out/glsl/push-constants.main.Fragment.glsl +++ b/naga/tests/out/glsl/push-constants.main.Fragment.glsl @@ -9,14 +9,14 @@ struct PushConstants { struct FragmentIn { vec4 color; }; -uniform PushConstants pc; +uniform PushConstants _push_constant_binding_fs; layout(location = 0) smooth in vec4 _vs2fs_location0; layout(location = 0) out vec4 _fs2p_location0; void main() { FragmentIn in_ = FragmentIn(_vs2fs_location0); - float _e4 = pc.multiplier; + float _e4 = _push_constant_binding_fs.multiplier; _fs2p_location0 = (in_.color * _e4); return; } diff --git a/naga/tests/out/glsl/push-constants.vert_main.Vertex.glsl b/naga/tests/out/glsl/push-constants.vert_main.Vertex.glsl index 27cd7037ab..4519dc4c6c 100644 --- a/naga/tests/out/glsl/push-constants.vert_main.Vertex.glsl +++ b/naga/tests/out/glsl/push-constants.vert_main.Vertex.glsl @@ -9,14 +9,14 @@ struct PushConstants { struct FragmentIn { vec4 color; }; -uniform PushConstants pc; +uniform PushConstants _push_constant_binding_vs; layout(location = 0) in vec2 _p2vs_location0; void main() { vec2 pos = _p2vs_location0; uint vi = uint(gl_VertexID); - float _e5 = pc.multiplier; + float _e5 = _push_constant_binding_vs.multiplier; gl_Position = vec4(((float(vi) * _e5) * pos), 0.0, 1.0); return; } diff --git a/tests/src/image.rs b/tests/src/image.rs index 0e3ea9ea8e..66f6abf16a 100644 --- a/tests/src/image.rs +++ b/tests/src/image.rs @@ -625,12 +625,16 @@ impl ReadbackBuffers { buffer_zero && stencil_buffer_zero } - pub fn check_buffer_contents(&self, device: &Device, expected_data: &[u8]) -> bool { - let result = self - .retrieve_buffer(device, &self.buffer, self.buffer_aspect()) - .iter() - .eq(expected_data.iter()); + pub fn assert_buffer_contents(&self, device: &Device, expected_data: &[u8]) { + let result_buffer = self.retrieve_buffer(device, &self.buffer, self.buffer_aspect()); + assert!( + result_buffer.len() >= expected_data.len(), + "Result buffer ({}) smaller than expected buffer ({})", + result_buffer.len(), + expected_data.len() + ); + let result_buffer = &result_buffer[..expected_data.len()]; + assert_eq!(result_buffer, expected_data); self.buffer.unmap(); - result } } diff --git a/tests/tests/gpu.rs b/tests/tests/gpu.rs index a5fbcde9da..c10df13ed7 100644 --- a/tests/tests/gpu.rs +++ b/tests/tests/gpu.rs @@ -1,4 +1,5 @@ mod regression { + mod issue_3349; mod issue_3457; mod issue_4024; mod issue_4122; @@ -19,6 +20,7 @@ mod occlusion_query; mod partially_bounded_arrays; mod pipeline; mod poll; +mod push_constants; mod query_set; mod queue_transfer; mod resource_descriptor_accessor; diff --git a/tests/tests/partially_bounded_arrays/mod.rs b/tests/tests/partially_bounded_arrays/mod.rs index acadaad67b..5a41ae8f29 100644 --- a/tests/tests/partially_bounded_arrays/mod.rs +++ b/tests/tests/partially_bounded_arrays/mod.rs @@ -97,9 +97,6 @@ static PARTIALLY_BOUNDED_ARRAY: GpuTestConfiguration = GpuTestConfiguration::new ctx.queue.submit(Some(encoder.finish())); - assert!( - readback_buffers - .check_buffer_contents(device, bytemuck::bytes_of(&[4.0f32, 3.0, 2.0, 1.0])), - "texture storage values are incorrect!" - ); + readback_buffers + .assert_buffer_contents(device, bytemuck::bytes_of(&[4.0f32, 3.0, 2.0, 1.0])); }); diff --git a/tests/tests/push_constants.rs b/tests/tests/push_constants.rs new file mode 100644 index 0000000000..e39000173c --- /dev/null +++ b/tests/tests/push_constants.rs @@ -0,0 +1,151 @@ +use std::num::NonZeroU64; + +use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext}; + +/// We want to test that partial updates to push constants work as expected. +/// +/// As such, we dispatch two compute passes, one which writes the values +/// before a parital update, and one which writes the values after the partial update. +/// +/// If the update code is working correctly, the values not written to by the second update +/// will remain unchanged. +#[gpu_test] +static PARTIAL_UPDATE: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(wgpu::Features::PUSH_CONSTANTS) + .limits(wgpu::Limits { + max_push_constant_size: 32, + ..Default::default() + }), + ) + .run_sync(partial_update_test); + +const SHADER: &str = r#" + struct Pc { + offset: u32, + vector: vec4f, + } + + var pc: Pc; + + @group(0) @binding(0) + var output: array; + + @compute @workgroup_size(1) + fn main() { + output[pc.offset] = pc.vector; + } +"#; + +fn partial_update_test(ctx: TestingContext) { + let sm = ctx + .device + .create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("shader"), + source: wgpu::ShaderSource::Wgsl(SHADER.into()), + }); + + let bgl = ctx + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("bind_group_layout"), + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: NonZeroU64::new(16), + }, + count: None, + }], + }); + + let gpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("gpu_buffer"), + size: 32, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + + let cpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("cpu_buffer"), + size: 32, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("bind_group"), + layout: &bgl, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: gpu_buffer.as_entire_binding(), + }], + }); + + let pipeline_layout = ctx + .device + .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("pipeline_layout"), + bind_group_layouts: &[&bgl], + push_constant_ranges: &[wgpu::PushConstantRange { + stages: wgpu::ShaderStages::COMPUTE, + range: 0..32, + }], + }); + + let pipeline = ctx + .device + .create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("pipeline"), + layout: Some(&pipeline_layout), + module: &sm, + entry_point: "main", + }); + + let mut encoder = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("encoder"), + }); + + { + let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("compute_pass"), + timestamp_writes: None, + }); + cpass.set_pipeline(&pipeline); + cpass.set_bind_group(0, &bind_group, &[]); + + // -- Dispatch 0 -- + + // Dispatch number + cpass.set_push_constants(0, bytemuck::bytes_of(&[0_u32])); + // Update the whole vector. + cpass.set_push_constants(16, bytemuck::bytes_of(&[1.0_f32, 2.0, 3.0, 4.0])); + cpass.dispatch_workgroups(1, 1, 1); + + // -- Dispatch 1 -- + + // Dispatch number + cpass.set_push_constants(0, bytemuck::bytes_of(&[1_u32])); + // Update just the y component of the vector. + cpass.set_push_constants(20, bytemuck::bytes_of(&[5.0_f32])); + cpass.dispatch_workgroups(1, 1, 1); + } + + encoder.copy_buffer_to_buffer(&gpu_buffer, 0, &cpu_buffer, 0, 32); + ctx.queue.submit([encoder.finish()]); + cpu_buffer.slice(..).map_async(wgpu::MapMode::Read, |_| ()); + ctx.device.poll(wgpu::Maintain::Wait); + + let data = cpu_buffer.slice(..).get_mapped_range(); + + let floats: &[f32] = bytemuck::cast_slice(&data); + + // first 4 floats the initial value + // second 4 floats the first update + assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 1.0, 5.0, 3.0, 4.0]); +} diff --git a/tests/tests/regression/issue_3349.fs.wgsl b/tests/tests/regression/issue_3349.fs.wgsl new file mode 100644 index 0000000000..d6a5ea5ceb --- /dev/null +++ b/tests/tests/regression/issue_3349.fs.wgsl @@ -0,0 +1,46 @@ +struct ShaderData { + a: f32, + b: f32, + c: f32, + d: f32, +} + +@group(0) @binding(0) +var data1: ShaderData; + +var data2: ShaderData; + +struct FsIn { + @builtin(position) position: vec4f, + @location(0) data1: vec4f, + @location(1) data2: vec4f, +} + +@fragment +fn fs_main(fs_in: FsIn) -> @location(0) vec4f { + let floored = vec2u(floor(fs_in.position.xy)); + // We're outputting a 2x2 image, each pixel coming from a different source + let serial = floored.x + floored.y * 2u; + + switch serial { + // (0, 0) - uniform buffer from the vertex shader + case 0u: { + return fs_in.data1; + } + // (1, 0) - push constant from the vertex shader + case 1u: { + return fs_in.data2; + } + // (0, 1) - uniform buffer from the fragment shader + case 2u: { + return vec4f(data1.a, data1.b, data1.c, data1.d); + } + // (1, 1) - push constant from the fragment shader + case 3u: { + return vec4f(data2.a, data2.b, data2.c, data2.d); + } + default: { + return vec4f(0.0); + } + } +} diff --git a/tests/tests/regression/issue_3349.rs b/tests/tests/regression/issue_3349.rs new file mode 100644 index 0000000000..5db5575ddf --- /dev/null +++ b/tests/tests/regression/issue_3349.rs @@ -0,0 +1,178 @@ +use wgpu::util::DeviceExt; +use wgpu_test::{ + gpu_test, image::ReadbackBuffers, GpuTestConfiguration, TestParameters, TestingContext, +}; + +/// We thought we had an OpenGL bug that, when running without explicit in-shader locations, +/// we will not properly bind uniform buffers to both the vertex and fragment +/// shaders. This turned out to not reproduce at all with this test case. +/// +/// However, it also caught issues with the push constant implementation, +/// making sure that it works correctly with different definitions for the push constant +/// block in vertex and fragment shaders. +/// +/// This test needs to be able to run on GLES 3.0 +/// +/// What this test does is render a 2x2 texture. Each pixel corresponds to a different +/// data source. +/// +/// top left: Vertex Shader / Uniform Buffer +/// top right: Vertex Shader / Push Constant +/// bottom left: Fragment Shader / Uniform Buffer +/// bottom right: Fragment Shader / Push Constant +/// +/// We then validate the data is correct from every position. +#[gpu_test] +static MULTI_STAGE_DATA_BINDING: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(wgpu::Features::PUSH_CONSTANTS) + .limits(wgpu::Limits { + max_push_constant_size: 16, + ..Default::default() + }), + ) + .run_sync(multi_stage_data_binding_test); + +fn multi_stage_data_binding_test(ctx: TestingContext) { + // We use different shader modules to allow us to use different + // types for the uniform and push constant blocks between stages. + let vs_sm = ctx + .device + .create_shader_module(wgpu::include_wgsl!("issue_3349.vs.wgsl")); + + let fs_sm = ctx + .device + .create_shader_module(wgpu::include_wgsl!("issue_3349.fs.wgsl")); + + // We start with u8s then convert to float, to make sure we don't have + // cross-vendor rounding issues unorm. + let input_as_unorm: [u8; 4] = [25_u8, 50, 75, 100]; + let input = input_as_unorm.map(|v| v as f32 / 255.0); + + let buffer = ctx + .device + .create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("buffer"), + contents: bytemuck::cast_slice(&input), + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + }); + + let bgl = ctx + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("bgl"), + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::VERTEX_FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }], + }); + + let bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("bg"), + layout: &bgl, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: buffer.as_entire_binding(), + }], + }); + + let pll = ctx + .device + .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("pll"), + bind_group_layouts: &[&bgl], + push_constant_ranges: &[wgpu::PushConstantRange { + stages: wgpu::ShaderStages::VERTEX_FRAGMENT, + range: 0..16, + }], + }); + + let pipeline = ctx + .device + .create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: Some("pipeline"), + layout: Some(&pll), + vertex: wgpu::VertexState { + module: &vs_sm, + entry_point: "vs_main", + buffers: &[], + }, + fragment: Some(wgpu::FragmentState { + module: &fs_sm, + entry_point: "fs_main", + targets: &[Some(wgpu::ColorTargetState { + format: wgpu::TextureFormat::Rgba8Unorm, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + })], + }), + primitive: wgpu::PrimitiveState::default(), + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + multiview: None, + }); + + let texture = ctx.device.create_texture(&wgpu::TextureDescriptor { + label: Some("texture"), + size: wgpu::Extent3d { + width: 2, + height: 2, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + // Important: NOT srgb. + format: wgpu::TextureFormat::Rgba8Unorm, + usage: wgpu::TextureUsages::COPY_SRC | wgpu::TextureUsages::RENDER_ATTACHMENT, + view_formats: &[], + }); + + let view = texture.create_view(&wgpu::TextureViewDescriptor::default()); + + let mut encoder = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("encoder"), + }); + + { + let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: Some("rpass"), + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &view, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color::BLACK), + store: wgpu::StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }); + + rpass.set_pipeline(&pipeline); + rpass.set_bind_group(0, &bg, &[]); + rpass.set_push_constants( + wgpu::ShaderStages::VERTEX_FRAGMENT, + 0, + bytemuck::cast_slice(&input), + ); + rpass.draw(0..3, 0..1); + } + + let buffers = ReadbackBuffers::new(&ctx.device, &texture); + buffers.copy_from(&ctx.device, &mut encoder, &texture); + ctx.queue.submit([encoder.finish()]); + + let result = input_as_unorm.repeat(4); + buffers.assert_buffer_contents(&ctx.device, &result); +} diff --git a/tests/tests/regression/issue_3349.vs.wgsl b/tests/tests/regression/issue_3349.vs.wgsl new file mode 100644 index 0000000000..85992a756b --- /dev/null +++ b/tests/tests/regression/issue_3349.vs.wgsl @@ -0,0 +1,22 @@ +@group(0) @binding(0) +var data1: vec4f; + +// D3DCompile requires this to be a struct +struct Pc { + inner: vec4f, +} + +var data2: Pc; + +struct VsOut { + @builtin(position) position: vec4f, + @location(0) data1: vec4f, + @location(1) data2: vec4f, +} + +@vertex +fn vs_main(@builtin(vertex_index) vertexIndex: u32) -> VsOut { + let uv = vec2f(f32((vertexIndex << 1u) & 2u), f32(vertexIndex & 2u)); + let position = vec4f(uv * 2.0 - 1.0, 0.0, 1.0); + return VsOut(position, data1, data2.inner); +} diff --git a/tests/tests/scissor_tests/mod.rs b/tests/tests/scissor_tests/mod.rs index d53d31cdac..40801a343a 100644 --- a/tests/tests/scissor_tests/mod.rs +++ b/tests/tests/scissor_tests/mod.rs @@ -94,7 +94,7 @@ fn scissor_test_impl(ctx: &TestingContext, scissor_rect: Rect, expected_data: [u readback_buffer.copy_from(&ctx.device, &mut encoder, &texture); ctx.queue.submit(Some(encoder.finish())); } - assert!(readback_buffer.check_buffer_contents(&ctx.device, &expected_data)); + readback_buffer.assert_buffer_contents(&ctx.device, &expected_data); } #[gpu_test] diff --git a/tests/tests/shader/mod.rs b/tests/tests/shader/mod.rs index a8ca9a27bb..48800bfb35 100644 --- a/tests/tests/shader/mod.rs +++ b/tests/tests/shader/mod.rs @@ -40,6 +40,8 @@ impl InputStorageType { struct ShaderTest { /// Human readable name name: String, + /// Header text. This is arbitrary code injected at the top of the shader. Replaces {{header}} + header: String, /// This text will be the body of the `Input` struct. Replaces "{{input_members}}" /// in the shader_test shader. custom_struct_members: String, @@ -132,6 +134,7 @@ impl ShaderTest { ) -> Self { Self { name, + header: String::new(), custom_struct_members, body, input_type: String::from("CustomStruct"), @@ -144,6 +147,12 @@ impl ShaderTest { } } + fn header(mut self, header: String) -> Self { + self.header = header; + + self + } + /// Add another set of possible outputs. If any of the given /// output values are seen it's considered a success (i.e. this is OR, not AND). /// @@ -272,6 +281,7 @@ fn shader_input_output_test( // This isn't terribly efficient but the string is short and it's a test. // The body and input members are the longest part, so do them last. let mut processed = source + .replace("{{header}}", &test.header) .replace("{{storage_type}}", storage_type.as_str()) .replace("{{input_type}}", &test.input_type) .replace("{{output_type}}", &test.output_type) diff --git a/tests/tests/shader/shader_test.wgsl b/tests/tests/shader/shader_test.wgsl index efe8692bd5..91c8636574 100644 --- a/tests/tests/shader/shader_test.wgsl +++ b/tests/tests/shader/shader_test.wgsl @@ -1,3 +1,5 @@ +{{header}} + struct CustomStruct { {{input_members}} } diff --git a/tests/tests/shader/struct_layout.rs b/tests/tests/shader/struct_layout.rs index f17dceac08..a7460b9abd 100644 --- a/tests/tests/shader/struct_layout.rs +++ b/tests/tests/shader/struct_layout.rs @@ -99,7 +99,7 @@ fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec } } - // https://github.com/gfx-rs/naga/issues/1785 + // https://github.com/gfx-rs/wgpu/issues/4371 let failures = if storage_type == InputStorageType::Uniform && rows == 2 { Backends::GL } else { @@ -171,6 +171,51 @@ fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec } } + // Nested struct and array test. + // + // This tries to exploit all the weird edge cases of the struct layout algorithm. + { + let header = + String::from("struct Inner { scalar: f32, member: array, 2>, scalar2: f32 }"); + let members = String::from("inner: Inner, scalar3: f32, vector: vec3, scalar4: f32"); + let direct = String::from( + "\ + output[0] = bitcast(input.inner.scalar); + output[1] = bitcast(input.inner.member[0].x); + output[2] = bitcast(input.inner.member[0].y); + output[3] = bitcast(input.inner.member[0].z); + output[4] = bitcast(input.inner.member[1].x); + output[5] = bitcast(input.inner.member[1].y); + output[6] = bitcast(input.inner.member[1].z); + output[7] = bitcast(input.inner.scalar2); + output[8] = bitcast(input.scalar3); + output[9] = bitcast(input.vector.x); + output[10] = bitcast(input.vector.y); + output[11] = bitcast(input.vector.z); + output[12] = bitcast(input.scalar4); + ", + ); + + tests.push( + ShaderTest::new( + String::from("nested struct and array"), + members, + direct, + &input_values, + &[ + 0, // inner.scalar + 4, 5, 6, // inner.member[0] + 8, 9, 10, // inner.member[1] + 12, // scalar2 + 16, // scalar3 + 20, 21, 22, // vector + 23, // scalar4 + ], + ) + .header(header), + ); + } + tests } @@ -215,8 +260,7 @@ static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new() .limits(Limits { max_push_constant_size: MAX_BUFFER_SIZE as u32, ..Limits::downlevel_defaults() - }) - .expect_fail(FailureCase::backend(Backends::GL)), + }), ) .run_sync(|ctx| { shader_input_output_test( diff --git a/tests/tests/shader_primitive_index/mod.rs b/tests/tests/shader_primitive_index/mod.rs index e5157a7c93..13ba76a328 100644 --- a/tests/tests/shader_primitive_index/mod.rs +++ b/tests/tests/shader_primitive_index/mod.rs @@ -192,5 +192,5 @@ fn pulling_common( } readback_buffer.copy_from(&ctx.device, &mut encoder, &color_texture); ctx.queue.submit(Some(encoder.finish())); - assert!(readback_buffer.check_buffer_contents(&ctx.device, expected)); + readback_buffer.assert_buffer_contents(&ctx.device, expected); } diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 8286540991..765384c075 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -96,7 +96,7 @@ rustc-hash = "1.1" log = "0.4" # backend: Gles -glow = { version = "0.13", optional = true } +glow = { version = "0.13", git = "https://github.com/grovesNL/glow.git", rev = "29ff917a2b2ff7ce0a81b2cc5681de6d4735b36e", optional = true } [dependencies.wgt] package = "wgpu-types" @@ -180,7 +180,9 @@ features = ["wgsl-in"] [dev-dependencies] cfg-if = "1" env_logger = "0.10" -winit = { version = "0.29.2", features = [ "android-native-activity" ] } # for "halmark" example +winit = { version = "0.29.2", features = [ + "android-native-activity", +] } # for "halmark" example [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] glutin = "0.29.1" # for "gles" example diff --git a/wgpu-hal/src/dx11/command.rs b/wgpu-hal/src/dx11/command.rs index 17cd5a22d2..3bbdf0a7ee 100644 --- a/wgpu-hal/src/dx11/command.rs +++ b/wgpu-hal/src/dx11/command.rs @@ -96,7 +96,7 @@ impl crate::CommandEncoder for super::CommandEncoder { &mut self, layout: &super::PipelineLayout, stages: wgt::ShaderStages, - offset: u32, + offset_bytes: u32, data: &[u32], ) { todo!() diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 719e63a36f..2e3b78e522 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -911,15 +911,16 @@ impl crate::CommandEncoder for super::CommandEncoder { &mut self, layout: &super::PipelineLayout, _stages: wgt::ShaderStages, - offset: u32, + offset_bytes: u32, data: &[u32], ) { + let offset_words = offset_bytes as usize / 4; + let info = layout.shared.root_constant_info.as_ref().unwrap(); self.pass.root_elements[info.root_index as usize] = super::RootElement::Constant; - self.pass.constant_data[(offset as usize)..(offset as usize + data.len())] - .copy_from_slice(data); + self.pass.constant_data[offset_words..(offset_words + data.len())].copy_from_slice(data); if self.pass.layout.signature == layout.shared.signature { self.pass.dirty_root_elements |= 1 << info.root_index; diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index d0f659f461..64bcf3109b 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -327,7 +327,7 @@ impl crate::CommandEncoder for Encoder { &mut self, layout: &Resource, stages: wgt::ShaderStages, - offset: u32, + offset_bytes: u32, data: &[u32], ) { } diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index 1234b97292..abbbe8d427 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -8,7 +8,6 @@ struct TextureSlotDesc { sampler_index: Option, } -#[derive(Default)] pub(super) struct State { topology: u32, primitive: super::PrimitiveState, @@ -30,10 +29,41 @@ pub(super) struct State { instance_vbuf_mask: usize, dirty_vbuf_mask: usize, active_first_instance: u32, - push_offset_to_uniform: ArrayVec, + push_constant_descs: ArrayVec, + // The current state of the push constant data block. + current_push_constant_data: [u32; super::MAX_PUSH_CONSTANTS], end_of_pass_timestamp: Option, } +impl Default for State { + fn default() -> Self { + Self { + topology: Default::default(), + primitive: Default::default(), + index_format: Default::default(), + index_offset: Default::default(), + vertex_buffers: Default::default(), + vertex_attributes: Default::default(), + color_targets: Default::default(), + stencil: Default::default(), + depth_bias: Default::default(), + alpha_to_coverage_enabled: Default::default(), + samplers: Default::default(), + texture_slots: Default::default(), + render_size: Default::default(), + resolve_attachments: Default::default(), + invalidate_attachments: Default::default(), + has_pass_label: Default::default(), + instance_vbuf_mask: Default::default(), + dirty_vbuf_mask: Default::default(), + active_first_instance: Default::default(), + push_constant_descs: Default::default(), + current_push_constant_data: [0; super::MAX_PUSH_CONSTANTS], + end_of_pass_timestamp: Default::default(), + } + } +} + impl super::CommandBuffer { fn clear(&mut self) { self.label = None; @@ -176,10 +206,7 @@ impl super::CommandEncoder { fn set_pipeline_inner(&mut self, inner: &super::PipelineInner) { self.cmd_buffer.commands.push(C::SetProgram(inner.program)); - self.state.push_offset_to_uniform.clear(); - self.state - .push_offset_to_uniform - .extend(inner.uniforms.iter().cloned()); + self.state.push_constant_descs = inner.push_constant_descs.clone(); // rebind textures, if needed let mut dirty_textures = 0u32; @@ -729,24 +756,46 @@ impl crate::CommandEncoder for super::CommandEncoder { &mut self, _layout: &super::PipelineLayout, _stages: wgt::ShaderStages, - start_offset: u32, + offset_bytes: u32, data: &[u32], ) { - let range = self.cmd_buffer.add_push_constant_data(data); - - let end = start_offset + data.len() as u32 * 4; - let mut offset = start_offset; - while offset < end { - let uniform = self.state.push_offset_to_uniform[offset as usize / 4].clone(); - let size = uniform.size; - if uniform.location.is_none() { - panic!("No uniform for push constant"); + // There is nothing preventing the user from trying to update a single value within + // a vector or matrix in the set_push_constant call, as to the user, all of this is + // just memory. However OpenGL does not allow parital uniform updates. + // + // As such, we locally keep a copy of the current state of the push constant memory + // block. If the user tries to update a single value, we have the data to update the entirety + // of the uniform. + let start_words = offset_bytes / 4; + let end_words = start_words + data.len() as u32; + self.state.current_push_constant_data[start_words as usize..end_words as usize] + .copy_from_slice(data); + + // We iterate over the uniform list as there may be multiple uniforms that need + // updating from the same push constant memory (one for each shader stage). + // + // Additionally, any statically unused uniform descs will have been removed from this list + // by OpenGL, so the uniform list is not contiguous. + for uniform in self.state.push_constant_descs.iter().cloned() { + let uniform_size_words = uniform.size_bytes / 4; + let uniform_start_words = uniform.offset / 4; + let uniform_end_words = uniform_start_words + uniform_size_words; + + // Is true if any word within the uniform binding was updated + let needs_updating = + start_words < uniform_end_words || uniform_start_words <= end_words; + + if needs_updating { + let uniform_data = &self.state.current_push_constant_data + [uniform_start_words as usize..uniform_end_words as usize]; + + let range = self.cmd_buffer.add_push_constant_data(uniform_data); + + self.cmd_buffer.commands.push(C::SetPushConstants { + uniform, + offset: range.start, + }); } - self.cmd_buffer.commands.push(C::SetPushConstants { - uniform, - offset: range.start + offset, - }); - offset += size; } } diff --git a/wgpu-hal/src/gles/conv.rs b/wgpu-hal/src/gles/conv.rs index c0ad4054d7..3fb8383a51 100644 --- a/wgpu-hal/src/gles/conv.rs +++ b/wgpu-hal/src/gles/conv.rs @@ -417,108 +417,6 @@ pub(super) fn map_storage_access(access: wgt::StorageTextureAccess) -> u32 { } } -pub(super) fn is_sampler(glsl_uniform_type: u32) -> bool { - match glsl_uniform_type { - glow::INT_SAMPLER_1D - | glow::INT_SAMPLER_1D_ARRAY - | glow::INT_SAMPLER_2D - | glow::INT_SAMPLER_2D_ARRAY - | glow::INT_SAMPLER_2D_MULTISAMPLE - | glow::INT_SAMPLER_2D_MULTISAMPLE_ARRAY - | glow::INT_SAMPLER_2D_RECT - | glow::INT_SAMPLER_3D - | glow::INT_SAMPLER_CUBE - | glow::INT_SAMPLER_CUBE_MAP_ARRAY - | glow::UNSIGNED_INT_SAMPLER_1D - | glow::UNSIGNED_INT_SAMPLER_1D_ARRAY - | glow::UNSIGNED_INT_SAMPLER_2D - | glow::UNSIGNED_INT_SAMPLER_2D_ARRAY - | glow::UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE - | glow::UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY - | glow::UNSIGNED_INT_SAMPLER_2D_RECT - | glow::UNSIGNED_INT_SAMPLER_3D - | glow::UNSIGNED_INT_SAMPLER_CUBE - | glow::UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY - | glow::SAMPLER_1D - | glow::SAMPLER_1D_SHADOW - | glow::SAMPLER_1D_ARRAY - | glow::SAMPLER_1D_ARRAY_SHADOW - | glow::SAMPLER_2D - | glow::SAMPLER_2D_SHADOW - | glow::SAMPLER_2D_ARRAY - | glow::SAMPLER_2D_ARRAY_SHADOW - | glow::SAMPLER_2D_MULTISAMPLE - | glow::SAMPLER_2D_MULTISAMPLE_ARRAY - | glow::SAMPLER_2D_RECT - | glow::SAMPLER_2D_RECT_SHADOW - | glow::SAMPLER_3D - | glow::SAMPLER_CUBE - | glow::SAMPLER_CUBE_MAP_ARRAY - | glow::SAMPLER_CUBE_MAP_ARRAY_SHADOW - | glow::SAMPLER_CUBE_SHADOW => true, - _ => false, - } -} - -pub(super) fn is_image(glsl_uniform_type: u32) -> bool { - match glsl_uniform_type { - glow::INT_IMAGE_1D - | glow::INT_IMAGE_1D_ARRAY - | glow::INT_IMAGE_2D - | glow::INT_IMAGE_2D_ARRAY - | glow::INT_IMAGE_2D_MULTISAMPLE - | glow::INT_IMAGE_2D_MULTISAMPLE_ARRAY - | glow::INT_IMAGE_2D_RECT - | glow::INT_IMAGE_3D - | glow::INT_IMAGE_CUBE - | glow::INT_IMAGE_CUBE_MAP_ARRAY - | glow::UNSIGNED_INT_IMAGE_1D - | glow::UNSIGNED_INT_IMAGE_1D_ARRAY - | glow::UNSIGNED_INT_IMAGE_2D - | glow::UNSIGNED_INT_IMAGE_2D_ARRAY - | glow::UNSIGNED_INT_IMAGE_2D_MULTISAMPLE - | glow::UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY - | glow::UNSIGNED_INT_IMAGE_2D_RECT - | glow::UNSIGNED_INT_IMAGE_3D - | glow::UNSIGNED_INT_IMAGE_CUBE - | glow::UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY - | glow::IMAGE_1D - | glow::IMAGE_1D_ARRAY - | glow::IMAGE_2D - | glow::IMAGE_2D_ARRAY - | glow::IMAGE_2D_MULTISAMPLE - | glow::IMAGE_2D_MULTISAMPLE_ARRAY - | glow::IMAGE_2D_RECT - | glow::IMAGE_3D - | glow::IMAGE_CUBE - | glow::IMAGE_CUBE_MAP_ARRAY => true, - _ => false, - } -} - -pub(super) fn is_atomic_counter(glsl_uniform_type: u32) -> bool { - glsl_uniform_type == glow::UNSIGNED_INT_ATOMIC_COUNTER -} - -pub(super) fn is_opaque_type(glsl_uniform_type: u32) -> bool { - is_sampler(glsl_uniform_type) - || is_image(glsl_uniform_type) - || is_atomic_counter(glsl_uniform_type) -} - -pub(super) fn uniform_byte_size(glsl_uniform_type: u32) -> u32 { - match glsl_uniform_type { - glow::FLOAT | glow::INT => 4, - glow::FLOAT_VEC2 | glow::INT_VEC2 => 8, - glow::FLOAT_VEC3 | glow::INT_VEC3 => 12, - glow::FLOAT_VEC4 | glow::INT_VEC4 => 16, - glow::FLOAT_MAT2 => 16, - glow::FLOAT_MAT3 => 36, - glow::FLOAT_MAT4 => 64, - _ => panic!("Unsupported uniform datatype! {glsl_uniform_type:#X}"), - } -} - pub(super) fn is_layered_target(target: u32) -> bool { match target { glow::TEXTURE_2D | glow::TEXTURE_CUBE_MAP => false, diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index a0048c5ec2..7934c4be01 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -23,6 +23,7 @@ struct CompilationContext<'a> { layout: &'a super::PipelineLayout, sampler_map: &'a mut super::SamplerBindMap, name_binding_map: &'a mut NameBindingMap, + push_constant_items: &'a mut Vec, multiview: Option, } @@ -53,7 +54,7 @@ impl CompilationContext<'_> { Some(name) => name.clone(), None => continue, }; - log::debug!( + log::trace!( "Rebind buffer: {:?} -> {}, register={:?}, slot={}", var.name.as_ref(), &name, @@ -101,6 +102,8 @@ impl CompilationContext<'_> { naga::ShaderStage::Compute => {} } } + + *self.push_constant_items = reflection_info.push_constant_items; } } @@ -279,7 +282,7 @@ impl super::Device { unsafe fn create_pipeline<'a>( &self, gl: &glow::Context, - shaders: ArrayVec, 3>, + shaders: ArrayVec, { crate::MAX_CONCURRENT_SHADER_STAGES }>, layout: &super::PipelineLayout, #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, multiview: Option, @@ -327,7 +330,7 @@ impl super::Device { unsafe fn create_program<'a>( gl: &glow::Context, - shaders: ArrayVec, 3>, + shaders: ArrayVec, { crate::MAX_CONCURRENT_SHADER_STAGES }>, layout: &super::PipelineLayout, #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, multiview: Option, @@ -348,16 +351,22 @@ impl super::Device { } let mut name_binding_map = NameBindingMap::default(); + let mut push_constant_items = ArrayVec::<_, { crate::MAX_CONCURRENT_SHADER_STAGES }>::new(); let mut sampler_map = [None; super::MAX_TEXTURE_SLOTS]; let mut has_stages = wgt::ShaderStages::empty(); - let mut shaders_to_delete = arrayvec::ArrayVec::<_, 3>::new(); + let mut shaders_to_delete = ArrayVec::<_, { crate::MAX_CONCURRENT_SHADER_STAGES }>::new(); - for (naga_stage, stage) in shaders { + for &(naga_stage, stage) in &shaders { has_stages |= map_naga_stage(naga_stage); + let pc_item = { + push_constant_items.push(Vec::new()); + push_constant_items.last_mut().unwrap() + }; let context = CompilationContext { layout, sampler_map: &mut sampler_map, name_binding_map: &mut name_binding_map, + push_constant_items: pc_item, multiview, }; @@ -409,6 +418,7 @@ impl super::Device { match register { super::BindingRegister::UniformBuffers => { let index = unsafe { gl.get_uniform_block_index(program, name) }.unwrap(); + log::trace!("\tBinding slot {slot} to block index {index}"); unsafe { gl.uniform_block_binding(program, index, slot as _) }; } super::BindingRegister::StorageBuffers => { @@ -429,41 +439,38 @@ impl super::Device { } } - let mut uniforms: [super::UniformDesc; super::MAX_PUSH_CONSTANTS] = - [None; super::MAX_PUSH_CONSTANTS].map(|_: Option<()>| Default::default()); - let count = unsafe { gl.get_active_uniforms(program) }; - let mut offset = 0; - - for uniform in 0..count { - let glow::ActiveUniform { utype, name, .. } = - unsafe { gl.get_active_uniform(program, uniform) }.unwrap(); - - if conv::is_opaque_type(utype) { - continue; - } - - if let Some(location) = unsafe { gl.get_uniform_location(program, &name) } { - if uniforms[offset / 4].location.is_some() { - panic!("Offset already occupied") + let mut uniforms = ArrayVec::new(); + + for (stage_idx, stage_items) in push_constant_items.into_iter().enumerate() { + for item in stage_items { + let naga_module = &shaders[stage_idx].1.module.naga.module; + let type_inner = &naga_module.types[item.ty].inner; + + let location = unsafe { gl.get_uniform_location(program, &item.access_path) }; + + log::trace!( + "push constant item: name={}, ty={:?}, offset={}, location={:?}", + item.access_path, + type_inner, + item.offset, + location, + ); + + if let Some(location) = location { + uniforms.push(super::PushConstantDesc { + location, + offset: item.offset, + size_bytes: type_inner.size(naga_module.to_ctx()), + ty: type_inner.clone(), + }); } - - // `size` will always be 1 so we need to guess the real size from the type - let uniform_size = conv::uniform_byte_size(utype); - - uniforms[offset / 4] = super::UniformDesc { - location: Some(location), - size: uniform_size, - utype, - }; - - offset += uniform_size as usize; } } Ok(Arc::new(super::PipelineInner { program, sampler_map, - uniforms, + push_constant_descs: uniforms, })) } } diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index bfc55e634f..0af5ad4a6e 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -108,6 +108,8 @@ const MAX_SAMPLERS: usize = 16; const MAX_VERTEX_ATTRIBUTES: usize = 16; const ZERO_BUFFER_SIZE: usize = 256 << 10; const MAX_PUSH_CONSTANTS: usize = 64; +// We have to account for each push constant may need to be set for every shader. +const MAX_PUSH_CONSTANT_COMMANDS: usize = MAX_PUSH_CONSTANTS * crate::MAX_CONCURRENT_SHADER_STAGES; impl crate::Api for Api { type Instance = Instance; @@ -483,11 +485,12 @@ struct VertexBufferDesc { stride: u32, } -#[derive(Clone, Debug, Default)] -struct UniformDesc { - location: Option, - size: u32, - utype: u32, +#[derive(Clone, Debug)] +struct PushConstantDesc { + location: glow::UniformLocation, + ty: naga::TypeInner, + offset: u32, + size_bytes: u32, } #[cfg(all( @@ -495,13 +498,13 @@ struct UniformDesc { feature = "fragile-send-sync-non-atomic-wasm", not(target_feature = "atomics") ))] -unsafe impl Sync for UniformDesc {} +unsafe impl Sync for PushConstantDesc {} #[cfg(all( target_arch = "wasm32", feature = "fragile-send-sync-non-atomic-wasm", not(target_feature = "atomics") ))] -unsafe impl Send for UniformDesc {} +unsafe impl Send for PushConstantDesc {} /// For each texture in the pipeline layout, store the index of the only /// sampler (in this layout) that the texture is used with. @@ -510,7 +513,7 @@ type SamplerBindMap = [Option; MAX_TEXTURE_SLOTS]; struct PipelineInner { program: glow::Program, sampler_map: SamplerBindMap, - uniforms: [UniformDesc; MAX_PUSH_CONSTANTS], + push_constant_descs: ArrayVec, } #[derive(Clone, Debug)] @@ -882,7 +885,7 @@ enum Command { PushDebugGroup(Range), PopDebugGroup, SetPushConstants { - uniform: UniformDesc, + uniform: PushConstantDesc, /// Offset from the start of the `data_bytes` offset: u32, }, diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs index 6125363aa7..c395a2004a 100644 --- a/wgpu-hal/src/gles/queue.rs +++ b/wgpu-hal/src/gles/queue.rs @@ -1441,64 +1441,235 @@ impl super::Queue { ref uniform, offset, } => { - fn get_data(data: &[u8], offset: u32) -> &[T] { - let raw = &data[(offset as usize)..]; - unsafe { - slice::from_raw_parts( - raw.as_ptr() as *const _, - raw.len() / mem::size_of::(), - ) - } + // T must be POD + // + // This function is absolutely sketchy and we really should be using bytemuck. + unsafe fn get_data(data: &[u8], offset: u32) -> &[T; COUNT] { + let data_required = mem::size_of::() * COUNT; + + let raw = &data[(offset as usize)..][..data_required]; + + debug_assert_eq!(data_required, raw.len()); + + let slice: &[T] = + unsafe { slice::from_raw_parts(raw.as_ptr() as *const _, COUNT) }; + + slice.try_into().unwrap() } - let location = uniform.location.as_ref(); + let location = Some(&uniform.location); - match uniform.utype { - glow::FLOAT => { - let data = get_data::(data_bytes, offset)[0]; + match uniform.ty { + // + // --- Float 1-4 Component --- + // + naga::TypeInner::Scalar { + kind: naga::ScalarKind::Float, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset)[0] }; unsafe { gl.uniform_1_f32(location, data) }; } - glow::FLOAT_VEC2 => { - let data = get_data::<[f32; 2]>(data_bytes, offset)[0]; - unsafe { gl.uniform_2_f32_slice(location, &data) }; + naga::TypeInner::Vector { + kind: naga::ScalarKind::Float, + size: naga::VectorSize::Bi, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_2_f32_slice(location, data) }; } - glow::FLOAT_VEC3 => { - let data = get_data::<[f32; 3]>(data_bytes, offset)[0]; - unsafe { gl.uniform_3_f32_slice(location, &data) }; + naga::TypeInner::Vector { + kind: naga::ScalarKind::Float, + size: naga::VectorSize::Tri, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_3_f32_slice(location, data) }; } - glow::FLOAT_VEC4 => { - let data = get_data::<[f32; 4]>(data_bytes, offset)[0]; - unsafe { gl.uniform_4_f32_slice(location, &data) }; + naga::TypeInner::Vector { + kind: naga::ScalarKind::Float, + size: naga::VectorSize::Quad, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_4_f32_slice(location, data) }; } - glow::INT => { - let data = get_data::(data_bytes, offset)[0]; + + // + // --- Int 1-4 Component --- + // + naga::TypeInner::Scalar { + kind: naga::ScalarKind::Sint, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset)[0] }; unsafe { gl.uniform_1_i32(location, data) }; } - glow::INT_VEC2 => { - let data = get_data::<[i32; 2]>(data_bytes, offset)[0]; - unsafe { gl.uniform_2_i32_slice(location, &data) }; + naga::TypeInner::Vector { + kind: naga::ScalarKind::Sint, + size: naga::VectorSize::Bi, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_2_i32_slice(location, data) }; + } + naga::TypeInner::Vector { + kind: naga::ScalarKind::Sint, + size: naga::VectorSize::Tri, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_3_i32_slice(location, data) }; } - glow::INT_VEC3 => { - let data = get_data::<[i32; 3]>(data_bytes, offset)[0]; - unsafe { gl.uniform_3_i32_slice(location, &data) }; + naga::TypeInner::Vector { + kind: naga::ScalarKind::Sint, + size: naga::VectorSize::Quad, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_4_i32_slice(location, data) }; + } + + // + // --- Uint 1-4 Component --- + // + naga::TypeInner::Scalar { + kind: naga::ScalarKind::Uint, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset)[0] }; + unsafe { gl.uniform_1_u32(location, data) }; } - glow::INT_VEC4 => { - let data = get_data::<[i32; 4]>(data_bytes, offset)[0]; - unsafe { gl.uniform_4_i32_slice(location, &data) }; + naga::TypeInner::Vector { + kind: naga::ScalarKind::Uint, + size: naga::VectorSize::Bi, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_2_u32_slice(location, data) }; } - glow::FLOAT_MAT2 => { - let data = get_data::<[f32; 4]>(data_bytes, offset)[0]; - unsafe { gl.uniform_matrix_2_f32_slice(location, false, &data) }; + naga::TypeInner::Vector { + kind: naga::ScalarKind::Uint, + size: naga::VectorSize::Tri, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_3_u32_slice(location, data) }; + } + naga::TypeInner::Vector { + kind: naga::ScalarKind::Uint, + size: naga::VectorSize::Quad, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_4_u32_slice(location, data) }; + } + + // + // --- Matrix 2xR --- + // + naga::TypeInner::Matrix { + columns: naga::VectorSize::Bi, + rows: naga::VectorSize::Bi, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_matrix_2_f32_slice(location, false, data) }; + } + naga::TypeInner::Matrix { + columns: naga::VectorSize::Bi, + rows: naga::VectorSize::Tri, + width: 4, + } => { + // repack 2 vec3s into 6 values. + let unpacked_data = unsafe { get_data::(data_bytes, offset) }; + #[rustfmt::skip] + let packed_data = [ + unpacked_data[0], unpacked_data[1], unpacked_data[2], + unpacked_data[4], unpacked_data[5], unpacked_data[6], + ]; + unsafe { gl.uniform_matrix_2x3_f32_slice(location, false, &packed_data) }; + } + naga::TypeInner::Matrix { + columns: naga::VectorSize::Bi, + rows: naga::VectorSize::Quad, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_matrix_2x4_f32_slice(location, false, data) }; + } + + // + // --- Matrix 3xR --- + // + naga::TypeInner::Matrix { + columns: naga::VectorSize::Tri, + rows: naga::VectorSize::Bi, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_matrix_3x2_f32_slice(location, false, data) }; + } + naga::TypeInner::Matrix { + columns: naga::VectorSize::Tri, + rows: naga::VectorSize::Tri, + width: 4, + } => { + // repack 3 vec3s into 9 values. + let unpacked_data = unsafe { get_data::(data_bytes, offset) }; + #[rustfmt::skip] + let packed_data = [ + unpacked_data[0], unpacked_data[1], unpacked_data[2], + unpacked_data[4], unpacked_data[5], unpacked_data[6], + unpacked_data[8], unpacked_data[9], unpacked_data[10], + ]; + unsafe { gl.uniform_matrix_3_f32_slice(location, false, &packed_data) }; + } + naga::TypeInner::Matrix { + columns: naga::VectorSize::Tri, + rows: naga::VectorSize::Quad, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_matrix_3x4_f32_slice(location, false, data) }; + } + + // + // --- Matrix 4xR --- + // + naga::TypeInner::Matrix { + columns: naga::VectorSize::Quad, + rows: naga::VectorSize::Bi, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_matrix_4x2_f32_slice(location, false, data) }; } - glow::FLOAT_MAT3 => { - let data = get_data::<[f32; 9]>(data_bytes, offset)[0]; - unsafe { gl.uniform_matrix_3_f32_slice(location, false, &data) }; + naga::TypeInner::Matrix { + columns: naga::VectorSize::Quad, + rows: naga::VectorSize::Tri, + width: 4, + } => { + // repack 4 vec3s into 12 values. + let unpacked_data = unsafe { get_data::(data_bytes, offset) }; + #[rustfmt::skip] + let packed_data = [ + unpacked_data[0], unpacked_data[1], unpacked_data[2], + unpacked_data[4], unpacked_data[5], unpacked_data[6], + unpacked_data[8], unpacked_data[9], unpacked_data[10], + unpacked_data[12], unpacked_data[13], unpacked_data[14], + ]; + unsafe { gl.uniform_matrix_4x3_f32_slice(location, false, &packed_data) }; } - glow::FLOAT_MAT4 => { - let data = get_data::<[f32; 16]>(data_bytes, offset)[0]; - unsafe { gl.uniform_matrix_4_f32_slice(location, false, &data) }; + naga::TypeInner::Matrix { + columns: naga::VectorSize::Quad, + rows: naga::VectorSize::Quad, + width: 4, + } => { + let data = unsafe { get_data::(data_bytes, offset) }; + unsafe { gl.uniform_matrix_4_f32_slice(location, false, data) }; } - _ => panic!("Unsupported uniform datatype!"), + _ => panic!("Unsupported uniform datatype: {:?}!", uniform.ty), } } } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 2e989499e4..6c8e36ab7c 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -97,6 +97,9 @@ use bitflags::bitflags; use thiserror::Error; use wgt::{WasmNotSend, WasmNotSync}; +// - Vertex + Fragment +// - Compute +pub const MAX_CONCURRENT_SHADER_STAGES: usize = 2; pub const MAX_ANISOTROPY: u8 = 16; pub const MAX_BIND_GROUPS: usize = 8; pub const MAX_VERTEX_BUFFERS: usize = 16; @@ -500,11 +503,19 @@ pub trait CommandEncoder: WasmNotSend + WasmNotSync + fmt::Debug { dynamic_offsets: &[wgt::DynamicOffset], ); + /// Sets a range in push constant data. + /// + /// IMPORTANT: while the data is passed as words, the offset is in bytes! + /// + /// # Safety + /// + /// - `offset_bytes` must be a multiple of 4. + /// - The range of push constants written must be valid for the pipeline layout at draw time. unsafe fn set_push_constants( &mut self, layout: &A::PipelineLayout, stages: wgt::ShaderStages, - offset: u32, + offset_bytes: u32, data: &[u32], ); diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index c4b37f9932..0fc8043fe4 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -798,17 +798,17 @@ impl crate::CommandEncoder for super::CommandEncoder { &mut self, layout: &super::PipelineLayout, stages: wgt::ShaderStages, - offset: u32, + offset_bytes: u32, data: &[u32], ) { let state_pc = &mut self.state.push_constants; if state_pc.len() < layout.total_push_constants as usize { state_pc.resize(layout.total_push_constants as usize, 0); } - assert_eq!(offset as usize % WORD_SIZE, 0); + debug_assert_eq!(offset_bytes as usize % WORD_SIZE, 0); - let offset = offset as usize / WORD_SIZE; - state_pc[offset..offset + data.len()].copy_from_slice(data); + let offset_words = offset_bytes as usize / WORD_SIZE; + state_pc[offset_words..offset_words + data.len()].copy_from_slice(data); if stages.contains(wgt::ShaderStages::COMPUTE) { self.state.compute.as_ref().unwrap().set_bytes( diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 391b754d33..dedc054e6b 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -600,7 +600,7 @@ impl crate::CommandEncoder for super::CommandEncoder { &mut self, layout: &super::PipelineLayout, stages: wgt::ShaderStages, - offset: u32, + offset_bytes: u32, data: &[u32], ) { unsafe { @@ -608,7 +608,7 @@ impl crate::CommandEncoder for super::CommandEncoder { self.active, layout.raw, conv::map_shader_stage(stages), - offset, + offset_bytes, slice::from_raw_parts(data.as_ptr() as _, data.len() * 4), ) }; diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index d88b48ef73..8eb2935a32 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -1588,7 +1588,7 @@ impl crate::Device for super::Device { multiview: desc.multiview, ..Default::default() }; - let mut stages = ArrayVec::<_, 2>::new(); + let mut stages = ArrayVec::<_, { crate::MAX_CONCURRENT_SHADER_STAGES }>::new(); let mut vertex_buffers = Vec::with_capacity(desc.vertex_buffers.len()); let mut vertex_attributes = Vec::new(); diff --git a/wgpu/src/backend/direct.rs b/wgpu/src/backend/direct.rs index e705d34e92..2804078068 100644 --- a/wgpu/src/backend/direct.rs +++ b/wgpu/src/backend/direct.rs @@ -1217,7 +1217,7 @@ impl crate::Context for Context { if let Some(cause) = error { if let wgc::pipeline::CreateRenderPipelineError::Internal { stage, ref error } = cause { log::error!("Shader translation error for stage {:?}: {}", stage, error); - log::error!("Please report it to https://github.com/gfx-rs/naga"); + log::error!("Please report it to https://github.com/gfx-rs/wgpu"); } self.handle_error( &device_data.error_sink, @@ -1262,12 +1262,12 @@ impl crate::Context for Context { )); if let Some(cause) = error { if let wgc::pipeline::CreateComputePipelineError::Internal(ref error) = cause { - log::warn!( + log::error!( "Shader translation error for stage {:?}: {}", wgt::ShaderStages::COMPUTE, error ); - log::warn!("Please report it to https://github.com/gfx-rs/naga"); + log::error!("Please report it to https://github.com/gfx-rs/wgpu"); } self.handle_error( &device_data.error_sink,